Skip to main content

reinhardt_utils/utils_core/
input_validation.rs

1//! Input validation and sanitization utilities
2//!
3//! Provides helpers for validating and sanitizing user input to prevent
4//! common security vulnerabilities such as open redirects, log injection,
5//! and identifier-based attacks.
6
7/// Errors returned by [`validate_identifier`].
8#[derive(Debug, thiserror::Error)]
9pub enum IdentifierError {
10	#[error("Identifier is empty")]
11	Empty,
12	#[error("Identifier exceeds maximum length of {max_length} characters")]
13	TooLong { max_length: usize },
14	#[error("Identifier contains invalid character: '{ch}'")]
15	InvalidCharacter { ch: char },
16	#[error("Identifier must start with alphanumeric or underscore, got: '{ch}'")]
17	InvalidStartCharacter { ch: char },
18}
19
20/// Validates a URL for safe redirect usage.
21///
22/// Allows:
23/// - Relative paths starting with `/` (absolute paths on same origin)
24/// - Same-origin relative paths starting with `./`
25/// - Anchor links starting with `#`
26/// - `http://` and `https://` URLs
27///
28/// Rejects:
29/// - Path traversal (`../`)
30/// - Dangerous protocols (`javascript:`, `data:`, `vbscript:`)
31/// - Unknown URL schemes
32/// - URLs with embedded credentials (`http://user:pass@host`)
33///
34/// # Examples
35///
36/// ```
37/// use reinhardt_utils::utils_core::input_validation::validate_redirect_url;
38///
39/// assert!(validate_redirect_url("/dashboard"));
40/// assert!(validate_redirect_url("https://example.com/page"));
41/// assert!(!validate_redirect_url("javascript:alert(1)"));
42/// assert!(!validate_redirect_url("../secret"));
43/// ```
44pub fn validate_redirect_url(url: &str) -> bool {
45	let trimmed = url.trim();
46
47	if trimmed.is_empty() {
48		return false;
49	}
50
51	// Reject path traversal
52	if trimmed.starts_with("../") || trimmed.contains("/../") || trimmed.ends_with("/..") {
53		return false;
54	}
55
56	// Allow anchor links
57	if trimmed.starts_with('#') {
58		return true;
59	}
60
61	// Allow same-origin relative paths
62	if trimmed.starts_with("./") {
63		return true;
64	}
65
66	// Allow absolute paths on same origin (must start with single /)
67	// Reject protocol-relative URLs (//) to prevent open redirect
68	if trimmed.starts_with('/') {
69		return !trimmed.starts_with("//");
70	}
71
72	let lower = trimmed.to_lowercase();
73
74	// Reject dangerous protocols
75	let dangerous_protocols = ["javascript:", "data:", "vbscript:"];
76	for proto in &dangerous_protocols {
77		if lower.starts_with(proto) {
78			return false;
79		}
80	}
81
82	// Allow only http:// and https://
83	if lower.starts_with("http://") || lower.starts_with("https://") {
84		// Reject URLs with embedded credentials (user:pass@host)
85		let after_scheme = if lower.starts_with("https://") {
86			&trimmed[8..]
87		} else {
88			&trimmed[7..]
89		};
90
91		// Check for @ before the first / (indicates credentials)
92		if let Some(path_start) = after_scheme.find('/') {
93			let authority = &after_scheme[..path_start];
94			if authority.contains('@') {
95				return false;
96			}
97		} else if after_scheme.contains('@') {
98			return false;
99		}
100
101		return true;
102	}
103
104	// Reject all other schemes / unknown formats
105	false
106}
107
108/// Sanitizes user input for safe inclusion in log messages.
109///
110/// Replaces control characters, newlines, and other characters
111/// that could be used for log injection attacks. Truncates
112/// the result to `max_length` characters.
113///
114/// # Examples
115///
116/// ```
117/// use reinhardt_utils::utils_core::input_validation::sanitize_log_input;
118///
119/// let input = "normal text\ninjected line";
120/// let sanitized = sanitize_log_input(input, 100);
121/// assert!(!sanitized.contains('\n'));
122/// ```
123pub fn sanitize_log_input(input: &str, max_length: usize) -> String {
124	let mut result = String::with_capacity(input.len().min(max_length));
125
126	for (char_count, ch) in input.chars().enumerate() {
127		if char_count >= max_length {
128			break;
129		}
130
131		match ch {
132			// Replace newlines and carriage returns with spaces
133			'\n' | '\r' => result.push(' '),
134			// Replace tabs with spaces
135			'\t' => result.push(' '),
136			// Replace other control characters with Unicode replacement character
137			c if c.is_control() => result.push('\u{FFFD}'),
138			// Keep printable characters as-is
139			c => result.push(c),
140		}
141	}
142
143	result
144}
145
146/// Validates that a string is a safe identifier.
147///
148/// Allows: ASCII alphanumeric, hyphens, underscores.
149/// First character must be alphanumeric or underscore.
150/// Max length is enforced.
151///
152/// # Errors
153///
154/// Returns [`IdentifierError`] if the identifier is empty, too long,
155/// starts with an invalid character, or contains invalid characters.
156///
157/// # Examples
158///
159/// ```
160/// use reinhardt_utils::utils_core::input_validation::validate_identifier;
161///
162/// assert!(validate_identifier("my-plugin", 64).is_ok());
163/// assert!(validate_identifier("_internal", 64).is_ok());
164/// assert!(validate_identifier("", 64).is_err());
165/// assert!(validate_identifier("-invalid", 64).is_err());
166/// ```
167pub fn validate_identifier(input: &str, max_length: usize) -> Result<(), IdentifierError> {
168	if input.is_empty() {
169		return Err(IdentifierError::Empty);
170	}
171
172	if input.len() > max_length {
173		return Err(IdentifierError::TooLong { max_length });
174	}
175
176	// First character must be alphanumeric or underscore
177	let first = input.chars().next().expect("non-empty string");
178	if !first.is_ascii_alphanumeric() && first != '_' {
179		return Err(IdentifierError::InvalidStartCharacter { ch: first });
180	}
181
182	// Remaining characters: alphanumeric, hyphens, underscores
183	for ch in input.chars() {
184		if !ch.is_ascii_alphanumeric() && ch != '-' && ch != '_' {
185			return Err(IdentifierError::InvalidCharacter { ch });
186		}
187	}
188
189	Ok(())
190}
191
192#[cfg(test)]
193mod tests {
194	use super::*;
195	use rstest::rstest;
196
197	// ===================================================================
198	// validate_redirect_url tests
199	// ===================================================================
200
201	#[rstest]
202	#[case("/dashboard", true)]
203	#[case("/path/to/page", true)]
204	#[case("./relative", true)]
205	#[case("#section", true)]
206	#[case("#", true)]
207	#[case("https://example.com", true)]
208	#[case("http://example.com/page", true)]
209	#[case("https://example.com/path?q=1", true)]
210	fn test_validate_redirect_url_allows_safe_urls(#[case] url: &str, #[case] expected: bool) {
211		// Act
212		let result = validate_redirect_url(url);
213
214		// Assert
215		assert_eq!(result, expected, "URL {:?} should be allowed", url);
216	}
217
218	#[rstest]
219	#[case("javascript:alert(1)", false)]
220	#[case("JAVASCRIPT:alert(1)", false)]
221	#[case("data:text/html,<script>", false)]
222	#[case("vbscript:msgbox", false)]
223	#[case("../secret", false)]
224	#[case("/path/../secret", false)]
225	#[case("/path/..", false)]
226	#[case("//evil.com", false)]
227	#[case("", false)]
228	#[case("   ", false)]
229	#[case("ftp://files.example.com", false)]
230	#[case("http://user:pass@host.com", false)]
231	#[case("https://admin:secret@host.com/path", false)]
232	fn test_validate_redirect_url_rejects_unsafe_urls(#[case] url: &str, #[case] expected: bool) {
233		// Act
234		let result = validate_redirect_url(url);
235
236		// Assert
237		assert_eq!(result, expected, "URL {:?} should be rejected", url);
238	}
239
240	#[rstest]
241	fn test_validate_redirect_url_trims_whitespace() {
242		// Arrange
243		let url = "  /dashboard  ";
244
245		// Act
246		let result = validate_redirect_url(url);
247
248		// Assert
249		assert!(result);
250	}
251
252	// ===================================================================
253	// sanitize_log_input tests
254	// ===================================================================
255
256	#[rstest]
257	fn test_sanitize_log_input_replaces_newlines() {
258		// Arrange
259		let input = "line1\nline2\rline3\r\nline4";
260
261		// Act
262		let result = sanitize_log_input(input, 100);
263
264		// Assert
265		assert_eq!(result, "line1 line2 line3  line4");
266	}
267
268	#[rstest]
269	fn test_sanitize_log_input_replaces_tabs() {
270		// Arrange
271		let input = "col1\tcol2\tcol3";
272
273		// Act
274		let result = sanitize_log_input(input, 100);
275
276		// Assert
277		assert_eq!(result, "col1 col2 col3");
278	}
279
280	#[rstest]
281	fn test_sanitize_log_input_replaces_control_characters() {
282		// Arrange
283		let input = "before\x00\x01\x07after";
284
285		// Act
286		let result = sanitize_log_input(input, 100);
287
288		// Assert
289		assert_eq!(result, "before\u{FFFD}\u{FFFD}\u{FFFD}after");
290	}
291
292	#[rstest]
293	fn test_sanitize_log_input_truncates_to_max_length() {
294		// Arrange
295		let input = "a".repeat(200);
296
297		// Act
298		let result = sanitize_log_input(&input, 50);
299
300		// Assert
301		assert_eq!(result.len(), 50);
302	}
303
304	#[rstest]
305	fn test_sanitize_log_input_preserves_normal_text() {
306		// Arrange
307		let input = "Hello, World! 123 @#$";
308
309		// Act
310		let result = sanitize_log_input(input, 100);
311
312		// Assert
313		assert_eq!(result, input);
314	}
315
316	#[rstest]
317	fn test_sanitize_log_input_empty_input() {
318		// Act
319		let result = sanitize_log_input("", 100);
320
321		// Assert
322		assert_eq!(result, "");
323	}
324
325	#[rstest]
326	fn test_sanitize_log_input_zero_max_length() {
327		// Act
328		let result = sanitize_log_input("some text", 0);
329
330		// Assert
331		assert_eq!(result, "");
332	}
333
334	// ===================================================================
335	// validate_identifier tests
336	// ===================================================================
337
338	#[rstest]
339	#[case("my-plugin", 64)]
340	#[case("MyPlugin", 64)]
341	#[case("plugin_v2", 64)]
342	#[case("_internal", 64)]
343	#[case("a", 64)]
344	#[case("A123-test_name", 64)]
345	fn test_validate_identifier_accepts_valid(#[case] input: &str, #[case] max_len: usize) {
346		// Act
347		let result = validate_identifier(input, max_len);
348
349		// Assert
350		assert!(result.is_ok(), "Identifier {:?} should be valid", input);
351	}
352
353	#[rstest]
354	fn test_validate_identifier_rejects_empty() {
355		// Act
356		let result = validate_identifier("", 64);
357
358		// Assert
359		assert!(matches!(result, Err(IdentifierError::Empty)));
360	}
361
362	#[rstest]
363	fn test_validate_identifier_rejects_too_long() {
364		// Arrange
365		let input = "a".repeat(65);
366
367		// Act
368		let result = validate_identifier(&input, 64);
369
370		// Assert
371		assert!(matches!(
372			result,
373			Err(IdentifierError::TooLong { max_length: 64 })
374		));
375	}
376
377	#[rstest]
378	#[case("-starts-with-hyphen")]
379	fn test_validate_identifier_rejects_invalid_start(#[case] input: &str) {
380		// Act
381		let result = validate_identifier(input, 64);
382
383		// Assert
384		assert!(matches!(
385			result,
386			Err(IdentifierError::InvalidStartCharacter { .. })
387		));
388	}
389
390	#[rstest]
391	#[case("has space", ' ')]
392	#[case("has.dot", '.')]
393	#[case("has/slash", '/')]
394	#[case("has@at", '@')]
395	fn test_validate_identifier_rejects_invalid_characters(
396		#[case] input: &str,
397		#[case] expected_ch: char,
398	) {
399		// Act
400		let result = validate_identifier(input, 64);
401
402		// Assert
403		match result {
404			Err(IdentifierError::InvalidCharacter { ch }) => {
405				assert_eq!(ch, expected_ch);
406			}
407			other => panic!("Expected InvalidCharacter, got {:?}", other),
408		}
409	}
410
411	// ===================================================================
412	// IdentifierError Display tests
413	// ===================================================================
414
415	#[rstest]
416	fn test_sanitize_log_input_multibyte_truncation_does_not_panic() {
417		// Fixes #762: Use character count instead of byte length for truncation
418		// to prevent cutting in the middle of multi-byte UTF-8 characters.
419		let input = "あいうえおかきくけこ"; // 10 chars, 30 bytes
420
421		// Act
422		let result = sanitize_log_input(input, 5);
423
424		// Assert
425		assert_eq!(result.chars().count(), 5);
426		assert_eq!(result, "あいうえお");
427	}
428
429	#[rstest]
430	fn test_sanitize_log_input_mixed_multibyte_truncation() {
431		// Fixes #762: Mixed ASCII and multibyte characters
432		let input = "aあbいcうdえeお";
433
434		// Act
435		let result = sanitize_log_input(input, 6);
436
437		// Assert
438		assert_eq!(result.chars().count(), 6);
439		assert_eq!(result, "aあbいcう");
440	}
441
442	#[rstest]
443	fn test_identifier_error_display_messages() {
444		// Assert
445		assert_eq!(IdentifierError::Empty.to_string(), "Identifier is empty");
446		assert_eq!(
447			IdentifierError::TooLong { max_length: 32 }.to_string(),
448			"Identifier exceeds maximum length of 32 characters"
449		);
450		assert_eq!(
451			IdentifierError::InvalidCharacter { ch: '@' }.to_string(),
452			"Identifier contains invalid character: '@'"
453		);
454		assert_eq!(
455			IdentifierError::InvalidStartCharacter { ch: '-' }.to_string(),
456			"Identifier must start with alphanumeric or underscore, got: '-'"
457		);
458	}
459}