Skip to main content

reinhardt_utils/utils_core/
encoding.rs

1//! Text encoding and decoding utilities
2
3use std::borrow::Cow;
4
5use crate::utils_core::html::escape;
6/// URL encode a string
7///
8/// # Examples
9///
10/// ```
11/// use reinhardt_utils::utils_core::encoding::urlencode;
12///
13/// assert_eq!(urlencode("hello world"), "hello+world");
14/// assert_eq!(urlencode("hello@world.com"), "hello%40world.com");
15/// assert_eq!(urlencode("test&value=1"), "test%26value%3D1");
16/// ```
17pub fn urlencode(text: &str) -> String {
18	let mut result = String::with_capacity(text.len() * 3);
19	for byte in text.as_bytes() {
20		match byte {
21			b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
22				result.push(*byte as char);
23			}
24			b' ' => result.push('+'),
25			_ => {
26				result.push('%');
27				result.push_str(&format!("{:02X}", byte));
28			}
29		}
30	}
31	result
32}
33/// URL decode a string
34///
35/// # Examples
36///
37/// ```
38/// use reinhardt_utils::utils_core::encoding::urldecode;
39///
40/// assert_eq!(urldecode("hello+world").unwrap(), "hello world");
41/// assert_eq!(urldecode("hello%40world.com").unwrap(), "hello@world.com");
42/// assert_eq!(urldecode("test%26value%3D1").unwrap(), "test&value=1");
43/// assert!(urldecode("%ZZ").is_err());
44/// ```
45pub fn urldecode(text: &str) -> Result<String, String> {
46	let mut result = Vec::new();
47	let mut chars = text.chars().peekable();
48
49	while let Some(ch) = chars.next() {
50		match ch {
51			'+' => result.push(b' '),
52			'%' => {
53				let hex: String = chars.by_ref().take(2).collect();
54				if hex.len() != 2 {
55					return Err(format!("Invalid URL encoding at '%{}'", hex));
56				}
57				match u8::from_str_radix(&hex, 16) {
58					Ok(byte) => result.push(byte),
59					Err(_) => return Err(format!("Invalid hex in URL encoding: {}", hex)),
60				}
61			}
62			_ if ch.is_ascii() => result.push(ch as u8),
63			_ => {
64				for byte in ch.to_string().as_bytes() {
65					result.push(*byte);
66				}
67			}
68		}
69	}
70
71	String::from_utf8(result).map_err(|e| format!("Invalid UTF-8: {}", e))
72}
73/// Escape quotes in a string for use in JavaScript
74///
75/// # Examples
76///
77/// ```
78/// use reinhardt_utils::utils_core::encoding::escapejs;
79///
80/// assert_eq!(escapejs("Hello"), "Hello");
81/// assert_eq!(escapejs("It's \"quoted\""), "It\\'s \\\"quoted\\\"");
82/// assert_eq!(escapejs("Line\nBreak"), "Line\\nBreak");
83/// assert_eq!(escapejs("<script>"), "\\u003Cscript\\u003E");
84/// ```
85pub fn escapejs(text: &str) -> String {
86	let mut result = String::with_capacity(text.len() + 20);
87	for ch in text.chars() {
88		match ch {
89			'\'' => result.push_str("\\'"),
90			'"' => result.push_str("\\\""),
91			'\\' => result.push_str("\\\\"),
92			'\n' => result.push_str("\\n"),
93			'\r' => result.push_str("\\r"),
94			'\t' => result.push_str("\\t"),
95			'\x08' => result.push_str("\\b"),
96			'\x0C' => result.push_str("\\f"),
97			'<' => result.push_str("\\u003C"),
98			'>' => result.push_str("\\u003E"),
99			'&' => result.push_str("\\u0026"),
100			_ if ch.is_control() => {
101				result.push_str(&format!("\\u{:04X}", ch as u32));
102			}
103			_ => result.push(ch),
104		}
105	}
106	result
107}
108/// Convert string to slug (URL-friendly format)
109///
110/// # Examples
111///
112/// ```
113/// use reinhardt_utils::utils_core::encoding::slugify;
114///
115/// assert_eq!(slugify("Hello World"), "hello-world");
116/// assert_eq!(slugify("Hello  World"), "hello-world");
117/// assert_eq!(slugify("Test 123"), "test-123");
118/// assert_eq!(slugify("Special!@#Characters"), "special-characters");
119/// ```
120pub fn slugify(text: &str) -> String {
121	text.to_lowercase()
122		.chars()
123		.map(|ch| match ch {
124			'a'..='z' | '0'..='9' => ch,
125			' ' | '-' | '_' => '-',
126			_ => '-',
127		})
128		.collect::<String>()
129		.split('-')
130		.filter(|s| !s.is_empty())
131		.collect::<Vec<_>>()
132		.join("-")
133}
134/// Safely convert bytes to UTF-8 string, replacing invalid sequences
135///
136/// # Examples
137///
138/// ```
139/// use reinhardt_utils::utils_core::encoding::force_str;
140///
141/// let bytes = b"Hello, World!";
142/// assert_eq!(force_str(bytes), "Hello, World!");
143///
144/// let invalid = b"Hello\xFF\xFEWorld";
145/// let result = force_str(invalid);
146/// assert!(result.contains("Hello"));
147/// assert!(result.contains("World"));
148/// ```
149pub fn force_str(bytes: &[u8]) -> Cow<'_, str> {
150	String::from_utf8_lossy(bytes)
151}
152/// Convert string to bytes
153///
154/// # Examples
155///
156/// ```
157/// use reinhardt_utils::utils_core::encoding::force_bytes;
158///
159/// let text = "Hello, World!";
160/// assert_eq!(force_bytes(text), b"Hello, World!");
161/// ```
162pub fn force_bytes(text: &str) -> Vec<u8> {
163	text.as_bytes().to_vec()
164}
165/// Smart truncate - truncate at word boundary
166///
167/// # Examples
168///
169/// ```
170/// use reinhardt_utils::utils_core::encoding::truncate_chars;
171///
172/// assert_eq!(truncate_chars("Hello World", 20), "Hello World");
173/// assert_eq!(truncate_chars("Hello World", 8), "Hello...");
174/// assert_eq!(truncate_chars("Test", 10), "Test");
175/// ```
176pub fn truncate_chars(text: &str, max_length: usize) -> String {
177	if text.chars().count() <= max_length {
178		return text.to_string();
179	}
180
181	// When max_length is too small to fit any characters plus "...",
182	// return just the ellipsis truncated to max_length.
183	let content_limit = max_length.saturating_sub(3);
184
185	let mut result = String::new();
186
187	for (char_count, ch) in text.chars().enumerate() {
188		if char_count >= content_limit {
189			result.push_str(&"..."[..max_length.min(3)]);
190			break;
191		}
192		result.push(ch);
193	}
194
195	result
196}
197/// Truncate at word boundary
198///
199/// # Examples
200///
201/// ```
202/// use reinhardt_utils::utils_core::encoding::truncate_words;
203///
204/// assert_eq!(truncate_words("Hello World Test", 2), "Hello World...");
205/// assert_eq!(truncate_words("One", 5), "One");
206/// assert_eq!(truncate_words("A B C D E", 3), "A B C...");
207/// ```
208pub fn truncate_words(text: &str, max_words: usize) -> String {
209	let words: Vec<&str> = text.split_whitespace().collect();
210	if words.len() <= max_words {
211		return text.to_string();
212	}
213
214	let mut result = words[..max_words].join(" ");
215	result.push_str("...");
216	result
217}
218/// Wrap text at specified width
219///
220/// # Examples
221///
222/// ```
223/// use reinhardt_utils::utils_core::encoding::wrap_text;
224///
225/// let text = "This is a long line that needs to be wrapped";
226/// let wrapped = wrap_text(text, 20);
227/// assert!(wrapped.len() > 1);
228/// assert!(wrapped.iter().all(|line| line.chars().count() <= 25));
229/// ```
230pub fn wrap_text(text: &str, width: usize) -> Vec<String> {
231	let mut lines = Vec::new();
232	let mut current_line = String::new();
233	let mut current_width = 0;
234
235	for word in text.split_whitespace() {
236		let word_len = word.chars().count();
237
238		if current_width + word_len + 1 > width && !current_line.is_empty() {
239			lines.push(current_line.clone());
240			current_line.clear();
241			current_width = 0;
242		}
243
244		if !current_line.is_empty() {
245			current_line.push(' ');
246			current_width += 1;
247		}
248
249		current_line.push_str(word);
250		current_width += word_len;
251	}
252
253	if !current_line.is_empty() {
254		lines.push(current_line);
255	}
256
257	lines
258}
259/// Line breaks to `<br>` tags
260///
261/// Input text is HTML-escaped before injecting HTML structure tags
262/// to prevent XSS attacks.
263///
264/// # Examples
265///
266/// ```
267/// use reinhardt_utils::utils_core::encoding::linebreaks;
268///
269/// assert_eq!(
270///     linebreaks("Line 1\nLine 2\n\nLine 3"),
271///     "Line 1<br>\nLine 2<br>\n</p>\n<p><br>\nLine 3"
272/// );
273/// assert_eq!(
274///     linebreaks("<script>alert('xss')</script>"),
275///     "&lt;script&gt;alert(&#x27;xss&#x27;)&lt;/script&gt;"
276/// );
277/// ```
278pub fn linebreaks(text: &str) -> String {
279	// Fixes #798: Escape HTML entities before injecting HTML structure tags
280	let text = escape(text);
281	text.lines()
282		.map(|line| {
283			if line.trim().is_empty() {
284				"</p>\n<p>".to_string()
285			} else {
286				line.to_string()
287			}
288		})
289		.collect::<Vec<_>>()
290		.join("<br>\n")
291}
292/// Line breaks to `<br>` tags without wrapping in `<p>`
293///
294/// Input text is HTML-escaped before injecting `<br>` tags
295/// to prevent XSS attacks.
296///
297/// # Examples
298///
299/// ```
300/// use reinhardt_utils::utils_core::encoding::linebreaksbr;
301///
302/// assert_eq!(linebreaksbr("Line 1\nLine 2"), "Line 1<br>\nLine 2");
303/// assert_eq!(linebreaksbr("Single"), "Single");
304/// assert_eq!(
305///     linebreaksbr("<b>bold</b>"),
306///     "&lt;b&gt;bold&lt;/b&gt;"
307/// );
308/// ```
309pub fn linebreaksbr(text: &str) -> String {
310	// Fixes #798: Escape HTML entities before injecting <br> tags
311	escape(text).replace('\n', "<br>\n")
312}
313
314#[cfg(test)]
315mod tests {
316	use super::*;
317
318	#[test]
319	fn test_urlencode() {
320		assert_eq!(urlencode("hello world"), "hello+world");
321		assert_eq!(urlencode("hello@world.com"), "hello%40world.com");
322		assert_eq!(urlencode("test&value=1"), "test%26value%3D1");
323	}
324
325	#[test]
326	fn test_urldecode() {
327		assert_eq!(urldecode("hello+world").unwrap(), "hello world");
328		assert_eq!(urldecode("hello%40world.com").unwrap(), "hello@world.com");
329		assert_eq!(urldecode("test%26value%3D1").unwrap(), "test&value=1");
330	}
331
332	#[test]
333	fn test_urlencode_urldecode_roundtrip() {
334		let original = "Hello, World! 123 @#$%";
335		let encoded = urlencode(original);
336		let decoded = urldecode(&encoded).unwrap();
337		assert_eq!(decoded, original);
338	}
339
340	#[test]
341	fn test_escapejs() {
342		assert_eq!(escapejs("Hello"), "Hello");
343		assert_eq!(escapejs("It's \"quoted\""), "It\\'s \\\"quoted\\\"");
344		assert_eq!(escapejs("Line\nBreak"), "Line\\nBreak");
345		assert_eq!(escapejs("<script>"), "\\u003Cscript\\u003E");
346	}
347
348	#[test]
349	fn test_slugify() {
350		assert_eq!(slugify("Hello World"), "hello-world");
351		assert_eq!(slugify("Hello  World"), "hello-world");
352		assert_eq!(slugify("Hello-World"), "hello-world");
353		assert_eq!(slugify("Test 123"), "test-123");
354		assert_eq!(slugify("Special!@#Characters"), "special-characters");
355	}
356
357	#[test]
358	fn test_truncate_chars() {
359		assert_eq!(truncate_chars("Hello World", 20), "Hello World");
360		assert_eq!(truncate_chars("Hello World", 8), "Hello...");
361		assert_eq!(truncate_chars("Test", 10), "Test");
362	}
363
364	#[test]
365	fn test_truncate_words() {
366		assert_eq!(truncate_words("Hello World Test", 2), "Hello World...");
367		assert_eq!(truncate_words("One", 5), "One");
368		assert_eq!(truncate_words("A B C D E", 3), "A B C...");
369	}
370
371	#[test]
372	fn test_wrap_text() {
373		let text = "This is a long line that needs to be wrapped";
374		let wrapped = wrap_text(text, 20);
375		assert!(wrapped.len() > 1);
376		assert!(wrapped.iter().all(|line| line.chars().count() <= 20));
377	}
378
379	#[test]
380	fn test_linebreaksbr() {
381		assert_eq!(linebreaksbr("Line 1\nLine 2"), "Line 1<br>\nLine 2");
382		assert_eq!(linebreaksbr("Single"), "Single");
383	}
384
385	#[test]
386	fn test_linebreaksbr_escapes_html() {
387		assert_eq!(
388			linebreaksbr("<script>alert('xss')</script>"),
389			"&lt;script&gt;alert(&#x27;xss&#x27;)&lt;/script&gt;"
390		);
391		assert_eq!(
392			linebreaksbr("<b>bold</b>\nnormal"),
393			"&lt;b&gt;bold&lt;/b&gt;<br>\nnormal"
394		);
395	}
396
397	#[test]
398	fn test_force_str() {
399		let bytes = b"Hello, World!";
400		assert_eq!(force_str(bytes), "Hello, World!");
401
402		let invalid = b"Hello\xFF\xFEWorld";
403		let result = force_str(invalid);
404		assert!(result.contains("Hello"));
405		assert!(result.contains("World"));
406	}
407
408	#[test]
409	fn test_force_bytes() {
410		let text = "Hello, World!";
411		assert_eq!(force_bytes(text), b"Hello, World!");
412	}
413
414	#[test]
415	fn test_linebreaks() {
416		assert_eq!(
417			linebreaks("Line 1\nLine 2\n\nLine 3"),
418			"Line 1<br>\nLine 2<br>\n</p>\n<p><br>\nLine 3"
419		);
420	}
421
422	#[test]
423	fn test_linebreaks_single_line() {
424		assert_eq!(linebreaks("Single line"), "Single line");
425	}
426
427	#[test]
428	fn test_linebreaks_escapes_html() {
429		assert_eq!(
430			linebreaks("<script>alert('xss')</script>"),
431			"&lt;script&gt;alert(&#x27;xss&#x27;)&lt;/script&gt;"
432		);
433		assert_eq!(
434			linebreaks("<b>bold</b>\nnormal"),
435			"&lt;b&gt;bold&lt;/b&gt;<br>\nnormal"
436		);
437		// Verify HTML entities in input are double-escaped
438		assert_eq!(linebreaks("5 < 10 & 10 > 5"), "5 &lt; 10 &amp; 10 &gt; 5");
439	}
440
441	#[test]
442	fn test_linebreaks_empty_lines() {
443		assert_eq!(
444			linebreaks("Line 1\n\nLine 2"),
445			"Line 1<br>\n</p>\n<p><br>\nLine 2"
446		);
447	}
448
449	#[test]
450	fn test_urldecode_invalid_hex() {
451		assert!(urldecode("%ZZ").is_err());
452		assert!(urldecode("%1").is_err());
453	}
454
455	#[test]
456	fn test_urldecode_invalid_utf8() {
457		// This should handle invalid UTF-8 sequences gracefully
458		let result = urldecode("%FF%FE");
459		assert!(result.is_err());
460	}
461
462	#[test]
463	fn test_urlencode_special_chars() {
464		assert_eq!(urlencode("a-b_c.d~e"), "a-b_c.d~e");
465		assert_eq!(urlencode("!@#$%^&*()"), "%21%40%23%24%25%5E%26%2A%28%29");
466	}
467
468	#[test]
469	fn test_escapejs_control_chars() {
470		assert_eq!(escapejs("\x08"), "\\b");
471		assert_eq!(escapejs("\x0C"), "\\f");
472		assert_eq!(escapejs("\x01"), "\\u0001");
473	}
474
475	#[test]
476	fn test_slugify_empty() {
477		assert_eq!(slugify(""), "");
478	}
479
480	#[test]
481	fn test_slugify_unicode() {
482		// Unicode characters are converted to dashes, then consecutive dashes are collapsed
483		assert_eq!(slugify("Hello 世界"), "hello");
484	}
485
486	#[test]
487	fn test_slugify_multiple_dashes() {
488		assert_eq!(slugify("hello---world"), "hello-world");
489	}
490
491	#[test]
492	fn test_truncate_chars_exact_length() {
493		assert_eq!(truncate_chars("Hello", 5), "Hello");
494	}
495
496	#[test]
497	fn test_truncate_chars_unicode() {
498		assert_eq!(truncate_chars("こんにちは世界", 5), "こん...");
499	}
500
501	#[test]
502	fn test_truncate_words_empty() {
503		assert_eq!(truncate_words("", 5), "");
504	}
505
506	#[test]
507	fn test_wrap_text_single_word_exceeds_width() {
508		let text = "VeryLongWordThatExceedsWidth";
509		let wrapped = wrap_text(text, 10);
510		assert_eq!(wrapped.len(), 1);
511		assert_eq!(wrapped[0], "VeryLongWordThatExceedsWidth");
512	}
513
514	#[test]
515	fn test_wrap_text_empty() {
516		let wrapped = wrap_text("", 10);
517		assert_eq!(wrapped.len(), 0);
518	}
519
520	#[test]
521	fn test_force_str_empty() {
522		assert_eq!(force_str(b""), "");
523	}
524
525	#[test]
526	fn test_truncate_chars_zero_max_length_does_not_panic() {
527		// Fixes #764: saturating_sub prevents underflow when max_length < 3
528		assert_eq!(truncate_chars("Hello", 0), "");
529	}
530
531	#[test]
532	fn test_truncate_chars_max_length_one() {
533		// Fixes #764: max_length=1 should produce "."
534		assert_eq!(truncate_chars("Hello", 1), ".");
535	}
536
537	#[test]
538	fn test_truncate_chars_max_length_two() {
539		// Fixes #764: max_length=2 should produce ".."
540		assert_eq!(truncate_chars("Hello", 2), "..");
541	}
542
543	#[test]
544	fn test_truncate_chars_max_length_three() {
545		// Fixes #764: max_length=3 should produce "..."
546		assert_eq!(truncate_chars("Hello", 3), "...");
547	}
548
549	#[test]
550	fn test_truncate_chars_max_length_four() {
551		assert_eq!(truncate_chars("Hello World", 4), "H...");
552	}
553}
554
555#[cfg(test)]
556mod proptests {
557	use super::*;
558	use proptest::prelude::*;
559
560	proptest! {
561		#[test]
562		fn prop_slugify_format(s in "[a-zA-Z0-9 -]+") {
563			let slug = slugify(&s);
564			// Slug should only contain lowercase letters, numbers, and hyphens
565			assert!(slug.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-'));
566			// No consecutive hyphens
567			assert!(!slug.contains("--"));
568		}
569
570		#[test]
571		fn prop_truncate_chars_length(s in "\\PC*", n in 0usize..100) {
572			let truncated = truncate_chars(&s, n);
573			assert!(truncated.chars().count() <= n);
574		}
575
576		#[test]
577		fn prop_truncate_words_count(s in "\\w+(\\s+\\w+)*", n in 1usize..20) {
578			let truncated = truncate_words(&s, n);
579			let word_count = truncated.split_whitespace().filter(|w| *w != "...").count();
580			assert!(word_count <= n);
581		}
582
583		#[test]
584		fn prop_urlencode_ascii_safe(s in "[a-zA-Z0-9._~-]+") {
585			let encoded = urlencode(&s);
586			// These characters should not be encoded
587			assert_eq!(encoded, s);
588		}
589
590		#[test]
591		fn prop_escapejs_no_newlines(s in "\\PC*") {
592			let escaped = escapejs(&s);
593			assert!(!escaped.contains('\n'));
594			assert!(!escaped.contains('\r'));
595			assert!(!escaped.contains('\t'));
596		}
597
598		#[test]
599		fn prop_wrap_text_line_width(s in "[a-zA-Z0-9 ]+", width in 10usize..50) {
600			let lines = wrap_text(&s, width);
601			for line in lines {
602				// Allow more flexibility for word boundaries and Unicode handling
603				assert!(line.chars().count() <= width + 20);
604			}
605		}
606	}
607}