reinhardt_utils/utils_core/
encoding.rs1use std::borrow::Cow;
4
5use crate::utils_core::html::escape;
6pub fn urlencode(text: &str) -> String {
18 let mut result = String::with_capacity(text.len() * 3);
19 for byte in text.as_bytes() {
20 match byte {
21 b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
22 result.push(*byte as char);
23 }
24 b' ' => result.push('+'),
25 _ => {
26 result.push('%');
27 result.push_str(&format!("{:02X}", byte));
28 }
29 }
30 }
31 result
32}
33pub fn urldecode(text: &str) -> Result<String, String> {
46 let mut result = Vec::new();
47 let mut chars = text.chars().peekable();
48
49 while let Some(ch) = chars.next() {
50 match ch {
51 '+' => result.push(b' '),
52 '%' => {
53 let hex: String = chars.by_ref().take(2).collect();
54 if hex.len() != 2 {
55 return Err(format!("Invalid URL encoding at '%{}'", hex));
56 }
57 match u8::from_str_radix(&hex, 16) {
58 Ok(byte) => result.push(byte),
59 Err(_) => return Err(format!("Invalid hex in URL encoding: {}", hex)),
60 }
61 }
62 _ if ch.is_ascii() => result.push(ch as u8),
63 _ => {
64 for byte in ch.to_string().as_bytes() {
65 result.push(*byte);
66 }
67 }
68 }
69 }
70
71 String::from_utf8(result).map_err(|e| format!("Invalid UTF-8: {}", e))
72}
73pub fn escapejs(text: &str) -> String {
86 let mut result = String::with_capacity(text.len() + 20);
87 for ch in text.chars() {
88 match ch {
89 '\'' => result.push_str("\\'"),
90 '"' => result.push_str("\\\""),
91 '\\' => result.push_str("\\\\"),
92 '\n' => result.push_str("\\n"),
93 '\r' => result.push_str("\\r"),
94 '\t' => result.push_str("\\t"),
95 '\x08' => result.push_str("\\b"),
96 '\x0C' => result.push_str("\\f"),
97 '<' => result.push_str("\\u003C"),
98 '>' => result.push_str("\\u003E"),
99 '&' => result.push_str("\\u0026"),
100 _ if ch.is_control() => {
101 result.push_str(&format!("\\u{:04X}", ch as u32));
102 }
103 _ => result.push(ch),
104 }
105 }
106 result
107}
108pub fn slugify(text: &str) -> String {
121 text.to_lowercase()
122 .chars()
123 .map(|ch| match ch {
124 'a'..='z' | '0'..='9' => ch,
125 ' ' | '-' | '_' => '-',
126 _ => '-',
127 })
128 .collect::<String>()
129 .split('-')
130 .filter(|s| !s.is_empty())
131 .collect::<Vec<_>>()
132 .join("-")
133}
134pub fn force_str(bytes: &[u8]) -> Cow<'_, str> {
150 String::from_utf8_lossy(bytes)
151}
152pub fn force_bytes(text: &str) -> Vec<u8> {
163 text.as_bytes().to_vec()
164}
165pub fn truncate_chars(text: &str, max_length: usize) -> String {
177 if text.chars().count() <= max_length {
178 return text.to_string();
179 }
180
181 let content_limit = max_length.saturating_sub(3);
184
185 let mut result = String::new();
186
187 for (char_count, ch) in text.chars().enumerate() {
188 if char_count >= content_limit {
189 result.push_str(&"..."[..max_length.min(3)]);
190 break;
191 }
192 result.push(ch);
193 }
194
195 result
196}
197pub fn truncate_words(text: &str, max_words: usize) -> String {
209 let words: Vec<&str> = text.split_whitespace().collect();
210 if words.len() <= max_words {
211 return text.to_string();
212 }
213
214 let mut result = words[..max_words].join(" ");
215 result.push_str("...");
216 result
217}
218pub fn wrap_text(text: &str, width: usize) -> Vec<String> {
231 let mut lines = Vec::new();
232 let mut current_line = String::new();
233 let mut current_width = 0;
234
235 for word in text.split_whitespace() {
236 let word_len = word.chars().count();
237
238 if current_width + word_len + 1 > width && !current_line.is_empty() {
239 lines.push(current_line.clone());
240 current_line.clear();
241 current_width = 0;
242 }
243
244 if !current_line.is_empty() {
245 current_line.push(' ');
246 current_width += 1;
247 }
248
249 current_line.push_str(word);
250 current_width += word_len;
251 }
252
253 if !current_line.is_empty() {
254 lines.push(current_line);
255 }
256
257 lines
258}
259pub fn linebreaks(text: &str) -> String {
279 let text = escape(text);
281 text.lines()
282 .map(|line| {
283 if line.trim().is_empty() {
284 "</p>\n<p>".to_string()
285 } else {
286 line.to_string()
287 }
288 })
289 .collect::<Vec<_>>()
290 .join("<br>\n")
291}
292pub fn linebreaksbr(text: &str) -> String {
310 escape(text).replace('\n', "<br>\n")
312}
313
314#[cfg(test)]
315mod tests {
316 use super::*;
317
318 #[test]
319 fn test_urlencode() {
320 assert_eq!(urlencode("hello world"), "hello+world");
321 assert_eq!(urlencode("hello@world.com"), "hello%40world.com");
322 assert_eq!(urlencode("test&value=1"), "test%26value%3D1");
323 }
324
325 #[test]
326 fn test_urldecode() {
327 assert_eq!(urldecode("hello+world").unwrap(), "hello world");
328 assert_eq!(urldecode("hello%40world.com").unwrap(), "hello@world.com");
329 assert_eq!(urldecode("test%26value%3D1").unwrap(), "test&value=1");
330 }
331
332 #[test]
333 fn test_urlencode_urldecode_roundtrip() {
334 let original = "Hello, World! 123 @#$%";
335 let encoded = urlencode(original);
336 let decoded = urldecode(&encoded).unwrap();
337 assert_eq!(decoded, original);
338 }
339
340 #[test]
341 fn test_escapejs() {
342 assert_eq!(escapejs("Hello"), "Hello");
343 assert_eq!(escapejs("It's \"quoted\""), "It\\'s \\\"quoted\\\"");
344 assert_eq!(escapejs("Line\nBreak"), "Line\\nBreak");
345 assert_eq!(escapejs("<script>"), "\\u003Cscript\\u003E");
346 }
347
348 #[test]
349 fn test_slugify() {
350 assert_eq!(slugify("Hello World"), "hello-world");
351 assert_eq!(slugify("Hello World"), "hello-world");
352 assert_eq!(slugify("Hello-World"), "hello-world");
353 assert_eq!(slugify("Test 123"), "test-123");
354 assert_eq!(slugify("Special!@#Characters"), "special-characters");
355 }
356
357 #[test]
358 fn test_truncate_chars() {
359 assert_eq!(truncate_chars("Hello World", 20), "Hello World");
360 assert_eq!(truncate_chars("Hello World", 8), "Hello...");
361 assert_eq!(truncate_chars("Test", 10), "Test");
362 }
363
364 #[test]
365 fn test_truncate_words() {
366 assert_eq!(truncate_words("Hello World Test", 2), "Hello World...");
367 assert_eq!(truncate_words("One", 5), "One");
368 assert_eq!(truncate_words("A B C D E", 3), "A B C...");
369 }
370
371 #[test]
372 fn test_wrap_text() {
373 let text = "This is a long line that needs to be wrapped";
374 let wrapped = wrap_text(text, 20);
375 assert!(wrapped.len() > 1);
376 assert!(wrapped.iter().all(|line| line.chars().count() <= 20));
377 }
378
379 #[test]
380 fn test_linebreaksbr() {
381 assert_eq!(linebreaksbr("Line 1\nLine 2"), "Line 1<br>\nLine 2");
382 assert_eq!(linebreaksbr("Single"), "Single");
383 }
384
385 #[test]
386 fn test_linebreaksbr_escapes_html() {
387 assert_eq!(
388 linebreaksbr("<script>alert('xss')</script>"),
389 "<script>alert('xss')</script>"
390 );
391 assert_eq!(
392 linebreaksbr("<b>bold</b>\nnormal"),
393 "<b>bold</b><br>\nnormal"
394 );
395 }
396
397 #[test]
398 fn test_force_str() {
399 let bytes = b"Hello, World!";
400 assert_eq!(force_str(bytes), "Hello, World!");
401
402 let invalid = b"Hello\xFF\xFEWorld";
403 let result = force_str(invalid);
404 assert!(result.contains("Hello"));
405 assert!(result.contains("World"));
406 }
407
408 #[test]
409 fn test_force_bytes() {
410 let text = "Hello, World!";
411 assert_eq!(force_bytes(text), b"Hello, World!");
412 }
413
414 #[test]
415 fn test_linebreaks() {
416 assert_eq!(
417 linebreaks("Line 1\nLine 2\n\nLine 3"),
418 "Line 1<br>\nLine 2<br>\n</p>\n<p><br>\nLine 3"
419 );
420 }
421
422 #[test]
423 fn test_linebreaks_single_line() {
424 assert_eq!(linebreaks("Single line"), "Single line");
425 }
426
427 #[test]
428 fn test_linebreaks_escapes_html() {
429 assert_eq!(
430 linebreaks("<script>alert('xss')</script>"),
431 "<script>alert('xss')</script>"
432 );
433 assert_eq!(
434 linebreaks("<b>bold</b>\nnormal"),
435 "<b>bold</b><br>\nnormal"
436 );
437 assert_eq!(linebreaks("5 < 10 & 10 > 5"), "5 < 10 & 10 > 5");
439 }
440
441 #[test]
442 fn test_linebreaks_empty_lines() {
443 assert_eq!(
444 linebreaks("Line 1\n\nLine 2"),
445 "Line 1<br>\n</p>\n<p><br>\nLine 2"
446 );
447 }
448
449 #[test]
450 fn test_urldecode_invalid_hex() {
451 assert!(urldecode("%ZZ").is_err());
452 assert!(urldecode("%1").is_err());
453 }
454
455 #[test]
456 fn test_urldecode_invalid_utf8() {
457 let result = urldecode("%FF%FE");
459 assert!(result.is_err());
460 }
461
462 #[test]
463 fn test_urlencode_special_chars() {
464 assert_eq!(urlencode("a-b_c.d~e"), "a-b_c.d~e");
465 assert_eq!(urlencode("!@#$%^&*()"), "%21%40%23%24%25%5E%26%2A%28%29");
466 }
467
468 #[test]
469 fn test_escapejs_control_chars() {
470 assert_eq!(escapejs("\x08"), "\\b");
471 assert_eq!(escapejs("\x0C"), "\\f");
472 assert_eq!(escapejs("\x01"), "\\u0001");
473 }
474
475 #[test]
476 fn test_slugify_empty() {
477 assert_eq!(slugify(""), "");
478 }
479
480 #[test]
481 fn test_slugify_unicode() {
482 assert_eq!(slugify("Hello 世界"), "hello");
484 }
485
486 #[test]
487 fn test_slugify_multiple_dashes() {
488 assert_eq!(slugify("hello---world"), "hello-world");
489 }
490
491 #[test]
492 fn test_truncate_chars_exact_length() {
493 assert_eq!(truncate_chars("Hello", 5), "Hello");
494 }
495
496 #[test]
497 fn test_truncate_chars_unicode() {
498 assert_eq!(truncate_chars("こんにちは世界", 5), "こん...");
499 }
500
501 #[test]
502 fn test_truncate_words_empty() {
503 assert_eq!(truncate_words("", 5), "");
504 }
505
506 #[test]
507 fn test_wrap_text_single_word_exceeds_width() {
508 let text = "VeryLongWordThatExceedsWidth";
509 let wrapped = wrap_text(text, 10);
510 assert_eq!(wrapped.len(), 1);
511 assert_eq!(wrapped[0], "VeryLongWordThatExceedsWidth");
512 }
513
514 #[test]
515 fn test_wrap_text_empty() {
516 let wrapped = wrap_text("", 10);
517 assert_eq!(wrapped.len(), 0);
518 }
519
520 #[test]
521 fn test_force_str_empty() {
522 assert_eq!(force_str(b""), "");
523 }
524
525 #[test]
526 fn test_truncate_chars_zero_max_length_does_not_panic() {
527 assert_eq!(truncate_chars("Hello", 0), "");
529 }
530
531 #[test]
532 fn test_truncate_chars_max_length_one() {
533 assert_eq!(truncate_chars("Hello", 1), ".");
535 }
536
537 #[test]
538 fn test_truncate_chars_max_length_two() {
539 assert_eq!(truncate_chars("Hello", 2), "..");
541 }
542
543 #[test]
544 fn test_truncate_chars_max_length_three() {
545 assert_eq!(truncate_chars("Hello", 3), "...");
547 }
548
549 #[test]
550 fn test_truncate_chars_max_length_four() {
551 assert_eq!(truncate_chars("Hello World", 4), "H...");
552 }
553}
554
555#[cfg(test)]
556mod proptests {
557 use super::*;
558 use proptest::prelude::*;
559
560 proptest! {
561 #[test]
562 fn prop_slugify_format(s in "[a-zA-Z0-9 -]+") {
563 let slug = slugify(&s);
564 assert!(slug.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-'));
566 assert!(!slug.contains("--"));
568 }
569
570 #[test]
571 fn prop_truncate_chars_length(s in "\\PC*", n in 0usize..100) {
572 let truncated = truncate_chars(&s, n);
573 assert!(truncated.chars().count() <= n);
574 }
575
576 #[test]
577 fn prop_truncate_words_count(s in "\\w+(\\s+\\w+)*", n in 1usize..20) {
578 let truncated = truncate_words(&s, n);
579 let word_count = truncated.split_whitespace().filter(|w| *w != "...").count();
580 assert!(word_count <= n);
581 }
582
583 #[test]
584 fn prop_urlencode_ascii_safe(s in "[a-zA-Z0-9._~-]+") {
585 let encoded = urlencode(&s);
586 assert_eq!(encoded, s);
588 }
589
590 #[test]
591 fn prop_escapejs_no_newlines(s in "\\PC*") {
592 let escaped = escapejs(&s);
593 assert!(!escaped.contains('\n'));
594 assert!(!escaped.contains('\r'));
595 assert!(!escaped.contains('\t'));
596 }
597
598 #[test]
599 fn prop_wrap_text_line_width(s in "[a-zA-Z0-9 ]+", width in 10usize..50) {
600 let lines = wrap_text(&s, width);
601 for line in lines {
602 assert!(line.chars().count() <= width + 20);
604 }
605 }
606 }
607}