reinhardt_utils/utils_core/
html.rs1use reinhardt_core::security::xss::strip_tags_safe;
4use std::borrow::Cow;
5pub fn escape(text: &str) -> String {
18 let mut result = String::with_capacity(text.len() + 10);
19 for ch in text.chars() {
20 match ch {
21 '&' => result.push_str("&"),
22 '<' => result.push_str("<"),
23 '>' => result.push_str(">"),
24 '"' => result.push_str("""),
25 '\'' => result.push_str("'"),
26 _ => result.push(ch),
27 }
28 }
29 result
30}
31pub fn unescape(text: &str) -> String {
44 let mut result = String::with_capacity(text.len());
45 let mut chars = text.chars().peekable();
46
47 while let Some(ch) = chars.next() {
48 if ch == '&' {
49 let entity: String = chars.by_ref().take_while(|&c| c != ';').collect();
50 match entity.as_str() {
51 "amp" => result.push('&'),
52 "lt" => result.push('<'),
53 "gt" => result.push('>'),
54 "quot" => result.push('"'),
55 "#x27" | "apos" => result.push('\''),
56 _ if entity.starts_with('#') => {
57 if let Some(code_str) = entity.strip_prefix('#')
58 && let Ok(code) = code_str.parse::<u32>()
59 && let Some(unicode_char) = char::from_u32(code)
60 {
61 result.push(unicode_char);
62 continue;
63 }
64 result.push('&');
65 result.push_str(&entity);
66 result.push(';');
67 }
68 _ => {
69 result.push('&');
70 result.push_str(&entity);
71 result.push(';');
72 }
73 }
74 } else {
75 result.push(ch);
76 }
77 }
78 result
79}
80pub fn strip_tags(html: &str) -> String {
101 strip_tags_safe(html)
103}
104pub fn strip_spaces_between_tags(html: &str) -> String {
121 let mut result = String::with_capacity(html.len());
122 let mut in_tag = false;
123 let mut space_buffer = String::new();
124
125 for ch in html.chars() {
126 match ch {
127 '<' => {
128 in_tag = true;
129 result.push(ch);
130 space_buffer.clear();
131 }
132 '>' => {
133 in_tag = false;
134 result.push(ch);
135 }
136 ' ' | '\t' | '\n' | '\r' if !in_tag => {
137 space_buffer.push(ch);
138 }
139 _ => {
140 if !in_tag && !space_buffer.is_empty() {
141 result.push_str(&space_buffer);
142 space_buffer.clear();
143 }
144 result.push(ch);
145 }
146 }
147 }
148 result
149}
150pub fn escape_attr(text: &str) -> String {
164 let escaped = escape(text);
165 let mut result = String::with_capacity(escaped.len());
167 for ch in escaped.chars() {
168 match ch {
169 '\n' => result.push_str(" "),
170 '\r' => result.push_str(" "),
171 '\t' => result.push_str("	"),
172 _ => result.push(ch),
173 }
174 }
175 result
176}
177pub fn format_html(template: &str, args: &[(&str, &str)]) -> String {
212 let mut result = template.to_string();
213 for (key, value) in args {
214 let placeholder = format!("{{{}}}", key);
215 let escaped_value = escape(value);
216 result = result.replace(&placeholder, &escaped_value);
217 }
218 result
219}
220pub fn conditional_escape(text: &str, autoescape: bool) -> Cow<'_, str> {
232 if autoescape {
233 Cow::Owned(escape(text))
234 } else {
235 Cow::Borrowed(text)
236 }
237}
238
239#[derive(Debug, Clone)]
241pub struct SafeString(String);
242
243impl SafeString {
244 pub fn new(s: impl Into<String>) -> Self {
255 Self(s.into())
256 }
257 pub fn as_str(&self) -> &str {
268 &self.0
269 }
270}
271
272impl From<String> for SafeString {
273 fn from(s: String) -> Self {
274 Self(s)
275 }
276}
277
278impl From<&str> for SafeString {
279 fn from(s: &str) -> Self {
280 Self(s.to_string())
281 }
282}
283pub fn truncate_html_words(html: &str, num_words: usize) -> String {
302 let mut result = String::new();
303 let mut word_count = 0;
304 let mut in_tag = false;
305 let mut current_word = String::new();
306
307 for ch in html.chars() {
308 match ch {
309 '<' => {
310 if !current_word.is_empty() {
311 result.push_str(¤t_word);
312 current_word.clear();
313 word_count += 1;
314 if word_count >= num_words {
315 return result + "...";
316 }
317 }
318 in_tag = true;
319 result.push(ch);
320 }
321 '>' => {
322 in_tag = false;
323 result.push(ch);
324 }
325 ' ' | '\t' | '\n' | '\r' if !in_tag => {
326 if !current_word.is_empty() {
327 result.push_str(¤t_word);
328 current_word.clear();
329 word_count += 1;
330 if word_count >= num_words {
331 return result + "...";
332 }
333 }
334 result.push(ch);
335 }
336 _ => {
337 if in_tag {
338 result.push(ch);
339 } else {
340 current_word.push(ch);
341 }
342 }
343 }
344 }
345
346 if !current_word.is_empty() && word_count < num_words {
347 result.push_str(¤t_word);
348 }
349
350 result
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356
357 #[test]
358 fn test_escape() {
359 assert_eq!(escape("Hello, World!"), "Hello, World!");
360 assert_eq!(
361 escape("<script>alert('XSS')</script>"),
362 "<script>alert('XSS')</script>"
363 );
364 assert_eq!(escape("5 < 10 & 10 > 5"), "5 < 10 & 10 > 5");
365 assert_eq!(escape("\"quoted\""), ""quoted"");
366 }
367
368 #[test]
369 fn test_unescape() {
370 assert_eq!(unescape("<div>"), "<div>");
371 assert_eq!(unescape("&"), "&");
372 assert_eq!(unescape(""test""), "\"test\"");
373 assert_eq!(unescape("'"), "'");
374 assert_eq!(unescape("'"), "'");
375 }
376
377 #[test]
378 fn test_strip_tags() {
379 assert_eq!(strip_tags("<p>Hello <b>World</b></p>"), "Hello World");
380 assert_eq!(strip_tags("<div><span>Test</span></div>"), "Test");
381 assert_eq!(strip_tags("No tags here"), "No tags here");
382 assert_eq!(strip_tags("<a href=\"#\">Link</a>"), "Link");
383 }
384
385 #[test]
386 fn test_strip_spaces_between_tags() {
387 assert_eq!(
388 strip_spaces_between_tags("<div> <span>Test</span> </div>"),
389 "<div><span>Test</span></div>"
390 );
391 }
392
393 #[test]
394 fn test_escape_attr() {
395 assert_eq!(escape_attr("value"), "value");
396 assert_eq!(
397 escape_attr("value with \"quotes\""),
398 "value with "quotes""
399 );
400 assert_eq!(escape_attr("line\nbreak"), "line break");
401 assert_eq!(escape_attr("tab\there"), "tab	here");
402 }
403
404 #[test]
405 fn test_format_html() {
406 let template = "<div class=\"{class}\">{content}</div>";
407 let args = [("class", "container"), ("content", "Hello")];
408 assert_eq!(
409 format_html(template, &args),
410 "<div class=\"container\">Hello</div>"
411 );
412 }
413
414 #[test]
415 fn test_conditional_escape() {
416 assert_eq!(conditional_escape("<script>", true), "<script>");
417 assert_eq!(conditional_escape("<script>", false), "<script>");
418 }
419
420 #[test]
421 fn test_safe_string() {
422 let safe = SafeString::new("<b>Bold</b>");
423 assert_eq!(safe.as_str(), "<b>Bold</b>");
424 }
425
426 #[test]
427 fn test_truncate_html_words() {
428 let html = "<p>This is a <b>test</b> sentence with many words.</p>";
429 let truncated = truncate_html_words(html, 5);
430 assert!(truncated.contains("This"));
431 assert!(truncated.contains("is"));
432 assert!(truncated.contains("..."));
433 }
434
435 #[test]
436 fn test_truncate_html_preserves_tags() {
437 let html = "<div>Hello <strong>world</strong> test</div>";
438 let truncated = truncate_html_words(html, 2);
439 assert!(truncated.contains("<div>"));
440 assert!(truncated.contains("<strong>"));
441 }
442
443 #[test]
444 fn test_safe_string_from_string() {
445 let s = String::from("<b>Bold</b>");
446 let safe = SafeString::from(s);
447 assert_eq!(safe.as_str(), "<b>Bold</b>");
448 }
449
450 #[test]
451 fn test_safe_string_from_str() {
452 let safe = SafeString::from("<i>Italic</i>");
453 assert_eq!(safe.as_str(), "<i>Italic</i>");
454 }
455
456 #[test]
457 fn test_escape_empty_string() {
458 assert_eq!(escape(""), "");
459 }
460
461 #[test]
462 fn test_escape_multibyte() {
463 assert_eq!(escape("こんにちは<>&"), "こんにちは<>&");
464 }
465
466 #[test]
467 fn test_unescape_incomplete_entity() {
468 assert_eq!(unescape("<"), "<");
471 assert_eq!(unescape("&"), "&;");
472 }
473
474 #[test]
475 fn test_unescape_unknown_entity() {
476 assert_eq!(unescape("&unknown;"), "&unknown;");
477 }
478
479 #[test]
480 fn test_strip_tags_nested() {
481 assert_eq!(strip_tags("<div><p><span>Test</span></p></div>"), "Test");
482 }
483
484 #[test]
485 fn test_strip_tags_empty() {
486 assert_eq!(strip_tags(""), "");
487 }
488
489 #[test]
490 fn test_strip_tags_quoted_attributes_with_angle_brackets() {
491 assert_eq!(strip_tags(r#"<a title="x>y">Link</a>"#), "Link");
493 assert_eq!(strip_tags("<a title='x>y'>Link</a>"), "Link");
495 assert_eq!(
497 strip_tags(r#"<a title="a>b" data-value="c>d">Text</a>"#),
498 "Text"
499 );
500 assert_eq!(strip_tags(r#"<a title='x"y'>Link</a>"#), "Link");
502 assert_eq!(strip_tags(r#"<a title="x'y">Link</a>"#), "Link");
504 }
505
506 #[test]
507 fn test_strip_spaces_between_tags_multiple_spaces() {
508 assert_eq!(
509 strip_spaces_between_tags("<div> \n\t <span>Test</span> \n\t </div>"),
510 "<div><span>Test</span></div>"
511 );
512 }
513
514 #[test]
515 fn test_escape_attr_carriage_return() {
516 assert_eq!(escape_attr("test\rvalue"), "test value");
517 }
518
519 #[test]
520 fn test_format_html_multiple_replacements() {
521 let template = "<div id=\"{id}\" class=\"{class}\">{content}</div>";
522 let args = [("id", "main"), ("class", "container"), ("content", "Hello")];
523 assert_eq!(
524 format_html(template, &args),
525 "<div id=\"main\" class=\"container\">Hello</div>"
526 );
527 }
528
529 #[test]
530 fn test_format_html_no_replacements() {
531 let template = "<div>Static content</div>";
532 let args: [(&str, &str); 0] = [];
533 assert_eq!(format_html(template, &args), "<div>Static content</div>");
534 }
535
536 #[test]
537 fn test_format_html_xss_prevention_script_tag() {
538 let template = "<p>{content}</p>";
540 let args = [("content", "<script>alert('xss')</script>")];
541
542 let result = format_html(template, &args);
544
545 assert!(!result.contains("<script>"));
547 assert!(result.contains("<script>"));
548 assert!(result.contains("</script>"));
549 assert!(result.contains("'xss'"));
550 }
551
552 #[test]
553 fn test_format_html_xss_prevention_event_handler() {
554 let template = r#"<div class="{class}">{content}</div>"#;
556 let args = [
557 ("class", r#"container" onclick="alert('xss')"#),
558 ("content", "Safe content"),
559 ];
560
561 let result = format_html(template, &args);
563
564 assert!(result.contains("""));
566 assert!(!result.contains(r#"onclick="alert"#));
567 }
568
569 #[test]
570 fn test_format_html_xss_prevention_ampersand() {
571 let template = "<a href=\"/search?q={query}\">Search</a>";
573 let args = [("query", "test&redirect=evil.com")];
574
575 let result = format_html(template, &args);
577
578 assert!(result.contains("&"));
580 assert!(!result.contains("test&redirect"));
581 }
582
583 #[test]
584 fn test_format_html_xss_prevention_angle_brackets() {
585 let template = "<span>{text}</span>";
587 let args = [("text", "<<SCRIPT>alert('XSS');//<</SCRIPT>")];
588
589 let result = format_html(template, &args);
591
592 assert!(!result.contains("<SCRIPT>"));
594 assert!(result.contains("<"));
595 assert!(result.contains(">"));
596 }
597
598 #[test]
599 fn test_format_html_safe_values_unchanged() {
600 let template = "<div id=\"{id}\" class=\"{class}\">{content}</div>";
602 let args = [
603 ("id", "main"),
604 ("class", "container"),
605 ("content", "Hello World"),
606 ];
607
608 let result = format_html(template, &args);
610
611 assert_eq!(
613 result,
614 "<div id=\"main\" class=\"container\">Hello World</div>"
615 );
616 }
617
618 #[test]
619 fn test_truncate_html_words_exact_count() {
620 let html = "<p>One two three</p>";
621 let truncated = truncate_html_words(html, 3);
622 assert!(truncated.contains("..."));
625 }
626
627 #[test]
628 fn test_truncate_html_words_empty() {
629 let html = "";
630 let truncated = truncate_html_words(html, 5);
631 assert_eq!(truncated, "");
632 }
633}
634
635#[cfg(test)]
636mod proptests {
637 use super::*;
638 use proptest::prelude::*;
639
640 proptest! {
641 #[test]
642 fn prop_escape_no_special_chars(s in "[^<>&\"']*") {
643 let escaped = escape(&s);
644 assert!(!escaped.contains('<'));
645 assert!(!escaped.contains('>'));
646 assert!(!escaped.contains('&'));
647 assert!(!escaped.contains('"'));
648 assert!(!escaped.contains('\''));
649 }
650
651 #[test]
652 fn prop_strip_tags_no_angle_brackets(s in "\\PC*") {
653 let stripped = strip_tags(&s);
654 assert!(!stripped.contains('<'));
655 }
656
657 #[test]
658 fn prop_strip_tags_length_decrease(s in "\\PC*") {
659 let stripped = strip_tags(&s);
660 assert!(stripped.len() <= s.len());
661 }
662
663 #[test]
664 fn prop_truncate_html_words_respects_limit(html in "\\PC*", n in 1usize..20) {
665 let truncated = truncate_html_words(&html, n);
666 let word_count = truncated
667 .split(|c: char| c.is_whitespace() || c == '<' || c == '>')
668 .filter(|w| !w.is_empty() && !w.starts_with('/'))
669 .filter(|w| !w.chars().all(|c| !c.is_alphanumeric()))
670 .count();
671
672 assert!(word_count <= n + 5);
674 }
675
676 #[test]
677 fn prop_escape_attr_no_newlines(s in "\\PC*") {
678 let escaped = escape_attr(&s);
679 assert!(!escaped.contains('\n'));
680 assert!(!escaped.contains('\r'));
681 assert!(!escaped.contains('\t'));
682 }
683
684 #[test]
685 fn prop_conditional_escape_when_true(s in "\\PC*") {
686 let escaped_cond = conditional_escape(&s, true);
687 let escaped_direct = escape(&s);
688 assert_eq!(escaped_cond, escaped_direct);
689 }
690
691 #[test]
692 fn prop_conditional_escape_when_false(s in "\\PC*") {
693 let escaped = conditional_escape(&s, false);
694 assert_eq!(escaped, s);
695 }
696
697 #[test]
698 fn prop_safe_string_roundtrip(s in "\\PC*") {
699 let safe = SafeString::from(s.clone());
700 assert_eq!(safe.as_str(), &s);
701 }
702
703 #[test]
704 fn prop_format_html_preserves_non_placeholders(template in "\\PC*") {
705 let args: [(&str, &str); 0] = [];
706 let result = format_html(&template, &args);
707 assert_eq!(result, template);
708 }
709
710 #[test]
711 fn prop_strip_spaces_reduces_whitespace(s in "\\PC*") {
712 let stripped = strip_spaces_between_tags(&s);
713 assert!(stripped.len() <= s.len() + 100); }
716 }
717}