reinhardt_utils/utils_core/
html.rs1use reinhardt_core::security::xss::strip_tags_safe;
4use std::borrow::Cow;
5pub fn escape(text: &str) -> String {
18 let mut result = String::with_capacity(text.len() + 10);
19 for ch in text.chars() {
20 match ch {
21 '&' => result.push_str("&"),
22 '<' => result.push_str("<"),
23 '>' => result.push_str(">"),
24 '"' => result.push_str("""),
25 '\'' => result.push_str("'"),
26 _ => result.push(ch),
27 }
28 }
29 result
30}
31pub fn unescape(text: &str) -> String {
44 let mut result = String::with_capacity(text.len());
45 let mut chars = text.chars().peekable();
46
47 while let Some(ch) = chars.next() {
48 if ch == '&' {
49 let entity: String = chars.by_ref().take_while(|&c| c != ';').collect();
50 match entity.as_str() {
51 "amp" => result.push('&'),
52 "lt" => result.push('<'),
53 "gt" => result.push('>'),
54 "quot" => result.push('"'),
55 "#x27" | "apos" => result.push('\''),
56 _ if entity.starts_with('#') => {
57 if let Some(code_str) = entity.strip_prefix('#')
58 && let Ok(code) = code_str.parse::<u32>()
59 && let Some(unicode_char) = char::from_u32(code)
60 {
61 result.push(unicode_char);
62 continue;
63 }
64 result.push('&');
65 result.push_str(&entity);
66 result.push(';');
67 }
68 _ => {
69 result.push('&');
70 result.push_str(&entity);
71 result.push(';');
72 }
73 }
74 } else {
75 result.push(ch);
76 }
77 }
78 result
79}
80pub fn strip_tags(html: &str) -> String {
101 strip_tags_safe(html)
103}
104pub fn strip_spaces_between_tags(html: &str) -> String {
121 let mut result = String::with_capacity(html.len());
122 let mut in_tag = false;
123 let mut space_buffer = String::new();
124
125 for ch in html.chars() {
126 match ch {
127 '<' => {
128 in_tag = true;
129 result.push(ch);
130 space_buffer.clear();
131 }
132 '>' => {
133 in_tag = false;
134 result.push(ch);
135 }
136 ' ' | '\t' | '\n' | '\r' if !in_tag => {
137 space_buffer.push(ch);
138 }
139 _ => {
140 if !in_tag && !space_buffer.is_empty() {
141 result.push_str(&space_buffer);
142 space_buffer.clear();
143 }
144 result.push(ch);
145 }
146 }
147 }
148 result
149}
150pub fn escape_attr(text: &str) -> String {
164 let mut result = String::with_capacity(text.len() + 10);
165 for ch in text.chars() {
166 match ch {
167 '&' => result.push_str("&"),
168 '<' => result.push_str("<"),
169 '>' => result.push_str(">"),
170 '"' => result.push_str("""),
171 '\'' => result.push_str("'"),
172 '\n' => result.push_str(" "),
173 '\r' => result.push_str(" "),
174 '\t' => result.push_str("	"),
175 _ => result.push(ch),
176 }
177 }
178 result
179}
180pub fn format_html(template: &str, args: &[(&str, &str)]) -> String {
215 let mut result = template.to_string();
216 for (key, value) in args {
217 let placeholder = format!("{{{}}}", key);
218 let escaped_value = escape(value);
219 result = result.replace(&placeholder, &escaped_value);
220 }
221 result
222}
223pub fn conditional_escape(text: &str, autoescape: bool) -> Cow<'_, str> {
235 if autoescape {
236 Cow::Owned(escape(text))
237 } else {
238 Cow::Borrowed(text)
239 }
240}
241
242#[derive(Debug, Clone)]
244pub struct SafeString(String);
245
246impl SafeString {
247 pub fn new(s: impl Into<String>) -> Self {
258 Self(s.into())
259 }
260 pub fn as_str(&self) -> &str {
271 &self.0
272 }
273}
274
275impl From<String> for SafeString {
276 fn from(s: String) -> Self {
277 Self(s)
278 }
279}
280
281impl From<&str> for SafeString {
282 fn from(s: &str) -> Self {
283 Self(s.to_string())
284 }
285}
286pub fn truncate_html_words(html: &str, num_words: usize) -> String {
305 let mut result = String::new();
306 let mut word_count = 0;
307 let mut in_tag = false;
308 let mut current_word = String::new();
309
310 for ch in html.chars() {
311 match ch {
312 '<' => {
313 if !current_word.is_empty() {
314 result.push_str(¤t_word);
315 current_word.clear();
316 word_count += 1;
317 if word_count >= num_words {
318 return result + "...";
319 }
320 }
321 in_tag = true;
322 result.push(ch);
323 }
324 '>' => {
325 in_tag = false;
326 result.push(ch);
327 }
328 ' ' | '\t' | '\n' | '\r' if !in_tag => {
329 if !current_word.is_empty() {
330 result.push_str(¤t_word);
331 current_word.clear();
332 word_count += 1;
333 if word_count >= num_words {
334 return result + "...";
335 }
336 }
337 result.push(ch);
338 }
339 _ => {
340 if in_tag {
341 result.push(ch);
342 } else {
343 current_word.push(ch);
344 }
345 }
346 }
347 }
348
349 if !current_word.is_empty() && word_count < num_words {
350 result.push_str(¤t_word);
351 }
352
353 result
354}
355
356#[cfg(test)]
357mod tests {
358 use super::*;
359
360 #[test]
361 fn test_escape() {
362 assert_eq!(escape("Hello, World!"), "Hello, World!");
363 assert_eq!(
364 escape("<script>alert('XSS')</script>"),
365 "<script>alert('XSS')</script>"
366 );
367 assert_eq!(escape("5 < 10 & 10 > 5"), "5 < 10 & 10 > 5");
368 assert_eq!(escape("\"quoted\""), ""quoted"");
369 }
370
371 #[test]
372 fn test_unescape() {
373 assert_eq!(unescape("<div>"), "<div>");
374 assert_eq!(unescape("&"), "&");
375 assert_eq!(unescape(""test""), "\"test\"");
376 assert_eq!(unescape("'"), "'");
377 assert_eq!(unescape("'"), "'");
378 }
379
380 #[test]
381 fn test_strip_tags() {
382 assert_eq!(strip_tags("<p>Hello <b>World</b></p>"), "Hello World");
383 assert_eq!(strip_tags("<div><span>Test</span></div>"), "Test");
384 assert_eq!(strip_tags("No tags here"), "No tags here");
385 assert_eq!(strip_tags("<a href=\"#\">Link</a>"), "Link");
386 }
387
388 #[test]
389 fn test_strip_spaces_between_tags() {
390 assert_eq!(
391 strip_spaces_between_tags("<div> <span>Test</span> </div>"),
392 "<div><span>Test</span></div>"
393 );
394 }
395
396 #[test]
397 fn test_escape_attr() {
398 assert_eq!(escape_attr("value"), "value");
399 assert_eq!(
400 escape_attr("value with \"quotes\""),
401 "value with "quotes""
402 );
403 assert_eq!(escape_attr("line\nbreak"), "line break");
404 assert_eq!(escape_attr("tab\there"), "tab	here");
405 }
406
407 #[test]
408 fn test_format_html() {
409 let template = "<div class=\"{class}\">{content}</div>";
410 let args = [("class", "container"), ("content", "Hello")];
411 assert_eq!(
412 format_html(template, &args),
413 "<div class=\"container\">Hello</div>"
414 );
415 }
416
417 #[test]
418 fn test_conditional_escape() {
419 assert_eq!(conditional_escape("<script>", true), "<script>");
420 assert_eq!(conditional_escape("<script>", false), "<script>");
421 }
422
423 #[test]
424 fn test_safe_string() {
425 let safe = SafeString::new("<b>Bold</b>");
426 assert_eq!(safe.as_str(), "<b>Bold</b>");
427 }
428
429 #[test]
430 fn test_truncate_html_words() {
431 let html = "<p>This is a <b>test</b> sentence with many words.</p>";
432 let truncated = truncate_html_words(html, 5);
433 assert!(truncated.contains("This"));
434 assert!(truncated.contains("is"));
435 assert!(truncated.contains("..."));
436 }
437
438 #[test]
439 fn test_truncate_html_preserves_tags() {
440 let html = "<div>Hello <strong>world</strong> test</div>";
441 let truncated = truncate_html_words(html, 2);
442 assert!(truncated.contains("<div>"));
443 assert!(truncated.contains("<strong>"));
444 }
445
446 #[test]
447 fn test_safe_string_from_string() {
448 let s = String::from("<b>Bold</b>");
449 let safe = SafeString::from(s);
450 assert_eq!(safe.as_str(), "<b>Bold</b>");
451 }
452
453 #[test]
454 fn test_safe_string_from_str() {
455 let safe = SafeString::from("<i>Italic</i>");
456 assert_eq!(safe.as_str(), "<i>Italic</i>");
457 }
458
459 #[test]
460 fn test_escape_empty_string() {
461 assert_eq!(escape(""), "");
462 }
463
464 #[test]
465 fn test_escape_multibyte() {
466 assert_eq!(escape("こんにちは<>&"), "こんにちは<>&");
467 }
468
469 #[test]
470 fn test_unescape_incomplete_entity() {
471 assert_eq!(unescape("<"), "<");
474 assert_eq!(unescape("&"), "&;");
475 }
476
477 #[test]
478 fn test_unescape_unknown_entity() {
479 assert_eq!(unescape("&unknown;"), "&unknown;");
480 }
481
482 #[test]
483 fn test_strip_tags_nested() {
484 assert_eq!(strip_tags("<div><p><span>Test</span></p></div>"), "Test");
485 }
486
487 #[test]
488 fn test_strip_tags_empty() {
489 assert_eq!(strip_tags(""), "");
490 }
491
492 #[test]
493 fn test_strip_tags_quoted_attributes_with_angle_brackets() {
494 assert_eq!(strip_tags(r#"<a title="x>y">Link</a>"#), "Link");
496 assert_eq!(strip_tags("<a title='x>y'>Link</a>"), "Link");
498 assert_eq!(
500 strip_tags(r#"<a title="a>b" data-value="c>d">Text</a>"#),
501 "Text"
502 );
503 assert_eq!(strip_tags(r#"<a title='x"y'>Link</a>"#), "Link");
505 assert_eq!(strip_tags(r#"<a title="x'y">Link</a>"#), "Link");
507 }
508
509 #[test]
510 fn test_strip_spaces_between_tags_multiple_spaces() {
511 assert_eq!(
512 strip_spaces_between_tags("<div> \n\t <span>Test</span> \n\t </div>"),
513 "<div><span>Test</span></div>"
514 );
515 }
516
517 #[test]
518 fn test_escape_attr_carriage_return() {
519 assert_eq!(escape_attr("test\rvalue"), "test value");
520 }
521
522 #[test]
523 fn test_format_html_multiple_replacements() {
524 let template = "<div id=\"{id}\" class=\"{class}\">{content}</div>";
525 let args = [("id", "main"), ("class", "container"), ("content", "Hello")];
526 assert_eq!(
527 format_html(template, &args),
528 "<div id=\"main\" class=\"container\">Hello</div>"
529 );
530 }
531
532 #[test]
533 fn test_format_html_no_replacements() {
534 let template = "<div>Static content</div>";
535 let args: [(&str, &str); 0] = [];
536 assert_eq!(format_html(template, &args), "<div>Static content</div>");
537 }
538
539 #[test]
540 fn test_format_html_xss_prevention_script_tag() {
541 let template = "<p>{content}</p>";
543 let args = [("content", "<script>alert('xss')</script>")];
544
545 let result = format_html(template, &args);
547
548 assert!(!result.contains("<script>"));
550 assert!(result.contains("<script>"));
551 assert!(result.contains("</script>"));
552 assert!(result.contains("'xss'"));
553 }
554
555 #[test]
556 fn test_format_html_xss_prevention_event_handler() {
557 let template = r#"<div class="{class}">{content}</div>"#;
559 let args = [
560 ("class", r#"container" onclick="alert('xss')"#),
561 ("content", "Safe content"),
562 ];
563
564 let result = format_html(template, &args);
566
567 assert!(result.contains("""));
569 assert!(!result.contains(r#"onclick="alert"#));
570 }
571
572 #[test]
573 fn test_format_html_xss_prevention_ampersand() {
574 let template = "<a href=\"/search?q={query}\">Search</a>";
576 let args = [("query", "test&redirect=evil.com")];
577
578 let result = format_html(template, &args);
580
581 assert!(result.contains("&"));
583 assert!(!result.contains("test&redirect"));
584 }
585
586 #[test]
587 fn test_format_html_xss_prevention_angle_brackets() {
588 let template = "<span>{text}</span>";
590 let args = [("text", "<<SCRIPT>alert('XSS');//<</SCRIPT>")];
591
592 let result = format_html(template, &args);
594
595 assert!(!result.contains("<SCRIPT>"));
597 assert!(result.contains("<"));
598 assert!(result.contains(">"));
599 }
600
601 #[test]
602 fn test_format_html_safe_values_unchanged() {
603 let template = "<div id=\"{id}\" class=\"{class}\">{content}</div>";
605 let args = [
606 ("id", "main"),
607 ("class", "container"),
608 ("content", "Hello World"),
609 ];
610
611 let result = format_html(template, &args);
613
614 assert_eq!(
616 result,
617 "<div id=\"main\" class=\"container\">Hello World</div>"
618 );
619 }
620
621 #[test]
622 fn test_truncate_html_words_exact_count() {
623 let html = "<p>One two three</p>";
624 let truncated = truncate_html_words(html, 3);
625 assert!(truncated.contains("..."));
628 }
629
630 #[test]
631 fn test_truncate_html_words_empty() {
632 let html = "";
633 let truncated = truncate_html_words(html, 5);
634 assert_eq!(truncated, "");
635 }
636}
637
638#[cfg(test)]
639mod proptests {
640 use super::*;
641 use proptest::prelude::*;
642
643 proptest! {
644 #[test]
645 fn prop_escape_no_special_chars(s in "[^<>&\"']*") {
646 let escaped = escape(&s);
647 assert!(!escaped.contains('<'));
648 assert!(!escaped.contains('>'));
649 assert!(!escaped.contains('&'));
650 assert!(!escaped.contains('"'));
651 assert!(!escaped.contains('\''));
652 }
653
654 #[test]
655 fn prop_strip_tags_no_angle_brackets(s in "\\PC*") {
656 let stripped = strip_tags(&s);
657 assert!(!stripped.contains('<'));
658 }
659
660 #[test]
661 fn prop_strip_tags_length_decrease(s in "\\PC*") {
662 let stripped = strip_tags(&s);
663 assert!(stripped.len() <= s.len());
664 }
665
666 #[test]
667 fn prop_truncate_html_words_respects_limit(html in "\\PC*", n in 1usize..20) {
668 let truncated = truncate_html_words(&html, n);
669 let word_count = truncated
670 .split(|c: char| c.is_whitespace() || c == '<' || c == '>')
671 .filter(|w| !w.is_empty() && !w.starts_with('/'))
672 .filter(|w| !w.chars().all(|c| !c.is_alphanumeric()))
673 .count();
674
675 assert!(word_count <= n + 5);
677 }
678
679 #[test]
680 fn prop_escape_attr_no_newlines(s in "\\PC*") {
681 let escaped = escape_attr(&s);
682 assert!(!escaped.contains('\n'));
683 assert!(!escaped.contains('\r'));
684 assert!(!escaped.contains('\t'));
685 }
686
687 #[test]
688 fn prop_conditional_escape_when_true(s in "\\PC*") {
689 let escaped_cond = conditional_escape(&s, true);
690 let escaped_direct = escape(&s);
691 assert_eq!(escaped_cond, escaped_direct);
692 }
693
694 #[test]
695 fn prop_conditional_escape_when_false(s in "\\PC*") {
696 let escaped = conditional_escape(&s, false);
697 assert_eq!(escaped, s);
698 }
699
700 #[test]
701 fn prop_safe_string_roundtrip(s in "\\PC*") {
702 let safe = SafeString::from(s.clone());
703 assert_eq!(safe.as_str(), &s);
704 }
705
706 #[test]
707 fn prop_format_html_preserves_non_placeholders(template in "\\PC*") {
708 let args: [(&str, &str); 0] = [];
709 let result = format_html(&template, &args);
710 assert_eq!(result, template);
711 }
712
713 #[test]
714 fn prop_strip_spaces_reduces_whitespace(s in "\\PC*") {
715 let stripped = strip_spaces_between_tags(&s);
716 assert!(stripped.len() <= s.len() + 100); }
719 }
720}