Skip to main content

graphify_security/
label_validator.rs

1//! Label sanitization to prevent injection in HTML/GraphML output.
2
3/// Maximum label length after sanitization.
4const MAX_LABEL_LEN: usize = 256;
5
6/// Sanitize a label for safe use in HTML/GraphML output.
7///
8/// - Strips control characters
9/// - Truncates to 256 characters
10/// - HTML-escapes `&`, `<`, `>`, `"`, and `'`
11pub fn sanitize_label(label: &str) -> String {
12    let cleaned: String = label
13        .chars()
14        .filter(|c| !c.is_control())
15        .take(MAX_LABEL_LEN)
16        .collect();
17
18    cleaned
19        .replace('&', "&amp;")
20        .replace('<', "&lt;")
21        .replace('>', "&gt;")
22        .replace('"', "&quot;")
23        .replace('\'', "&#x27;")
24}
25
26#[cfg(test)]
27mod tests {
28    use super::*;
29
30    #[test]
31    fn test_plain_text_unchanged() {
32        assert_eq!(sanitize_label("Hello World"), "Hello World");
33    }
34
35    #[test]
36    fn test_html_entities_escaped() {
37        assert_eq!(
38            sanitize_label("<script>alert(\"xss\")</script>"),
39            "&lt;script&gt;alert(&quot;xss&quot;)&lt;/script&gt;"
40        );
41    }
42
43    #[test]
44    fn test_ampersand_escaped() {
45        assert_eq!(sanitize_label("A & B"), "A &amp; B");
46    }
47
48    #[test]
49    fn test_single_quote_escaped() {
50        assert_eq!(sanitize_label("it's"), "it&#x27;s");
51    }
52
53    #[test]
54    fn test_control_chars_stripped() {
55        assert_eq!(sanitize_label("hello\x00world\x07"), "helloworld");
56    }
57
58    #[test]
59    fn test_newlines_stripped() {
60        assert_eq!(sanitize_label("line1\nline2\r\n"), "line1line2");
61    }
62
63    #[test]
64    fn test_tabs_stripped() {
65        assert_eq!(sanitize_label("col1\tcol2"), "col1col2");
66    }
67
68    #[test]
69    fn test_truncation() {
70        let long = "a".repeat(300);
71        let result = sanitize_label(&long);
72        // After sanitization no escaping needed, so length == 256
73        assert_eq!(result.len(), 256);
74    }
75
76    #[test]
77    fn test_truncation_with_entities() {
78        // 256 '<' characters → each becomes "&lt;" (4 bytes)
79        let input = "<".repeat(300);
80        let result = sanitize_label(&input);
81        // Only 256 '<' taken, each → "&lt;" = 4 chars
82        assert_eq!(result.len(), 256 * 4);
83    }
84
85    #[test]
86    fn test_empty_string() {
87        assert_eq!(sanitize_label(""), "");
88    }
89
90    #[test]
91    fn test_unicode_preserved() {
92        assert_eq!(sanitize_label("你好世界"), "你好世界");
93    }
94
95    #[test]
96    fn test_mixed_content() {
97        assert_eq!(
98            sanitize_label("Node <A> & \"B\""),
99            "Node &lt;A&gt; &amp; &quot;B&quot;"
100        );
101    }
102
103    #[test]
104    fn test_backtick_and_braces() {
105        // Backticks and braces pass through (not HTML-special)
106        assert_eq!(sanitize_label("`{code}`"), "`{code}`");
107    }
108}