output_sanitize_rs/
lib.rs1#![deny(missing_docs)]
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
20pub enum Sink {
21 #[default]
23 Markdown,
24 Html,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct Finding {
31 pub kind: &'static str,
33 pub matched: String,
35}
36
37#[derive(Debug, Clone)]
39pub struct SanitizeResult {
40 pub safe: bool,
42 pub text: String,
44 pub findings: Vec<Finding>,
46}
47
48pub fn sanitize(text: &str, sink: Sink) -> SanitizeResult {
50 let mut findings = Vec::new();
51 let mut out = String::with_capacity(text.len());
52 let lower = text.to_ascii_lowercase();
53 let lower_bytes = lower.as_bytes();
54 let original_bytes = text.as_bytes();
55
56 let mut i = 0;
57 while i < original_bytes.len() {
58 let tags = ["script", "iframe", "object", "embed", "form", "meta", "link"];
60 let mut matched = false;
61 if original_bytes[i] == b'<' {
62 for tag in tags {
63 let open = format!("<{}", tag);
64 let close = format!("</{}", tag);
65 if lower[i..].starts_with(&open) || lower[i..].starts_with(&close) {
66 if let Some(end_rel) = lower_bytes[i..].iter().position(|&c| c == b'>') {
68 let end = i + end_rel + 1;
69 findings.push(Finding {
70 kind: "html",
71 matched: text[i..end].to_string(),
72 });
73 out.push_str("[removed:html]");
74 i = end;
75 matched = true;
76 break;
77 }
78 }
79 }
80 }
81 if matched {
82 continue;
83 }
84
85 let sql_kws: &[&str] = &["drop ", "truncate ", "alter ", "delete from ", "insert into "];
87 let mut sql_matched = None;
88 for kw in sql_kws {
89 if lower[i..].starts_with(kw) && at_word_boundary(original_bytes, i) {
90 sql_matched = Some(*kw);
91 break;
92 }
93 }
94 if let Some(kw) = sql_matched {
95 let mut end = i + kw.len();
97 while end < original_bytes.len() && !original_bytes[end].is_ascii_whitespace() {
98 end += 1;
99 }
100 findings.push(Finding {
101 kind: "sql",
102 matched: text[i..end].to_string(),
103 });
104 out.push_str("[removed:sql]");
105 i = end;
106 continue;
107 }
108
109 let shell_signals: &[&str] = &["rm -rf", "chmod 777", "sudo ", "curl ", "wget "];
111 let mut shell_match = None;
112 for sig in shell_signals {
113 if lower[i..].starts_with(sig) && at_word_boundary(original_bytes, i) {
114 shell_match = Some(*sig);
115 break;
116 }
117 }
118 if let Some(sig) = shell_match {
119 let mut end = i + sig.len();
120 while end < original_bytes.len()
121 && original_bytes[end] != b'\n'
122 && original_bytes[end] != b';'
123 {
124 end += 1;
125 }
126 findings.push(Finding {
127 kind: "shell",
128 matched: text[i..end].to_string(),
129 });
130 out.push_str("[removed:shell]");
131 i = end;
132 continue;
133 }
134
135 let c = text[i..].chars().next().unwrap();
138 out.push(c);
139 i += c.len_utf8();
140 }
141
142 if sink == Sink::Html {
143 out = out
144 .replace('&', "&")
145 .replace('<', "<")
146 .replace('>', ">");
147 }
148
149 SanitizeResult {
150 safe: findings.is_empty(),
151 text: out,
152 findings,
153 }
154}
155
156fn at_word_boundary(bytes: &[u8], i: usize) -> bool {
157 i == 0 || !bytes[i - 1].is_ascii_alphanumeric()
158}