snarkvm_console_network_environment/helpers/
sanitizer.rs1use crate::{ParserResult, string_parser::is_char_supported};
17
18use nom::{
19 branch::alt,
20 bytes::complete::tag,
21 character::complete::{anychar, char, line_ending, multispace1},
22 combinator::{cut, map, recognize, value, verify},
23 error::{ErrorKind, VerboseError, VerboseErrorKind},
24 multi::fold_many0,
25 sequence::{preceded, terminated},
26};
27
28pub struct Sanitizer;
29
30impl Sanitizer {
31 pub fn parse(string: &str) -> ParserResult<&str> {
33 preceded(Self::parse_whitespaces, Self::parse_comments)(string)
34 }
35
36 pub fn parse_whitespaces(string: &str) -> ParserResult<&str> {
38 recognize(Self::many0_(alt((multispace1, tag("\\\n")))))(string)
39 }
40
41 pub fn parse_comments(string: &str) -> ParserResult<&str> {
43 recognize(Self::many0_(terminated(Self::parse_comment, Self::parse_whitespaces)))(string)
44 }
45
46 pub fn parse_comment(string: &str) -> ParserResult<&str> {
48 preceded(
49 char('/'),
50 alt((preceded(char('/'), cut(Self::str_till_eol)), preceded(char('*'), cut(Self::str_till_star_slash)))),
51 )(string)
52 }
53
54 pub fn parse_safe_char(string: &str) -> ParserResult<char> {
67 fn is_safe(ch: &char) -> bool {
68 is_char_supported(*ch)
69 }
70 verify(anychar, is_safe)(string)
71 }
72}
73
74impl Sanitizer {
75 fn eoi(string: &str) -> ParserResult<()> {
79 match string.is_empty() {
80 true => Ok((string, ())),
81 false => {
82 Err(nom::Err::Error(VerboseError { errors: vec![(string, VerboseErrorKind::Nom(ErrorKind::Eof))] }))
83 }
84 }
85 }
86
87 fn eol(string: &str) -> ParserResult<()> {
91 alt((
92 Self::eoi, value((), line_ending),
94 ))(string)
95 }
96
97 fn till<'a, A, B, F, G>(mut f: F, mut g: G) -> impl FnMut(&'a str) -> ParserResult<'a, ()>
99 where
100 F: FnMut(&'a str) -> ParserResult<'a, A>,
101 G: FnMut(&'a str) -> ParserResult<'a, B>,
102 {
103 move |mut i| loop {
104 if let Ok((i2, _)) = g(i) {
105 break Ok((i2, ()));
106 }
107
108 let (i2, _) = f(i)?;
109 i = i2;
110 }
111 }
112
113 fn str_till_eol(string: &str) -> ParserResult<&str> {
123 if let Some((before, after)) = string.split_once('\n') {
127 let is_multiline = before.ends_with('\\'); if !is_multiline {
130 let contains_unsafe_chars = !before.chars().all(is_char_supported);
131
132 if !contains_unsafe_chars {
133 Ok((after, before))
134 } else {
135 recognize(Self::till(value((), Sanitizer::parse_safe_char), Self::eoi))(before)
139 }
140 } else {
141 map(
142 recognize(Self::till(
143 alt((value((), tag("\\\n")), value((), Sanitizer::parse_safe_char))),
144 Self::eol,
145 )),
146 |i| {
147 if i.as_bytes().last() == Some(&b'\n') { &i[0..i.len() - 1] } else { i }
149 },
150 )(string)
151 }
152 } else if string.chars().all(is_char_supported) {
153 Ok(("", string))
155 } else {
156 recognize(Self::till(value((), Sanitizer::parse_safe_char), Self::eoi))(string)
160 }
161 }
162
163 fn str_till_star_slash(string: &str) -> ParserResult<&str> {
169 map(recognize(Self::till(value((), Sanitizer::parse_safe_char), tag("*/"))), |i| {
170 &i[0..i.len() - 2] })(string)
172 }
173
174 fn many0_<'a, A, F>(mut f: F) -> impl FnMut(&'a str) -> ParserResult<'a, ()>
176 where
177 F: FnMut(&'a str) -> ParserResult<'a, A>,
178 {
179 move |string| fold_many0(&mut f, || (), |_, _| ())(string)
180 }
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186
187 #[test]
188 fn test_parse_safe_char() {
189 assert_eq!(("", 'A'), Sanitizer::parse_safe_char("A").unwrap());
191 assert_eq!((" and more", 'A'), Sanitizer::parse_safe_char("A and more").unwrap());
192 assert_eq!(("", '\u{4141}'), Sanitizer::parse_safe_char("\u{4141}").unwrap());
193 assert_eq!((" and more", '\u{4141}'), Sanitizer::parse_safe_char("\u{4141} and more").unwrap());
194
195 assert!(Sanitizer::parse_safe_char("\x00").is_err());
197 assert!(Sanitizer::parse_safe_char("\x01").is_err());
198 assert!(Sanitizer::parse_safe_char("\x02").is_err());
199 assert!(Sanitizer::parse_safe_char("\x03").is_err());
200 assert!(Sanitizer::parse_safe_char("\x04").is_err());
201 assert!(Sanitizer::parse_safe_char("\x05").is_err());
202 assert!(Sanitizer::parse_safe_char("\x06").is_err());
203 assert!(Sanitizer::parse_safe_char("\x07").is_err());
204 assert!(Sanitizer::parse_safe_char("\x08").is_err());
205 assert!(Sanitizer::parse_safe_char("\x09").is_ok());
206 assert!(Sanitizer::parse_safe_char("\x0a").is_ok());
207 assert!(Sanitizer::parse_safe_char("\x0b").is_err());
208 assert!(Sanitizer::parse_safe_char("\x0c").is_err());
209 assert!(Sanitizer::parse_safe_char("\x0d").is_ok());
210 assert!(Sanitizer::parse_safe_char("\x0e").is_err());
211 assert!(Sanitizer::parse_safe_char("\x0f").is_err());
212 assert!(Sanitizer::parse_safe_char("\x10").is_err());
213 assert!(Sanitizer::parse_safe_char("\x11").is_err());
214 assert!(Sanitizer::parse_safe_char("\x12").is_err());
215 assert!(Sanitizer::parse_safe_char("\x13").is_err());
216 assert!(Sanitizer::parse_safe_char("\x14").is_err());
217 assert!(Sanitizer::parse_safe_char("\x15").is_err());
218 assert!(Sanitizer::parse_safe_char("\x16").is_err());
219 assert!(Sanitizer::parse_safe_char("\x17").is_err());
220 assert!(Sanitizer::parse_safe_char("\x18").is_err());
221 assert!(Sanitizer::parse_safe_char("\x19").is_err());
222 assert!(Sanitizer::parse_safe_char("\x1a").is_err());
223 assert!(Sanitizer::parse_safe_char("\x1b").is_err());
224 assert!(Sanitizer::parse_safe_char("\x1c").is_err());
225 assert!(Sanitizer::parse_safe_char("\x1d").is_err());
226 assert!(Sanitizer::parse_safe_char("\x1e").is_err());
227 assert!(Sanitizer::parse_safe_char("\x1f").is_err());
228 assert!(Sanitizer::parse_safe_char("\x7f").is_err());
229
230 assert!(Sanitizer::parse_safe_char("\u{2029}").is_ok());
232 assert!(Sanitizer::parse_safe_char("\u{202a}").is_err());
233 assert!(Sanitizer::parse_safe_char("\u{202b}").is_err());
234 assert!(Sanitizer::parse_safe_char("\u{202c}").is_err());
235 assert!(Sanitizer::parse_safe_char("\u{202d}").is_err());
236 assert!(Sanitizer::parse_safe_char("\u{202e}").is_err());
237 assert!(Sanitizer::parse_safe_char("\u{202f}").is_ok());
238 assert!(Sanitizer::parse_safe_char("\u{2065}").is_ok());
239 assert!(Sanitizer::parse_safe_char("\u{2066}").is_err());
240 assert!(Sanitizer::parse_safe_char("\u{2067}").is_err());
241 assert!(Sanitizer::parse_safe_char("\u{2068}").is_err());
242 assert!(Sanitizer::parse_safe_char("\u{2069}").is_err());
243 assert!(Sanitizer::parse_safe_char("\u{206a}").is_ok());
244 }
245
246 #[test]
247 fn test_sanitize() {
248 assert_eq!(("hello world", ""), Sanitizer::parse("hello world").unwrap());
250 assert_eq!(("hello world", ""), Sanitizer::parse(" hello world").unwrap());
251 assert_eq!(("hello world", ""), Sanitizer::parse(" hello world").unwrap());
252 assert_eq!(("hello world", ""), Sanitizer::parse("\nhello world").unwrap());
253 assert_eq!(("hello world", ""), Sanitizer::parse(" \nhello world").unwrap());
254 assert_eq!(("hello world ", ""), Sanitizer::parse("hello world ").unwrap());
255
256 assert_eq!(("hello world", "// hello\n"), Sanitizer::parse("// hello\nhello world").unwrap());
258 assert_eq!(("hello world", "/* hello */"), Sanitizer::parse("/* hello */hello world").unwrap());
259 assert_eq!(("hello world", "/* hello */\n"), Sanitizer::parse("/* hello */\nhello world").unwrap());
260 assert_eq!(("hello world", "/** hello */"), Sanitizer::parse("/** hello */hello world").unwrap());
261 assert_eq!(("hello world", "/** hello */\n"), Sanitizer::parse("/** hello */\nhello world").unwrap());
262 assert_eq!(("/\nhello world", ""), Sanitizer::parse("/\nhello world").unwrap());
263
264 assert_eq!(("hello world", "// hello\n"), Sanitizer::parse(" \n// hello\nhello world").unwrap());
266 assert_eq!(("hello world", "/* hello */\n"), Sanitizer::parse(" \n /* hello */\nhello world").unwrap());
267 assert_eq!(("hello world", "/** hello */\n"), Sanitizer::parse(" \n\t /** hello */\nhello world").unwrap());
268 assert_eq!(("/\nhello world", ""), Sanitizer::parse(" /\nhello world").unwrap());
269 }
270
271 #[test]
272 fn test_whitespaces() {
273 assert_eq!(("hello world", ""), Sanitizer::parse_whitespaces("hello world").unwrap());
274 assert_eq!(("hello world", " "), Sanitizer::parse_whitespaces(" hello world").unwrap());
275 assert_eq!(("hello world", " "), Sanitizer::parse_whitespaces(" hello world").unwrap());
276 assert_eq!(("hello world", "\n"), Sanitizer::parse_whitespaces("\nhello world").unwrap());
277 assert_eq!(("hello world", " \n"), Sanitizer::parse_whitespaces(" \nhello world").unwrap());
278 assert_eq!(("hello world", "\t"), Sanitizer::parse_whitespaces("\thello world").unwrap());
279 assert_eq!(("hello world", " \t"), Sanitizer::parse_whitespaces(" \thello world").unwrap());
280 assert_eq!(("hello world", " \n\t"), Sanitizer::parse_whitespaces(" \n\thello world").unwrap());
281 assert_eq!(("hello world ", ""), Sanitizer::parse_whitespaces("hello world ").unwrap());
282 }
283
284 #[test]
285 fn test_comments() {
286 assert_eq!(("hello world", "// hello\n"), Sanitizer::parse_comments("// hello\nhello world").unwrap());
287 assert_eq!(("hello world", "/* hello */\n"), Sanitizer::parse_comments("/* hello */\nhello world").unwrap());
288 assert_eq!(("hello world", "/** hello */\n"), Sanitizer::parse_comments("/** hello */\nhello world").unwrap());
289 assert_eq!(("/\nhello world", ""), Sanitizer::parse_comments("/\nhello world").unwrap());
290 assert_eq!(
291 ("hello world", "// hel\u{4141}lo\n"),
292 Sanitizer::parse_comments("// hel\u{4141}lo\nhello world").unwrap()
293 );
294 assert_eq!(
295 ("hello world", "/* multi\n line comment\n*/\n"),
296 Sanitizer::parse_comments("/* multi\n line comment\n*/\nhello world").unwrap()
297 );
298 assert_eq!(
299 ("hello world", "// multiple\n// line\n// comments\n"),
300 Sanitizer::parse_comments("// multiple\n// line\n// comments\nhello world").unwrap()
301 );
302 assert_eq!(
303 ("hello world", "/* multi\n line comment\n*/\n/* and\n another\n one\n*/\n"),
304 Sanitizer::parse_comments("/* multi\n line comment\n*/\n/* and\n another\n one\n*/\nhello world")
305 .unwrap()
306 );
307 assert_eq!(
308 ("hello world", "/* multi\n line comment\n*/\n// two single\n// line comments\n/* and\n another\n multi-liner\n*/\n"),
309 Sanitizer::parse_comments("/* multi\n line comment\n*/\n// two single\n// line comments\n/* and\n another\n multi-liner\n*/\nhello world").unwrap()
310 );
311 assert!(Sanitizer::parse_comments("// hel\x08lo\nhello world").is_err());
312 assert!(Sanitizer::parse_comments("// hel\u{2066}lo\nhello world").is_err());
313 assert!(Sanitizer::parse_comments("/* hel\x7flo */\nhello world").is_err());
314 assert!(Sanitizer::parse_comments("/* hel\u{202d}lo */\nhello world").is_err());
315 assert!(Sanitizer::parse_comments("/** hel\x00lo */\nhello world").is_err());
316 assert!(Sanitizer::parse_comments("/** hel\u{202a}lo */\nhello world").is_err());
317 assert!(Sanitizer::parse_comments("// unsafe \u{202a} no newline").is_err());
318 }
319}