netsblox_ast/
util.rs

1use core::fmt::{self, Debug, Display};
2use alloc::string::ToString;
3
4#[cfg(test)]
5use proptest::prelude::*;
6
7use crate::*;
8
9pub struct Punctuated<'a, T: Iterator + Clone>(pub T, pub &'a str);
10macro_rules! impl_punctuated {
11    ($($req:ident => $fmt:literal),*$(,)?) => {$(
12        impl<'a, T: Iterator + Clone> $req for Punctuated<'a, T> where <T as Iterator>::Item: $req {
13            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
14                let mut vals = self.0.clone();
15                if let Some(first) = vals.next() {
16                    write!(f, $fmt, first)?;
17                    for rest in vals {
18                        write!(f, concat!("{}", $fmt), self.1, rest)?;
19                    }
20                }
21                Ok(())
22            }
23        }
24    )*}
25}
26impl_punctuated! { Debug => "{:?}", Display => "{}" }
27
28/// Returns a new string which is indented by 4 spaces.
29pub fn indent(code: &str) -> CompactString {
30    Punctuated(code.lines().map(|s| format!("    {}", s)), "\n").to_string().into()
31}
32#[test]
33fn test_indent() {
34    assert_eq!(indent(""), "");
35    assert_eq!(indent("hello"), "    hello");
36    assert_eq!(indent("hello\nworld"), "    hello\n    world");
37}
38
39/// Returns a new string which encodes special characters as the typical backslash escape sequences.
40/// Notably, this includes single and double quotes, so you can safely translate a string literal by wrapping the result in quotes.
41pub fn escape(raw: &str) -> CompactString {
42    let mut res = alloc::string::String::with_capacity(raw.len());
43    for c in raw.chars() {
44        match c {
45            '\"' => res += "\\\"",
46            '\\' => res += "\\\\",
47            '\'' => res += "\\'",
48            '\n' => res += "\\n",
49            '\r' => res += "\\r",
50            '\t' => res += "\\t",
51            _ => res.push(c),
52        }
53    }
54    res.into()
55}
56#[test]
57fn test_escape() {
58    assert_eq!(escape("hello world"), "hello world");
59    assert_eq!(escape("hello\n\r\t\\'\"world"), "hello\\n\\r\\t\\\\\\'\\\"world");
60}
61
62pub fn normalize_space(raw: &str) -> CompactString {
63    let mut res = CompactString::default();
64    let mut chars = raw.trim().chars();
65    while let Some(c) = chars.next() {
66        if c.is_whitespace() {
67            res.push(' ');
68            for cc in chars.by_ref() {
69                if !cc.is_whitespace() {
70                    res.push(cc);
71                    break
72                }
73            }
74        }
75        else { res.push(c) }
76    }
77    res
78}
79#[test]
80fn test_normalize_space() {
81    assert_eq!(normalize_space(" \t  hello \r\n \r\n\n \t\t   \t world \t\t  "), "hello world");
82}
83
84/// Converts a Snap! identifier into a valid C-like identifier.
85pub fn c_ident(raw: &str) -> Result<CompactString, ()> {
86    let cleaned: CompactString = raw.chars().map(|ch| match ch {
87        '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' => ch,
88        _ => ' ',
89    }).collect();
90    let res: CompactString = Punctuated(cleaned.split_ascii_whitespace(), "_").to_string().into();
91    match res.chars().next() {
92        None => Err(()),
93        Some(v) => Ok(if ('0'..='9').contains(&v) { format!("var_{}", res).into() } else { res })
94    }
95}
96#[test]
97fn test_c_ident() {
98    assert_eq!(c_ident("foo").unwrap(), "foo");
99    assert_eq!(c_ident("foo!").unwrap(), "foo");
100    assert_eq!(c_ident("foo[]").unwrap(), "foo");
101    assert_eq!(c_ident("(foo)").unwrap(), "foo");
102    assert_eq!(c_ident(" (foo) ").unwrap(), "foo");
103    assert_eq!(c_ident(" (foo-bar) ").unwrap(), "foo_bar");
104    assert_eq!(c_ident(" (foo    bar 27) ").unwrap(), "foo_bar_27");
105    assert_eq!(c_ident(" (foo bar*} 27)[]{} ").unwrap(), "foo_bar_27");
106    assert_eq!(c_ident(" ( foo bar*} 27)[]{} ").unwrap(), "foo_bar_27");
107    assert_eq!(c_ident(" ( foo ba*[]}r*} 27)[]{} ").unwrap(), "foo_ba_r_27");
108    assert_eq!(c_ident("foo's parent").unwrap(), "foo_s_parent");
109    assert_eq!(c_ident("6foo").unwrap(), "var_6foo");
110    assert_eq!(c_ident("[6foo").unwrap(), "var_6foo");
111    assert_eq!(c_ident("[ 6foo").unwrap(), "var_6foo");
112}
113
114// source: https://docs.babelmonkeys.de/RustyXML/src/xml/lib.rs.html#41-55
115#[inline(never)]
116pub fn xml_escape(input: &str) -> CompactString {
117    let mut result = alloc::string::String::with_capacity(input.len());
118    for c in input.chars() {
119        match c {
120            '&' => result.push_str("&amp;"),
121            '<' => result.push_str("&lt;"),
122            '>' => result.push_str("&gt;"),
123            '\'' => result.push_str("&apos;"),
124            '"' => result.push_str("&quot;"),
125            o => result.push(o),
126        }
127    }
128    result.into()
129}
130
131#[inline(never)]
132pub fn xml_unescape(input: &str) -> Result<CompactString, XmlError> {
133    let mut result = alloc::string::String::with_capacity(input.len());
134
135    let mut chars = input.char_indices().fuse();
136    while let Some((start, start_ch)) = chars.next() {
137        match start_ch {
138            '&' => match chars.clone().skip_while(|(_, x)| x.is_ascii_digit() || x.is_ascii_alphabetic() || *x == '#').next() {
139                Some((stop, stop_ch)) if stop_ch == ';' => {
140                    match &input[start + 1..stop] {
141                        "quot" => result.push('"'),
142                        "apos" => result.push('\''),
143                        "gt" => result.push('>'),
144                        "lt" => result.push('<'),
145                        "amp" => result.push('&'),
146                        ent => {
147                            let val = if ent.starts_with("#x") {
148                                u32::from_str_radix(&ent[2..], 16).ok()
149                            } else if ent.starts_with('#') {
150                                u32::from_str_radix(&ent[1..], 10).ok()
151                            } else {
152                                None
153                            };
154                            match val.and_then(char::from_u32) {
155                                Some(c) => result.push(c),
156                                None => return Err(XmlError::IllegalSequence { sequence: format_compact!("&{};", ent) }),
157                            }
158                        }
159                    }
160                    while let Some((pos, _)) = chars.next() {
161                        if pos == stop { break }
162                    }
163                }
164                _ => result.push(start_ch),
165            }
166            _ => result.push(start_ch),
167        }
168    }
169
170    Ok(result.into())
171}
172
173#[cfg(test)]
174proptest! {
175    #[test]
176    fn test_xml_enc_dec(raw in r".*") {
177        let encoded = xml_escape(&raw);
178        let back = xml_unescape(&encoded).unwrap();
179        prop_assert_eq!(raw, back);
180    }
181}
182
183#[test]
184fn test_xml_dec() {
185    assert_eq!(xml_unescape("hello world").unwrap(), "hello world");
186    assert_eq!(xml_unescape("hello &quot; world").unwrap(), "hello \" world");
187    assert_eq!(xml_unescape("hello &apos; world").unwrap(), "hello ' world");
188    assert_eq!(xml_unescape("hello &gt; world").unwrap(), "hello > world");
189    assert_eq!(xml_unescape("hello &lt; world").unwrap(), "hello < world");
190    assert_eq!(xml_unescape("hello &amp; world").unwrap(), "hello & world");
191    assert_eq!(xml_unescape("hello &#63; world").unwrap(), "hello ? world");
192    assert_eq!(xml_unescape("hello &#x3f; world").unwrap(), "hello ? world");
193    assert_eq!(xml_unescape("hello &#x3F; world").unwrap(), "hello ? world");
194    assert_eq!(xml_unescape("hello &#126; world").unwrap(), "hello ~ world");
195    assert_eq!(xml_unescape("hello &#126; world&#126").unwrap(), "hello ~ world&#126");
196    assert_eq!(xml_unescape("hello &#126; world&#126;").unwrap(), "hello ~ world~");
197    assert_eq!(xml_unescape("hello &#x7e; world").unwrap(), "hello ~ world");
198    assert_eq!(xml_unescape("hello &#x7e; world&#x7e").unwrap(), "hello ~ world&#x7e");
199    assert_eq!(xml_unescape("hello &#x7e; world&#x7e;").unwrap(), "hello ~ world~");
200    assert_eq!(xml_unescape("hello & ;world").unwrap(), "hello & ;world");
201    assert_eq!(xml_unescape("hello & world").unwrap(), "hello & world");
202    assert_eq!(xml_unescape("hello && world").unwrap(), "hello && world");
203    assert_eq!(xml_unescape("hello &&& world").unwrap(), "hello &&& world");
204    assert_eq!(xml_unescape("he&llo &&& world").unwrap(), "he&llo &&& world");
205    assert_eq!(xml_unescape("he&llo &&& world&").unwrap(), "he&llo &&& world&");
206    assert_eq!(xml_unescape("&he&llo &&& world&").unwrap(), "&he&llo &&& world&");
207    assert_eq!(xml_unescape("&&he&llo &&& world&").unwrap(), "&&he&llo &&& world&");
208    assert_eq!(xml_unescape("&&he&llo &&& world&&").unwrap(), "&&he&llo &&& world&&");
209}