streamdal_gjson/
util.rs

1// Copyright 2021 Joshua J Baker. All rights reserved.
2// Use of this source code is governed by an MIT-style
3// license that can be found in the LICENSE file.
4
5// Bit flags passed to the "info" parameter of the iter function which
6// provides additional information about the data
7
8use std::char;
9use std::mem;
10
11/// tostr transmutes a byte slice to a string reference. This function must
12/// only be used on path components and json data which originated from the
13/// super::get() function. The super::get() function only accepts &str
14/// references and expects that the inputs are utf8 validated. All slices to
15/// the json and path data during the get().
16pub fn tostr<'a>(v: &'a [u8]) -> &'a str {
17    // SAFETY: All slices to the json and path data during the get()
18    // operation are done at ascii codepoints which ensuring that the
19    // conversion is safe.
20    unsafe { std::str::from_utf8_unchecked(v) }
21}
22
23pub fn trim<'a>(mut bin: &'a [u8]) -> &'a [u8] {
24    while bin.len() > 0 && bin[0] <= b' ' {
25        bin = &bin[1..];
26    }
27    while bin.len() > 0 && bin[bin.len() - 1] <= b' ' {
28        bin = &bin[..bin.len() - 1];
29    }
30    bin
31}
32
33// unescape a json string.
34pub fn unescape(json: &str) -> String {
35    let json = json.as_bytes();
36    if json.len() < 2 || json[0] != b'"' || json[json.len() - 1] != b'"' {
37        return String::new();
38    }
39    let json = &json[1..json.len() - 1];
40    let mut out = Vec::with_capacity(json.len());
41    let mut i = 0;
42    loop {
43        if i == json.len() || json[i] < b' ' {
44            break;
45        } else if json[i] == b'\\' {
46            i += 1;
47            if i == json.len() {
48                break;
49            }
50            match json[i] {
51                b'"' => out.push(b'"'),
52                b'\\' => out.push(b'\\'),
53                b'/' => out.push(b'/'),
54                b'b' => out.push(8),
55                b'f' => out.push(12),
56                b'n' => out.push(b'\n'),
57                b'r' => out.push(b'\r'),
58                b't' => out.push(b'\t'),
59                b'u' => {
60                    if i + 5 > json.len() {
61                        break;
62                    }
63                    let mut r =
64                        u32::from_str_radix(tostr(&json[i + 1..i + 5]), 16).unwrap_or(0xFFFD);
65                    i += 5;
66                    if utf16_is_surrogate(r) {
67                        // need another code
68                        if (&json[i..]).len() >= 6 && json[i] == b'\\' && json[i + 1] == b'u' {
69                            if let Ok(r2) = u32::from_str_radix(tostr(&json[i + 2..i + 6]), 16) {
70                                r = utf16_decode(r, r2);
71                            } else {
72                                r = 0xFFFD;
73                            }
74                            i += 6
75                        }
76                    }
77                    let ch = char::from_u32(r).unwrap_or(char::REPLACEMENT_CHARACTER);
78                    let mark = out.len();
79                    for _ in 0..10 {
80                        out.push(0);
81                    }
82                    let n = ch.encode_utf8(&mut out[mark..]).len();
83                    out.truncate(mark + n);
84                    continue;
85                }
86                _ => break,
87            }
88        } else {
89            out.push(json[i]);
90        }
91        i += 1;
92    }
93    unsafe { mem::transmute::<Vec<u8>, String>(out) }
94}
95
96fn utf16_is_surrogate(r: u32) -> bool {
97    0xd800 <= r && r < 0xe000
98}
99
100fn utf16_decode(r1: u32, r2: u32) -> u32 {
101    if 0xd800 <= r1 && r1 < 0xdc00 && 0xdc00 <= r2 && r2 < 0xe000 {
102        (r1 - 0xd800) << 10 | (r2 - 0xdc00) + 0x10000
103    } else {
104        0xFFFD
105    }
106}
107
108// fn next_json_encoded_rune(iter: &mut std::str::Chars) -> Option<u16> {
109//     (iter.next()?.to_digit(16)? << 16)
110//         | (iter.next()?.to_digit(16)? << 8)
111//         | (iter.next()?.to_digit(16)? << 4)
112//         | (iter.next()?.to_digit(16)? << 0);
113//     None
114// }
115
116// pub fn need_escaping(s: &str) -> bool {
117//     let s = s.as_bytes();
118//     for i in 0..s.len() {
119//         if s[i] < b' ' || s[i] == b'\n' || s[i] == b'\\' || s[i] == b'"' {
120//             return true;
121//         }
122//     }
123//     return false;
124// }
125
126pub fn extend_json_string(out: &mut Vec<u8>, s: &[u8]) {
127    out.push(b'"');
128    for i in 0..s.len() {
129        if s[i] < b' ' || s[i] == b'\n' || s[i] == b'\\' || s[i] == b'"' {
130            out.push(b'\\');
131            match s[i] {
132                b'"' => out.push(b'"'),
133                b'\\' => out.push(b'\\'),
134                8 => out.push(b'b'),
135                12 => out.push(b'f'),
136                b'\n' => out.push(b'n'),
137                b'\r' => out.push(b'r'),
138                b'\t' => out.push(b't'),
139                _ => {
140                    out.push(b'u');
141                    out.push(b'0');
142                    out.push(b'0');
143                    let h = s[i] >> 4;
144                    out.push(if h < 10 { h + b'0' } else { (h - 10) + b'A' });
145                    let l = s[i] & 0xF;
146                    out.push(if l < 10 { l + b'0' } else { (l - 10) + b'A' });
147                }
148            }
149        } else {
150            out.push(s[i]);
151        }
152    }
153    out.push(b'"');
154}
155
156// escape a json string. includes the
157pub fn escape(s: &str) -> String {
158    let mut out = Vec::with_capacity(s.len());
159    extend_json_string(&mut out, s.as_bytes());
160    unsafe { std::mem::transmute::<Vec<u8>, String>(out) }
161}
162
163/// pmatch returns true if str matches pattern. This is a very
164/// simple wildcard match where '*' matches on any number characters
165/// and '?' matches on any one character.
166///
167/// pattern:
168///   { term }
169/// term:
170/// 	 '*'         matches any sequence of non-Separator characters
171/// 	 '?'         matches any single non-Separator character
172/// 	 c           matches character c (c != '*', '?')
173/// 	'\\' c       matches character c
174pub fn pmatch<S, P>(pattern: P, string: S) -> bool
175where
176    S: AsRef<[u8]>,
177    P: AsRef<[u8]>,
178{
179    let mut string = string.as_ref();
180    let mut pattern = pattern.as_ref();
181    while pattern.len() > 0 {
182        if pattern[0] == b'\\' {
183            if pattern.len() == 1 {
184                return false;
185            }
186            pattern = &pattern[1..];
187        } else if pattern[0] == b'*' {
188            if pattern.len() == 1 {
189                return true;
190            }
191            if pattern[1] == b'*' {
192                pattern = &pattern[1..];
193                continue;
194            }
195            if pmatch(&pattern[1..], string) {
196                return true;
197            }
198            if string.len() == 0 {
199                return false;
200            }
201            string = &string[1..];
202            continue;
203        }
204        if string.len() == 0 {
205            return false;
206        }
207        if pattern[0] != b'?' && string[0] != pattern[0] {
208            return false;
209        }
210        pattern = &pattern[1..];
211        string = &string[1..];
212    }
213    return string.len() == 0 && pattern.len() == 0;
214}
215
216#[cfg(test)]
217mod test {
218
219    #[test]
220    fn basic() {
221        assert_eq!(true, super::pmatch("*", "",));
222        assert_eq!(true, super::pmatch("", "",));
223        assert_eq!(false, super::pmatch("", "hello world",));
224        assert_eq!(false, super::pmatch("jello world", "hello world",));
225        assert_eq!(true, super::pmatch("*", "hello world",));
226        assert_eq!(true, super::pmatch("*world*", "hello world",));
227        assert_eq!(true, super::pmatch("*world", "hello world",));
228        assert_eq!(true, super::pmatch("hello*", "hello world",));
229        assert_eq!(false, super::pmatch("jello*", "hello world",));
230        assert_eq!(true, super::pmatch("hello?world", "hello world",));
231        assert_eq!(false, super::pmatch("jello?world", "hello world",));
232        assert_eq!(true, super::pmatch("he*o?world", "hello world",));
233        assert_eq!(true, super::pmatch("he*o?wor*", "hello world",));
234        assert_eq!(true, super::pmatch("he*o?*r*", "hello world",));
235        assert_eq!(true, super::pmatch("h\\*ello", "h*ello",));
236        assert_eq!(false, super::pmatch("hello\\", "hello\\",));
237        assert_eq!(true, super::pmatch("hello\\?", "hello?",));
238        assert_eq!(true, super::pmatch("hello\\\\", "hello\\",));
239
240        // test for fast repeating stars
241        let string = ",**,,**,**,**,**,**,**,";
242        let pattern = ",**********************************************{**\",**,,**,**,**,**,\"\",**,**,**,**,**,**,**,**,**,**]";
243        super::pmatch(pattern, string);
244    }
245    #[test]
246    fn escape() {
247        let text = r#"
248第一印象:なんか怖っ!
249今の印象:とりあえずキモい。噛み合わない
250好きなところ:ぶすでキモいとこ😋✨✨
251思い出:んーーー、ありすぎ😊❤️
252LINE交換できる?:あぁ……ごめん✋
253トプ画をみて:照れますがな😘✨
254一言:お前は一生もんのダチ💖"#;
255
256        let raw1 = r#""\n第一印象:なんか怖っ!\n今の印象:とりあえずキモい。噛み合わない\n好きなところ:ぶすでキモいとこ😋✨✨\n思い出:んーーー、ありすぎ😊❤️\nLINE交換できる?:あぁ……ごめん✋\nトプ画をみて:照れますがな😘✨\n一言:お前は一生もんのダチ💖""#;
257        let raw2 = r#""\n\u7B2C\u4E00\u5370\u8C61:\u306A\u3093\u304B\u6016\u3063\uFF01\n\u4ECA\u306E\u5370\u8C61:\u3068\u308A\u3042\u3048\u305A\u30AD\u30E2\u3044\u3002\u565B\u307F\u5408\u308F\u306A\u3044\n\u597D\u304D\u306A\u3068\u3053\u308D:\u3076\u3059\u3067\u30AD\u30E2\u3044\u3068\u3053\uD83D\uDE0B\u2728\u2728\n\u601D\u3044\u51FA:\u3093\u30FC\u30FC\u30FC\u3001\u3042\u308A\u3059\u304E\uD83D\uDE0A\u2764\uFE0F\nLINE\u4EA4\u63DB\u3067\u304D\u308B\uFF1F:\u3042\u3041\u2026\u2026\u3054\u3081\u3093\u270B\n\u30C8\u30D7\u753B\u3092\u307F\u3066:\u7167\u308C\u307E\u3059\u304C\u306A\uD83D\uDE18\u2728\n\u4E00\u8A00:\u304A\u524D\u306F\u4E00\u751F\u3082\u3093\u306E\u30C0\u30C1\uD83D\uDC96""#;
258        assert_eq!(text, super::unescape(raw1));
259        assert_eq!(text, super::unescape(raw2));
260        assert_eq!(super::escape(&text), raw1);
261
262        assert_eq!(
263            super::escape("ad\"\\/\u{08}\u{0C}\n\r\t\u{00}sf"),
264            r#""ad\"\\/\b\f\n\r\t\u0000sf""#
265        );
266    }
267
268    #[test]
269    fn unescape() {
270        assert_eq!(super::unescape(r#""adsf"#), "");
271        assert_eq!(super::unescape(r#""ad\sf""#), "ad");
272        assert_eq!(
273            super::unescape(r#""ad\"\\\/\b\f\n\r\tsf""#),
274            "ad\"\\/\u{08}\u{0C}\n\r\tsf"
275        );
276        assert_eq!(super::unescape(r#""ad\uD83Dsf""#), "ad�sf");
277        assert_eq!(super::unescape(r#""ad\uD83D\usf""#), "ad�");
278        assert_eq!(super::unescape(r#""ad\uD83D\uxxxxsf""#), "ad�sf");
279        assert_eq!(super::unescape(r#""ad\uD83D\u00FFsf""#), "ad�sf");
280    }
281}