gjson 0.8.1

Get JSON values quickly
Documentation
// Copyright 2021 Joshua J Baker. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.

// Bit flags passed to the "info" parameter of the iter function which
// provides additional information about the data

use std::char;
use std::mem;

/// tostr transmutes a byte slice to a string reference. This function must
/// only be used on path components and json data which originated from the
/// super::get() function. The super::get() function only accepts &str
/// references and expects that the inputs are utf8 validated. All slices to
/// the json and path data during the get().
pub fn tostr<'a>(v: &'a [u8]) -> &'a str {
    // SAFETY: All slices to the json and path data during the get()
    // operation are done at ascii codepoints which ensuring that the
    // conversion is safe.
    unsafe { std::str::from_utf8_unchecked(v) }
}

pub fn trim<'a>(mut bin: &'a [u8]) -> &'a [u8] {
    while bin.len() > 0 && bin[0] <= b' ' {
        bin = &bin[1..];
    }
    while bin.len() > 0 && bin[bin.len() - 1] <= b' ' {
        bin = &bin[..bin.len() - 1];
    }
    bin
}

// unescape a json string.
pub fn unescape(json: &str) -> String {
    let json = json.as_bytes();
    if json.len() < 2 || json[0] != b'"' || json[json.len() - 1] != b'"' {
        return String::new();
    }
    let json = &json[1..json.len() - 1];
    let mut out = Vec::with_capacity(json.len());
    let mut i = 0;
    loop {
        if i == json.len() || json[i] < b' ' {
            break;
        } else if json[i] == b'\\' {
            i += 1;
            if i == json.len() {
                break;
            }
            match json[i] {
                b'"' => out.push(b'"'),
                b'\\' => out.push(b'\\'),
                b'/' => out.push(b'/'),
                b'b' => out.push(8),
                b'f' => out.push(12),
                b'n' => out.push(b'\n'),
                b'r' => out.push(b'\r'),
                b't' => out.push(b'\t'),
                b'u' => {
                    if i + 5 > json.len() {
                        break;
                    }
                    let mut r =
                        u32::from_str_radix(tostr(&json[i + 1..i + 5]), 16).unwrap_or(0xFFFD);
                    i += 5;
                    if utf16_is_surrogate(r) {
                        // need another code
                        if (&json[i..]).len() >= 6 && json[i] == b'\\' && json[i + 1] == b'u' {
                            if let Ok(r2) = u32::from_str_radix(tostr(&json[i + 2..i + 6]), 16) {
                                r = utf16_decode(r, r2);
                            } else {
                                r = 0xFFFD;
                            }
                            i += 6
                        }
                    }
                    let ch = char::from_u32(r).unwrap_or(char::REPLACEMENT_CHARACTER);
                    let mark = out.len();
                    for _ in 0..10 {
                        out.push(0);
                    }
                    let n = ch.encode_utf8(&mut out[mark..]).len();
                    out.truncate(mark + n);
                    continue;
                }
                _ => break,
            }
        } else {
            out.push(json[i]);
        }
        i += 1;
    }
    unsafe { mem::transmute::<Vec<u8>, String>(out) }
}

fn utf16_is_surrogate(r: u32) -> bool {
    0xd800 <= r && r < 0xe000
}

fn utf16_decode(r1: u32, r2: u32) -> u32 {
    if 0xd800 <= r1 && r1 < 0xdc00 && 0xdc00 <= r2 && r2 < 0xe000 {
        (r1 - 0xd800) << 10 | (r2 - 0xdc00) + 0x10000
    } else {
        0xFFFD
    }
}

// fn next_json_encoded_rune(iter: &mut std::str::Chars) -> Option<u16> {
//     (iter.next()?.to_digit(16)? << 16)
//         | (iter.next()?.to_digit(16)? << 8)
//         | (iter.next()?.to_digit(16)? << 4)
//         | (iter.next()?.to_digit(16)? << 0);
//     None
// }

// pub fn need_escaping(s: &str) -> bool {
//     let s = s.as_bytes();
//     for i in 0..s.len() {
//         if s[i] < b' ' || s[i] == b'\n' || s[i] == b'\\' || s[i] == b'"' {
//             return true;
//         }
//     }
//     return false;
// }

pub fn extend_json_string(out: &mut Vec<u8>, s: &[u8]) {
    out.push(b'"');
    for i in 0..s.len() {
        if s[i] < b' ' || s[i] == b'\n' || s[i] == b'\\' || s[i] == b'"' {
            out.push(b'\\');
            match s[i] {
                b'"' => out.push(b'"'),
                b'\\' => out.push(b'\\'),
                8 => out.push(b'b'),
                12 => out.push(b'f'),
                b'\n' => out.push(b'n'),
                b'\r' => out.push(b'r'),
                b'\t' => out.push(b't'),
                _ => {
                    out.push(b'u');
                    out.push(b'0');
                    out.push(b'0');
                    let h = s[i] >> 4;
                    out.push(if h < 10 { h + b'0' } else { (h - 10) + b'A' });
                    let l = s[i] & 0xF;
                    out.push(if l < 10 { l + b'0' } else { (l - 10) + b'A' });
                }
            }
        } else {
            out.push(s[i]);
        }
    }
    out.push(b'"');
}

// escape a json string. includes the
pub fn escape(s: &str) -> String {
    let mut out = Vec::with_capacity(s.len());
    extend_json_string(&mut out, s.as_bytes());
    unsafe { std::mem::transmute::<Vec<u8>, String>(out) }
}

/// pmatch returns true if str matches pattern. This is a very
/// simple wildcard match where '*' matches on any number characters
/// and '?' matches on any one character.
///
/// pattern:
///   { term }
/// term:
/// 	 '*'         matches any sequence of non-Separator characters
/// 	 '?'         matches any single non-Separator character
/// 	 c           matches character c (c != '*', '?')
/// 	'\\' c       matches character c
pub fn pmatch<S, P>(pattern: P, string: S) -> bool
where
    S: AsRef<[u8]>,
    P: AsRef<[u8]>,
{
    let mut string = string.as_ref();
    let mut pattern = pattern.as_ref();
    while pattern.len() > 0 {
        if pattern[0] == b'\\' {
            if pattern.len() == 1 {
                return false;
            }
            pattern = &pattern[1..];
        } else if pattern[0] == b'*' {
            if pattern.len() == 1 {
                return true;
            }
            if pattern[1] == b'*' {
                pattern = &pattern[1..];
                continue;
            }
            if pmatch(&pattern[1..], string) {
                return true;
            }
            if string.len() == 0 {
                return false;
            }
            string = &string[1..];
            continue;
        }
        if string.len() == 0 {
            return false;
        }
        if pattern[0] != b'?' && string[0] != pattern[0] {
            return false;
        }
        pattern = &pattern[1..];
        string = &string[1..];
    }
    return string.len() == 0 && pattern.len() == 0;
}

#[cfg(test)]
mod test {

    #[test]
    fn basic() {
        assert_eq!(true, super::pmatch("*", "",));
        assert_eq!(true, super::pmatch("", "",));
        assert_eq!(false, super::pmatch("", "hello world",));
        assert_eq!(false, super::pmatch("jello world", "hello world",));
        assert_eq!(true, super::pmatch("*", "hello world",));
        assert_eq!(true, super::pmatch("*world*", "hello world",));
        assert_eq!(true, super::pmatch("*world", "hello world",));
        assert_eq!(true, super::pmatch("hello*", "hello world",));
        assert_eq!(false, super::pmatch("jello*", "hello world",));
        assert_eq!(true, super::pmatch("hello?world", "hello world",));
        assert_eq!(false, super::pmatch("jello?world", "hello world",));
        assert_eq!(true, super::pmatch("he*o?world", "hello world",));
        assert_eq!(true, super::pmatch("he*o?wor*", "hello world",));
        assert_eq!(true, super::pmatch("he*o?*r*", "hello world",));
        assert_eq!(true, super::pmatch("h\\*ello", "h*ello",));
        assert_eq!(false, super::pmatch("hello\\", "hello\\",));
        assert_eq!(true, super::pmatch("hello\\?", "hello?",));
        assert_eq!(true, super::pmatch("hello\\\\", "hello\\",));

        // test for fast repeating stars
        let string = ",**,,**,**,**,**,**,**,";
        let pattern = ",**********************************************{**\",**,,**,**,**,**,\"\",**,**,**,**,**,**,**,**,**,**]";
        super::pmatch(pattern, string);
    }
    #[test]
    fn escape() {
        let text = r#"
第一印象:なんか怖っ!
今の印象:とりあえずキモい。噛み合わない
好きなところ:ぶすでキモいとこ😋✨✨
思い出:んーーー、ありすぎ😊❤️
LINE交換できる?:あぁ……ごめん✋
トプ画をみて:照れますがな😘✨
一言:お前は一生もんのダチ💖"#;

        let raw1 = r#""\n第一印象:なんか怖っ!\n今の印象:とりあえずキモい。噛み合わない\n好きなところ:ぶすでキモいとこ😋✨✨\n思い出:んーーー、ありすぎ😊❤️\nLINE交換できる?:あぁ……ごめん✋\nトプ画をみて:照れますがな😘✨\n一言:お前は一生もんのダチ💖""#;
        let raw2 = r#""\n\u7B2C\u4E00\u5370\u8C61:\u306A\u3093\u304B\u6016\u3063\uFF01\n\u4ECA\u306E\u5370\u8C61:\u3068\u308A\u3042\u3048\u305A\u30AD\u30E2\u3044\u3002\u565B\u307F\u5408\u308F\u306A\u3044\n\u597D\u304D\u306A\u3068\u3053\u308D:\u3076\u3059\u3067\u30AD\u30E2\u3044\u3068\u3053\uD83D\uDE0B\u2728\u2728\n\u601D\u3044\u51FA:\u3093\u30FC\u30FC\u30FC\u3001\u3042\u308A\u3059\u304E\uD83D\uDE0A\u2764\uFE0F\nLINE\u4EA4\u63DB\u3067\u304D\u308B\uFF1F:\u3042\u3041\u2026\u2026\u3054\u3081\u3093\u270B\n\u30C8\u30D7\u753B\u3092\u307F\u3066:\u7167\u308C\u307E\u3059\u304C\u306A\uD83D\uDE18\u2728\n\u4E00\u8A00:\u304A\u524D\u306F\u4E00\u751F\u3082\u3093\u306E\u30C0\u30C1\uD83D\uDC96""#;
        assert_eq!(text, super::unescape(raw1));
        assert_eq!(text, super::unescape(raw2));
        assert_eq!(super::escape(&text), raw1);

        assert_eq!(
            super::escape("ad\"\\/\u{08}\u{0C}\n\r\t\u{00}sf"),
            r#""ad\"\\/\b\f\n\r\t\u0000sf""#
        );
    }

    #[test]
    fn unescape() {
        assert_eq!(super::unescape(r#""adsf"#), "");
        assert_eq!(super::unescape(r#""ad\sf""#), "ad");
        assert_eq!(
            super::unescape(r#""ad\"\\\/\b\f\n\r\tsf""#),
            "ad\"\\/\u{08}\u{0C}\n\r\tsf"
        );
        assert_eq!(super::unescape(r#""ad\uD83Dsf""#), "ad�sf");
        assert_eq!(super::unescape(r#""ad\uD83D\usf""#), "ad�");
        assert_eq!(super::unescape(r#""ad\uD83D\uxxxxsf""#), "ad�sf");
        assert_eq!(super::unescape(r#""ad\uD83D\u00FFsf""#), "ad�sf");
    }
}