use crate::{
tokenizer::{self, PosRange, Tokenizer},
value::Value,
};
use regex::Regex;
use std::collections::HashMap;
pub fn parse_with_regex<'a>(
s: &'a str,
keyword: &Regex,
regex: &Regex,
ret: &mut Vec<&'a str>,
) -> Value {
if s.is_empty() {
return Value::default();
}
let mut toker: Tokenizer<'a> = tokenizer::Tokenizer::<'a>::new(s);
toker.get_char();
process(&mut toker, false, false, keyword, regex, ret)
}
fn process<'a, 'input>(
toker: &'a mut Tokenizer<'input>,
is_key: bool,
is_key_matched: bool,
keyword: &Regex,
regex: &Regex,
ret: &'a mut Vec<&'input str>,
) -> Value {
if toker.ch.is_none() {
return Value::default();
}
toker.skip_bc();
if toker.ch.as_ref().unwrap().ch == '{' {
return proc_object(toker, is_key, is_key_matched, keyword, regex, ret);
}
if toker.ch.as_ref().unwrap().ch == '"' {
return proc_string(toker, is_key, is_key_matched, keyword, regex, ret);
}
if toker.ch.as_ref().unwrap().ch == '[' {
return proc_array(toker, is_key, is_key_matched, keyword, regex, ret);
}
if toker.is_letter() {
return proc_bool_null(toker, is_key, is_key_matched, keyword, regex, ret);
}
if toker.is_digit() || toker.ch.as_ref().unwrap().ch == '-' {
return proc_number(toker, is_key, is_key_matched, keyword, regex, ret);
}
panic!(
"grammar error: should be Object/Array/String/bool/null/Number, current ch is:{:?}",
toker.ch
)
}
fn proc_bool_null<'a, 'b>(
toker: &'a mut Tokenizer<'b>,
_is_key: bool,
is_key_matched: bool,
_keyword: &Regex,
regex: &Regex,
ret: &'a mut Vec<&'b str>,
) -> Value {
let step_n;
if toker.ch.as_ref().unwrap().ch == 't' || toker.ch.as_ref().unwrap().ch == 'n' {
step_n = 4;
} else if toker.ch.as_ref().unwrap().ch == 'f' {
step_n = 5;
} else {
panic!("grammar error: should be true/false or null")
}
for _ in 0..step_n {
toker.concat();
toker.get_char();
}
let pos_range = toker.get_str_token();
if is_key_matched {
for m in regex.find_iter(&toker.s[pos_range.start..pos_range.end + 1]) {
let caps = m.as_str().trim();
ret.push(caps);
}
}
return Value::string(pos_range);
}
fn proc_number<'a, 'b>(
toker: &'a mut Tokenizer<'b>,
_is_key: bool,
is_key_matched: bool,
_keyword: &Regex,
regex: &Regex,
ret: &'a mut Vec<&'b str>,
) -> Value {
toker.skip_bc();
if toker.ch.as_ref().unwrap().ch == '-' {
toker.concat();
toker.get_char();
if !toker.is_digit() {
panic!("grammar error: -x x should be digit")
}
}
if toker.ch.as_ref().unwrap().ch == '0' {
toker.concat();
toker.get_char();
} else {
while toker.is_digit() {
toker.concat();
toker.get_char();
}
}
if toker.ch.is_some() && toker.ch.as_ref().unwrap().ch == '.' {
toker.concat();
toker.get_char();
while toker.is_digit() {
toker.concat();
toker.get_char();
}
}
if toker.ch.is_some()
&& (toker.ch.as_ref().unwrap().ch == 'E' || toker.ch.as_ref().unwrap().ch == 'e')
{
toker.concat();
toker.get_char();
if toker.ch.as_ref().unwrap().ch == '-' || toker.ch.as_ref().unwrap().ch == '+' {
toker.concat();
toker.get_char();
}
while toker.is_digit() {
toker.concat();
toker.get_char();
}
}
let pos_range = toker.get_str_token();
if is_key_matched {
for m in regex.find_iter(&toker.s[pos_range.start..pos_range.end + 1]) {
let caps = m.as_str().trim();
ret.push(caps);
}
}
Value::string(pos_range)
}
fn proc_array<'a, 'b>(
toker: &'a mut Tokenizer<'b>,
is_key: bool,
is_key_matched: bool,
keyword: &'a Regex,
regex: &'a Regex,
ret: &'a mut Vec<&'b str>,
) -> Value {
toker.get_char();
toker.skip_bc();
if toker.ch.as_ref().unwrap().ch == ']' {
toker.get_char();
return Value::Array(vec![]);
}
let mut arr = vec![];
while toker.ch.as_ref().unwrap().ch != ']' {
let v = process(toker, is_key, is_key_matched, keyword, regex, ret);
arr.push(v);
toker.skip_bc();
if toker.ch.as_ref().unwrap().ch == ',' {
toker.get_char();
}
}
toker.get_char();
Value::Array(arr)
}
fn proc_string<'a, 'b>(
toker: &'a mut Tokenizer<'b>,
is_key: bool,
is_key_matched: bool,
_keyword: &Regex,
regex: &Regex,
ret: &'a mut Vec<&'b str>,
) -> Value {
toker.get_char();
if toker.ch.as_ref().unwrap().ch == '"' {
let offset = toker.ch.as_ref().unwrap().offset;
let pos_range = PosRange::new(offset, offset);
toker.get_char();
return Value::String(pos_range);
}
while toker.ch.as_ref().unwrap().ch != '"' {
if toker.ch.as_ref().unwrap().ch == '\\' {
toker.concat();
toker.get_char();
}
toker.concat();
toker.get_char();
}
toker.get_char();
let pos_range = toker.get_str_token();
if !is_key_matched && !is_key {
let mut end = pos_range.end + 1;
while !toker.s.is_char_boundary(end) {
end += 1;
}
for m in regex.find_iter(&toker.s[pos_range.start..end]) {
let caps = m.as_str().trim();
ret.push(caps);
}
}
Value::string(pos_range)
}
fn proc_object<'a, 'b>(
toker: &'a mut Tokenizer<'b>,
_is_key: bool,
is_key_matched: bool,
keyword: &Regex,
regex: &Regex,
ret: &'a mut Vec<&'b str>,
) -> Value {
toker.get_char();
toker.skip_bc();
if toker.ch.as_ref().unwrap().ch == '}' {
toker.get_char();
let map = HashMap::new();
return Value::object(map);
}
let mut map = HashMap::with_capacity(32);
while toker.ch.as_ref().unwrap().ch != '}' {
toker.skip_bc();
let key = match process(toker, true, is_key_matched, keyword, regex, ret) {
Value::String(key) => key,
_ => panic!("grammar error: key type error!"),
};
let mut is_match = false;
if !is_key_matched && key.start != key.end {
let mut end = key.end + 1;
while !toker.s.is_char_boundary(end) {
end += 1;
}
if keyword.is_match(&toker.s[key.start..end]) {
is_match = true;
}
}
toker.skip_bc();
if toker.ch.as_ref().unwrap().ch == ':' {
toker.get_char();
let value: Value = process(toker, false, is_match, keyword, regex, ret);
map.insert(key, value);
} else {
panic!("grammar error: need colon symbol : , not: {:?}", toker.ch)
}
toker.skip_bc();
if toker.ch.as_ref().unwrap().ch == ',' {
toker.get_char();
}
}
toker.get_char();
Value::object(map)
}