use dcbor_parse::parse_dcbor_item_partial;
use crate::{DCBORPattern, Error, Pattern, Result};
pub(crate) fn skip_ws(src: &str, pos: &mut usize) {
while let Some(ch) = src[*pos..].chars().next() {
if matches!(ch, ' ' | '\t' | '\n' | '\r' | '\u{0c}') {
*pos += ch.len_utf8();
} else {
break;
}
}
}
pub(crate) fn parse_text_regex(src: &str) -> Result<(regex::Regex, usize)> {
let mut pos = 0;
skip_ws(src, &mut pos);
if pos >= src.len() || src.as_bytes()[pos] != b'/' {
return Err(Error::UnterminatedRegex(pos..pos));
}
pos += 1;
let start = pos;
let mut escape = false;
while pos < src.len() {
let b = src.as_bytes()[pos];
pos += 1;
if escape {
escape = false;
continue;
}
if b == b'\\' {
escape = true;
continue;
}
if b == b'/' {
let inner = &src[start..pos - 1];
let regex = regex::Regex::new(inner)
.map_err(|_| Error::InvalidRegex(pos..pos))?;
skip_ws(src, &mut pos);
return Ok((regex, pos));
}
}
Err(Error::UnterminatedRegex(pos..pos))
}
pub(crate) fn parse_cbor_inner(src: &str) -> Result<(Pattern, usize)> {
let mut pos = 0;
skip_ws(src, &mut pos);
if src[pos..].starts_with('/') {
pos += 1; let start = pos;
let mut escape = false;
while pos < src.len() {
let b = src.as_bytes()[pos];
pos += 1;
if escape {
escape = false;
continue;
}
if b == b'\\' {
escape = true;
continue;
}
if b == b'/' {
let pattern_str = &src[start..pos - 1];
let dcbor_pattern = DCBORPattern::parse(pattern_str)
.map_err(|_| Error::InvalidPattern(start..pos - 1))?;
skip_ws(src, &mut pos);
return Ok((Pattern::cbor_pattern(dcbor_pattern), pos));
}
}
return Err(Error::UnterminatedRegex(start - 1..pos));
}
if src[pos..].starts_with("ur:") {
let (cbor_v20, consumed) = parse_dcbor_item_partial(&src[pos..])
.map_err(|_| Error::Unknown)?;
let bytes = cbor_v20.to_cbor_data();
let cbor =
dcbor::CBOR::try_from_data(bytes).map_err(|_| Error::Unknown)?;
return Ok((Pattern::cbor(cbor), pos + consumed));
}
let (cbor_v20, consumed) =
parse_dcbor_item_partial(&src[pos..]).map_err(|_| Error::Unknown)?;
let bytes = cbor_v20.to_cbor_data();
let cbor = dcbor::CBOR::try_from_data(bytes).map_err(|_| Error::Unknown)?;
Ok((Pattern::cbor(cbor), pos + consumed))
}
pub(crate) fn parse_array_inner(src: &str) -> Result<(Pattern, usize)> {
let mut pos = 0;
skip_ws(src, &mut pos);
if src[pos..].starts_with('*') {
pos += 1;
skip_ws(src, &mut pos);
return Ok((Pattern::any_array(), pos));
}
if src[pos..].starts_with('{') {
pos += 1;
skip_ws(src, &mut pos);
let start_pos = pos;
while pos < src.len()
&& src[pos..].chars().next().unwrap().is_ascii_digit()
{
pos += 1;
}
if pos == start_pos {
return Err(Error::InvalidRange(pos..pos));
}
let first_num: usize = src[start_pos..pos]
.parse()
.map_err(|_| Error::InvalidNumberFormat(start_pos..pos))?;
skip_ws(src, &mut pos);
if pos >= src.len() {
return Err(Error::UnexpectedEndOfInput);
}
let ch = src[pos..].chars().next().unwrap();
match ch {
'}' => {
pos += 1;
skip_ws(src, &mut pos);
return Ok((Pattern::array_with_count(first_num), pos));
}
',' => {
pos += 1;
skip_ws(src, &mut pos);
if pos >= src.len() {
return Err(Error::UnexpectedEndOfInput);
}
let ch = src[pos..].chars().next().unwrap();
if ch == '}' {
pos += 1;
skip_ws(src, &mut pos);
return Ok((Pattern::array_with_range(first_num..), pos));
} else if ch.is_ascii_digit() {
let start_pos = pos;
while pos < src.len()
&& src[pos..].chars().next().unwrap().is_ascii_digit()
{
pos += 1;
}
let second_num: usize =
src[start_pos..pos].parse().map_err(|_| {
Error::InvalidNumberFormat(start_pos..pos)
})?;
skip_ws(src, &mut pos);
if pos >= src.len() || !src[pos..].starts_with('}') {
return Err(Error::UnexpectedEndOfInput);
}
pos += 1;
skip_ws(src, &mut pos);
return Ok((
Pattern::array_with_range(first_num..=second_num),
pos,
));
} else {
return Err(Error::InvalidRange(pos..pos));
}
}
_ => return Err(Error::InvalidRange(pos..pos)),
}
}
let pattern_str = format!("[{}]", &src[pos..]);
match DCBORPattern::parse(&pattern_str) {
Ok(dcbor_pattern) => {
let consumed = src.len() - pos; Ok((Pattern::array_from_dcbor_pattern(dcbor_pattern), consumed))
}
Err(_) => Err(Error::InvalidPattern(pos..src.len())),
}
}
pub(crate) fn parse_bare_word(src: &str) -> Result<(String, usize)> {
let mut pos = 0;
skip_ws(src, &mut pos);
let start = pos;
while pos < src.len() {
let ch = src[pos..].chars().next().unwrap();
if matches!(ch, ' ' | '\t' | '\n' | '\r' | '\u{0c}' | ')') {
break;
}
pos += ch.len_utf8();
}
if start == pos {
return Err(Error::UnexpectedEndOfInput);
}
let word = src[start..pos].to_string();
skip_ws(src, &mut pos);
Ok((word, pos))
}