use super::parser;
use super::{Error, Result};
use crate::range::RangeArgument;
use crate::set::Set;
use bstr::decode_utf8;
use std::fmt::Debug;
use std::ops::{Add, BitOr, Mul, Neg, Not, Shr, Sub};
use std::str;
pub struct Parser<'a, O>(parser::Parser<'a, u8, O>);
impl<'a, O> Parser<'a, O> {
pub fn new<P>(parse: P) -> Self
where
P: Fn(&'a [u8], usize) -> Result<(O, usize)> + 'a,
{
Self(parser::Parser::new(parse))
}
pub fn collect(self) -> Parser<'a, &'a str>
where
O: 'a,
{
Parser(self.0.collect().map(
|s| unsafe { str::from_utf8_unchecked(s) },
))
}
pub fn parse(&self, input: &'a [u8]) -> Result<O> {
self.0.parse(input)
}
pub fn parse_at(&self, input: &'a [u8], start: usize) -> Result<(O, usize)> {
self.0.parse_at(input, start)
}
pub fn parse_str(&self, input: &'a str) -> Result<O> {
self.0.parse(input.as_bytes())
}
pub fn map<U, F>(self, f: F) -> Parser<'a, U>
where
F: Fn(O) -> U + 'a,
O: 'a,
U: 'a,
{
Parser(self.0.map(f))
}
pub fn convert<U, E, F>(self, f: F) -> Parser<'a, U>
where
F: Fn(O) -> ::std::result::Result<U, E> + 'a,
E: Debug,
O: 'a,
U: 'a,
{
Parser(self.0.convert(f))
}
pub fn cache(self) -> Self
where
O: Clone + 'a,
{
Self(self.0.cache())
}
pub fn pos(self) -> Parser<'a, usize>
where
O: 'a,
{
Parser(self.0.pos())
}
pub fn discard(self) -> Parser<'a, ()>
where
O: 'a,
{
Parser(self.0.discard())
}
pub fn opt(self) -> Parser<'a, Option<O>>
where
O: 'a,
{
Parser(self.0.opt())
}
pub fn repeat<R>(self, range: R) -> Parser<'a, Vec<O>>
where
R: RangeArgument<usize> + Debug + 'a,
O: 'a,
{
Parser(self.0.repeat(range))
}
pub fn name(self, name: &'a str) -> Self
where
O: 'a,
{
Self(self.0.name(name))
}
pub fn expect(self, name: &'a str) -> Self
where
O: 'a,
{
Self(self.0.expect(name))
}
}
impl<'a, O> From<Parser<'a, O>> for parser::Parser<'a, u8, O> {
fn from(parser: Parser<'a, O>) -> Self {
parser.0 }
}
pub fn decode(slice: &[u8], start: usize) -> Result<(char, usize)> {
let (ch, size) = decode_utf8(&slice[start..]);
let Some(ch) = ch else {
return no_utf8(start, size);
};
Ok((ch, size))
}
fn no_utf8<T>(start: usize, size: usize) -> Result<T> {
Err(Error::Mismatch {
message: if size == 0 {
"end of input reached"
} else {
"not UTF-8"
}
.to_owned(),
position: start,
})
}
pub fn any<'a>() -> Parser<'a, char> {
Parser::new(|input: &[u8], start: usize| {
let (ch, size) = decode(input, start)?;
let pos = start + size;
Ok((ch, pos))
})
}
pub fn sym<'a>(tag: char) -> Parser<'a, char> {
Parser::new(move |input: &[u8], start: usize| {
let (ch, size) = decode(input, start)?;
if ch != tag {
return Err(Error::Mismatch {
message: format!("expect: {}, found: {}", tag, ch),
position: start,
});
}
let pos = start + size;
Ok((ch, pos))
})
}
pub fn seq<'a, 'b: 'a>(tag_str: &'b str) -> Parser<'a, &'a str> {
let tag = tag_str.as_bytes();
Parser::new(move |input: &'a [u8], start: usize| {
let mut index = 0;
loop {
let pos = start + index;
if index == tag.len() {
let result = &input[start..pos];
let result_str = unsafe { str::from_utf8_unchecked(result) };
return Ok((result_str, pos));
}
let Some(s) = input.get(pos) else {
return Err(Error::Incomplete);
};
if tag[index] != *s {
return Err(Error::Mismatch {
message: format!("seq {:?} at byte index: {}", tag, pos),
position: pos,
});
}
index += 1;
}
})
}
pub fn one_of<'a, S>(set: &'a S) -> Parser<'a, char>
where
S: Set<char> + ?Sized,
{
Parser::new(move |input: &'a [u8], start: usize| {
let (ch, size) = decode(input, start)?;
if !set.contains(&ch) {
return Err(Error::Mismatch {
message: format!("expect one of: {}, found: {}", set.to_str(), ch),
position: start,
});
}
let pos = start + size;
Ok((ch, pos))
})
}
pub fn none_of<'a, S>(set: &'a S) -> Parser<'a, char>
where
S: Set<char> + ?Sized,
{
Parser::new(move |input: &'a [u8], start: usize| {
let (ch, size) = decode(input, start)?;
if set.contains(&ch) {
return Err(Error::Mismatch {
message: format!("expect one of: {}, found: {}", set.to_str(), ch),
position: start,
});
}
let pos = start + size;
Ok((ch, pos))
})
}
pub fn is_a<'a, F>(predicate: F) -> Parser<'a, char>
where
F: Fn(char) -> bool + 'a,
{
Parser::new(move |input: &'a [u8], start: usize| {
let (ch, size) = decode(input, start)?;
if !predicate(ch) {
return Err(Error::Mismatch {
message: format!("is_a predicate failed on: {}", ch),
position: start,
});
}
let pos = start + size;
Ok((ch, pos))
})
}
pub fn not_a<'a, F>(predicate: F) -> Parser<'a, char>
where
F: Fn(char) -> bool + 'a,
{
Parser::new(move |input: &'a [u8], start: usize| {
let (ch, size) = decode(input, start)?;
if predicate(ch) {
return Err(Error::Mismatch {
message: format!("is_a predicate failed on: {}", ch),
position: start,
});
}
let pos = start + size;
Ok((ch, pos))
})
}
pub fn take<'a>(n: usize) -> Parser<'a, &'a str> {
Parser::new(move |input: &'a [u8], start: usize| {
let mut byte_pos = start;
for _ in 0..n {
let (ch, size) = decode_utf8(&input[start..]);
if ch.is_none() {
return no_utf8(byte_pos, size);
}
byte_pos += size;
}
let result = &input[start..byte_pos];
let result_str = unsafe { str::from_utf8_unchecked(result) };
Ok((result_str, byte_pos))
})
}
pub fn skip<'a>(n: usize) -> Parser<'a, ()> {
Parser::new(move |input: &'a [u8], start: usize| {
let mut byte_pos = start;
for _ in 0..n {
let (ch, size) = decode_utf8(&input[start..]);
if ch.is_none() {
return no_utf8(byte_pos, size);
}
byte_pos += size;
}
Ok(((), byte_pos))
})
}
pub fn take_bytes<'a>(n: usize) -> Parser<'a, &'a str> {
Parser::new(move |input: &'a [u8], start: usize| {
let mut byte_pos = start;
loop {
let (ch, size) = decode_utf8(&input[start..]);
if ch.is_none() {
return no_utf8(byte_pos, size);
}
byte_pos += size;
if byte_pos > n {
return Err(Error::Mismatch {
message: "range splits a UTF-8 character".to_owned(),
position: start,
});
}
if byte_pos == n {
let result = &input[start..byte_pos];
let result_str = unsafe { str::from_utf8_unchecked(result) };
return Ok((result_str, byte_pos));
}
}
})
}
pub fn skip_bytes<'a>(n: usize) -> Parser<'a, ()> {
Parser::new(move |input: &'a [u8], start: usize| {
let mut byte_pos = start;
loop {
let (ch, size) = decode_utf8(&input[start..]);
if ch.is_none() {
return no_utf8(byte_pos, size);
}
byte_pos += size;
if byte_pos > n {
return Err(Error::Mismatch {
message: "range splits a UTF-8 character".to_owned(),
position: start,
});
}
if byte_pos == n {
return Ok(((), byte_pos));
}
}
})
}
impl<'a, O: 'a, U: 'a, F: Fn(O) -> Parser<'a, U> + 'a> Shr<F> for Parser<'a, O> {
type Output = Parser<'a, U>;
fn shr(self, other: F) -> Self::Output {
Parser::new(move |input: &'a [u8], start: usize| {
(self.0.method)(input, start).and_then(|(out, pos)| (other(out).0.method)(input, pos))
})
}
}
pub fn empty<'a>() -> Parser<'a, ()> {
Parser(parser::empty())
}
pub fn list<'a, O, U>(item: Parser<'a, O>, separator: Parser<'a, U>) -> Parser<'a, Vec<O>>
where
O: 'a,
U: 'a,
{
Parser(parser::list(item.0, separator.0))
}
pub fn call<'a, O, F>(parser_factory: F) -> Parser<'a, O>
where
O: 'a,
F: Fn() -> Parser<'a, O> + 'a,
{
Parser(parser::call(move || parser_factory().0))
}
pub fn end<'a>() -> Parser<'a, ()> {
Parser(parser::end())
}
macro_rules! utf_op {
( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
#[doc=$doc]
impl<'a, Left: 'a, Right: 'a> $impl_name<Parser<'a, Right>> for Parser<'a, Left> {
type Output = Parser<'a, $return_type>;
fn $fn_name (self, other: Parser<'a, Right>) -> Self::Output {
Parser(self.0 $op other.0)
}
}
};
}
macro_rules! utf_u8_op {
( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
#[doc=concat!($doc, " (but degrade to non-utf8 parser)")]
impl<'a, Left: 'a, Right: 'a> $impl_name<parser::Parser<'a, u8, Right>> for Parser<'a, Left> {
type Output = parser::Parser<'a, u8, $return_type>;
fn $fn_name (self, other: parser::Parser<'a, u8, Right>) -> Self::Output {
self.0 $op other
}
}
};
}
macro_rules! u8_utf_op {
( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
#[doc=concat!($doc, " (but degrade to non-utf8 parser)")]
impl<'a, Left: 'a, Right: 'a> $impl_name<Parser<'a, Right>> for parser::Parser<'a, u8, Left> {
type Output = parser::Parser<'a, u8, $return_type>;
fn $fn_name (self, other: Parser<'a, Right>) -> Self::Output {
self $op other.0
}
}
};
}
macro_rules! all_op {
( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
utf_op!($impl_name, $fn_name, $op, $return_type, $doc);
utf_u8_op!($impl_name, $fn_name, $op, $return_type, $doc);
u8_utf_op!($impl_name, $fn_name, $op, $return_type, $doc);
};
}
all_op!(Add, add, +, (Left, Right), "Sequence reserve value");
all_op!(Sub, sub, -, Left, "Sequence discard second value");
all_op!(Mul, mul, *, Right, "Sequence discard first value");
impl<'a, O: 'a> BitOr for Parser<'a, O> {
type Output = Self;
fn bitor(self, other: Self) -> Self {
Self(self.0 | other.0)
}
}
impl<'a, O: 'a> BitOr<parser::Parser<'a, u8, O>> for Parser<'a, O> {
type Output = parser::Parser<'a, u8, O>;
fn bitor(self, other: parser::Parser<'a, u8, O>) -> Self::Output {
self.0 | other
}
}
impl<'a, O: 'a> BitOr<Parser<'a, O>> for parser::Parser<'a, u8, O> {
type Output = parser::Parser<'a, u8, O>;
fn bitor(self, other: Parser<'a, O>) -> Self::Output {
self | other.0
}
}
impl<'a, O: 'a> Neg for Parser<'a, O> {
type Output = Parser<'a, bool>;
fn neg(self) -> Self::Output {
Parser(-self.0)
}
}
impl<'a, O: 'a> Not for Parser<'a, O> {
type Output = Parser<'a, bool>;
fn not(self) -> Self::Output {
Parser(!self.0)
}
}