use crate::combinators::{Ignore, Maybe, Repeat, RepeatSpec, Sequence};
use crate::parser::{execerr, ParseError, ParseResult, Parser};
use crate::state::ParseState;
use std::collections::HashSet;
use std::error::Error;
use std::iter::FromIterator;
use std::str::{self, FromStr};
#[derive(Clone, Debug, PartialEq)]
pub struct StringParser(String);
impl StringParser {
pub fn new<S: AsRef<str>>(s: S) -> StringParser {
StringParser(s.as_ref().to_owned())
}
}
impl Parser for StringParser {
type Result = String;
fn parse(
&mut self,
st: &mut ParseState<impl Iterator<Item = char>>,
) -> ParseResult<Self::Result> {
let mut cs = self.0.chars();
let expect = self.0.len();
let mut have = 0;
let hold = st.hold();
loop {
let (next, pk) = (cs.next(), st.peek());
if next.is_none() || pk.is_none() {
break;
}
if next != pk {
break;
}
let c = st.next().unwrap();
have += c.len_utf8();
}
if expect == have {
st.release(hold);
return Ok(self.0.clone());
}
let ix = st.index();
st.reset(hold);
return Err(ParseError::Fail("string not matched", ix));
}
}
pub struct Int<IType: Default + str::FromStr>(IType);
pub type Int128 = Int<i128>;
pub type Int64 = Int<i64>;
pub type Int32 = Int<i32>;
pub type Int16 = Int<i16>;
pub type Int8 = Int<i8>;
pub type Uint128 = Int<u128>;
pub type Uint64 = Int<u64>;
pub type Uint32 = Int<u32>;
pub type Uint16 = Int<u16>;
pub type Uint8 = Int<u8>;
impl<IType: Default + str::FromStr> Int<IType> {
pub fn new() -> Int<IType> {
Int(IType::default())
}
}
impl<IType: Default + str::FromStr<Err = std::num::ParseIntError> + std::convert::TryFrom<i8>>
Parser for Int<IType>
{
type Result = IType;
fn parse(
&mut self,
st: &mut ParseState<impl Iterator<Item = char>>,
) -> ParseResult<Self::Result> {
const BUFSIZE: usize = 16;
let mut buf: [char; BUFSIZE] = [' '; BUFSIZE];
let mut widebuf: Option<Vec<char>> = None;
let mut i = 0;
if IType::try_from(-1 as i8).is_ok() {
match st.peek() {
None => return Err(ParseError::EOF),
Some('-') => {
buf[i] = '-';
i += 1;
}
Some(c) if c.is_digit(10) => {
buf[i] = c;
i += 1;
}
Some(_) => return Err(ParseError::Fail("not start of integer", st.index())),
}
}
let hold = st.hold();
if i > 0 {
st.next();
}
loop {
match st.next() {
Some(c) if c.is_digit(10) => {
if widebuf.is_none() {
buf[i] = c;
i += 1;
if i >= BUFSIZE {
widebuf = Some(buf.to_vec());
}
} else {
widebuf.as_mut().unwrap().push(c);
i += 1;
}
}
Some(_) => {
st.undo_next();
break;
}
None => break,
}
}
if i == 0 {
st.reset(hold);
return Err(ParseError::Fail("no appropriate integer found", st.index()));
}
let intstr: String;
if widebuf.is_none() {
intstr = buf[..i].iter().collect();
} else {
intstr = widebuf.unwrap().iter().collect();
}
match IType::from_str(&intstr) {
Ok(i) => {
st.release(hold);
Ok(i)
}
Err(e) => {
st.reset(hold);
Err(ParseError::ExecFail(e.description().to_string()))
}
}
}
}
fn assemble_float(
s: Option<()>,
big: String,
dot: Option<String>,
mut little: Option<String>,
exp: Option<((), i32)>,
) -> ParseResult<f64> {
if dot.is_some() && little.is_none() {
little = Some("0".to_string());
}
assert!((dot.is_some() && little.is_some()) || (dot.is_none() && little.is_none()));
let bigf = match f64::from_str(&big) {
Ok(f) => f,
Err(e) => return Err(execerr(e.description())),
};
let mut littlef = 0.;
if let Some(mut d) = dot {
d.push_str(little.as_ref().unwrap());
littlef = match f64::from_str(&d) {
Ok(f) => f,
Err(e) => return Err(execerr(e.description())),
}
}
let mut multiplier: f64 = if s.is_some() { -1. } else { 1. };
if let Some((_, e)) = exp {
multiplier *= (10. as f64).powi(e);
}
return Ok(multiplier * (bigf + littlef));
}
pub fn float() -> impl Parser<Result = f64> {
let digits_set = "0123456789";
let minus = Maybe::new(Ignore::new(StringParser::new("-")));
let digits = string_of(digits_set, RepeatSpec::Min(1));
let point = Maybe::new(StringParser::new("."));
let smalldigits = Maybe::new(string_of(digits_set, RepeatSpec::Min(1)));
let exp = Maybe::new(Sequence::new((
Ignore::new(StringParser::new("e")),
Int32::new(),
)));
let parser = Sequence::new((minus, digits, point, smalldigits, exp))
.apply(|(m, d, p, sd, exp)| assemble_float(m, d, p, sd, exp));
parser
}
pub struct Nothing;
impl Parser for Nothing {
type Result = ();
fn parse(
&mut self,
_: &mut ParseState<impl Iterator<Item = char>>,
) -> ParseResult<Self::Result> {
Ok(())
}
}
enum MatchSpec {
One(char),
Some(Vec<char>),
Many(HashSet<char>),
}
const MATCHSPEC_MANY_THRESHOLD: usize = 20;
impl MatchSpec {
fn new<S: AsRef<str>>(chars: S) -> MatchSpec {
if chars.as_ref().len() == 1 {
MatchSpec::One(chars.as_ref().chars().next().unwrap())
} else if chars.as_ref().len() <= MATCHSPEC_MANY_THRESHOLD {
MatchSpec::Some(chars.as_ref().chars().collect())
} else {
MatchSpec::Many(chars.as_ref().chars().collect())
}
}
fn matches(&self, c: char) -> bool {
match self {
MatchSpec::One(cc) => c == *cc,
MatchSpec::Some(cs) => cs.contains(&c),
MatchSpec::Many(cs) => cs.contains(&c),
}
}
}
pub struct OneOf(MatchSpec, bool);
impl OneOf {
pub fn new<S: AsRef<str>>(chars: S) -> OneOf {
OneOf(MatchSpec::new(chars), false)
}
pub fn new_none_of<S: AsRef<str>>(chars: S) -> OneOf {
OneOf(MatchSpec::new(chars), true)
}
}
impl Parser for OneOf {
type Result = char;
fn parse(
&mut self,
st: &mut ParseState<impl Iterator<Item = char>>,
) -> ParseResult<Self::Result> {
match st.peek() {
Some(c) => {
if self.0.matches(c) ^ self.1 {
st.next();
Ok(c)
} else {
Err(ParseError::Fail("char not matched", st.index()))
}
}
_ => Err(ParseError::EOF),
}
}
}
pub fn string_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> {
let oo = OneOf::new(chars);
let rp = Repeat::new(oo, rp);
let make_string = |charvec: Vec<char>| Ok(String::from_iter(charvec.into_iter()));
rp.apply(make_string)
}
pub fn string_none_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> {
let oo = OneOf::new_none_of(chars);
let rp = Repeat::new(oo, rp);
let make_string = |charvec: Vec<char>| Ok(String::from_iter(charvec.into_iter()));
rp.apply(make_string)
}
pub fn whitespace() -> impl Parser<Result = ()> {
Ignore::new(Repeat::new(OneOf::new(" \n\r\t"), RepeatSpec::Any))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::combinators::Sequence;
#[test]
fn test_parse_string() {
let mut s = ParseState::new("abc def");
let mut p = StringParser::new("abc ".to_owned());
assert_eq!(Ok("abc ".to_owned()), p.parse(&mut s));
assert_eq!(4, s.index());
}
#[test]
fn test_parse_int() {
let mut s = ParseState::new("-1252 353 354 -1253 422345");
let mut ip = Int64::new();
let mut up = Uint64::new();
let mut sp = StringParser::new(" ".to_string());
assert_eq!(Ok(-1252), ip.parse(&mut s));
assert_eq!(Ok(" ".to_string()), sp.parse(&mut s));
assert_eq!(Ok(353), ip.parse(&mut s));
assert_eq!(Ok(" ".to_string()), sp.parse(&mut s));
assert_eq!(Ok(354), up.parse(&mut s));
assert_eq!(Ok(" ".to_string()), sp.parse(&mut s));
assert!(up.parse(&mut s).is_err());
assert_eq!(Ok(-1253), ip.parse(&mut s));
assert_eq!(Ok(" ".to_string()), sp.parse(&mut s));
assert_eq!(Ok(422345), up.parse(&mut s));
}
#[test]
fn test_parse_long_int() {
let mut s = ParseState::new("123456789");
let mut up = Uint128::new();
assert_eq!(Ok(123456789 as u128), up.parse(&mut s));
}
#[test]
fn test_parse_floats() {
let mut ps = ParseState::new("1 1. 1.5 -1.5 -1.75 2.5e-4 -2e-2");
let mut p = float();
let want = vec![1., 1., 1.5, -1.5, -1.75, 2.5e-4, -0.02];
for &f in want.iter() {
assert_eq!(Ok(f), p.parse(&mut ps));
let _ = StringParser::new(" ").parse(&mut ps);
}
}
#[test]
fn test_string_of() {
let mut st = ParseState::new("aaabcxxzy");
let mut p = string_of("abcd", RepeatSpec::Min(1));
assert_eq!(Ok("aaabc".to_string()), p.parse(&mut st));
}
#[test]
fn test_string_none_of() {
let mut st = ParseState::new("aaabcxxzy");
let mut p = string_none_of("xyz", RepeatSpec::Min(1));
assert_eq!(Ok("aaabc".to_string()), p.parse(&mut st));
}
use std::iter;
#[test]
fn bench_integer_medium() {
let piece = "-422345812310928 ";
let repeats = 1000;
let mut input = String::with_capacity(piece.len() * repeats);
input.extend(iter::repeat(piece).take(repeats));
let mut ps = ParseState::new(&input);
let mut p = Sequence::new((Int64::new(), StringParser::new(" ")));
{
time_test!("parse-int with static buffer");
for _ in 0..1000 {
let _ = p.parse(&mut ps);
}
}
let piece = "-4223458123109289 ";
let mut input = String::with_capacity(piece.len() * repeats);
input.extend(iter::repeat(piece).take(repeats));
let mut ps = ParseState::new(&input);
{
time_test!("parse-int with dynamic buffer");
for _ in 0..1000 {
let _ = p.parse(&mut ps);
}
}
}
#[test]
fn bench_float() {
let piece = "-32.334 ";
let repeats = 1000;
let mut input = String::with_capacity(piece.len() * repeats);
input.extend(iter::repeat(piece).take(repeats));
let mut ps = ParseState::new(&input);
let mut p = Sequence::new((float(), StringParser::new(" ")));
{
time_test!("parse-float with combinators");
for _ in 0..1000 {
let _ = p.parse(&mut ps);
}
}
}
}