use std::{
io::{Error, ErrorKind, Read},
marker::PhantomData,
};
use crate::decoder::ByteStreamCharDecoder;
#[derive(Debug)]
pub struct Parser<D, R> {
decoder: D,
peek: Option<char>,
read_phantom: PhantomData<R>,
}
impl<D, R> Parser<D, R>
where
D: ByteStreamCharDecoder<R>,
R: Read,
{
pub fn wrap(decoder: D) -> Parser<D, R> {
Parser {
decoder,
peek: None,
read_phantom: PhantomData,
}
}
pub fn has_more(&mut self) -> Result<bool, Error> {
self.peek().map(|o| o.is_some())
}
pub fn peek(&mut self) -> Result<Option<char>, Error> {
match self.peek {
Some(p) => Ok(Some(p)),
None => match self.decoder.decode_char()? {
Some(c) => {
self.peek = Some(c);
Ok(self.peek)
}
None => Ok(None),
},
}
}
fn read_next(&mut self) -> Result<Option<char>, Error> {
match self.peek.take() {
Some(c) => Ok(Some(c)),
None => self.decoder.decode_char(),
}
}
fn skip_next(&mut self) -> Result<(), Error> {
match self.peek {
Some(_) => self.peek.take(),
None => self.decoder.decode_char()?,
};
Ok(())
}
pub fn read(&mut self) -> Result<char, Error> {
match self.read_next()? {
Some(c) => Ok(c),
None => Err(Error::new(
ErrorKind::InvalidData,
"Expected some character but found end of input.",
)),
}
}
pub fn peek_sees(&mut self, c: char) -> Result<bool, Error> {
match self.peek()? {
Some(f) => Ok(f == c),
None => Ok(false),
}
}
pub fn accept(&mut self, c: char) -> Result<bool, Error> {
match self.peek()? {
Some(f) => {
if f == c {
self.skip_next()?;
Ok(true)
} else {
Ok(false)
}
}
None => Ok(false),
}
}
pub fn require(&mut self, c: char) -> Result<(), Error> {
match self.read_next()? {
Some(f) => {
if f == c {
Ok(())
} else {
Err(Error::new(
ErrorKind::InvalidData,
format!("Expected character '{}' but found '{}'!", c, f),
))
}
}
None => Err(Error::new(
ErrorKind::InvalidData,
format!("Input ends without expected character '{}'!", c),
)),
}
}
pub fn require_str(&mut self, s: &str) -> Result<(), Error> {
for c in s.chars() {
match self.read_next()? {
None => {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Input ends before completion of required sequence \"{}\"!",
s
),
));
}
Some(f) => {
if f != c {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Found character '{}' but expected '{}', to satisfy required sequence \"{}\"!",
f, c, s
),
));
}
}
}
}
Ok(())
}
pub fn read_up_to(&mut self, stop_before: char) -> Result<Option<String>, Error> {
match self.peek()? {
None => Ok(None),
Some(c) => {
if c == stop_before {
return Ok(None);
}
let mut s = String::with_capacity(16);
s.push(c);
self.skip_next()?;
while let Some(c) = self.peek()? {
if c == stop_before {
break;
} else {
self.skip_next()?;
s.push(c);
}
}
s.shrink_to_fit();
Ok(Some(s))
}
}
}
pub fn skip_while<P>(&mut self, acceptable: P) -> Result<bool, Error>
where
P: Fn(char) -> bool,
{
let mut found = false;
while let Some(c) = self.peek()? {
if !acceptable(c) {
break;
}
self.skip_next()?;
found = true;
}
Ok(found)
}
}
#[cfg(test)]
mod tests {
use crate::decoder::Utf8Decoder;
use super::*;
type TestOut = Result<(), Error>;
#[test]
fn read_empty() -> TestOut {
let input = "";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
let outcome = parser.read();
assert!(outcome.is_err());
Ok(())
}
#[test]
fn read_first_character() -> TestOut {
let input = "a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
let result = parser.read()?;
assert_eq!(result, 'a');
Ok(())
}
#[test]
fn read_all_characters() -> TestOut {
let input = "abc123<*>";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert_eq!(parser.read()?, 'a');
assert_eq!(parser.read()?, 'b');
assert_eq!(parser.read()?, 'c');
assert_eq!(parser.read()?, '1');
assert_eq!(parser.read()?, '2');
assert_eq!(parser.read()?, '3');
assert_eq!(parser.read()?, '<');
assert_eq!(parser.read()?, '*');
assert_eq!(parser.read()?, '>');
let outcome = parser.read();
assert!(outcome.is_err());
Ok(())
}
#[test]
fn require_empty() -> TestOut {
let input = "";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.require('i').is_err());
Ok(())
}
#[test]
fn require_first_character_of_input() -> TestOut {
let input = "i";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.require('i').is_ok());
Ok(())
}
#[test]
fn require_last_character_of_input() -> TestOut {
let input = "abci";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert_eq!(parser.read()?, 'a');
assert_eq!(parser.read()?, 'b');
assert_eq!(parser.read()?, 'c');
assert!(parser.require('i').is_ok());
Ok(())
}
#[test]
fn require_end_of_input() -> TestOut {
let input = "i";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert_eq!(parser.read()?, 'i');
assert!(parser.require('D').is_err());
Ok(())
}
#[test]
fn peek_empty() -> TestOut {
let input = "";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert_eq!(parser.peek()?, None);
Ok(())
}
#[test]
fn peek_exists_first_character() -> TestOut {
let input = "a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert_eq!(parser.peek()?, Some('a'));
Ok(())
}
#[test]
fn peek_exists() -> TestOut {
let input = "input:a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("input:")?;
assert_eq!(parser.peek()?, Some('a'));
Ok(())
}
#[test]
fn peek_no_more_input() -> TestOut {
let input = "input:";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("input:")?;
assert_eq!(parser.peek()?, None);
Ok(())
}
#[test]
fn require_exact_empty() -> TestOut {
let input = "";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.require_str("i").is_err());
assert!(parser.require_str("sequence").is_err());
Ok(())
}
#[test]
fn require_exact_last_character_of_input() -> TestOut {
let input = "i";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.require_str("i").is_ok());
assert!(parser.require_str("string").is_err());
assert_eq!(parser.peek()?, None);
Ok(())
}
#[test]
fn require_exact_end_of_input() -> TestOut {
let input = "iD";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.require_str("iD").is_ok());
assert!(parser.require_str("KFA").is_err());
assert!(parser.require_str("â„¢").is_err());
assert_eq!(parser.peek()?, None);
Ok(())
}
#[test]
fn peek_finds_empty() -> TestOut {
let input = "";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(!parser.peek_sees('a')?);
Ok(())
}
#[test]
fn peek_finds_exists_first_position() -> TestOut {
let input = "a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.peek_sees('a')?);
Ok(())
}
#[test]
fn peek_finds_exists() -> TestOut {
let input = "000a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("000")?;
assert!(parser.peek_sees('a')?);
Ok(())
}
#[test]
fn peek_finds_absent() -> TestOut {
let input = "b";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(!parser.peek_sees('a')?);
Ok(())
}
#[test]
fn peek_finds_no_more_input() -> TestOut {
let input = "bash";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("bash")?;
assert!(!parser.peek_sees('a')?);
Ok(())
}
#[test]
fn find_and_take_empty() -> TestOut {
let input = "";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(!parser.accept('a')?);
Ok(())
}
#[test]
fn find_and_take_exists_first_character() -> TestOut {
let input = "a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.accept('a')?);
assert_eq!(parser.peek()?, None);
Ok(())
}
#[test]
fn find_and_take_exists() -> TestOut {
let input = "prelude:a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("prelude:")?;
assert!(parser.accept('a')?);
assert_eq!(parser.peek()?, None);
Ok(())
}
#[test]
fn find_and_take_absent() -> TestOut {
let input = "b";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(!parser.accept('a')?);
Ok(())
}
#[test]
fn find_and_take_no_more_input() -> TestOut {
let input = "b";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require('b')?;
assert!(!parser.accept('a')?);
Ok(())
}
#[test]
fn skip_while_whitespace() -> TestOut {
let input = "\r\n\t Some actual text";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.skip_while(|c| c.is_ascii_whitespace())?);
assert_eq!(parser.peek()?, Some('S'));
Ok(())
}
#[test]
fn skip_while_everything() -> TestOut {
let input = "text";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.skip_while(|c| c.is_ascii_alphabetic())?);
assert_eq!(parser.peek()?, None);
Ok(())
}
#[test]
fn skip_while_after_entire_content() -> TestOut {
let input = "start";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("start")?;
assert!(!parser.skip_while(|c| c.is_whitespace())?);
Ok(())
}
#[test]
fn read_up_to_empty() -> TestOut {
let input = "";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.read_up_to('a')?.is_none());
Ok(())
}
#[test]
fn read_up_to_absent() -> TestOut {
let input = "abcde";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert_eq!(parser.read_up_to(';')?.unwrap(), input);
Ok(())
}
#[test]
fn read_up_to_first_character() -> TestOut {
let input = "a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
let nothingness = parser.read_up_to('a')?;
assert!(¬hingness.is_none());
Ok(())
}
#[test]
fn read_up_to_next_character() -> TestOut {
let input = "prelude:a";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("prelude:")?;
let nothingness = parser.read_up_to('a')?;
assert!(¬hingness.is_none());
Ok(())
}
#[test]
fn read_up_to_exists() -> TestOut {
let input = "&SomeCharRef;";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require('&')?;
let char_ref_name = parser.read_up_to(';')?;
assert_eq!(&char_ref_name.unwrap(), "SomeCharRef");
parser.require(';')?;
Ok(())
}
#[test]
fn read_up_to_no_more_input() -> TestOut {
let input = "&";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("&")?;
assert!(parser.read_up_to(';')?.is_none());
Ok(())
}
#[test]
fn has_more_start() -> TestOut {
let input = "whatever";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(parser.has_more()?);
Ok(())
}
#[test]
fn has_more_mid() -> TestOut {
let input = "whatever";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("what")?;
assert!(parser.has_more()?);
Ok(())
}
#[test]
fn has_more_end() -> TestOut {
let input = "whatever";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
parser.require_str("whatever")?;
assert!(!parser.has_more()?);
Ok(())
}
#[test]
fn has_more_empty() -> TestOut {
let input = "";
let mut parser = Parser::wrap(Utf8Decoder::wrap(input.as_bytes()));
assert!(!parser.has_more()?);
Ok(())
}
}