pub mod error;
use crate::token::PatternToken;
pub use error::ParserError;
use std::{borrow::Cow, fmt::Debug, marker::PhantomData, str::FromStr};
#[derive(PartialEq)]
enum ParserState {
Default,
Placeholder,
QuotedLiteral,
Apostrophe { quoted: bool },
}
impl Default for ParserState {
fn default() -> Self {
Self::Default
}
}
macro_rules! handle_literal {
($self:ident, $quoted:expr, $next_state:expr) => {{
let range = $self.advance_state($self.idx, $next_state);
if !range.is_empty() {
#[allow(clippy::indexing_slicing)]
return Ok(Some(PatternToken::Literal {
content: Cow::Borrowed(&$self.input[range]),
quoted: $quoted,
}));
} else {
continue;
}
}};
}
pub struct ParserOptions {
pub allow_raw_letters: bool,
}
pub struct Parser<'p, P> {
input: &'p str,
len: usize,
allow_raw_letters: bool,
start_idx: usize,
idx: usize,
state: ParserState,
marker: PhantomData<P>,
}
impl<'p, P> Parser<'p, P> {
pub fn new(input: &'p str, options: ParserOptions) -> Self {
Self {
input,
len: input.len(),
allow_raw_letters: options.allow_raw_letters,
start_idx: 0,
idx: 0,
state: ParserState::default(),
marker: PhantomData,
}
}
pub fn try_next(
&mut self,
) -> Result<Option<PatternToken<'p, P>>, ParserError<<P as FromStr>::Err>>
where
P: FromStr,
P::Err: Debug,
{
while let Some(b) = self.input.as_bytes().get(self.idx) {
match self.state {
ParserState::Placeholder if *b == b'}' => {
let range = self.advance_state(self.idx, ParserState::Default);
#[allow(clippy::indexing_slicing)]
return self.input[range]
.parse()
.map(|ret| Some(PatternToken::Placeholder(ret)))
.map_err(ParserError::InvalidPlaceholder);
}
ParserState::QuotedLiteral if *b == b'\'' => {
if self.input.as_bytes().get(self.idx + 1) == Some(&b'\'') {
handle_literal!(self, true, ParserState::Apostrophe { quoted: true })
} else {
handle_literal!(self, true, ParserState::Default)
}
}
ParserState::Default if *b == b'{' => {
handle_literal!(self, false, ParserState::Placeholder)
}
ParserState::Default if *b == b'\'' => {
if self.input.as_bytes().get(self.idx + 1) == Some(&b'\'') {
handle_literal!(self, false, ParserState::Apostrophe { quoted: false })
} else {
handle_literal!(self, false, ParserState::QuotedLiteral)
}
}
ParserState::Default if !self.allow_raw_letters && b.is_ascii_alphabetic() => {
return Err(ParserError::IllegalCharacter(*b as char));
}
ParserState::Apostrophe { quoted } => {
self.start_idx -= 1;
if quoted {
handle_literal!(self, true, ParserState::QuotedLiteral)
} else {
handle_literal!(self, false, ParserState::Default)
}
}
_ => self.idx += 1,
}
}
match self.state {
ParserState::Placeholder => Err(ParserError::UnclosedPlaceholder),
ParserState::QuotedLiteral => Err(ParserError::UnclosedQuotedLiteral),
ParserState::Apostrophe { .. } => unreachable!(),
ParserState::Default => {
let range = self.start_idx..self.len;
if !range.is_empty() {
self.start_idx = self.len;
#[allow(clippy::indexing_slicing)]
Ok(Some(PatternToken::Literal {
content: Cow::Borrowed(&self.input[range]),
quoted: false,
}))
} else {
Ok(None)
}
}
}
}
fn advance_state(&mut self, idx: usize, next_state: ParserState) -> std::ops::Range<usize> {
let range = self.start_idx..idx;
self.idx = idx + 1;
self.start_idx = self.idx;
self.state = next_state;
range
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pattern::Pattern;
use std::{convert::TryInto, ops::Deref};
#[test]
fn pattern_parse_placeholders() {
let samples = vec![
("{0}", vec![PatternToken::Placeholder(0)]),
(
"{0}{1}",
vec![PatternToken::Placeholder(0), PatternToken::Placeholder(1)],
),
(
"{0} 'at' {1}",
vec![
PatternToken::Placeholder(0),
PatternToken::Literal {
content: " ".into(),
quoted: false,
},
PatternToken::Literal {
content: "at".into(),
quoted: true,
},
PatternToken::Literal {
content: " ".into(),
quoted: false,
},
PatternToken::Placeholder(1),
],
),
(
"{0}'at'{1}",
vec![
PatternToken::Placeholder(0),
PatternToken::Literal {
content: "at".into(),
quoted: true,
},
PatternToken::Placeholder(1),
],
),
(
"'{0}' 'at' '{1}'",
vec![
PatternToken::Literal {
content: "{0}".into(),
quoted: true,
},
PatternToken::Literal {
content: " ".into(),
quoted: false,
},
PatternToken::Literal {
content: "at".into(),
quoted: true,
},
PatternToken::Literal {
content: " ".into(),
quoted: false,
},
PatternToken::Literal {
content: "{1}".into(),
quoted: true,
},
],
),
(
"'PRE' {0} 'and' {1} 'POST'",
vec![
PatternToken::Literal {
content: "PRE".into(),
quoted: true,
},
PatternToken::Literal {
content: " ".into(),
quoted: false,
},
PatternToken::Placeholder(0),
PatternToken::Literal {
content: " ".into(),
quoted: false,
},
PatternToken::Literal {
content: "and".into(),
quoted: true,
},
PatternToken::Literal {
content: " ".into(),
quoted: false,
},
PatternToken::Placeholder(1),
PatternToken::Literal {
content: " ".into(),
quoted: false,
},
PatternToken::Literal {
content: "POST".into(),
quoted: true,
},
],
),
(
"{0} o''clock and 'o''clock'",
vec![
PatternToken::Placeholder(0),
PatternToken::Literal {
content: " o".into(),
quoted: false,
},
PatternToken::Literal {
content: "'".into(),
quoted: false,
},
PatternToken::Literal {
content: "clock and ".into(),
quoted: false,
},
PatternToken::Literal {
content: "o".into(),
quoted: true,
},
PatternToken::Literal {
content: "'".into(),
quoted: true,
},
PatternToken::Literal {
content: "clock".into(),
quoted: true,
},
],
),
];
for (input, expected) in samples {
let parser = Parser::new(
input,
ParserOptions {
allow_raw_letters: true,
},
);
let result: Pattern<_> = parser.try_into().expect("Failed to parse a pattern");
assert_eq!(result.deref(), expected,);
}
let broken: Vec<(_, Option<ParserError<std::num::ParseIntError>>)> = vec![
("{", Some(ParserError::UnclosedPlaceholder)),
("{0", Some(ParserError::UnclosedPlaceholder)),
("{01", Some(ParserError::UnclosedPlaceholder)),
(
"{date}",
None,
),
("{date} 'days'", None),
("'{00}", Some(ParserError::UnclosedQuotedLiteral)),
("d", Some(ParserError::IllegalCharacter('d'))),
];
for (input, error) in broken {
let parser = Parser::<usize>::new(
input,
ParserOptions {
allow_raw_letters: false,
},
);
let result: Result<Pattern<_>, _> = parser.try_into();
if let Some(error) = error {
assert_eq!(result.expect_err("Should have failed."), error,);
} else {
assert!(result.is_err());
}
}
}
}