use std::error::Error as StdError;
use std::mem;
use std::fmt;
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
Text(String),
EscapedTag(Vec<String>, String),
UnescapedTag(Vec<String>, String),
Section(Vec<String>, bool, Vec<Token>, String, String, String, String, String),
IncompleteSection(Vec<String>, bool, String, bool),
Partial(String, String, String),
}
#[derive(Debug, PartialEq)]
pub enum Error {
BadClosingTag(char, char),
UnclosedTag,
UnclosedSection(String),
UnbalancedUnescapeTag,
EmptyTag,
EarlySectionClose(String),
MissingSetDelimeterClosingTag,
InvalidSetDelimeterSyntax,
#[doc(hidden)]
__Nonexhaustive,
}
impl StdError for Error {
fn description(&self) -> &'static str {
match *self {
Error::BadClosingTag(..) => "found a malformed closing tag",
Error::UnclosedTag => "found an unclosed tag",
Error::UnclosedSection(..) => "found an unclosed section",
Error::UnbalancedUnescapeTag => "found an unbalanced unescape tag",
Error::EmptyTag => "found an empty tag",
Error::EarlySectionClose(..) => "found a closing tag for an unopened section",
Error::MissingSetDelimeterClosingTag => "missing the new closing tag in set delimeter tag",
Error::InvalidSetDelimeterSyntax => "invalid set delimeter tag syntax",
Error::__Nonexhaustive => unreachable!(),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::BadClosingTag(actual, expected) => {
write!(f,
"character {:?} was unexpected in the closing tag, expected {:?}",
actual,
expected)
}
Error::UnclosedSection(ref name) => {
write!(f, "found an unclosed section: {:?}", name)
},
Error::EarlySectionClose(ref name) => {
write!(f, "found a closing tag for an unopened section {:?}", name)
},
_ => write!(f, "{}", self.description()),
}
}
}
enum TokenClass {
Normal,
StandAlone,
WhiteSpace(String, usize),
}
pub struct Parser<'a, T: 'a> {
reader: &'a mut T,
ch: Option<char>,
lookahead: Option<char>,
line: usize,
col: usize,
content: String,
state: ParserState,
opening_tag: String,
closing_tag: String,
opening_tag_chars: Vec<char>,
closing_tag_chars: Vec<char>,
tag_position: usize,
tokens: Vec<Token>,
partials: Vec<String>,
}
enum ParserState {
Text,
OpeningTag,
Tag,
ClosingTag,
}
impl<'a, T: Iterator<Item = char>> Parser<'a, T> {
pub fn new(reader: &'a mut T, opening_tag: &str, closing_tag: &str) -> Parser<'a, T> {
let mut parser = Parser {
reader: reader,
ch: None,
lookahead: None,
line: 1,
col: 1,
content: String::new(),
state: ParserState::Text,
opening_tag: opening_tag.to_string(),
closing_tag: closing_tag.to_string(),
opening_tag_chars: opening_tag.chars().collect(),
closing_tag_chars: closing_tag.chars().collect(),
tag_position: 0,
tokens: Vec::new(),
partials: Vec::new(),
};
parser.bump();
parser
}
fn bump(&mut self) {
match self.lookahead.take() {
None => {
self.ch = self.reader.next();
}
Some(ch) => {
self.ch = Some(ch);
}
}
if let Some(ch) = self.ch {
if ch == '\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
}
}
fn peek(&mut self) -> Option<char> {
match self.lookahead {
None => {
self.lookahead = self.reader.next();
self.lookahead
}
Some(ch) => Some(ch),
}
}
fn ch_is(&self, ch: char) -> bool {
match self.ch {
Some(c) => c == ch,
None => false,
}
}
pub fn parse(mut self) -> Result<(Vec<Token>, Vec<String>), Error> {
let mut curly_brace_tag = false;
while let Some(ch) = self.ch {
match self.state {
ParserState::Text => {
if ch == self.opening_tag_chars[0] {
if self.opening_tag_chars.len() > 1 {
self.tag_position = 1;
self.state = ParserState::OpeningTag;
} else {
self.add_text();
self.state = ParserState::Tag;
}
} else {
self.content.push(ch);
}
self.bump();
}
ParserState::OpeningTag => {
if ch == self.opening_tag_chars[self.tag_position] {
if self.tag_position == self.opening_tag_chars.len() - 1 {
self.add_text();
curly_brace_tag = false;
self.state = ParserState::Tag;
} else {
self.tag_position = self.tag_position + 1;
}
} else {
self.state = ParserState::Text;
self.not_otag();
self.content.push(ch);
}
self.bump();
}
ParserState::Tag => {
if self.content.is_empty() && ch == '{' {
curly_brace_tag = true;
self.content.push(ch);
self.bump();
} else if curly_brace_tag && ch == '}' {
curly_brace_tag = false;
self.content.push(ch);
self.bump();
} else if ch == self.closing_tag_chars[0] {
if self.closing_tag_chars.len() > 1 {
self.tag_position = 1;
self.state = ParserState::ClosingTag;
self.bump();
} else {
try!(self.add_tag());
self.state = ParserState::Text;
}
} else {
self.content.push(ch);
self.bump();
}
}
ParserState::ClosingTag => {
if ch == self.closing_tag_chars[self.tag_position] {
if self.tag_position == self.closing_tag_chars.len() - 1 {
try!(self.add_tag());
self.state = ParserState::Text;
} else {
self.state = ParserState::Tag;
self.not_ctag();
self.content.push(ch);
self.bump();
}
} else {
let expected = self.closing_tag_chars[self.tag_position];
return Err(Error::BadClosingTag(ch, expected));
}
}
}
}
match self.state {
ParserState::Text => {
self.add_text();
}
ParserState::OpeningTag => {
self.not_otag();
self.add_text();
}
ParserState::ClosingTag => {
self.not_ctag();
self.add_text();
}
ParserState::Tag => return Err(Error::UnclosedTag),
}
for token in self.tokens.iter().rev() {
if let Token::IncompleteSection(ref path, _, _, _) = *token {
return Err(Error::UnclosedSection(path.join(".")))
}
}
let Parser { tokens, partials, .. } = self;
Ok((tokens, partials))
}
fn add_text(&mut self) {
if !self.content.is_empty() {
let mut content = String::new();
mem::swap(&mut content, &mut self.content);
self.tokens.push(Token::Text(content));
}
}
fn classify_token(&mut self) -> TokenClass {
if let Some(ch) = self.ch {
if !(ch == '\n' || (ch == '\r' && self.peek() == Some('\n'))) {
return TokenClass::Normal;
}
}
match self.tokens.last() {
None => TokenClass::StandAlone,
Some(&Token::IncompleteSection(_, _, _, true)) => TokenClass::StandAlone,
Some(&Token::Text(ref s)) if !s.is_empty() => {
match s.rfind(|c: char| c == '\n' || !c.is_whitespace()) {
None => {
if self.tokens.len() == 1 {
TokenClass::WhiteSpace(s.clone(), 0)
} else {
TokenClass::Normal
}
}
Some(pos) => {
if s.as_bytes()[pos] == b'\n' {
if pos == s.len() - 1 {
TokenClass::StandAlone
} else {
TokenClass::WhiteSpace(s.clone(), pos + 1)
}
} else {
TokenClass::Normal
}
}
}
}
Some(_) => TokenClass::Normal,
}
}
fn eat_whitespace(&mut self) -> bool {
match self.classify_token() {
TokenClass::Normal => false,
TokenClass::StandAlone => {
if self.ch_is('\r') {
self.bump();
}
self.bump();
true
}
TokenClass::WhiteSpace(s, pos) => {
if self.ch_is('\r') {
self.bump();
}
self.bump();
self.tokens.pop();
self.tokens.push(Token::Text(s[0..pos].to_string()));
true
}
}
}
fn add_tag(&mut self) -> Result<(), Error> {
self.bump();
let tag = self.opening_tag.clone() + &self.content + &self.closing_tag;
let mut content = String::new();
mem::swap(&mut content, &mut self.content);
let len = content.len();
try!(deny_blank(&content));
let content = content;
match content.as_bytes()[0] as char {
'!' => {
self.eat_whitespace();
}
'&' => {
let name = &content[1..len];
let name = try!(get_name_or_implicit(name));
self.tokens.push(Token::UnescapedTag(name, tag));
}
'{' => {
if content.ends_with('}') {
let name = &content[1..len - 1];
let name = try!(get_name_or_implicit(name));
self.tokens.push(Token::UnescapedTag(name, tag));
} else {
return Err(Error::UnbalancedUnescapeTag)
}
}
'#' => {
let newlined = self.eat_whitespace();
let name = try!(get_name_or_implicit(&content[1..len]));
self.tokens.push(Token::IncompleteSection(name, false, tag, newlined));
}
'^' => {
let newlined = self.eat_whitespace();
let name = try!(get_name_or_implicit(&content[1..len]));
self.tokens.push(Token::IncompleteSection(name, true, tag, newlined));
}
'/' => {
self.eat_whitespace();
let name = try!(get_name_or_implicit(&content[1..len]));
let mut children: Vec<Token> = Vec::new();
loop {
if self.tokens.is_empty() {
return Err(Error::EarlySectionClose(name.join(".")))
}
let last = self.tokens.pop();
match last {
Some(Token::IncompleteSection(section_name, inverted, osection, _)) => {
children.reverse();
let mut srcs = Vec::new();
for child in children.iter() {
match *child {
Token::Text(ref s) |
Token::EscapedTag(_, ref s) |
Token::UnescapedTag(_, ref s) |
Token::Partial(_, _, ref s) => srcs.push(s.clone()),
Token::Section(_, _, _, _, ref osection, ref src, ref csection, _) => {
srcs.push(osection.clone());
srcs.push(src.clone());
srcs.push(csection.clone());
}
_ => bug!("Incomplete sections should not be nested"),
}
}
if section_name == name {
let mut src = String::new();
for s in srcs.iter() {
src.push_str(s);
}
self.tokens.push(Token::Section(name,
inverted,
children,
self.opening_tag.clone(),
osection,
src,
tag,
self.closing_tag.clone()));
break;
} else {
return Err(Error::UnclosedSection(section_name.join(".")))
}
}
Some(last_token) => children.push(last_token),
None => (),
}
}
}
'>' => {
try!(self.add_partial(&content, tag));
}
'=' => {
self.eat_whitespace();
if len > 2usize && content.ends_with('=') {
let s = try!(deny_blank(&content[1..len - 1]));
let pos = s.find(char::is_whitespace);
let pos = match pos {
None => return Err(Error::MissingSetDelimeterClosingTag),
Some(pos) => pos,
};
self.opening_tag = s[0..pos].to_string();
self.opening_tag_chars = self.opening_tag.chars().collect();
let s2 = &s[pos..];
let pos = s2.find(|c: char| !c.is_whitespace());
let pos = match pos {
None => return Err(Error::MissingSetDelimeterClosingTag),
Some(pos) => pos,
};
self.closing_tag = s2[pos..].to_string();
self.closing_tag_chars = self.closing_tag.chars().collect();
} else {
return Err(Error::InvalidSetDelimeterSyntax)
}
}
_ => {
let name = try!(get_name_or_implicit(&content));
self.tokens.push(Token::EscapedTag(name, tag));
}
};
Ok(())
}
fn add_partial(&mut self, content: &str, tag: String) -> Result<(), Error> {
let indent = match self.classify_token() {
TokenClass::Normal => "".to_string(),
TokenClass::StandAlone => {
if self.ch_is('\r') {
self.bump();
}
self.bump();
"".to_string()
}
TokenClass::WhiteSpace(s, pos) => {
if self.ch_is('\r') {
self.bump();
}
self.bump();
let ws = &s[pos..];
self.tokens.pop();
self.tokens.push(Token::Text(s[0..pos].to_string()));
ws.to_string()
}
};
let name = &content[1..content.len()];
let name = try!(deny_blank(name));
self.tokens.push(Token::Partial(name.into(), indent, tag));
self.partials.push(name.into());
Ok(())
}
fn not_otag(&mut self) {
for (i, ch) in self.opening_tag_chars.iter().enumerate() {
if !(i < self.tag_position) {
break;
}
self.content.push(*ch);
}
}
fn not_ctag(&mut self) {
for (i, ch) in self.closing_tag_chars.iter().enumerate() {
if !(i < self.tag_position) {
break;
}
self.content.push(*ch);
}
}
}
fn get_name_or_implicit(name: &str) -> Result<Vec<String>, Error> {
let name = try!(deny_blank(&name));
Ok(if name == "." {
Vec::new()
} else {
name.split_terminator('.')
.map(|x| x.to_string())
.collect()
})
}
fn deny_blank(content: &str) -> Result<&str, Error> {
let trimmed = content.trim();
if trimmed.is_empty() {
Err(Error::EmptyTag)
} else {
Ok(trimmed)
}
}
#[cfg(test)]
mod tests {
use super::*;
pub fn parse(input: &str) -> Result<(Vec<Token>, Vec<String>), Error> {
let input = &mut input.chars();
let parser = Parser::new(input, "{{", "}}");
parser.parse()
}
pub fn assert_parse(input: &str) -> (Vec<Token>, Vec<String>) {
parse(input).expect(&format!("Failed to parse: {}", input))
}
#[test]
fn empty_input() {
assert_parse("");
}
#[test]
fn empty_tag() {
assert_eq!(parse("{{}}"), Err(Error::EmptyTag));
}
#[test]
fn whitespace_only_tag() {
assert_eq!(parse("{{ }}"), Err(Error::EmptyTag));
}
#[test]
fn bad_closing_tag() {
assert_eq!(parse("{{hello}?"), Err(Error::BadClosingTag('?', '}')))
}
#[test]
fn unclosed_tag() {
assert_eq!(parse("{{hi"), Err(Error::UnclosedTag))
}
mod sections {
use super::*;
#[test]
fn sanity() {
assert_parse("{{#people}} Hi {{name}}! {{/people}}");
}
#[test]
fn unclosed() {
assert_eq!(parse("{{#world}}hi"), Err(Error::UnclosedSection("world".into())))
}
#[test]
fn unclosed_nested_with_wrong_closing_tag() {
assert_eq!(
parse("{{#universe}} {{#world}} {{/universe}}"),
Err(Error::UnclosedSection("world".into()))
)
}
#[test]
fn unclosed_nested() {
assert_eq!(
parse("{{#universe}} {{#world}}"),
Err(Error::UnclosedSection("world".into()))
)
}
#[test]
fn unclosed_with_path() {
assert_eq!(
parse("{{#universe}} {{#world.and.stuff}} {{/universe}}"),
Err(Error::UnclosedSection("world.and.stuff".into()))
)
}
#[test]
fn early_close() {
assert_eq!(parse("{{/world}}"), Err(Error::EarlySectionClose("world".into())))
}
}
mod inverted {
use super::*;
#[test]
fn sanity() {
assert_parse("{{^people}} No people! {{/people}}");
}
#[test]
fn unclosed() {
assert_eq!(parse("{{^world}}hi"), Err(Error::UnclosedSection("world".into())))
}
#[test]
fn unclosed_nested_with_wrong_closing_tag() {
assert_eq!(
parse("{{#universe}} {{^world}} {{/universe}}"),
Err(Error::UnclosedSection("world".into()))
);
assert_eq!(
parse("{{^universe}} {{^world}} {{/universe}}"),
Err(Error::UnclosedSection("world".into()))
)
}
#[test]
fn unclosed_nested() {
assert_eq!(
parse("{{#universe}} {{^world}}"),
Err(Error::UnclosedSection("world".into()))
)
}
#[test]
fn unclosed_with_path() {
assert_eq!(
parse("{{#universe}} {{^world.and.stuff}} {{/universe}}"),
Err(Error::UnclosedSection("world.and.stuff".into()))
)
}
}
mod set_delimeter {
use super::*;
#[test]
fn sanity() {
assert_parse("{{=<% %>=}}");
}
#[test]
fn closing_tag_is_whitespace() {
assert_eq!(parse("{{=<% =}}"), Err(Error::MissingSetDelimeterClosingTag))
}
#[test]
fn missing_closing_tag() {
assert_eq!(parse("{{=<%=}}"), Err(Error::MissingSetDelimeterClosingTag))
}
#[test]
fn missing_closing_equals() {
assert_eq!(parse("{{=<% %>}}"), Err(Error::InvalidSetDelimeterSyntax))
}
}
#[test]
fn unbalanced_unescape() {
let input = "{{=<% %>=}} <%{ %>";
assert_eq!(parse(input), Err(Error::UnbalancedUnescapeTag))
}
}