use std::mem;
use std::ops::Deref;
use dupe::Dupe;
pub struct FormatParser<'a> {
view: StringView<'a>,
}
#[derive(Debug, PartialEq, Copy, Clone, Dupe)]
pub enum FormatConv {
Str,
Repr,
}
#[derive(Debug, PartialEq)]
pub enum FormatToken<'a> {
Text(&'a str),
Capture {
capture: &'a str,
pos: usize,
conv: FormatConv,
},
Escape(EscapeCurlyBrace),
}
#[derive(Debug, PartialEq)]
pub enum EscapeCurlyBrace {
Open,
Close,
}
impl EscapeCurlyBrace {
pub fn as_str(&self) -> &'static str {
match self {
Self::Open => "{",
Self::Close => "}",
}
}
pub fn back_to_escape(&self) -> &'static str {
match self {
Self::Open => "{{",
Self::Close => "}}",
}
}
}
impl<'a> FormatParser<'a> {
#[inline]
pub fn new(s: &'a str) -> Self {
Self {
view: StringView::new(s),
}
}
pub fn next(&mut self) -> anyhow::Result<Option<FormatToken<'a>>> {
let mut i = 0;
while i < self.view.len() {
match self.view.as_bytes()[i] {
b'{' | b'}' if i != 0 => {
let text = self.view.eat(i);
return Ok(Some(FormatToken::Text(text)));
}
b'{' => {
assert!(i == 0);
let pos = self.view.pos() + 1;
i = 1;
while i < self.view.len() {
match self.view.as_bytes()[i] {
b'}' => {
let capture = &self.view.eat(i + 1)[1..i];
return Ok(Some(FormatToken::Capture {
capture,
pos,
conv: FormatConv::Str,
}));
}
b'!' => {
let capture = &self.view.eat(i + 1)[1..i];
let conv = if self.view.rem().starts_with('r') {
FormatConv::Repr
} else if self.view.rem().starts_with('s') {
FormatConv::Str
} else if self.view.rem().starts_with('}') {
return Err(anyhow::anyhow!(
"Missing conversion character in format string `{}`",
self.view.original()
));
} else {
return Err(anyhow::anyhow!(
"Invalid conversion in format string `{}`",
self.view.original()
));
};
self.view.eat(1); if !self.view.starts_with('}') {
break;
}
self.view.eat(1); return Ok(Some(FormatToken::Capture { capture, pos, conv }));
}
b'{' => {
if i == 1 {
self.view.eat(2);
return Ok(Some(FormatToken::Escape(EscapeCurlyBrace::Open)));
}
break;
}
_ => i += 1,
}
}
return Err(anyhow::anyhow!(
"Unmatched '{{' in format string `{}`",
self.view.original()
));
}
b'}' => {
assert!(i == 0);
if self.view.starts_with("}}") {
self.view.eat(2);
return Ok(Some(FormatToken::Escape(EscapeCurlyBrace::Close)));
}
return Err(anyhow::anyhow!(
"Standalone '}}' in format string `{}`",
self.view.original()
));
}
_ => i += 1,
}
}
if i == 0 {
Ok(None)
} else {
Ok(Some(FormatToken::Text(mem::take(&mut self.view).rem())))
}
}
}
#[derive(Default)]
struct StringView<'a> {
s: &'a str,
i: usize,
}
impl<'a> StringView<'a> {
fn new(s: &'a str) -> Self {
Self { s, i: 0 }
}
fn eat(&mut self, n: usize) -> &'a str {
let ret = &self.s[self.i..self.i + n];
self.i += n;
ret
}
fn pos(&self) -> usize {
self.i
}
fn rem(&self) -> &'a str {
&self.s[self.i..]
}
fn original(&self) -> &'a str {
self.s
}
}
impl<'a> Deref for StringView<'a> {
type Target = str;
fn deref(&self) -> &Self::Target {
self.rem()
}
}
#[cfg(test)]
mod tests {
use crate::dot_format_parser::FormatConv;
use crate::dot_format_parser::FormatParser;
use crate::dot_format_parser::FormatToken;
#[test]
fn test_parser_position() {
let s = "foo{x}bar{yz}baz{w!s}qux{v!r}quux";
let mut parser = FormatParser::new(s);
assert_eq!(parser.next().unwrap(), Some(FormatToken::Text("foo")));
assert_eq!(
parser.next().unwrap(),
Some(FormatToken::Capture {
capture: "x",
pos: 4,
conv: FormatConv::Str,
})
);
assert_eq!(parser.next().unwrap(), Some(FormatToken::Text("bar")));
assert_eq!(
parser.next().unwrap(),
Some(FormatToken::Capture {
capture: "yz",
pos: 10,
conv: FormatConv::Str,
})
);
assert_eq!(parser.next().unwrap(), Some(FormatToken::Text("baz")));
assert_eq!(
parser.next().unwrap(),
Some(FormatToken::Capture {
capture: "w",
pos: 17,
conv: FormatConv::Str,
})
);
assert_eq!(parser.next().unwrap(), Some(FormatToken::Text("qux")));
assert_eq!(
parser.next().unwrap(),
Some(FormatToken::Capture {
capture: "v",
pos: 25,
conv: FormatConv::Repr,
})
);
assert_eq!(parser.next().unwrap(), Some(FormatToken::Text("quux")));
assert_eq!(parser.next().unwrap(), None);
}
#[test]
fn test_failure() {
let s = "}foo";
let mut parser = FormatParser::new(s);
let error_msg = parser.next().unwrap_err().to_string();
assert_eq!(error_msg, "Standalone '}' in format string `}foo`");
}
}