use super::{EscapedByte, Literal, Part, Template, Variable};
use crate::error::{self, ParseError};
impl Template {
pub fn parse(source: &[u8], start: usize) -> Result<Self, ParseError> {
let mut parts = Vec::with_capacity(1);
let mut finger = start;
while finger < source.len() {
let next = match memchr::memchr2(b'$', b'\\', &source[finger..]) {
Some(x) => finger + x,
None => source.len(),
};
if next != finger {
parts.push(Part::Literal(Literal { range: finger..next }));
}
if next == source.len() {
break;
}
if source[next] == b'\\' {
let value = unescape_one(source, next)?;
parts.push(Part::EscapedByte(EscapedByte { value }));
finger = next + 2;
} else {
let (variable, end) = Variable::parse(source, next)?;
finger = end;
parts.push(Part::Variable(variable));
}
}
Ok(Self { parts })
}
}
impl Variable {
fn parse(source: &[u8], finger: usize) -> Result<(Self, usize), ParseError> {
if finger + 1 >= source.len() {
return Err(error::MissingVariableName {
position: finger,
len: 1,
}
.into());
}
if source[finger + 1] == b'{' {
Self::parse_braced(source, finger)
} else {
let name_end = match source[finger + 1..]
.iter()
.position(|&c| !c.is_ascii_alphanumeric() && c != b'_')
{
Some(0) => {
return Err(error::MissingVariableName {
position: finger,
len: 1,
}
.into());
},
Some(x) => finger + 1 + x,
None => source.len(),
};
let variable = Variable {
name: finger + 1..name_end,
default: None,
};
Ok((variable, name_end))
}
}
fn parse_braced(source: &[u8], finger: usize) -> Result<(Self, usize), ParseError> {
let name_start = finger + 2;
if name_start >= source.len() {
return Err(error::MissingVariableName {
position: finger,
len: 2,
}
.into());
}
let name_end = match source[name_start..]
.iter()
.position(|&c| !c.is_ascii_alphanumeric() && c != b'_')
{
Some(0) => {
return Err(error::MissingVariableName {
position: finger,
len: 2,
}
.into());
},
Some(x) => name_start + x,
None => source.len(),
};
if name_end == source.len() {
return Err(error::MissingClosingBrace { position: finger + 1 }.into());
}
if source[name_end] == b'}' {
let variable = Variable {
name: name_start..name_end,
default: None,
};
return Ok((variable, name_end + 1));
} else if source[name_end] != b':' {
return Err(error::UnexpectedCharacter {
position: name_end,
character: get_maybe_char_at(source, name_end),
expected: error::ExpectedCharacter {
message: "a closing brace ('}') or colon (':')",
},
}
.into());
}
let end = finger
+ find_closing_brace(&source[finger..]).ok_or(error::MissingClosingBrace { position: finger + 1 })?;
let variable = Variable {
name: name_start..name_end,
default: Some(Template::parse(&source[..end], name_end + 1)?),
};
Ok((variable, end + 1))
}
}
fn unescape_one(source: &[u8], position: usize) -> Result<u8, ParseError> {
if position == source.len() - 1 {
return Err(error::InvalidEscapeSequence {
position,
character: None,
}
.into());
}
match source[position + 1] {
b'\\' => Ok(b'\\'),
b'$' => Ok(b'$'),
b'{' => Ok(b'{'),
b'}' => Ok(b'}'),
b':' => Ok(b':'),
_ => Err(error::InvalidEscapeSequence {
position,
character: Some(get_maybe_char_at(source, position + 1)),
}
.into()),
}
}
fn get_maybe_char_at(data: &[u8], index: usize) -> error::CharOrByte {
let head = &data[index..];
let head = &head[..head.len().min(4)];
assert!(
!head.is_empty(),
"index out of bounds: data.len() is {} but index is {}",
data.len(),
index,
);
let head = valid_utf8_prefix(head);
if let Some(c) = head.chars().next() {
error::CharOrByte::Char(c)
} else {
error::CharOrByte::Byte(data[index])
}
}
fn valid_utf8_prefix(input: &[u8]) -> &str {
std::str::from_utf8(input)
.or_else(|e| std::str::from_utf8(&input[..e.valid_up_to()]))
.unwrap()
}
fn find_closing_brace(haystack: &[u8]) -> Option<usize> {
let mut finger = 0;
let mut nested = 0;
while finger < haystack.len() {
let next = memchr::memchr3(b'\\', b'{', b'}', &haystack[finger..])?;
match haystack[finger + next] {
b'\\' => {
if next + 1 == haystack.len() {
return None;
}
finger += next + 2;
},
b'{' => {
if next == haystack.len() - 1 {
return None;
}
nested += 1;
finger += next + 1;
},
b'}' => {
nested -= 1;
if nested == 0 {
return Some(finger + next);
}
finger += next + 1;
},
_ => unreachable!(),
}
}
None
}
#[cfg(test)]
#[rustfmt::skip]
mod test {
use super::*;
use assert2::{assert, check};
#[test]
fn test_get_maybe_char_at() {
use error::CharOrByte::{Byte, Char};
assert!(get_maybe_char_at(b"hello", 0) == Char('h'));
assert!(get_maybe_char_at(b"he", 0) == Char('h'));
assert!(get_maybe_char_at(b"hello", 1) == Char('e'));
assert!(get_maybe_char_at(b"he", 1) == Char('e'));
assert!(get_maybe_char_at(b"hello\x80", 1) == Char('e'));
assert!(get_maybe_char_at(b"he\x80llo\x80", 1) == Char('e'));
assert!(get_maybe_char_at(b"h\x79", 1) == Char('\x79'));
assert!(get_maybe_char_at(b"h\x80llo", 1) == Byte(0x80));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 0) == Char('h'));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 1) == Char('❤'));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 2) == Byte(0x9d));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 3) == Byte(0xA4));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 4) == Char('l'));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 5) == Char('l'));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 6) == Char('❤'));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 7) == Byte(0x9d));
assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 8) == Byte(0xA4));
}
#[test]
fn test_find_closing_brace() {
check!(find_closing_brace(b"${foo}") == Some(5));
check!(find_closing_brace(b"{\\{}foo") == Some(3));
check!(find_closing_brace(b"{{}}foo $bar") == Some(3));
check!(find_closing_brace(b"foo{\\}}bar") == Some(6));
}
}