use std::fmt;
use std::str::FromStr;
use eyre::WrapErr;
use nom::{
branch::alt,
bytes::complete::{tag, take_till1},
character::complete::anychar,
combinator::{all_consuming, map, opt, verify},
error::{context, convert_error, ContextError, ParseError, VerboseError},
multi::fold_many1,
sequence::{delimited, pair, preceded, separated_pair, tuple},
Finish, IResult,
};
#[cfg(feature = "full")]
use nom::{
character::complete::{space0, space1},
multi::separated_list1,
};
trait Err<'a>: 'a + ParseError<&'a str> + ContextError<&'a str> {}
impl<'a, T: 'a + ParseError<&'a str> + ContextError<&'a str>> Err<'a> for T {}
#[allow(clippy::module_name_repetitions)]
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct TokenString(Vec<Token>);
impl TokenString {
pub const fn empty() -> Self {
Self(vec![])
}
pub fn text(text: impl Into<String>) -> Self {
Self(vec![Token::Text(text.into())])
}
pub fn r#macro(name: impl Into<String>) -> Self {
Self(vec![Token::MacroExpansion {
name: Self::text(name),
replacement: None,
}])
}
pub fn just(token: Token) -> Self {
Self(vec![token])
}
pub fn tokens(&self) -> impl Iterator<Item = &Token> {
self.0.iter()
}
pub fn first_token_mut(&mut self) -> &mut Token {
&mut self.0[0]
}
pub fn split_once(&self, delimiter: char) -> Option<(Self, Self)> {
let mut result0 = vec![];
let mut iter = self.0.iter();
while let Some(t) = iter.next() {
match t {
Token::Text(text) if text.contains(delimiter) => {
let split_text = text.splitn(2, delimiter);
let pieces = split_text.collect::<Vec<_>>();
assert_eq!(pieces.len(), 2, "wrong number of pieces!");
result0.push(Token::Text(pieces[0].into()));
let mut result1 = vec![Token::Text(pieces[1].into())];
result1.extend(iter.cloned());
return Some((Self(result0), Self(result1)));
}
_ => result0.push(t.clone()),
}
}
None
}
pub fn starts_with(&self, pattern: &str) -> bool {
match self.0.first() {
Some(Token::Text(t)) => t.starts_with(pattern),
_ => false,
}
}
pub fn ends_with(&self, pattern: &str) -> bool {
match self.0.last() {
Some(Token::Text(t)) => t.ends_with(pattern),
_ => false,
}
}
pub fn strip_prefix(&mut self, suffix: &str) {
if let Some(Token::Text(t)) = self.0.first_mut() {
if let Some(x) = t.strip_prefix(suffix) {
*t = x.into()
}
}
}
pub fn strip_suffix(&mut self, suffix: &str) {
if let Some(Token::Text(t)) = self.0.last_mut() {
if let Some(x) = t.strip_suffix(suffix) {
*t = x.into()
}
}
}
pub fn extend(&mut self, other: Self) {
let mut incoming = other.0.into_iter().peekable();
if let Some(Token::Text(text)) = self.0.last_mut() {
while let Some(Token::Text(incoming_text)) =
incoming.next_if(|x| matches!(x, Token::Text(_)))
{
text.push_str(&incoming_text);
}
}
self.0.extend(incoming);
}
pub fn trim_start(&mut self) {
if let Some(Token::Text(t)) = self.0.first_mut() {
*t = t.trim_start().into();
}
}
pub fn trim_end(&mut self) {
if let Some(Token::Text(t)) = self.0.last_mut() {
*t = t.trim_end().into();
}
}
pub fn is_empty(&self) -> bool {
match self.0.get(0) {
None => true,
Some(Token::Text(t)) if t.is_empty() && self.0.len() == 1 => true,
_ => false,
}
}
pub fn contains_text(&self, pattern: &str) -> bool {
self.0.iter().any(|x| {
if let Token::Text(x) = x {
x.contains(pattern)
} else {
false
}
})
}
}
impl fmt::Display for TokenString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for t in &self.0 {
write!(f, "{}", t)?;
}
Ok(())
}
}
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum Token {
Text(String),
MacroExpansion {
name: TokenString,
replacement: Option<(TokenString, TokenString)>,
},
#[cfg(feature = "full")]
FunctionCall {
name: TokenString,
args: Vec<TokenString>,
},
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Text(t) => write!(f, "{}", t),
Self::MacroExpansion {
name,
replacement: None,
} => write!(f, "$({})", name),
Self::MacroExpansion {
name,
replacement: Some((r1, r2)),
} => write!(f, "$({}:{}={})", name, r1, r2),
#[cfg(feature = "full")]
Self::FunctionCall { name, args } => write!(
f,
"$({} {})",
name,
args.iter()
.map(|x| format!("{}", x))
.collect::<Vec<_>>()
.join(", ")
),
}
}
}
#[derive(Clone, Copy)]
enum Delimiter {
Parens,
Braces,
}
impl Delimiter {
const fn start(&self) -> &'static str {
match self {
Self::Parens => "(",
Self::Braces => "{",
}
}
const fn start_char(&self) -> char {
match self {
Self::Parens => '(',
Self::Braces => '{',
}
}
const fn end(&self) -> &'static str {
match self {
Self::Parens => ")",
Self::Braces => "}",
}
}
}
fn macro_function_name<'a, E: Err<'a>>(
end: char,
context: Delimiter,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> + 'a {
tokens_but_not(vec![':', '#', '=', ' ', end], context)
}
fn macro_expansion_body<'a, E: Err<'a>>(
end: char,
delim: Delimiter,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> + 'a {
let subst = preceded(
tag(":"),
separated_pair(
tokens_but_not(vec!['='], delim),
tag("="),
tokens_but_not(vec![end], delim),
),
);
context(
"macro_expansion_body",
map(
pair(macro_function_name(end, delim), opt(subst)),
|(name, replacement)| TokenString::just(Token::MacroExpansion { name, replacement }),
),
)
}
#[cfg(feature = "full")]
fn function_call_body<'a, E: Err<'a>>(
end: char,
delim: Delimiter,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> {
context(
"function_call_body",
map(
separated_pair(
macro_function_name(end, delim),
space1,
separated_list1(
pair(tag(","), space0),
tokens_but_not(vec![',', end], delim),
),
),
|(name, args)| TokenString::just(Token::FunctionCall { name, args }),
),
)
}
#[cfg(feature = "full")]
fn macro_body<'a, E: Err<'a>>(
end: char,
context: Delimiter,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> {
alt((
function_call_body(end, context),
macro_expansion_body(end, context),
))
}
#[cfg(not(feature = "full"))]
fn macro_body<'a, E: Err<'a>>(
end: char,
context: Delimiter,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> {
macro_expansion_body(end, context)
}
fn parens_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> {
delimited(tag("$("), macro_body(')', Delimiter::Parens), tag(")"))(input)
}
fn braces_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> {
delimited(tag("${"), macro_body('}', Delimiter::Braces), tag("}"))(input)
}
fn tiny_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> {
let raw = preceded(tag("$"), verify(anychar, |&c| c != '(' && c != '{'));
map(raw, |c| {
if c == '$' {
TokenString::text("$")
} else {
TokenString::r#macro(c)
}
})(input)
}
fn macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> {
context(
"macro_expansion",
alt((
tiny_macro_expansion,
parens_macro_expansion,
braces_macro_expansion,
)),
)(input)
}
fn text_but_not<'a, E: Err<'a>>(
ends: Vec<char>, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> {
map(
take_till1(move |c| c == '$' || ends.contains(&c)),
TokenString::text, )
}
fn nested_delimiters<'a, E: Err<'a>>(
ends: Vec<char>,
context: Delimiter,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> {
map(
tuple((
tag(context.start()),
move |x| tokens_but_not(ends.clone(), context)(x),
tag(context.end()),
)),
|(left, center, right)| {
let mut tokens = TokenString::text(left);
tokens.extend(center);
tokens.extend(TokenString::text(right));
tokens
},
)
}
fn single_token_but_not<'a, E: Err<'a>>(
ends: Vec<char>,
context: Delimiter,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> {
let mut tbn_ends = ends.clone();
tbn_ends.push(context.start_char());
alt((
text_but_not(tbn_ends),
macro_expansion,
nested_delimiters(ends, context),
))
}
fn single_token<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> {
alt((text_but_not(vec![]), macro_expansion))(input)
}
fn empty_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> {
context(
"empty_tokens",
map(tag(""), |_| TokenString(vec![Token::Text(String::new())])),
)(input)
}
fn fold_tokens<'a, E: Err<'a>>(
parser: impl FnMut(&'a str) -> IResult<&'a str, TokenString, E>,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> {
fold_many1(parser, TokenString::empty(), |mut acc, x| {
acc.extend(x);
acc
})
}
fn tokens_but_not<'a, E: Err<'a>>(
ends: Vec<char>,
context: Delimiter,
) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> {
alt((
fold_tokens(single_token_but_not(ends, context)),
empty_tokens,
))
}
fn tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> {
alt((fold_tokens(single_token), empty_tokens))(input)
}
fn full_text_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> {
all_consuming(tokens)(input)
}
pub fn tokenize(input: &str) -> eyre::Result<TokenString> {
let (_, result) = full_text_tokens(input)
.finish()
.map_err(|err: VerboseError<&str>| eyre::eyre!(convert_error(input, err)))
.with_context(|| format!("couldn't parse {:?}", input))?;
Ok(result)
}
impl FromStr for TokenString {
type Err = eyre::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
tokenize(s)
}
}
#[cfg(test)]
mod test {
use super::*;
type R = eyre::Result<()>;
impl From<Vec<Token>> for TokenString {
fn from(x: Vec<Token>) -> Self {
Self(x)
}
}
fn token_text(text: impl Into<String>) -> Token {
Token::Text(text.into())
}
fn token_macro_expansion(name: impl Into<String>) -> Token {
Token::MacroExpansion {
name: TokenString::text(name),
replacement: None,
}
}
fn token_macro_expansion_replacement(
name: impl Into<String>,
subst1: impl Into<TokenString>,
subst2: impl Into<TokenString>,
) -> Token {
Token::MacroExpansion {
name: TokenString::text(name),
replacement: Some((subst1.into(), subst2.into())),
}
}
#[cfg(feature = "full")]
fn token_function_call(name: impl Into<String>, args: Vec<impl Into<TokenString>>) -> Token {
Token::FunctionCall {
name: TokenString::text(name),
args: args.into_iter().map(|x| x.into()).collect(),
}
}
#[test]
fn no_macros() -> R {
let text = "This is an example sentence! There aren't macros in it at all!";
let tokens = tokenize(text)?;
assert_eq!(tokens, TokenString(vec![token_text(text)]));
Ok(())
}
#[test]
fn no_replacement() -> R {
let text = "This is a $Q sentence! There are $(BORING) macros in it at ${YEET}!";
let tokens = tokenize(text)?;
assert_eq!(
tokens,
TokenString(vec![
token_text("This is a "),
token_macro_expansion("Q"),
token_text(" sentence! There are "),
token_macro_expansion("BORING"),
token_text(" macros in it at "),
token_macro_expansion("YEET"),
token_text("!"),
])
);
Ok(())
}
#[test]
fn escaped() -> R {
let text = "This costs $$2 to run, which isn't ideal";
let tokens = tokenize(text)?;
assert_eq!(
tokens,
TokenString(vec![token_text("This costs $2 to run, which isn't ideal"),])
);
Ok(())
}
#[test]
fn replacement() -> R {
let text = "Can I get a $(DATA:.c=.oof) in this ${SWAG:.yolo=}";
let tokens = tokenize(text)?;
assert_eq!(
tokens,
TokenString(vec![
token_text("Can I get a "),
token_macro_expansion_replacement(
"DATA",
vec![token_text(".c")],
vec![token_text(".oof")]
),
token_text(" in this "),
token_macro_expansion_replacement(
"SWAG",
vec![token_text(".yolo")],
vec![token_text("")]
),
])
);
Ok(())
}
#[test]
fn hell() -> R {
let text = "$(OOF:${ouch:hi=hey} there=$(owie:$(my)=${bones})), bro.";
let tokens = tokenize(text)?;
assert_eq!(
tokens,
TokenString(vec![
token_macro_expansion_replacement(
"OOF",
vec![
token_macro_expansion_replacement(
"ouch",
vec![token_text("hi")],
vec![token_text("hey")]
),
token_text(" there"),
],
vec![token_macro_expansion_replacement(
"owie",
vec![token_macro_expansion("my")],
vec![token_macro_expansion("bones")],
),],
),
token_text(", bro."),
])
);
Ok(())
}
#[cfg(feature = "full")]
#[test]
fn function_hell() -> R {
let text = "$(foo bar, $(baz))";
let tokens = tokenize(text)?;
assert_eq!(
tokens,
TokenString(vec![token_function_call(
"foo",
vec![TokenString::text("bar"), tokenize("$(baz)")?]
)])
);
Ok(())
}
#[test]
fn triple_mega_deluxe_super_hell() -> R {
let text = "$($($(a)b)c)";
let tokens = tokenize(text)?;
assert_eq!(
tokens,
TokenString(vec![Token::MacroExpansion {
name: TokenString(vec![
Token::MacroExpansion {
name: TokenString(vec![
Token::MacroExpansion {
name: TokenString::text("a"),
replacement: None,
},
token_text("b"),
]),
replacement: None,
},
token_text("c")
]),
replacement: None,
}]),
);
Ok(())
}
#[cfg(feature = "full")]
#[test]
fn i_will_attack_and_destroy_god() -> R {
let text = "$(shell echo (hi) (bro) yeet)";
let tokens = tokenize(text)?;
assert_eq!(
tokens,
TokenString(vec![Token::FunctionCall {
name: TokenString::text("shell"),
args: vec![TokenString::text("echo (hi) (bro) yeet")],
}])
);
Ok(())
}
}