use std::os::fd::RawFd;
use anyhow::anyhow;
use nom::{
branch::alt,
bytes::complete::{is_not, tag, take_while, take_while1},
character::complete::{char, digit1, multispace0},
combinator::{eof, map, opt, value},
multi::{fold_many0, many_till},
sequence::{delimited, pair, preceded, tuple},
Finish, IResult,
};
#[derive(Debug, PartialEq)]
pub struct FdPair(RawFd, Option<RawFd>);
#[derive(Debug, PartialEq)]
pub enum Mode {
Read,
Write,
ReadWrite,
}
#[derive(Debug, PartialEq)]
pub enum Redir {
Input,
Output,
InputOutput,
DoubleOutput,
}
#[derive(Debug, PartialEq)]
pub enum Token {
Eof,
For,
In,
While,
If,
Not,
Twiddle,
Bang,
Subshell,
Switch,
Fn,
Redir,
RedirW,
Dup,
OpenBrace,
CloseBrace,
OpenParen,
CloseParen,
Pcmd,
PipeFd,
AndAnd,
OrOr,
Caret,
Equal,
Pipe(Option<FdPair>),
HereDoc(String, bool), CmdRedir(String, bool), FdRedir(FdPair, Mode),
FileRedir(String, Redir),
Argument(String, Option<String>), ArgumentQuote(String),
ArgumentSize(String),
Cmd(String),
CmdSplit(String),
Word(String),
}
pub fn tokenize(input: &str) -> Result<Vec<Token>, anyhow::Error> {
many_till(
alt((
alt((
argument_token,
caret_token,
command_token,
split_command_token,
pipe_token,
bang_token,
subshell_token,
andand_token,
oror_token,
heredoc_token,
command_redir_token,
fd_redir_token,
file_redir_token,
if_token,
while_token,
for_token,
switch_token,
close_paren_token,
not_token,
open_paren_token,
)),
alt((
in_token,
twiddle_token,
fn_token,
redir_token,
redirw_token,
dup_token,
open_brace_token,
close_brace_token,
pcmd_token,
pipefd_token,
equal_token,
word_token,
)),
)),
eof,
)(input)
.map(|(remaining, (tokens, _))| (remaining, tokens))
.finish()
.map(|(_, tokens)| tokens)
.map_err(|error| anyhow!("! {error:?}"))
}
fn is_id_char(c: char) -> bool {
c.is_alphanumeric() || c == '_' || c == '*'
}
fn is_word_char(c: char) -> bool {
!c.is_whitespace() && !r#"&|^$=`'{}()<>"#.contains(c)
}
fn variable_name(input: &str) -> IResult<&str, &str> {
take_while1(|c: char| is_id_char(c))(input)
}
fn variable_name_subscript(input: &str) -> IResult<&str, (&str, Option<&str>)> {
let (input, name) = variable_name(input)?;
let (mut input, subscript) = opt(preceded(tag("("), take_while(|c: char| c != ')')))(input)?;
if subscript.is_some() {
(input, _) = tag(")")(input)?;
}
Ok((input, (name, subscript)))
}
fn argument_token(input: &str) -> IResult<&str, Token> {
alt((
map(
tuple((multispace0, tag("$\""), variable_name)),
|(_, _, name)| Token::ArgumentQuote(name.to_string()),
),
map(
tuple((multispace0, tag("$#"), variable_name)),
|(_, _, name)| Token::ArgumentSize(name.to_string()),
),
map(
tuple((multispace0, tag("$"), variable_name_subscript)),
|(_, _, (name, subscript))| {
Token::Argument(name.to_string(), subscript.map(|s| s.to_string()))
},
),
))(input)
}
fn file_redir_token(input: &str) -> IResult<&str, Token> {
let (input, redirection) =
preceded(multispace0, alt((tag(">>"), tag("<>"), tag("<"), tag(">"))))(input)?;
let redirection = match redirection {
"<" => Redir::Input,
">" => Redir::Output,
"<>" => Redir::InputOutput,
">>" => Redir::DoubleOutput,
_ => unreachable!(),
};
let (input, file_redir) = preceded(multispace0, word)(input)?;
Ok((input, Token::FileRedir(file_redir.to_string(), redirection)))
}
fn pipe_token(input: &str) -> IResult<&str, Token> {
let (input, _) = preceded(multispace0, tag("|"))(input)?;
let (input, fd_redir) = opt(preceded(multispace0, fd_redir))(input)?;
Ok((input, Token::Pipe(fd_redir)))
}
fn heredoc_token(input: &str) -> IResult<&str, Token> {
let (input, _) = preceded(multispace0, tag("<<"))(input)?;
preceded(
multispace0,
alt((
map(quoted_string, |qs| Token::HereDoc(qs, true)),
map(word, |w| Token::HereDoc(w, false)),
)),
)(input)
}
fn command_redir_token(input: &str) -> IResult<&str, Token> {
let (input, redirection) = preceded(multispace0, alt((tag("<"), tag(">"))))(input)?;
let (input, _) = preceded(multispace0, tag("{"))(input)?;
let (input, command) = take_while1(|c| c != '}')(input)?;
let (input, _) = tag("}")(input)?;
Ok((
input,
Token::CmdRedir(command.to_string(), redirection == "<"),
))
}
fn fd_redir_token(input: &str) -> IResult<&str, Token> {
let (input, operator) = preceded(multispace0, alt((tag("<>"), tag("<"), tag(">"))))(input)?;
let operator = match operator {
"<>" => Mode::ReadWrite,
"<" => Mode::Read,
">" => Mode::Write,
_ => unreachable!(),
};
let (input, fd_redir) = preceded(multispace0, fd_redir)(input)?;
Ok((input, Token::FdRedir(fd_redir, operator)))
}
fn fd_redir<'a>(input: &'a str) -> IResult<&'a str, FdPair> {
let (input, _) = preceded(multispace0, tag("["))(input)?;
let (input, fd) = preceded(multispace0, digit1)(input)?;
let (input, _) = preceded(multispace0, tag("="))(input)?;
let (input, operator) = opt(preceded(multispace0, digit1))(input)?;
let (input, _) = preceded(multispace0, tag("]"))(input)?;
Ok((
input,
FdPair(fd.parse().unwrap(), operator.map(|op| op.parse().unwrap())),
))
}
fn command_token(input: &str) -> IResult<&str, Token> {
let (input, _) = multispace0(input)?;
let (input, _) = tag("`{")(input)?;
let (input, command) = take_while1(|c| c != '}')(input)?;
let (input, _) = tag("}")(input)?;
Ok((input, Token::Cmd(command.to_string())))
}
fn split_command_token(input: &str) -> IResult<&str, Token> {
let (input, _) = multispace0(input)?;
let (input, _) = tag("`split {")(input)?;
let (input, command) = take_while1(|c| c != '}')(input)?;
let (input, _) = tag("}")(input)?;
Ok((input, Token::CmdSplit(command.to_string())))
}
fn for_token(input: &str) -> IResult<&str, Token> {
let (input, _) = preceded(multispace0, tag("for"))(input)?;
Ok((input, Token::For))
}
fn in_token(input: &str) -> IResult<&str, Token> {
let (input, _) = preceded(multispace0, tag("in"))(input)?;
Ok((input, Token::In))
}
fn while_token(input: &str) -> IResult<&str, Token> {
let (input, _) = preceded(multispace0, tag("while"))(input)?;
Ok((input, Token::While))
}
fn if_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("if")), |_| Token::If)(input)
}
fn not_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("not")), |_| Token::Not)(input)
}
fn twiddle_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('~')), |_| Token::Twiddle)(input)
}
fn bang_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('!')), |_| Token::Bang)(input)
}
fn subshell_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("subshell")), |_| Token::Subshell)(input)
}
fn switch_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("switch")), |_| Token::Switch)(input)
}
fn fn_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("fn")), |_| Token::Fn)(input)
}
fn redir_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("redir")), |_| Token::Redir)(input)
}
fn redirw_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("redirw")), |_| Token::RedirW)(input)
}
fn dup_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("dup")), |_| Token::Dup)(input)
}
fn word_token(input: &str) -> IResult<&str, Token> {
let (input, word) = opt(word)(input)?;
let (input, _) = opt(preceded(multispace0, comment))(input)?;
if let Some(word) = word {
Ok((input, Token::Word(word.to_string())))
} else {
Ok((input, Token::Eof))
}
}
fn word(input: &str) -> IResult<&str, String> {
let (input, word) = preceded(
multispace0,
alt((
quoted_string, non_quoted_word, )),
)(input)?;
Ok((input, word))
}
fn non_quoted_word(input: &str) -> IResult<&str, String> {
let (input, word) = map(take_while1(|c| c != '#' && is_word_char(c)), |s: &str| {
s.to_string()
})(input)?;
Ok((input, word))
}
fn quoted_string(input: &str) -> IResult<&str, String> {
delimited(
char('\''),
fold_many0(
alt((
map(value("'", tag("''")), |s: &str| s.to_string()), map(is_not("'"), |s: &str| s.to_string()), )),
|| String::new(), |mut acc: String, item: String| {
acc.push_str(&item);
acc
},
),
char('\''),
)(input)
}
fn comment(input: &str) -> IResult<&str, ()> {
value(
(), pair(char('#'), is_not("\n\r")),
)(input)
}
fn open_brace_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('{')), |_| Token::OpenBrace)(input)
}
fn close_brace_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('}')), |_| Token::CloseBrace)(input)
}
fn open_paren_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('(')), |_| Token::OpenParen)(input)
}
fn close_paren_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char(')')), |_| Token::CloseParen)(input)
}
fn caret_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('^')), |_| Token::Caret)(input)
}
fn equal_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('=')), |_| Token::Equal)(input)
}
fn pcmd_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("pcmd")), |_| Token::Pcmd)(input)
}
fn pipefd_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("pipefd")), |_| Token::PipeFd)(input)
}
fn andand_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("&&")), |_| Token::AndAnd)(input)
}
fn oror_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("||")), |_| Token::OrOr)(input)
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use super::*;
#[test]
fn command_redirection() -> Result<()> {
assert_eq!(
tokenize("<{cmd}")?,
vec![Token::CmdRedir(Redir::InputRedir, "cmd".to_string())]
);
assert_eq!(
tokenize(">{cmd}")?,
vec![Token::CmdRedir(Redir::OutputRedir, "cmd".to_string())]
);
assert_eq!(
tokenize("cmp <{old} <{new}")?,
vec![
Token::Word("cmp".to_string()),
Token::CmdRedir(Redir::InputRedir, "old".to_string()),
Token::CmdRedir(Redir::InputRedir, "new".to_string()),
]
);
Ok(())
}
}