use std::os::fd::RawFd;
use anyhow::anyhow;
use nom::{
branch::alt,
bytes::complete::{is_not, tag, take_while1},
character::complete::{char, digit1, multispace0},
combinator::{eof, map, opt, value},
multi::{fold_many0, many_till},
sequence::{delimited, preceded},
Finish, IResult,
};
#[derive(Debug, PartialEq)]
pub struct FdRedirection {
src: RawFd,
dst: RawFd,
}
#[derive(Debug, PartialEq)]
pub enum Redir {
Pipe,
InputRedir,
DoubleInputRedir,
OutputRedir,
DoubleOutputRedir,
}
#[derive(Debug, PartialEq)]
pub enum Token {
For,
In,
While,
If,
Not,
Twiddle,
Bang,
Pipe,
Subshell,
Switch,
Fn,
Redir,
RedirW,
Dup,
Word(String),
OpenBrace,
CloseBrace,
OpenParen,
CloseParen,
Pcmd,
PipeFd,
AndAnd,
OrOr,
Operator(String),
FdRedirection((Redir, FdRedirection)),
FileRedirection((Redir, String)),
}
pub fn tokenize(input: &str) -> Result<Vec<Token>, anyhow::Error> {
many_till(
alt((
alt((
if_token,
while_token,
for_token,
switch_token,
close_paren_token,
not_token,
andand_token,
oror_token,
bang_token,
subshell_token,
redirection_token,
pipe_token,
operator,
open_paren_token,
)),
alt((
in_token,
twiddle_token,
fn_token,
redir_token,
redirw_token,
dup_token,
open_brace_token,
close_brace_token,
pcmd_token,
pipefd_token,
word_token,
)),
)),
eof,
)(input)
.map(|(remaining, (tokens, _))| (remaining, tokens))
.finish()
.map(|(_, tokens)| tokens)
.map_err(|error| anyhow!("! {error:?}"))
}
fn is_word_char(c: char) -> bool {
!c.is_whitespace() && !r#"&|^$=`'{}()<>"#.contains(c)
}
fn redir_from_str(redirection: &str) -> Redir {
match redirection {
"|" => Redir::Pipe,
"<" => Redir::InputRedir,
"<<" => Redir::DoubleInputRedir,
">" => Redir::OutputRedir,
">>" => Redir::DoubleOutputRedir,
_ => unreachable!(),
}
}
fn operator(input: &str) -> IResult<&str, Token> {
let (input, op) = preceded(multispace0, take_while1(|c: char| r#"^$"#.contains(c)))(input)?;
Ok((input, Token::Operator(op.to_string())))
}
fn redirection_token(input: &str) -> IResult<&str, Token> {
let (input, _) = multispace0(input)?;
let (input, redirection) = alt((tag("<"), tag("<<"), tag(">"), tag(">>"), tag("|")))(input)?;
let redirection = redir_from_str(redirection);
let (mut input, fd_redir) = opt(fd_redirection)(input)?;
let token = if let Some(fd_redir) = fd_redir {
Token::FdRedirection((redirection, fd_redir))
} else {
let (file_input, file_redir) = word(input)?;
input = file_input;
Token::FileRedirection((redirection, file_redir.to_string()))
};
Ok((input, token))
}
fn fd_redirection<'a>(input: &'a str) -> IResult<&'a str, FdRedirection> {
let (input, _) = tag("[")(input)?;
let (input, fd) = digit1(input)?;
let (input, _) = tag("=")(input)?;
let (input, operator) = digit1(input)?;
let (input, _) = tag("]")(input)?;
Ok((
input,
FdRedirection {
src: fd.parse().unwrap(),
dst: operator.parse().unwrap(),
},
))
}
fn for_token(input: &str) -> IResult<&str, Token> {
let (input, _) = preceded(multispace0, tag("for"))(input)?;
Ok((input, Token::For))
}
fn in_token(input: &str) -> IResult<&str, Token> {
let (input, _) = preceded(multispace0, tag("in"))(input)?;
Ok((input, Token::In))
}
fn while_token(input: &str) -> IResult<&str, Token> {
let (input, _) = preceded(multispace0, tag("while"))(input)?;
Ok((input, Token::While))
}
fn if_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("if")), |_| Token::If)(input)
}
fn not_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("not")), |_| Token::Not)(input)
}
fn twiddle_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('~')), |_| Token::Twiddle)(input)
}
fn bang_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('!')), |_| Token::Bang)(input)
}
fn subshell_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("subshell")), |_| Token::Subshell)(input)
}
fn switch_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("switch")), |_| Token::Switch)(input)
}
fn fn_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("fn")), |_| Token::Fn)(input)
}
fn redir_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("redir")), |_| Token::Redir)(input)
}
fn redirw_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("redirw")), |_| Token::RedirW)(input)
}
fn dup_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("dup")), |_| Token::Dup)(input)
}
fn quoted_string(input: &str) -> IResult<&str, String> {
delimited(
char('\''),
fold_many0(
alt((
map(value("'", tag("''")), |s: &str| s.to_string()), map(is_not("'"), |s: &str| s.to_string()), )),
|| String::new(), |mut acc: String, item: String| {
acc.push_str(&item);
acc
},
),
char('\''),
)(input)
}
fn word(input: &str) -> IResult<&str, String> {
let (input, word) = preceded(
multispace0,
alt((
quoted_string, map(take_while1(is_word_char), |s: &str| s.to_string()), )),
)(input)?;
Ok((input, word))
}
fn word_token(input: &str) -> IResult<&str, Token> {
let (input, words) = word(input)?;
Ok((input, Token::Word(words.to_string())))
}
fn open_brace_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('{')), |_| Token::OpenBrace)(input)
}
fn close_brace_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('}')), |_| Token::CloseBrace)(input)
}
fn open_paren_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('(')), |_| Token::OpenParen)(input)
}
fn close_paren_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char(')')), |_| Token::CloseParen)(input)
}
fn pipe_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, char('|')), |_| Token::Pipe)(input)
}
fn pcmd_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("pcmd")), |_| Token::Pcmd)(input)
}
fn pipefd_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("pipefd")), |_| Token::PipeFd)(input)
}
fn andand_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("&&")), |_| Token::AndAnd)(input)
}
fn oror_token(input: &str) -> IResult<&str, Token> {
map(preceded(multispace0, tag("||")), |_| Token::OrOr)(input)
}