rcshell 0.0.1-alpha.1

plan9 rc in rust
//
// rcshell: Plan9 rc shell in Rust
// src/lexer.rs: Lexer/tokenizer
//
// Copyright (c) 2024 Ali Polatel <alip@exherbo.org>
//
// SPDX-License-Identifier: GPL-3.0-or-later

use std::os::fd::RawFd;

use anyhow::anyhow;
use nom::{
    branch::alt,
    bytes::complete::{is_not, tag, take_while1},
    character::complete::{char, digit1, multispace0},
    combinator::{eof, map, opt, value},
    multi::{fold_many0, many_till},
    sequence::{delimited, preceded},
    Finish, IResult,
};

#[derive(Debug, PartialEq)]
pub struct FdRedirection {
    src: RawFd,
    dst: RawFd,
}

#[derive(Debug, PartialEq)]
pub enum Redir {
    Pipe,
    InputRedir,
    DoubleInputRedir,
    OutputRedir,
    DoubleOutputRedir,
}

#[derive(Debug, PartialEq)]
pub enum Token {
    For,
    In,
    While,
    If,
    Not,
    Twiddle,
    Bang,
    Pipe,
    Subshell,
    Switch,
    Fn,
    Redir,
    RedirW,
    Dup,
    Word(String),
    OpenBrace,
    CloseBrace,
    OpenParen,
    CloseParen,
    Pcmd,
    PipeFd,
    AndAnd,
    OrOr,
    Operator(String),
    FdRedirection((Redir, FdRedirection)),
    FileRedirection((Redir, String)),
}

pub fn tokenize(input: &str) -> Result<Vec<Token>, anyhow::Error> {
    many_till(
        alt((
            alt((
                if_token,
                while_token,
                for_token,
                switch_token,
                close_paren_token,
                not_token,
                andand_token,
                oror_token,
                bang_token,
                subshell_token,
                redirection_token,
                pipe_token,
                operator,
                open_paren_token,
            )),
            alt((
                in_token,
                twiddle_token,
                fn_token,
                redir_token,
                redirw_token,
                dup_token,
                open_brace_token,
                close_brace_token,
                pcmd_token,
                pipefd_token,
                word_token,
            )),
        )),
        eof,
    )(input)
    .map(|(remaining, (tokens, _))| (remaining, tokens))
    .finish()
    .map(|(_, tokens)| tokens)
    .map_err(|error| anyhow!("! {error:?}"))
}

/*
fn is_id_char(c: char) -> bool {
    c.is_alphanumeric() || c == '_'
}
*/

fn is_word_char(c: char) -> bool {
    !c.is_whitespace() && !r#"&|^$=`'{}()<>"#.contains(c)
}

fn redir_from_str(redirection: &str) -> Redir {
    match redirection {
        "|" => Redir::Pipe,
        "<" => Redir::InputRedir,
        "<<" => Redir::DoubleInputRedir,
        ">" => Redir::OutputRedir,
        ">>" => Redir::DoubleOutputRedir,
        _ => unreachable!(),
    }
}

fn operator(input: &str) -> IResult<&str, Token> {
    let (input, op) = preceded(multispace0, take_while1(|c: char| r#"^$"#.contains(c)))(input)?;
    Ok((input, Token::Operator(op.to_string())))
}

fn redirection_token(input: &str) -> IResult<&str, Token> {
    let (input, _) = multispace0(input)?;

    // Parse redirection operator first
    let (input, redirection) = alt((tag("<"), tag("<<"), tag(">"), tag(">>"), tag("|")))(input)?;
    let redirection = redir_from_str(redirection);

    // Try to parse fd redirection next (optional)
    let (mut input, fd_redir) = opt(fd_redirection)(input)?;

    let token = if let Some(fd_redir) = fd_redir {
        Token::FdRedirection((redirection, fd_redir))
    } else {
        // Try to parse file redirection.
        let (file_input, file_redir) = word(input)?;
        input = file_input;
        Token::FileRedirection((redirection, file_redir.to_string()))
    };

    Ok((input, token))
}

fn fd_redirection<'a>(input: &'a str) -> IResult<&'a str, FdRedirection> {
    let (input, _) = tag("[")(input)?;
    let (input, fd) = digit1(input)?;
    let (input, _) = tag("=")(input)?;
    let (input, operator) = digit1(input)?;
    let (input, _) = tag("]")(input)?;

    Ok((
        input,
        FdRedirection {
            src: fd.parse().unwrap(),
            dst: operator.parse().unwrap(),
        },
    ))
}

fn for_token(input: &str) -> IResult<&str, Token> {
    let (input, _) = preceded(multispace0, tag("for"))(input)?;
    Ok((input, Token::For))
}

fn in_token(input: &str) -> IResult<&str, Token> {
    let (input, _) = preceded(multispace0, tag("in"))(input)?;
    Ok((input, Token::In))
}

fn while_token(input: &str) -> IResult<&str, Token> {
    let (input, _) = preceded(multispace0, tag("while"))(input)?;
    Ok((input, Token::While))
}

fn if_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("if")), |_| Token::If)(input)
}

fn not_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("not")), |_| Token::Not)(input)
}

fn twiddle_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, char('~')), |_| Token::Twiddle)(input)
}

fn bang_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, char('!')), |_| Token::Bang)(input)
}

fn subshell_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("subshell")), |_| Token::Subshell)(input)
}

fn switch_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("switch")), |_| Token::Switch)(input)
}

fn fn_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("fn")), |_| Token::Fn)(input)
}

fn redir_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("redir")), |_| Token::Redir)(input)
}

fn redirw_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("redirw")), |_| Token::RedirW)(input)
}

fn dup_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("dup")), |_| Token::Dup)(input)
}

fn quoted_string(input: &str) -> IResult<&str, String> {
    delimited(
        char('\''),
        fold_many0(
            alt((
                map(value("'", tag("''")), |s: &str| s.to_string()), // Two single quotes representing a single quote, mapped to String
                map(is_not("'"), |s: &str| s.to_string()), // Other characters, mapped to String
            )),
            || String::new(), // Initial accumulator
            |mut acc: String, item: String| {
                acc.push_str(&item);
                acc
            },
        ),
        char('\''),
    )(input)
}

fn word(input: &str) -> IResult<&str, String> {
    let (input, word) = preceded(
        multispace0,
        alt((
            quoted_string,                                           // Single-quoted string
            map(take_while1(is_word_char), |s: &str| s.to_string()), // Regular unquoted words
        )),
    )(input)?;
    Ok((input, word))
}

fn word_token(input: &str) -> IResult<&str, Token> {
    let (input, words) = word(input)?;
    Ok((input, Token::Word(words.to_string())))
}

fn open_brace_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, char('{')), |_| Token::OpenBrace)(input)
}

fn close_brace_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, char('}')), |_| Token::CloseBrace)(input)
}

fn open_paren_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, char('(')), |_| Token::OpenParen)(input)
}

fn close_paren_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, char(')')), |_| Token::CloseParen)(input)
}

fn pipe_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, char('|')), |_| Token::Pipe)(input)
}

fn pcmd_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("pcmd")), |_| Token::Pcmd)(input)
}

fn pipefd_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("pipefd")), |_| Token::PipeFd)(input)
}

fn andand_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("&&")), |_| Token::AndAnd)(input)
}

fn oror_token(input: &str) -> IResult<&str, Token> {
    map(preceded(multispace0, tag("||")), |_| Token::OrOr)(input)
}

// Add auxiliary functions and parsers as needed