shrs_lang 0.0.6

parser and lexer for shrs posix shell
Documentation
use std::str::FromStr;
use crate::{ast, lexer};

grammar<'input>(text: &'input str);

extern {
    type Location = usize;
    type Error = lexer::Error;

    enum lexer::Token<'input> {

	"\n" => lexer::Token::NEWLINE,
	";" => lexer::Token::SEMI,
	"&" => lexer::Token::AMP,
	"|" => lexer::Token::PIPE,
	"`" => lexer::Token::BACKTICK,
	"=" => lexer::Token::EQUAL,
	"'" => lexer::Token::SINGLEQUOTE,
	"\"" => lexer::Token::DOUBLEQUOTE,
	"<" => lexer::Token::LESS,
	">" => lexer::Token::GREAT,

	"(" => lexer::Token::LPAREN,
	")" => lexer::Token::RPAREN,
	"{" => lexer::Token::LBRACE,
	"}" => lexer::Token::RBRACE,
	"!" => lexer::Token::BANG,

	"&&" => lexer::Token::AND_IF,
	"||" => lexer::Token::OR_IF,
	";;" => lexer::Token::DSEMI,

	"<<" => lexer::Token::DLESS,
	">>" => lexer::Token::DGREAT,
	"<&" => lexer::Token::LESSAND,
	">&" => lexer::Token::GREATAND,
	"<>" => lexer::Token::LESSGREAT,
	"<<-" => lexer::Token::DLESSDASH,
	">|" => lexer::Token::CLOBBER,

	"if" => lexer::Token::IF,
	"then" => lexer::Token::THEN,
	"else" => lexer::Token::ELSE,
	"elif" => lexer::Token::ELIF,
	"fi" => lexer::Token::FI,
	"do" => lexer::Token::DO,
	"done" => lexer::Token::DONE,

	"case" => lexer::Token::CASE,
	"esac" => lexer::Token::ESAC,
	"while" => lexer::Token::WHILE,
	"until" => lexer::Token::UNTIL,
	"for" => lexer::Token::FOR,
	"in" => lexer::Token::IN,

	"WORD" => lexer::Token::WORD(<&'input str>),
	"ASSIGNMENT_WORD" => lexer::Token::ASSIGNMENT_WORD(<&'input str>),
	"NAME" => lexer::Token::NAME(<&'input str>),
	"FNAME" => lexer::Token::FNAME(<&'input str>),
	"IO_NUMBER" => lexer::Token::IO_NUMBER(<&'input str>),
    
    }
}

pub Program: ast::Command = {
    Linebreak <cs:CompleteCommands> Linebreak => cs,
    Linebreak => ast::Command::None,
}

pub CompleteCommands: ast::Command = {
    <cs:CompleteCommands> NewlineList <c:CompleteCommand> => ast::Command::SeqList(Box::new(cs), Some(Box::new(c))), // double check seqlist is correct here
    <c:CompleteCommand> => c,
}

pub CompleteCommand: ast::Command = {
    <l:List> <s:SeparatorOp> => {
        match s {
	      ast::SeparatorOp::Amp => ast::Command::AsyncList(Box::new(l), None),
	      ast::SeparatorOp::Semi => ast::Command::SeqList(Box::new(l), None),
	} 
    },
    <l:List> => l,
}

pub List: ast::Command = {
    <l:List> <s:SeparatorOp> <a:AndOr> => {
        match s {
	      ast::SeparatorOp::Amp => ast::Command::AsyncList(Box::new(l), Some(Box::new(a))),
	      ast::SeparatorOp::Semi => ast::Command::SeqList(Box::new(l), Some(Box::new(a))),
	} 
    },
    <a:AndOr> => a,
}

pub AndOr: ast::Command = {
    <a:AndOr> "&&" Linebreak <p:Pipeline> => ast::Command::And(Box::new(a), Box::new(p)),
    <a:AndOr> "||" Linebreak <p:Pipeline> => ast::Command::Or(Box::new(a), Box::new(p)),
    <p:Pipeline> => p,
}

pub Pipeline: ast::Command = {
    "!" <ps:PipeSequence> => ast::Command::Not(Box::new(ps)),
    <ps:PipeSequence> => ps,
}

pub PipeSequence: ast::Command = {
    <ps:PipeSequence> "|" Linebreak <c:Command> => ast::Command::Pipeline(Box::new(ps), Box::new(c)),
    <c:Command> => c,
}

pub Command: ast::Command = {
    <s:SimpleCommand> => s,
    <c:CompoundCommand> => c,
    <f:FunctionDefinition> => f,
}

pub SimpleCommand: ast::Command = {
    <assigns: Assign*> <prefix: Redirect*> <words: "WORD"+> <suffix: Redirect*> => {
    	let redirects = prefix.into_iter().chain(suffix.into_iter()).collect();
	ast::Command::Simple { assigns, redirects, args: words.iter().map(|x| x.to_string()).collect::<Vec<_>>() }
    }
}

pub CompoundCommand: ast::Command = {
    <b:BraceGroup> => b,
    <s:Subshell> => ast::Command::Subshell(Box::new(s)),
    <i:IfClause> => i,
    <w:WhileClause> => w,
    <u:UntilClause> => u,
    <f:ForClause> => f,
    <c:CaseClause> => c,
}

// TODO use FNAME token
pub FunctionDefinition: ast::Command = <fname:"WORD"> "(" ")" Linebreak <body:FunctionBody> => ast::Command::Fn { fname: fname.to_string(), body: Box::new(body) };

pub FunctionBody: ast::Command = {
    <c:CompoundCommand> => c,
}

// BRACE GROUP

pub BraceGroup: ast::Command = "{" <c:CompoundList> "}" => c;

// SUBSHELL

pub Subshell: ast::Command = {
    "(" <c:CompoundList> ")" => c,
}

pub CompoundList: ast::Command = {
    Linebreak <t:Term> => t,
    Linebreak <t:Term> <s:Separator> => {
	match s {
	      None => t,
	      Some(ast::SeparatorOp::Amp) => ast::Command::AsyncList(Box::new(t), None),
	      Some(ast::SeparatorOp::Semi) => ast::Command::SeqList(Box::new(t), None),
	}
    }
}

pub Term: ast::Command = {
    <t:Term> <s:Separator> <a:AndOr> => {
	match s {
	      None => t,
	      Some(ast::SeparatorOp::Amp) => ast::Command::AsyncList(Box::new(t), Some(Box::new(a))),
	      Some(ast::SeparatorOp::Semi) => ast::Command::SeqList(Box::new(t), Some(Box::new(a))),
	}
    },
    <a:AndOr> => a,
}

// IF CLAUSE

pub IfClause: ast::Command = {
    "if" <cond:CompoundList> "then" <body:CompoundList> <mut elifs:ElifBody*> <else_part:ElseBody?> "fi" => {

	let mut conds = vec![
	    ast::Condition {
		cond: Box::new(cond),
		body: Box::new(body),
	    }
	];
	conds.append(&mut elifs);
	// TODO prob can be done nicer
	let else_part = match else_part {
	    Some(else_part) => Some(Box::new(else_part)),
	    None => None,
	};
	ast::Command::If {
	    conds,
	    else_part,
	}
    }
}

pub ElifBody: ast::Condition = "elif" <cond:CompoundList> "then" <body:CompoundList> => ast::Condition { cond: Box::new(cond), body: Box::new(body) };
pub ElseBody: ast::Command = "else" <body:CompoundList> => body;

// WHILE/UNTIL CLAUSE

pub WhileClause: ast::Command = {
    "while" <cond:CompoundList> <d:DoGroup> => ast::Command::While { cond: Box::new(cond), body: Box::new(d) },
}

pub UntilClause: ast::Command = {
    "until" <cond:CompoundList> <d:DoGroup> => ast::Command::Until { cond: Box::new(cond), body: Box::new(d) },
}

// FOR CLAUSE

// TODO actually use "NAME" token
pub ForClause: ast::Command = {
    "for" <name: "WORD"> <d:DoGroup> => ast::Command::For { name: name.to_string(), wordlist: vec![], body: Box::new(d) },
    "for" <name: "WORD"> <s:SequentialSep> <d:DoGroup> => ast::Command::For { name: name.to_string(), wordlist: vec![], body: Box::new(d) },
    "for" <name: "WORD"> Linebreak "in" <wordlist: "WORD"*> <s:SequentialSep> <d:DoGroup> => ast::Command::For { name: name.to_string(), wordlist: wordlist.iter().map(|x| x.to_string()).collect::<Vec<_>>(), body: Box::new(d) },
}

// CASE CLAUSE

pub CaseClause: ast::Command = {
    "case" <w: "WORD"> Linebreak "in" Linebreak <c:CaseList> "esac" => ast::Command::Case { word: w.to_string(), arms: c },
    "case" <w: "WORD"> Linebreak "in" Linebreak <c:CaseListNs> "esac" => ast::Command::Case { word: w.to_string(), arms: c },
    "case" <w: "WORD"> Linebreak "in" Linebreak "esac" => ast::Command::Case { word: w.to_string(), arms: vec![] },
}

pub CaseListNs: Vec<ast::CaseArm> = <mut cs:CaseItem*> <c:CaseItemNs> => {
    cs.push(c);
    cs
};
pub CaseList: Vec<ast::CaseArm> = <cs:CaseItem+> => cs;

pub CaseItemNs: ast::CaseArm = {
    "("? <p:Pattern> ")" Linebreak => ast::CaseArm { pattern: p, body: Box::new(ast::Command::None) },
    "("? <p:Pattern> ")" <c:CompoundList> => ast::CaseArm { pattern: p, body: Box::new(c) },
}
pub CaseItem: ast::CaseArm = {
    "("? <p:Pattern> ")" Linebreak ";;" Linebreak => ast::CaseArm { pattern: p, body: Box::new(ast::Command::None) },
    "("? <p:Pattern> ")" <c:CompoundList> ";;" Linebreak => ast::CaseArm { pattern: p, body: Box::new(c) },
}

pub Pattern: Vec<String> = {
    <w: "WORD"> => vec![w.to_string()],
    <mut p:Pattern> "|" <w: "WORD"> => {
    	p.push(w.to_string());
	p
    }
}

pub DoGroup: ast::Command = "do" <body:CompoundList> "done" => body;

pub Redirect: ast::Redirect = {
    <n: "IO_NUMBER"?> "<"  <file: "WORD"> => ast::Redirect { n: n.and_then(|x| str::parse::<usize>(x).ok()), file: file.to_string(), mode: ast::RedirectMode::Read },
    <n: "IO_NUMBER"?> ">"  <file: "WORD"> => ast::Redirect { n: n.and_then(|x| str::parse::<usize>(x).ok()), file: file.to_string(), mode: ast::RedirectMode::Write },
    <n: "IO_NUMBER"?> "<<" <file: "WORD"> => ast::Redirect { n: n.and_then(|x| str::parse::<usize>(x).ok()), file: file.to_string(), mode: ast::RedirectMode::ReadAppend },
    <n: "IO_NUMBER"?> ">>" <file: "WORD"> => ast::Redirect { n: n.and_then(|x| str::parse::<usize>(x).ok()), file: file.to_string(), mode: ast::RedirectMode::WriteAppend },
    <n: "IO_NUMBER"?> "<&" <file: "WORD"> => ast::Redirect { n: n.and_then(|x| str::parse::<usize>(x).ok()), file: file.to_string(), mode: ast::RedirectMode::ReadDup },
    <n: "IO_NUMBER"?> ">&" <file: "WORD"> => ast::Redirect { n: n.and_then(|x| str::parse::<usize>(x).ok()), file: file.to_string(), mode: ast::RedirectMode::WriteDup },
    <n: "IO_NUMBER"?> "<>" <file: "WORD"> => ast::Redirect { n: n.and_then(|x| str::parse::<usize>(x).ok()), file: file.to_string(), mode: ast::RedirectMode::ReadWrite },
}

pub Assign: ast::Assign = <var:"WORD"> "=" <val:"WORD"> => ast::Assign { var: var.to_string(), val: val.to_string() };

pub Linebreak: () = NewlineList? => ();
pub NewlineList: () = "\n"+ => ();
pub Separator: Option<ast::SeparatorOp> = {
    <s:SeparatorOp> Linebreak => Some(s),
    NewlineList => None,
}
pub SeparatorOp: ast::SeparatorOp = {
    "&" => ast::SeparatorOp::Amp,
    ";" => ast::SeparatorOp::Semi,
}
pub SequentialSep: () = {
    ";" Linebreak => (),
    NewlineList => (),
}