conch_parser/parse/
mod.rs

1//! The definition of a parser (and related methods) for the shell language.
2// FIXME: consider parsing [[ exr ]] as keywords? otherwise [[ foo && bar ]] won't get parsed right
3// FIXME: consider parsing out array index syntax? (e.g. ${array[some index]}
4// FIXME: arithmetic substitutions don't currently support param/comand substitutions
5
6use std::convert::From;
7use std::error::Error;
8use std::iter::empty as empty_iter;
9use std::fmt;
10use std::mem;
11use std::str::FromStr;
12
13use ast::{self, DefaultArithmetic, DefaultParameter};
14use ast::builder::{self, Builder, SimpleWordKind};
15use ast::builder::ComplexWordKind::{self, Concat, Single};
16use ast::builder::WordKind::{self, DoubleQuoted, Simple, SingleQuoted};
17use self::iter::{PeekableIterator, PositionIterator, TokenIter, TokenIterator, TokenIterWrapper};
18use token::Token;
19use token::Token::*;
20
21mod iter;
22
23const CASE:     &'static str = "case";
24const DO:       &'static str = "do";
25const DONE:     &'static str = "done";
26const ELIF:     &'static str = "elif";
27const ELSE:     &'static str = "else";
28const ESAC:     &'static str = "esac";
29const FI:       &'static str = "fi";
30const FOR:      &'static str = "for";
31const FUNCTION: &'static str = "function";
32const IF:       &'static str = "if";
33const IN:       &'static str = "in";
34const THEN:     &'static str = "then";
35const UNTIL:    &'static str = "until";
36const WHILE:    &'static str = "while";
37
38/// A parser which will use a default AST builder implementation,
39/// yielding results in terms of types defined in the `ast` module.
40pub type DefaultParser<I> = Parser<I, builder::StringBuilder>;
41
42/// A specialized `Result` type for parsing shell commands.
43pub type ParseResult<T, E> = Result<T, ParseError<E>>;
44
45/// Indicates a character/token position in the original source.
46#[derive(Debug, PartialEq, Eq, Copy, Clone)]
47pub struct SourcePos {
48    /// The byte offset since the start of parsing.
49    pub byte: usize,
50    /// The line offset since the start of parsing, useful for error messages.
51    pub line: usize,
52    /// The column offset since the start of parsing, useful for error messages.
53    pub col: usize,
54}
55
56impl Default for SourcePos {
57    fn default() -> Self {
58        Self::new()
59    }
60}
61
62impl SourcePos {
63    /// Constructs a new, starting, source position
64    pub fn new() -> SourcePos {
65        SourcePos { byte: 0, line: 1, col: 1 }
66    }
67
68    /// Increments self using the length of the provided token.
69    pub fn advance(&mut self, next: &Token) {
70        let newlines = match *next {
71            // Most of these should not have any newlines
72            // embedded within them, but permitting external
73            // tokenizers means we should sanity check anyway.
74            Name(ref s)       |
75            Literal(ref s)    |
76            Whitespace(ref s) => s.chars().filter(|&c| c == '\n').count(),
77
78            Newline => 1,
79            _ => 0,
80        };
81
82        let tok_len = next.len();
83        self.byte += tok_len;
84        self.line += newlines;
85        self.col = if newlines == 0 { self.col + tok_len } else { 1 };
86    }
87
88    /// Increments self by `num_tab` tab characters
89    fn advance_tabs(&mut self, num_tab: usize) {
90        self.byte += num_tab;
91        self.col += num_tab;
92    }
93}
94
95/// The error type which is returned from parsing shell commands.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub enum ParseError<T> {
98    /// Encountered a word that could not be interpreted as a valid file descriptor.
99    /// Stores the start and end position of the invalid word.
100    BadFd(SourcePos, SourcePos),
101    /// Encountered a `Token::Literal` where expecting a `Token::Name`.
102    BadIdent(String, SourcePos),
103    /// Encountered a bad token inside of `${...}`.
104    BadSubst(Token, SourcePos),
105    /// Encountered EOF while looking for a match for the specified token.
106    /// Stores position of opening token.
107    Unmatched(Token, SourcePos),
108    /// Did not find a reserved keyword within a command. The first String is the
109    /// command being parsed, followed by the position of where it starts. Next
110    /// is the missing keyword followed by the position of where the parse
111    /// expected to have encountered it.
112    IncompleteCmd(&'static str, SourcePos, &'static str, SourcePos),
113    /// Encountered a token not appropriate for the current context.
114    Unexpected(Token, SourcePos),
115    /// Encountered the end of input while expecting additional tokens.
116    UnexpectedEOF,
117    /// A custom error returned by the AST builder.
118    Custom(T),
119}
120
121impl<T: Error> Error for ParseError<T> {
122    fn description(&self) -> &str {
123        match *self {
124            ParseError::BadFd(..)       => "bad file descriptor found",
125            ParseError::BadIdent(..)    => "bad identifier found",
126            ParseError::BadSubst(..)    => "bad substitution found",
127            ParseError::Unmatched(..)   => "unmatched token",
128            ParseError::IncompleteCmd(..)=> "incomplete command",
129            ParseError::Unexpected(..)  => "unexpected token found",
130            ParseError::UnexpectedEOF   => "unexpected end of input",
131            ParseError::Custom(ref e)   => e.description(),
132        }
133    }
134
135    fn cause(&self) -> Option<&Error> {
136        match *self {
137            ParseError::BadFd(..)        |
138            ParseError::BadIdent(..)     |
139            ParseError::BadSubst(..)     |
140            ParseError::Unmatched(..)    |
141            ParseError::IncompleteCmd(..)|
142            ParseError::Unexpected(..)   |
143            ParseError::UnexpectedEOF    => None,
144            ParseError::Custom(ref e) => Some(e),
145        }
146    }
147}
148
149impl<T: fmt::Display> fmt::Display for ParseError<T> {
150    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
151        match *self {
152            ParseError::BadFd(ref start, ref end)  =>
153                write!(fmt, "file descriptor found between lines {} - {} cannot possibly be a valid", start, end),
154            ParseError::BadIdent(ref id, pos) => write!(fmt, "not a valid identifier {}: {}", pos, id),
155            ParseError::BadSubst(ref t, pos)  => write!(fmt, "bad substitution {}: invalid token: {}", pos, t),
156            ParseError::Unmatched(ref t, pos) => write!(fmt, "unmatched `{}` starting on line {}", t, pos),
157
158            ParseError::IncompleteCmd(c, start, kw, kw_pos) => write!(fmt,
159                "did not find `{}` keyword on line {}, in `{}` command which starts on line {}",
160                kw, kw_pos, c, start),
161
162            // When printing unexpected newlines, print \n instead to avoid confusingly formatted messages
163            ParseError::Unexpected(Newline, pos) => write!(fmt, "found unexpected token on line {}: \\n", pos),
164            ParseError::Unexpected(ref t, pos)   => write!(fmt, "found unexpected token on line {}: {}", pos, t),
165
166            ParseError::UnexpectedEOF => fmt.write_str("unexpected end of input"),
167            ParseError::Custom(ref e) => write!(fmt, "{}", e),
168        }
169    }
170}
171
172impl<T> From<T> for ParseError<T> {
173    fn from(err: T) -> Self {
174        ParseError::Custom(err)
175    }
176}
177
178impl fmt::Display for SourcePos {
179    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
180        write!(fmt, "{}:{}", self.line, self.col)
181    }
182}
183
184/// Used to indicate what kind of compound command could be parsed next.
185#[derive(Debug, PartialEq, Eq, Copy, Clone)]
186enum CompoundCmdKeyword {
187    For,
188    Case,
189    If,
190    While,
191    Until,
192    Brace,
193    Subshell,
194}
195
196/// Used to configure when `Parser::command_group` stops parsing commands.
197#[derive(Debug, PartialEq, Eq, Clone)]
198pub struct CommandGroupDelimiters<'a, 'b, 'c> {
199    /// Any token which appears after a complete command separator (e.g. `;`, `&`, or a
200    /// newline) will be considered a delimeter for the command group.
201    pub reserved_tokens: &'a [Token],
202    /// Any `Literal` or `Name` token that matches any of these entries completely
203    /// *and* appear after a complete command will be considered a delimeter.
204    pub reserved_words: &'b [&'static str],
205    /// Any token which matches this provided set will be considered a delimeter.
206    pub exact_tokens: &'c [Token],
207}
208
209impl Default for CommandGroupDelimiters<'static, 'static, 'static> {
210    fn default() -> Self {
211        CommandGroupDelimiters {
212            reserved_tokens: &[],
213            reserved_words: &[],
214            exact_tokens: &[],
215        }
216    }
217}
218
219/// An `Iterator` adapter around a `Parser`.
220///
221/// This iterator is `fused`, that is, if the underlying parser either yields
222/// no command, or an error, no further commands (or errors) will be yielded.
223///
224/// This is because the parser does not do any error handling or backtracking
225/// on errors, thus trying to parse another command after an error is not
226/// well defined, and will either fail as well, or will produce an incorrect
227/// result.
228#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
229#[derive(Debug)]
230pub struct ParserIterator<I, B> {
231    /// The underlying parser to poll for complete commands.
232    /// A `None` value indicates the stream has been exhausted.
233    parser: Option<Parser<I, B>>,
234}
235
236impl<I, B> ParserIterator<I, B> {
237    /// Construct a new adapter with a given parser.
238    fn new(parser: Parser<I, B>) -> Self {
239        ParserIterator {
240            parser: Some(parser),
241        }
242    }
243}
244
245if_nightly! {
246    impl<I, B> ::std::iter::FusedIterator for ParserIterator<I, B>
247        where I: Iterator<Item = Token>,
248              B: Builder,
249    {}
250}
251
252impl<I, B> Iterator for ParserIterator<I, B>
253    where I: Iterator<Item = Token>,
254          B: Builder,
255{
256    type Item = ParseResult<B::Command, B::Error>;
257
258    fn next(&mut self) -> Option<Self::Item> {
259        match self.parser.as_mut().map(Parser::complete_command) {
260            None => None,
261            Some(ret) => match ret {
262                Ok(Some(c)) => Some(Ok(c)),
263                Ok(None) => {
264                    let _ = self.parser.take();
265                    None
266                },
267                Err(e) => {
268                    let _ = self.parser.take();
269                    Some(Err(e))
270                },
271            }
272        }
273    }
274}
275
276impl<I, B> IntoIterator for Parser<I, B>
277    where I: Iterator<Item = Token>,
278          B: Builder,
279{
280    type IntoIter = ParserIterator<I, B>;
281    type Item = <Self::IntoIter as Iterator>::Item;
282
283    fn into_iter(self) -> Self::IntoIter {
284        ParserIterator::new(self)
285    }
286}
287
288/// A parser for the shell language. It will parse shell commands from a
289/// stream of shell `Token`s, and pass them to an AST builder.
290///
291/// The parser implements the `IntoIterator` trait so that it can behave like
292/// a stream of parsed shell commands. Converting the parser into an `Iterator`
293/// and calling `next()` on the result will yield a complete shell command, or
294/// an error should one arise.
295///
296/// # Building
297///
298/// To construct a parser you need a stream of `Token`s and a `Builder`
299/// which will receive data from the parser and assemble an AST. This
300/// library provides both a default `Token` lexer, as well as an AST `Builder`.
301///
302/// ```
303/// use conch_parser::ast::builder::{Builder, RcBuilder};
304/// use conch_parser::lexer::Lexer;
305/// use conch_parser::parse::Parser;
306///
307/// let source = "echo hello world";
308/// let lexer = Lexer::new(source.chars());
309/// let mut parser = Parser::with_builder(lexer, RcBuilder::new());
310/// assert!(parser.complete_command().unwrap().is_some());
311/// ```
312///
313/// If you want to use a parser with the default AST builder implementation
314/// you can also use the `DefaultParser` type alias for a simpler setup.
315///
316/// ```
317/// use conch_parser::lexer::Lexer;
318/// use conch_parser::parse::DefaultParser;
319///
320/// let source = "echo hello world";
321/// let lexer = Lexer::new(source.chars());
322/// let mut parser = DefaultParser::new(lexer);
323/// assert!(parser.complete_command().unwrap().is_some());
324/// ```
325///
326/// # Token lexing
327///
328/// Lexer implementations are free to yield tokens in whatever manner they wish,
329/// however, there are a few considerations the lexer should take.
330///
331/// First, the lexer should consolidate consecutive tokens such as `Token::Name`,
332/// `Token::Literal`, and `Token::Whitespace` as densely as possible, e.g.
333/// `Literal(foobar)` is preferred over `[Literal(foo), Literal(bar)]`. Although
334/// such splitting of tokens will not cause problems while parsing most shell
335/// commands, certain situations require the parser to look-ahead some fixed
336/// number of tokens so it can avoid backtracking. When the tokens are consolidated
337/// the parser can look-ahead deterministically. If a lexer implementation chooses
338/// not to use this strategy, the parser may unsuccessfully parse certain inputs
339/// normally considered valid.
340///
341/// Second, the lexer can influence how token escaping is handled by the parser.
342/// The backslash token, `\` is used to escape, or make literal, any token which
343/// may or may not have a special meaning. Since the parser operates on tokens and
344/// not characters, the escaping of multi-character tokens is affected by how the
345/// lexer yields them. For example, the source `\<<` is normally considered by shells
346/// as `[Literal(<), Less]`. If this behavior is desired, the lexer should yield
347/// the tokens `[Backslash, Less, Less]` for that source. Otherwise if the lexer
348/// yields the tokens `[Backslash, DLess]`, the parser will treat the source as if
349/// it were `[Literal(<<)]`. The lexer's behavior need not be consistent between different
350/// multi-char tokens, as long as it is aware of the implications.
351#[derive(Debug)]
352pub struct Parser<I, B> {
353    iter: TokenIterWrapper<I>,
354    builder: B,
355}
356
357impl<I: Iterator<Item = Token>, B: Builder + Default> Parser<I, B> {
358    /// Creates a new Parser from a Token iterator or collection.
359    pub fn new<T>(iter: T) -> Parser<I, B> where T: IntoIterator<Item = Token, IntoIter = I> {
360        Parser::with_builder(iter.into_iter(), Default::default())
361    }
362}
363
364/// A macro that will consume and return a token that matches a specified pattern
365/// from a parser's token iterator. If no matching token is found, None will be yielded.
366macro_rules! eat_maybe {
367    ($parser:expr, { $($tok:pat => $blk:block),+; _ => $default:block, }) => {
368        eat_maybe!($parser, { $($tok => $blk),+; _ => $default })
369    };
370
371    ($parser:expr, { $($tok:pat => $blk:block),+,}) => {
372        eat_maybe!($parser, { $($tok => $blk),+ })
373    };
374
375    ($parser:expr, { $($tok:pat => $blk:block),+ }) => {
376        eat_maybe!($parser, { $($tok => $blk),+; _ => {} })
377    };
378
379    ($parser:expr, { $($tok:pat => $blk:block),+; _ => $default:block }) => {
380        $(if let Some(&$tok) = $parser.iter.peek() {
381            $parser.iter.next();
382            $blk
383        } else)+ {
384            $default
385        }
386    };
387}
388
389/// A macro that will consume a specified token from the parser's iterator
390/// an run a corresponding action block to do something with the token,
391/// or it will construct and return an appropriate Unexpected(EOF) error.
392macro_rules! eat {
393    ($parser:expr, { $($tok:pat => $blk:block),+, }) => { eat!($parser, {$($tok => $blk),+}) };
394    ($parser:expr, {$($tok:pat => $blk:block),+}) => {
395        eat_maybe!($parser, {$($tok => $blk),+; _ => { return Err($parser.make_unexpected_err()) } })
396    };
397}
398
399/// A macro that defines a function for parsing binary operations in arithmetic
400/// expressions.  It accepts a name for the function, a name for the subsequent
401/// expression (which has a higher precedence) to sub parse on, and a number of
402/// tokens which can be recognized as an operator for the binary operation and
403/// the appropriate AST constructor for said token/operator. All operators within
404/// the definition are considered to have identical precedence and are left-to-right
405/// associative.
406macro_rules! arith_parse {
407    ($fn_name:ident, $next_expr:ident, $($tok:pat => $constructor:path),+) => {
408        #[inline]
409        fn $fn_name(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
410            let mut expr = try!(self.$next_expr());
411            loop {
412                self.skip_whitespace();
413                eat_maybe!(self, {
414                    $($tok => {
415                        let next = try!(self.$next_expr());
416                        expr = $constructor(Box::new(expr), Box::new(next));
417                    }),+;
418                    _ => { break },
419                });
420            }
421            Ok(expr)
422        }
423    }
424}
425
426impl<I: Iterator<Item = Token>, B: Builder> Parser<I, B> {
427    /// Construct an `Unexpected` error using the given token. If `None` specified, the next
428    /// token in the iterator will be used (or `UnexpectedEOF` if none left).
429    #[inline]
430    fn make_unexpected_err(&mut self) -> ParseError<B::Error> {
431        let pos = self.iter.pos();
432        self.iter.next().map_or(ParseError::UnexpectedEOF, |t| ParseError::Unexpected(t, pos))
433    }
434
435    /// Creates a new Parser from a Token iterator and provided AST builder.
436    pub fn with_builder(iter: I, builder: B) -> Self {
437        Parser {
438            iter: TokenIterWrapper::Regular(TokenIter::new(iter)),
439            builder: builder,
440        }
441    }
442
443    /// Returns the parser's current position in the source.
444    pub fn pos(&self) -> SourcePos { self.iter.pos() }
445
446    /// Parses a single complete command.
447    ///
448    /// For example, `foo && bar; baz` will yield two complete
449    /// commands: `And(foo, bar)`, and `Simple(baz)`.
450    pub fn complete_command(&mut self) -> ParseResult<Option<B::Command>, B::Error> {
451        let pre_cmd_comments = self.linebreak();
452
453        if self.iter.peek().is_some() {
454            Ok(Some(try!(self.complete_command_with_leading_comments(pre_cmd_comments))))
455        } else {
456            if !pre_cmd_comments.is_empty() {
457                try!(self.builder.comments(pre_cmd_comments));
458            }
459            Ok(None)
460        }
461    }
462
463    /// Parses a single complete command, but expects caller to parse any leading comments.
464    ///
465    /// It is considered an error there is not a valid complete command to be parsed, thus
466    /// the caller should perform any EOF checks.
467    fn complete_command_with_leading_comments(&mut self, pre_cmd_comments: Vec<builder::Newline>)
468        -> ParseResult<B::Command, B::Error>
469    {
470        let cmd = try!(self.and_or_list());
471
472        let (sep, cmd_comment) = eat_maybe!(self, {
473            Semi => { (builder::SeparatorKind::Semi, self.newline()) },
474            Amp  => { (builder::SeparatorKind::Amp , self.newline()) };
475            _ => {
476                match self.newline() {
477                    n@Some(_) => (builder::SeparatorKind::Newline, n),
478                    None => (builder::SeparatorKind::Other, None),
479                }
480            }
481        });
482
483        Ok(try!(self.builder.complete_command(pre_cmd_comments, cmd, sep, cmd_comment)))
484    }
485
486    /// Parses compound AND/OR commands.
487    ///
488    /// Commands are left associative. For example `foo || bar && baz`
489    /// parses to `And(Or(foo, bar), baz)`.
490    pub fn and_or_list(&mut self) -> ParseResult<B::CommandList, B::Error> {
491        let first = try!(self.pipeline());
492        let mut rest = Vec::new();
493
494        loop {
495            self.skip_whitespace();
496            let is_and = eat_maybe!(self, {
497                AndIf => { true },
498                OrIf  => { false };
499                _ => { break },
500            });
501
502            let post_sep_comments = self.linebreak();
503            let next = try!(self.pipeline());
504
505            let next = if is_and {
506                ast::AndOr::And(next)
507            } else {
508                ast::AndOr::Or(next)
509            };
510
511            rest.push((post_sep_comments, next));
512        }
513
514        Ok(try!(self.builder.and_or_list(first, rest)))
515    }
516
517    /// Parses either a single command or a pipeline of commands.
518    ///
519    /// For example `[!] foo | bar`.
520    pub fn pipeline(&mut self) -> ParseResult<B::ListableCommand, B::Error> {
521        self.skip_whitespace();
522        let bang = eat_maybe!(self, {
523            Bang => { true };
524            _ => { false },
525        });
526
527        let mut cmds = Vec::new();
528        loop {
529            // We've already passed an apropriate spot for !, so it
530            // is an error if it appears before the start of a command.
531            if let Some(&Bang) = self.iter.peek() {
532                return Err(self.make_unexpected_err());
533            }
534
535            let cmd = try!(self.command());
536
537            eat_maybe!(self, {
538                Pipe => { cmds.push((self.linebreak(), cmd)) };
539                _ => {
540                    cmds.push((Vec::new(), cmd));
541                    break;
542                },
543            });
544        }
545
546        Ok(try!(self.builder.pipeline(bang, cmds)))
547    }
548
549    /// Parses any compound or individual command.
550    pub fn command(&mut self) -> ParseResult<B::PipeableCommand, B::Error> {
551        if let Some(kw) = self.next_compound_command_type() {
552            let compound = try!(self.compound_command_internal(Some(kw)));
553            Ok(try!(self.builder.compound_command_into_pipeable(compound)))
554        } else if let Some(fn_def) = try!(self.maybe_function_declaration()) {
555            Ok(fn_def)
556        } else {
557            self.simple_command()
558        }
559    }
560
561    /// Tries to parse a simple command, e.g. `cmd arg1 arg2 >redirect`.
562    ///
563    /// A valid command is expected to have at least an executable name, or a single
564    /// variable assignment or redirection. Otherwise an error will be returned.
565    pub fn simple_command(&mut self) -> ParseResult<B::PipeableCommand, B::Error> {
566        use ast::{RedirectOrCmdWord, RedirectOrEnvVar};
567
568        let mut vars = Vec::new();
569        let mut cmd_args = Vec::new();
570
571        loop {
572            self.skip_whitespace();
573            let is_name = {
574                let mut peeked = self.iter.multipeek();
575                if let Some(&Name(_)) = peeked.peek_next() {
576                    Some(&Equals) == peeked.peek_next()
577                } else {
578                    false
579                }
580            };
581
582            if is_name {
583                if let Some(Name(var)) = self.iter.next() {
584                    self.iter.next(); // Consume the =
585
586                    let value = if let Some(&Whitespace(_)) = self.iter.peek() {
587                        None
588                    } else {
589                        try!(self.word())
590                    };
591                    vars.push(RedirectOrEnvVar::EnvVar(var, value));
592
593                    // Make sure we continue checking for assignments,
594                    // otherwise it they can be interpreted as literal words.
595                    continue;
596                } else {
597                    unreachable!();
598                }
599            }
600
601            // If we find a redirect we should keep checking for
602            // more redirects or assignments. Otherwise we will either
603            // run into the command name or the end of the simple command.
604            let exec = match try!(self.redirect()) {
605                Some(Ok(redirect)) => {
606                    vars.push(RedirectOrEnvVar::Redirect(redirect));
607                    continue;
608                },
609                Some(Err(w)) => w,
610                None => break,
611            };
612
613            // Since there are no more assignments or redirects present
614            // it must be the first real word, and thus the executable name.
615            cmd_args.push(RedirectOrCmdWord::CmdWord(exec));
616            break;
617        }
618
619        let vars = vars;
620
621        // Now that all assignments are taken care of, any other occurances of `=` will be
622        // treated as literals when we attempt to parse a word out.
623        loop {
624            match try!(self.redirect()) {
625                Some(Ok(redirect)) => cmd_args.push(RedirectOrCmdWord::Redirect(redirect)),
626                Some(Err(w)) => cmd_args.push(RedirectOrCmdWord::CmdWord(w)),
627                None => break,
628            }
629        }
630
631        // "Blank" commands are only allowed if redirection occurs
632        // or if there is some variable assignment
633        if vars.is_empty() && cmd_args.is_empty() {
634            Err(self.make_unexpected_err())
635        } else {
636            Ok(try!(self.builder.simple_command(vars, cmd_args)))
637        }
638    }
639
640    /// Parses a continuous list of redirections and will error if any words
641    /// that are not valid file descriptors are found. Essentially used for
642    /// parsing redirection lists after a compound command like `while` or `if`.
643    pub fn redirect_list(&mut self) -> ParseResult<Vec<B::Redirect>, B::Error> {
644        let mut list = Vec::new();
645        loop {
646            self.skip_whitespace();
647            let start_pos = self.iter.pos();
648            match try!(self.redirect()) {
649                Some(Ok(io)) => list.push(io),
650                Some(Err(_)) => return Err(ParseError::BadFd(start_pos, self.iter.pos())),
651                None => break,
652            }
653        }
654
655        Ok(list)
656    }
657
658    /// Parses a redirection token an any source file descriptor and
659    /// path/destination descriptor as appropriate, e.g. `>out`, `1>& 2`, or `2>&-`.
660    ///
661    /// Since the source descriptor can be any arbitrarily complicated word,
662    /// it makes it difficult to reliably peek forward whether a valid redirection
663    /// exists without consuming anything. Thus this method may return a simple word
664    /// if no redirection is found.
665    ///
666    /// Thus, unless a parse error is occured, the return value will be an optional
667    /// redirect or word if either is found. In other words, `Ok(Some(Ok(redirect)))`
668    /// will result if a redirect is found, `Ok(Some(Err(word)))` if a word is found,
669    /// or `Ok(None)` if neither is found.
670    pub fn redirect(&mut self) -> ParseResult<Option<Result<B::Redirect, B::Word>>, B::Error> {
671        fn could_be_numeric<C>(word: &WordKind<C>) -> bool {
672            let simple_could_be_numeric = |word: &SimpleWordKind<C>| match *word {
673                SimpleWordKind::Star        |
674                SimpleWordKind::Question    |
675                SimpleWordKind::SquareOpen  |
676                SimpleWordKind::SquareClose |
677                SimpleWordKind::Tilde       |
678                SimpleWordKind::Colon       => false,
679
680                // Literals and can be statically checked if they have non-numeric characters
681                SimpleWordKind::Escaped(ref s) |
682                SimpleWordKind::Literal(ref s) => s.chars().all(|c| c.is_digit(10)),
683
684                // These could end up evaluating to a numeric,
685                // but we'll have to see at runtime.
686                SimpleWordKind::Param(_) |
687                SimpleWordKind::Subst(_) |
688                SimpleWordKind::CommandSubst(_) => true,
689            };
690
691            match *word {
692                Simple(ref s) => simple_could_be_numeric(s),
693                SingleQuoted(ref s) => s.chars().all(|c| c.is_digit(10)),
694                DoubleQuoted(ref fragments) => fragments.iter().all(simple_could_be_numeric),
695            }
696        }
697
698        fn as_num<C>(word: &ComplexWordKind<C>) -> Option<u16> {
699            match *word {
700                Single(Simple(SimpleWordKind::Literal(ref s))) => u16::from_str_radix(s, 10).ok(),
701                Single(_) => None,
702                Concat(ref fragments) => {
703                    let mut buf = String::new();
704                    for w in fragments {
705                        if let Simple(SimpleWordKind::Literal(ref s)) = *w {
706                            buf.push_str(s);
707                        } else {
708                            return None;
709                        }
710                    }
711
712                    u16::from_str_radix(&buf, 10).ok()
713                }
714            }
715        }
716
717        let (src_fd, src_fd_as_word) = match try!(self.word_preserve_trailing_whitespace_raw()) {
718            None => (None, None),
719            Some(w) => match as_num(&w) {
720                Some(num) => (Some(num), Some(w)),
721                None => return Ok(Some(Err(try!(self.builder.word(w))))),
722            },
723        };
724
725        let redir_tok = match self.iter.peek() {
726            Some(&Less)      |
727            Some(&Great)     |
728            Some(&DGreat)    |
729            Some(&Clobber)   |
730            Some(&LessAnd)   |
731            Some(&GreatAnd)  |
732            Some(&LessGreat) => self.iter.next().unwrap(),
733
734            Some(&DLess)     |
735            Some(&DLessDash) => return Ok(Some(Ok(try!(self.redirect_heredoc(src_fd))))),
736
737            _ => match src_fd_as_word {
738                Some(w) => return Ok(Some(Err(try!(self.builder.word(w))))),
739                None => return Ok(None),
740            },
741        };
742
743        self.skip_whitespace();
744
745        macro_rules! get_path {
746            ($parser:expr) => {
747                match try!($parser.word_preserve_trailing_whitespace_raw()) {
748                    Some(p) => try!($parser.builder.word(p)),
749                    None => return Err(self.make_unexpected_err()),
750                }
751            }
752        }
753
754        macro_rules! get_dup_path {
755            ($parser:expr) => {{
756                let path = if $parser.peek_reserved_token(&[Dash]).is_some() {
757                    let dash = try!($parser.reserved_token(&[Dash]));
758                    Single(Simple(SimpleWordKind::Literal(dash.to_string())))
759                } else {
760                    let path_start_pos = $parser.iter.pos();
761                    let path = if let Some(p) = try!($parser.word_preserve_trailing_whitespace_raw()) {
762                        p
763                    } else {
764                        return Err($parser.make_unexpected_err())
765                    };
766                    let is_numeric = match path {
767                        Single(ref p) => could_be_numeric(&p),
768                        Concat(ref v) => v.iter().all(could_be_numeric),
769                    };
770                    if is_numeric {
771                        path
772                    } else {
773                        return Err(ParseError::BadFd(path_start_pos, self.iter.pos()));
774                    }
775                };
776                try!($parser.builder.word(path))
777            }}
778        }
779
780        let redirect = match redir_tok {
781            Less      => builder::RedirectKind::Read(src_fd, get_path!(self)),
782            Great     => builder::RedirectKind::Write(src_fd, get_path!(self)),
783            DGreat    => builder::RedirectKind::Append(src_fd, get_path!(self)),
784            Clobber   => builder::RedirectKind::Clobber(src_fd, get_path!(self)),
785            LessGreat => builder::RedirectKind::ReadWrite(src_fd, get_path!(self)),
786
787            LessAnd   => builder::RedirectKind::DupRead(src_fd, get_dup_path!(self)),
788            GreatAnd  => builder::RedirectKind::DupWrite(src_fd, get_dup_path!(self)),
789
790            _ => unreachable!(),
791        };
792
793        Ok(Some(Ok(try!(self.builder.redirect(redirect)))))
794    }
795
796    /// Parses a heredoc redirection and the heredoc's body.
797    ///
798    /// This method will look ahead after the next unquoted/unescaped newline
799    /// to capture the heredoc's body, and will stop consuming tokens until
800    /// the approrpiate delimeter is found on a line by itself. If the
801    /// delimeter is unquoted, the heredoc's body will be expanded for
802    /// parameters and other special words. Otherwise, there heredoc's body
803    /// will be treated as a literal.
804    ///
805    /// The heredoc delimeter need not be a valid word (e.g. parameter subsitution
806    /// rules within ${ } need not apply), although it is expected to be balanced
807    /// like a regular word. In other words, all single/double quotes, backticks,
808    /// `${ }`, `$( )`, and `( )` must be balanced.
809    ///
810    /// Note: if the delimeter is quoted, this method will look for an UNQUOTED
811    /// version in the body. For example `<<"EOF"` will cause the parser to look
812    /// until `\nEOF` is found. This it is possible to create a heredoc that can
813    /// only be delimited by the end of the stream, e.g. if a newline is embedded
814    /// within the delimeter. Any backticks that appear in the delimeter are
815    /// expected to appear at the end of the delimeter of the heredoc body, as
816    /// well as any embedded backslashes (unless the backslashes are followed by
817    /// a \, $, or `).
818    ///
819    /// Note: this method expects that the caller provide a potential file
820    /// descriptor for redirection.
821    pub fn redirect_heredoc(&mut self, src_fd: Option<u16>) -> ParseResult<B::Redirect, B::Error> {
822        use std::iter::FromIterator;
823
824        macro_rules! try_map {
825            ($result:expr) => {
826                try!($result.map_err(|e: iter::UnmatchedError| ParseError::Unmatched(e.0, e.1)))
827            }
828        }
829
830        let strip_tabs = eat!(self, {
831            DLess => { false },
832            DLessDash => { true },
833        });
834
835        self.skip_whitespace();
836
837        // Unfortunately we're going to have to capture the delimeter word "manually"
838        // here and double some code. The problem is that we might need to unquote the
839        // word--something that the parser isn't normally aware as a concept. We can
840        // crawl a parsed WordKind tree, but we would still have to convert the inner
841        // trees as either a token collection or into a string, each of which is more
842        // trouble than its worth (especially since WordKind can hold a parsed and
843        // and assembled Builder::Command object, which may not even be possible to
844        // be represented as a string).
845        //
846        // Also some shells like bash and zsh seem to check for balanced tokens like
847        // ', ", or ` within the heredoc delimiter (though this may just be from them
848        // parsing out a word as usual), so to maintain reasonable expectations, we'll
849        // do the same here.
850        let mut delim_tokens = Vec::new();
851        loop {
852            // Normally parens are never part of words, but many
853            // shells permit them to be part of a heredoc delimeter.
854            if let Some(t) = self.iter.peek() {
855                if t.is_word_delimiter() && t != &ParenOpen { break; }
856            } else {
857                break;
858            }
859
860            for t in self.iter.balanced() {
861                delim_tokens.push(try_map!(t));
862            }
863        }
864
865        let mut iter = TokenIter::new(delim_tokens.into_iter());
866        let mut quoted = false;
867        let mut delim = String::new();
868        loop {
869            let start_pos = iter.pos();
870            match iter.next() {
871                Some(Backslash) => {
872                    quoted = true;
873                    iter.next().map(|t| delim.push_str(t.as_str()));
874                },
875
876                Some(SingleQuote) => {
877                    quoted = true;
878                    for t in iter.single_quoted(start_pos) {
879                        delim.push_str(try_map!(t).as_str());
880                    }
881                },
882
883                Some(DoubleQuote) => {
884                    quoted = true;
885                    let mut iter = iter.double_quoted(start_pos);
886                    while let Some(next) = iter.next() {
887                        match try_map!(next) {
888                            Backslash => {
889                                match iter.next() {
890                                    Some(Ok(tok@Dollar))      |
891                                    Some(Ok(tok@Backtick))    |
892                                    Some(Ok(tok@DoubleQuote)) |
893                                    Some(Ok(tok@Backslash))   |
894                                    Some(Ok(tok@Newline))     => delim.push_str(tok.as_str()),
895
896                                    Some(t) => {
897                                        let t = try_map!(t);
898                                        delim.push_str(Backslash.as_str());
899                                        delim.push_str(t.as_str());
900                                    },
901
902                                    None => delim.push_str(Backslash.as_str()),
903                                }
904                            },
905
906                            t => delim.push_str(t.as_str()),
907                        }
908                    }
909                },
910
911                // Having backticks in a heredoc delimeter is something the major shells all
912                // disagree on. Half of them (bash included) treat the precense of backticks
913                // as indicating that the delimeter is quoted (and the body isn't expanded).
914                // Although the POSIX standard does not indicate backticks are a form of quoting
915                // its not unreasonable for them to be seen as such a way. Moreover, the presense
916                // of backticks in a heredoc delimeter isn't something that is seen often, so there
917                // probably won't be many problems in using this non-portable style, so we will
918                // treat their presense as an indication to NOT expand the body.
919                //
920                // Backslashes inside the double quotes should retain their literal meaning unless
921                // followed by \, $, or `, according to the POSIX standard. bash is the only major
922                // shell which does not follow this rule. Since the majority of the shells seeem to
923                // follow these escaping rules (one way or another), and since the standard
924                // indicates this course of action, we will adopt it as well. Again, most shell
925                // script maintainers probably avoid using escaping in heredoc delimeters to avoid
926                // confusing, and non-portable style so picking any approach shouldn't cause too
927                // many issues that cannot be fixed in a future version or with some compatability
928                // flag.
929                //
930                // TL;DR: balanced backticks are allowed in delimeter, they cause the body to NOT
931                // be expanded, and backslashes are only removed if followed by \, $, or `.
932                Some(Backtick) => {
933                    quoted = true;
934                    delim.push_str(Backtick.as_str());
935                    for t in iter.backticked_remove_backslashes(start_pos) {
936                        delim.push_str(try_map!(t).as_str());
937                    }
938                    delim.push_str(Backtick.as_str());
939                },
940
941                Some(t) => delim.push_str(t.as_str()),
942                None => break,
943            }
944        }
945
946        if delim.is_empty() {
947            return Err(self.make_unexpected_err());
948        }
949
950        delim.shrink_to_fit();
951        let (delim, quoted) = (delim, quoted);
952        let delim_len = delim.len();
953        let delim_r = String::from_iter(vec!(delim.as_str(), "\r"));
954        let delim_r_len = delim_r.len();
955
956        // Here we will fast-forward to the next newline and capture the heredoc's
957        // body that comes after it. Then we'll store these tokens in a safe place
958        // and continue parsing them later. Although it may seem inefficient to do
959        // this instead of parsing input until all pending heredocs are resolved,
960        // we would have to do even more bookkeeping to store pending and resolved
961        // heredocs, especially if we want to keep the builder unaware of our
962        // shenanigans (since it *could* be keeping some internal state of what
963        // we feed it).
964        let saved_pos = self.iter.pos();
965        let mut saved_tokens = Vec::new();
966        while self.iter.peek().is_some() {
967            // Make sure we save all tokens until the next UNQUOTED newilne
968            if let Some(&Newline) = self.iter.peek() {
969                saved_tokens.push(self.iter.next().unwrap());
970                break;
971            }
972
973            for t in self.iter.balanced() {
974                saved_tokens.push(try_map!(t));
975            }
976        }
977
978        let heredoc_start_pos = self.iter.pos();
979        let mut heredoc = Vec::new();
980        'heredoc: loop {
981            let mut line_start_pos = self.iter.pos();
982            let mut line = Vec::new();
983            'line: loop {
984                if strip_tabs {
985                    let skip_next = if let Some(&Whitespace(ref w)) = self.iter.peek() {
986                        let stripped = w.trim_left_matches('\t');
987                        let num_tabs = w.len() - stripped.len();
988                        line_start_pos.advance_tabs(num_tabs);
989
990                        if !stripped.is_empty() {
991                            line.push(Whitespace(stripped.to_owned()));
992                        }
993
994                        true
995                    } else {
996                        false
997                    };
998
999                    if skip_next {
1000                        self.iter.next();
1001                    }
1002                }
1003
1004                let next = self.iter.next();
1005                match next {
1006                    // If we haven't grabbed any input we must have hit EOF
1007                    // which should delimit the heredoc body
1008                    None if line.is_empty() => break 'heredoc,
1009
1010                    // Otherwise, if we have a partial line captured, check
1011                    // whether it happens to be the delimeter, and append it
1012                    // to the body if it isn't
1013                    None | Some(Newline) => {
1014                        // Do a quick length check on the line. Odds are that heredoc lines
1015                        // will be much longer than the delimeter itself, and it could get
1016                        // slow to stringify each and every line (and alloc it in memory)
1017                        // when token length checks are available without converting to strings.
1018                        let mut line_len = 0;
1019                        for t in &line {
1020                            line_len += t.len();
1021                            if line_len > delim_r_len {
1022                                break;
1023                            }
1024                        }
1025
1026                        // NB A delimeter like "\eof" becomes [Name(e), Name(of)], which
1027                        // won't compare to [Name(eof)], forcing us to do a string comparison
1028                        // NB We must also do a check using \r\n line endings. Although we could
1029                        // lex \r\n as a Newline token, doing so would complicate keeping track
1030                        // of positions in the source, as we could have one or two byte Newlines,
1031                        // or two different tokens to deal with.
1032                        if line_len == delim_len || line_len == delim_r_len {
1033                            let line_str = concat_tokens(&line);
1034                            if line_str == delim || line_str == delim_r {
1035                                break 'heredoc;
1036                            }
1037                        }
1038
1039                        if next == Some(Newline) { line.push(Newline); }
1040                        break 'line;
1041                    },
1042
1043                    Some(t) => line.push(t),
1044                }
1045            }
1046
1047            heredoc.push((line, line_start_pos));
1048        }
1049
1050        self.iter.buffer_tokens_to_yield_first(saved_tokens, saved_pos);
1051
1052        let body = if quoted {
1053            let body = heredoc.into_iter().flat_map(|(t, _)| t).collect::<Vec<_>>();
1054            Single(Simple(SimpleWordKind::Literal(concat_tokens(&body))))
1055        } else {
1056            let mut tok_iter = TokenIter::with_position(empty_iter(), heredoc_start_pos);
1057            while let Some((line, pos)) = heredoc.pop() {
1058                tok_iter.buffer_tokens_to_yield_first(line, pos);
1059            }
1060
1061            let mut tok_backup = TokenIterWrapper::Buffered(tok_iter);
1062            mem::swap(&mut self.iter, &mut tok_backup);
1063            let mut body = try!(self.word_interpolated_raw(None, heredoc_start_pos));
1064            let _ = mem::replace(&mut self.iter, tok_backup);
1065
1066            if body.len() > 1 {
1067                Concat(body.into_iter().map(Simple).collect())
1068            } else {
1069                let body = body.pop().unwrap_or_else(|| SimpleWordKind::Literal(String::new()));
1070                Single(Simple(body))
1071            }
1072        };
1073
1074        let word = try!(self.builder.word(body));
1075        Ok(try!(self.builder.redirect(builder::RedirectKind::Heredoc(src_fd, word))))
1076    }
1077
1078    /// Parses a whitespace delimited chunk of text, honoring space quoting rules,
1079    /// and skipping leading and trailing whitespace.
1080    ///
1081    /// Since there are a large number of possible tokens that constitute a word,
1082    /// (such as literals, paramters, variables, etc.) the caller may not know
1083    /// for sure whether to expect a word, thus an optional result is returned
1084    /// in the event that no word exists.
1085    ///
1086    /// Note that an error can still arise if partial tokens are present
1087    /// (e.g. malformed parameter).
1088    pub fn word(&mut self) -> ParseResult<Option<B::Word>, B::Error> {
1089        let ret = try!(self.word_preserve_trailing_whitespace());
1090        self.skip_whitespace();
1091        Ok(ret)
1092    }
1093
1094    /// Identical to `Parser::word()` but preserves trailing whitespace after the word.
1095    pub fn word_preserve_trailing_whitespace(&mut self) -> ParseResult<Option<B::Word>, B::Error> {
1096        let w = match try!(self.word_preserve_trailing_whitespace_raw()) {
1097            Some(w) => Some(try!(self.builder.word(w))),
1098            None => None,
1099        };
1100        Ok(w)
1101    }
1102
1103    /// Identical to `Parser::word_preserve_trailing_whitespace()` but does
1104    /// not pass the result to the AST builder.
1105    fn word_preserve_trailing_whitespace_raw(&mut self)
1106        -> ParseResult<Option<ComplexWordKind<B::Command>>, B::Error>
1107    {
1108        self.word_preserve_trailing_whitespace_raw_with_delim(None)
1109    }
1110
1111    /// Identical to `Parser::word_preserve_trailing_whitespace_raw()` but
1112    /// allows for specifying an arbitrary token as a word delimiter.
1113    fn word_preserve_trailing_whitespace_raw_with_delim(&mut self, delim: Option<Token>)
1114        -> ParseResult<Option<ComplexWordKind<B::Command>>, B::Error>
1115    {
1116        self.skip_whitespace();
1117
1118        // Make sure we don't consume comments,
1119        // e.g. if a # is at the start of a word.
1120        if let Some(&Pound) = self.iter.peek() {
1121            return Ok(None);
1122        }
1123
1124        let mut words = Vec::new();
1125        loop {
1126            if delim.is_some() && self.iter.peek() == delim.as_ref() {
1127                break;
1128            }
1129
1130            match self.iter.peek() {
1131                Some(&CurlyOpen)          |
1132                Some(&CurlyClose)         |
1133                Some(&SquareOpen)         |
1134                Some(&SquareClose)        |
1135                Some(&SingleQuote)        |
1136                Some(&DoubleQuote)        |
1137                Some(&Pound)              |
1138                Some(&Star)               |
1139                Some(&Question)           |
1140                Some(&Tilde)              |
1141                Some(&Bang)               |
1142                Some(&Backslash)          |
1143                Some(&Percent)            |
1144                Some(&Dash)               |
1145                Some(&Equals)             |
1146                Some(&Plus)               |
1147                Some(&Colon)              |
1148                Some(&At)                 |
1149                Some(&Caret)              |
1150                Some(&Slash)              |
1151                Some(&Comma)              |
1152                Some(&Name(_))            |
1153                Some(&Literal(_))         => {},
1154
1155                Some(&Backtick) => {
1156                    words.push(Simple(try!(self.backticked_raw())));
1157                    continue;
1158                },
1159
1160                Some(&Dollar)             |
1161                Some(&ParamPositional(_)) => {
1162                    words.push(Simple(try!(self.parameter_raw())));
1163                    continue;
1164                },
1165
1166                Some(&Newline)       |
1167                Some(&ParenOpen)     |
1168                Some(&ParenClose)    |
1169                Some(&Semi)          |
1170                Some(&Amp)           |
1171                Some(&Pipe)          |
1172                Some(&AndIf)         |
1173                Some(&OrIf)          |
1174                Some(&DSemi)         |
1175                Some(&Less)          |
1176                Some(&Great)         |
1177                Some(&DLess)         |
1178                Some(&DGreat)        |
1179                Some(&GreatAnd)      |
1180                Some(&LessAnd)       |
1181                Some(&DLessDash)     |
1182                Some(&Clobber)       |
1183                Some(&LessGreat)     |
1184                Some(&Whitespace(_)) |
1185                None => break,
1186            }
1187
1188            let start_pos = self.iter.pos();
1189            let w = match self.iter.next().unwrap() {
1190                // Unless we are explicitly parsing a brace group, `{` and `}` should
1191                // be treated as literals.
1192                //
1193                // Also, comments are only recognized where a Newline is valid, thus '#'
1194                // becomes a literal if it occurs in the middle of a word.
1195                tok@Bang       |
1196                tok@Pound      |
1197                tok@Percent    |
1198                tok@Dash       |
1199                tok@Equals     |
1200                tok@Plus       |
1201                tok@At         |
1202                tok@Caret      |
1203                tok@Slash      |
1204                tok@Comma      |
1205                tok@CurlyOpen  |
1206                tok@CurlyClose => Simple(SimpleWordKind::Literal(tok.to_string())),
1207
1208                Name(s)    |
1209                Literal(s) => Simple(SimpleWordKind::Literal(s)),
1210
1211                Star        => Simple(SimpleWordKind::Star),
1212                Question    => Simple(SimpleWordKind::Question),
1213                Tilde       => Simple(SimpleWordKind::Tilde),
1214                SquareOpen  => Simple(SimpleWordKind::SquareOpen),
1215                SquareClose => Simple(SimpleWordKind::SquareClose),
1216                Colon       => Simple(SimpleWordKind::Colon),
1217
1218                Backslash => match self.iter.next() {
1219                    // Escaped newlines become whitespace and a delimiter.
1220                    // Alternatively, can't escape EOF, just ignore the slash
1221                    Some(Newline) | None => break,
1222                    Some(t) => Simple(SimpleWordKind::Escaped(t.to_string())),
1223                },
1224
1225                SingleQuote => {
1226                    let mut buf = String::new();
1227                    for t in self.iter.single_quoted(start_pos) {
1228                        buf.push_str(try!(t.map_err(|e| ParseError::Unmatched(e.0, e.1))).as_str())
1229                    }
1230
1231                    SingleQuoted(buf)
1232                },
1233
1234                DoubleQuote => DoubleQuoted(
1235                    try!(self.word_interpolated_raw(Some((DoubleQuote, DoubleQuote)), start_pos))
1236                ),
1237
1238                // Parameters and backticks should have been
1239                // handled while peeking above.
1240                Backtick           |
1241                Dollar             |
1242                ParamPositional(_) => unreachable!(),
1243
1244                // All word delimiters should have
1245                // broken the loop while peeking above.
1246                Newline       |
1247                ParenOpen     |
1248                ParenClose    |
1249                Semi          |
1250                Amp           |
1251                Pipe          |
1252                AndIf         |
1253                OrIf          |
1254                DSemi         |
1255                Less          |
1256                Great         |
1257                DLess         |
1258                DGreat        |
1259                GreatAnd      |
1260                LessAnd       |
1261                DLessDash     |
1262                Clobber       |
1263                LessGreat     |
1264                Whitespace(_) => unreachable!(),
1265            };
1266
1267            words.push(w);
1268        }
1269
1270        let ret = if words.is_empty() {
1271            None
1272        } else if words.len() == 1 {
1273            Some(Single(words.pop().unwrap()))
1274        } else {
1275            Some(Concat(words))
1276        };
1277
1278        Ok(ret)
1279    }
1280
1281    /// Parses tokens in a way similar to how double quoted strings may be interpreted.
1282    ///
1283    /// Parameters/substitutions are parsed as normal, backslashes keep their literal
1284    /// meaning unless they preceed $, `, ", \, \n, or the specified delimeter, while
1285    /// all other tokens are consumed as literals.
1286    ///
1287    /// Tokens will continue to be consumed until a specified delimeter is reached
1288    /// (which is also consumed). If EOF is reached before a delimeter can be found,
1289    /// an error will result. If a `None` is provided as a delimeter tokens will be
1290    /// consumed until EOF is reached and no error will result.
1291    ///
1292    /// `delim` argument structure is Option<(open token, close token)>. The close
1293    /// token indicates when to stop parsing the word, while the open token will be
1294    /// used to construct a `ParseError::Unmatched` error.
1295    fn word_interpolated_raw(&mut self, delim: Option<(Token, Token)>, start_pos: SourcePos)
1296        -> ParseResult<Vec<SimpleWordKind<B::Command>>, B::Error>
1297    {
1298        let (delim_open, delim_close) = match delim {
1299            Some((o,c)) => (Some(o), Some(c)),
1300            None => (None, None),
1301        };
1302
1303        let mut words = Vec::new();
1304        let mut buf = String::new();
1305        loop {
1306            if self.iter.peek() == delim_close.as_ref() {
1307                self.iter.next();
1308                break;
1309            }
1310
1311            macro_rules! store {
1312                ($word:expr) => {{
1313                    if !buf.is_empty() {
1314                        words.push(SimpleWordKind::Literal(buf));
1315                        buf = String::new();
1316                    }
1317                    words.push($word);
1318                }}
1319            }
1320
1321            // Make sure we don't consume any $ (or any specific parameter token)
1322            // we find since the `parameter` method expects to consume them.
1323            match self.iter.peek() {
1324                Some(&Dollar)             |
1325                Some(&ParamPositional(_)) => {
1326                    store!(try!(self.parameter_raw()));
1327                    continue;
1328                },
1329
1330                Some(&Backtick) => {
1331                    store!(try!(self.backticked_raw()));
1332                    continue;
1333                },
1334
1335                _ => {},
1336            }
1337
1338            match self.iter.next() {
1339                // Backslashes only escape a few tokens when double-quoted-type words
1340                Some(Backslash) => {
1341                    let special = match self.iter.peek() {
1342                        Some(&Dollar)      |
1343                        Some(&Backtick)    |
1344                        Some(&DoubleQuote) |
1345                        Some(&Backslash)   |
1346                        Some(&Newline)     => true,
1347                        _ => false,
1348                    };
1349
1350                    if special || self.iter.peek() == delim_close.as_ref() {
1351                        store!(SimpleWordKind::Escaped(self.iter.next().unwrap().to_string()))
1352                    } else {
1353                        buf.push_str(Backslash.as_str());
1354                    }
1355                },
1356
1357                Some(Dollar) => unreachable!(), // Sanity
1358                Some(Backtick) => unreachable!(), // Sanity
1359
1360                Some(t) => buf.push_str(t.as_str()),
1361                None => match delim_open {
1362                    Some(delim) => return Err(ParseError::Unmatched(delim, start_pos)),
1363                    None => break,
1364                },
1365            }
1366        }
1367
1368        if !buf.is_empty() {
1369            words.push(SimpleWordKind::Literal(buf));
1370        }
1371
1372        Ok(words)
1373    }
1374
1375    /// Parses a command subsitution in the form \`cmd\`.
1376    ///
1377    /// Any backslashes that are immediately followed by \, $, or ` are removed
1378    /// before the contents inside the original backticks are recursively parsed
1379    /// as a command.
1380    pub fn backticked_command_substitution(&mut self) -> ParseResult<B::Word, B::Error> {
1381        let word = try!(self.backticked_raw());
1382        Ok(try!(self.builder.word(Single(Simple(word)))))
1383    }
1384
1385    /// Identical to `Parser::backticked_command_substitution`, except but does not pass the
1386    /// result to the AST builder.
1387    fn backticked_raw(&mut self) -> ParseResult<SimpleWordKind<B::Command>, B::Error> {
1388        let backtick_pos = self.iter.pos();
1389        eat!(self, { Backtick => {} });
1390
1391        // FIXME: it would be great to not have to buffer all tokens between backticks
1392        // and lazily consume them. Unfortunately the BacktickBackslashRemover iterator
1393        // returns a Result<Token, UnmatchedError>, so we can't temporarily substitute it
1394        // for our regular iterator (without forcing us to check the the value of each
1395        // `peek` or `next` operation we make).
1396        let tok_iter = try!(self.iter
1397            .token_iter_from_backticked_with_removed_backslashes(backtick_pos)
1398            .map_err(|e| ParseError::Unmatched(e.0, e.1))
1399        );
1400
1401        let mut tok_backup = TokenIterWrapper::Buffered(tok_iter);
1402
1403        mem::swap(&mut self.iter, &mut tok_backup);
1404        let cmd_subst = self.command_group_internal(CommandGroupDelimiters::default());
1405        let _ = mem::replace(&mut self.iter, tok_backup);
1406
1407        Ok(SimpleWordKind::CommandSubst(try!(cmd_subst)))
1408    }
1409
1410    /// Parses a parameters such as `$$`, `$1`, `$foo`, etc, or
1411    /// parameter substitutions such as `$(cmd)`, `${param-word}`, etc.
1412    ///
1413    /// Since it is possible that a leading `$` is not followed by a valid
1414    /// parameter, the `$` should be treated as a literal. Thus this method
1415    /// returns an `Word`, which will capture both cases where a literal or
1416    /// parameter is parsed.
1417    pub fn parameter(&mut self) -> ParseResult<B::Word, B::Error> {
1418        let param = try!(self.parameter_raw());
1419        Ok(try!(self.builder.word(Single(Simple(param)))))
1420    }
1421
1422    /// Identical to `Parser::parameter()` but does not pass the result to the AST builder.
1423    fn parameter_raw(&mut self) -> ParseResult<SimpleWordKind<B::Command>, B::Error> {
1424        use ast::Parameter;
1425
1426        let start_pos = self.iter.pos();
1427        match self.iter.next() {
1428            Some(ParamPositional(p)) => Ok(SimpleWordKind::Param(Parameter::Positional(p as u32))),
1429
1430            Some(Dollar) => {
1431                match self.iter.peek() {
1432                    Some(&Star)      |
1433                    Some(&Pound)     |
1434                    Some(&Question)  |
1435                    Some(&Dollar)    |
1436                    Some(&Bang)      |
1437                    Some(&Dash)      |
1438                    Some(&At)        |
1439                    Some(&Name(_))   => Ok(SimpleWordKind::Param(try!(self.parameter_inner()))),
1440
1441                    Some(&ParenOpen) |
1442                    Some(&CurlyOpen) => self.parameter_substitution_raw(),
1443
1444                    _ => Ok(SimpleWordKind::Literal(Dollar.to_string())),
1445                }
1446            },
1447
1448            Some(t) => Err(ParseError::Unexpected(t, start_pos)),
1449            None => Err(ParseError::UnexpectedEOF),
1450        }
1451    }
1452
1453    /// Parses the word part of a parameter substitution, up to and including
1454    /// the closing curly brace.
1455    ///
1456    /// All tokens that normally cannot be part of a word will be treated
1457    /// as literals.
1458    fn parameter_substitution_word_raw(&mut self, curly_open_pos: SourcePos)
1459        -> ParseResult<Option<ComplexWordKind<B::Command>>, B::Error>
1460    {
1461        let mut words = Vec::new();
1462        'capture_words: loop {
1463            'capture_literals: loop {
1464                let found_backslash = match self.iter.peek() {
1465                    None |
1466                    Some(&CurlyClose) => break 'capture_words,
1467
1468                    Some(&Backslash) => true,
1469
1470                    // Tokens that are normally skipped or ignored either always or at the
1471                    // start of a word (e.g. #), we want to make sure we capture these ourselves.
1472                    Some(t@&Pound)         |
1473                    Some(t@&ParenOpen)     |
1474                    Some(t@&ParenClose)    |
1475                    Some(t@&Semi)          |
1476                    Some(t@&Amp)           |
1477                    Some(t@&Pipe)          |
1478                    Some(t@&AndIf)         |
1479                    Some(t@&OrIf)          |
1480                    Some(t@&DSemi)         |
1481                    Some(t@&Less)          |
1482                    Some(t@&Great)         |
1483                    Some(t@&DLess)         |
1484                    Some(t@&DGreat)        |
1485                    Some(t@&GreatAnd)      |
1486                    Some(t@&LessAnd)       |
1487                    Some(t@&DLessDash)     |
1488                    Some(t@&Clobber)       |
1489                    Some(t@&LessGreat)     |
1490                    Some(t@&Whitespace(_)) |
1491                    Some(t@&Newline)       => {
1492                        words.push(Simple(SimpleWordKind::Literal(t.as_str().to_owned())));
1493                        false
1494                    },
1495
1496                    // Tokens that are always part of a word,
1497                    // don't have to handle them differently.
1498                    Some(&CurlyOpen)          |
1499                    Some(&SquareOpen)         |
1500                    Some(&SquareClose)        |
1501                    Some(&SingleQuote)        |
1502                    Some(&DoubleQuote)        |
1503                    Some(&Star)               |
1504                    Some(&Question)           |
1505                    Some(&Tilde)              |
1506                    Some(&Bang)               |
1507                    Some(&Percent)            |
1508                    Some(&Dash)               |
1509                    Some(&Equals)             |
1510                    Some(&Plus)               |
1511                    Some(&Colon)              |
1512                    Some(&At)                 |
1513                    Some(&Caret)              |
1514                    Some(&Slash)              |
1515                    Some(&Comma)              |
1516                    Some(&Name(_))            |
1517                    Some(&Literal(_))         |
1518                    Some(&Backtick)           |
1519                    Some(&Dollar)             |
1520                    Some(&ParamPositional(_)) => break 'capture_literals,
1521                };
1522
1523
1524                // Escaped newlines are considered whitespace and usually ignored,
1525                // so we have to manually capture here.
1526                let skip_twice = if found_backslash {
1527                    let mut peek = self.iter.multipeek();
1528                    peek.peek_next(); // Skip past the Backslash
1529                    if let Some(t@&Newline) = peek.peek_next() {
1530                        words.push(Simple(SimpleWordKind::Escaped(t.as_str().to_owned())));
1531                        true
1532                    } else {
1533                        // Backslash is escaping something other than newline,
1534                        // capture it like a regular word.
1535                        break 'capture_literals;
1536                    }
1537                } else {
1538                    false
1539                };
1540
1541                self.iter.next(); // Skip the first token that was peeked above
1542                if skip_twice {
1543                    self.iter.next();
1544                }
1545
1546            }
1547
1548            match try!(self.word_preserve_trailing_whitespace_raw_with_delim(Some(CurlyClose))) {
1549                Some(Single(w)) => words.push(w),
1550                Some(Concat(ws)) => words.extend(ws),
1551                None => break 'capture_words,
1552            }
1553        }
1554
1555        eat_maybe!(self, {
1556            CurlyClose => {};
1557            _ => { return Err(ParseError::Unmatched(CurlyOpen, curly_open_pos)); }
1558        });
1559
1560        if words.is_empty() {
1561            Ok(None)
1562        } else if words.len() == 1 {
1563            Ok(Some(Single(words.pop().unwrap())))
1564        } else {
1565            Ok(Some(Concat(words)))
1566        }
1567    }
1568
1569    /// Parses everything that comes after the parameter in a parameter substitution.
1570    ///
1571    /// For example, given `${<param><colon><operator><word>}`, this function will consume
1572    /// the colon, operator, word, and closing curly.
1573    ///
1574    /// Nothing is passed to the builder.
1575    fn parameter_substitution_body_raw(
1576        &mut self,
1577        param: DefaultParameter,
1578        curly_open_pos: SourcePos)
1579        -> ParseResult<SimpleWordKind<B::Command>, B::Error>
1580    {
1581        use ast::Parameter;
1582        use ast::builder::ParameterSubstitutionKind::*;
1583
1584        let has_colon = eat_maybe!(self, {
1585            Colon => { true };
1586            _ => { false },
1587        });
1588
1589        let op_pos = self.iter.pos();
1590        let op = match self.iter.next() {
1591            Some(tok@Dash)     |
1592            Some(tok@Equals)   |
1593            Some(tok@Question) |
1594            Some(tok@Plus)     => tok,
1595
1596            Some(CurlyClose) => return Ok(SimpleWordKind::Param(param)),
1597
1598            Some(t) => return Err(ParseError::BadSubst(t, op_pos)),
1599            None => return Err(ParseError::Unmatched(CurlyOpen, curly_open_pos)),
1600        };
1601
1602        let word = try!(self.parameter_substitution_word_raw(curly_open_pos));
1603        let maybe_len = param == Parameter::Pound && !has_colon && word.is_none();
1604
1605        // We must carefully check if we get ${#-} or ${#?}, in which case
1606        // we have parsed a Len substitution and not something else
1607        let ret = if maybe_len && op == Dash {
1608            Len(Parameter::Dash)
1609        } else if maybe_len && op == Question {
1610            Len(Parameter::Question)
1611        } else {
1612            match op {
1613                Dash     => Default(has_colon, param, word),
1614                Equals   => Assign(has_colon, param, word),
1615                Question => Error(has_colon, param, word),
1616                Plus     => Alternative(has_colon, param, word),
1617                _ => unreachable!(),
1618            }
1619        };
1620        Ok(SimpleWordKind::Subst(Box::new(ret)))
1621    }
1622
1623    /// Parses a parameter substitution in the form of `${...}`, `$(...)`, or `$((...))`.
1624    /// Nothing is passed to the builder.
1625    fn parameter_substitution_raw(&mut self) -> ParseResult<SimpleWordKind<B::Command>, B::Error> {
1626        use ast::Parameter;
1627        use ast::builder::ParameterSubstitutionKind::*;
1628
1629        let start_pos = self.iter.pos();
1630        match self.iter.peek() {
1631            Some(&ParenOpen) => {
1632                let is_arith = {
1633                    let mut peeked = self.iter.multipeek();
1634                    peeked.peek_next(); // Skip first ParenOpen
1635                    Some(&ParenOpen) == peeked.peek_next()
1636                };
1637
1638                let subst = if is_arith {
1639                    eat!(self, { ParenOpen => {} });
1640                    eat!(self, { ParenOpen => {} });
1641
1642                    // If we hit a paren right off the bat either the body is empty
1643                    // or there is a stray paren which will result in an error either
1644                    // when we look for the closing parens or sometime after.
1645                    self.skip_whitespace();
1646                    let subst = if let Some(&ParenClose) = self.iter.peek() {
1647                        None
1648                    } else {
1649                        Some(try!(self.arithmetic_substitution()))
1650                    };
1651
1652                    // Some shells allow the closing parens to have whitespace in between
1653                    self.skip_whitespace();
1654                    eat!(self, { ParenClose => {} });
1655                    self.skip_whitespace();
1656                    eat!(self, { ParenClose => {} });
1657
1658                    Arith(subst)
1659                } else {
1660                    Command(try!(self.subshell_internal(true)))
1661                };
1662
1663                Ok(SimpleWordKind::Subst(Box::new(subst)))
1664            },
1665
1666            Some(&CurlyOpen) => {
1667                let curly_open_pos = start_pos;
1668                self.iter.next();
1669
1670                let param = try!(self.parameter_inner());
1671                let subst = match self.iter.peek() {
1672                    Some(&Percent) => {
1673                        self.iter.next();
1674                        eat_maybe!(self, {
1675                            Percent => {
1676                                let word = self.parameter_substitution_word_raw(curly_open_pos);
1677                                RemoveLargestSuffix(param, try!(word))
1678                            };
1679                            _ => {
1680                                let word = self.parameter_substitution_word_raw(curly_open_pos);
1681                                RemoveSmallestSuffix(param, try!(word))
1682                            }
1683                        })
1684                    },
1685
1686                    Some(&Pound) => {
1687                        self.iter.next();
1688                        eat_maybe!(self, {
1689                            Pound => {
1690                                let word = self.parameter_substitution_word_raw(curly_open_pos);
1691                                RemoveLargestPrefix(param, try!(word))
1692                            };
1693                            _ => {
1694                                match try!(self.parameter_substitution_word_raw(curly_open_pos)) {
1695                                    // Handle ${##} case
1696                                    None if Parameter::Pound == param => Len(Parameter::Pound),
1697                                    w => RemoveSmallestPrefix(param, w),
1698                                }
1699                            }
1700                        })
1701                    },
1702
1703                    // In this case the found # is the parameter itself
1704                    Some(&Colon)      |
1705                    Some(&Dash)       |
1706                    Some(&Equals)     |
1707                    Some(&Question)   |
1708                    Some(&Plus)       |
1709                    Some(&CurlyClose) if Parameter::Pound == param =>
1710                        return self.parameter_substitution_body_raw(param, curly_open_pos),
1711
1712                    // Otherwise we must have ${#param}
1713                    _ if Parameter::Pound == param => {
1714                        let param = try!(self.parameter_inner());
1715                        eat!(self, { CurlyClose => { Len(param) } })
1716                    },
1717
1718                    _ => return self.parameter_substitution_body_raw(param, curly_open_pos),
1719                };
1720
1721                Ok(SimpleWordKind::Subst(Box::new(subst)))
1722            },
1723
1724            _ => Err(self.make_unexpected_err()),
1725        }
1726    }
1727
1728    /// Parses a valid parameter that can appear inside a set of curly braces.
1729    fn parameter_inner(&mut self) -> ParseResult<DefaultParameter, B::Error> {
1730        use ast::Parameter;
1731
1732        let start_pos = self.iter.pos();
1733        let param = match self.iter.next() {
1734            Some(Star)     => Parameter::Star,
1735            Some(Pound)    => Parameter::Pound,
1736            Some(Question) => Parameter::Question,
1737            Some(Dollar)   => Parameter::Dollar,
1738            Some(Bang)     => Parameter::Bang,
1739            Some(Dash)     => Parameter::Dash,
1740            Some(At)       => Parameter::At,
1741
1742            Some(Name(n)) => Parameter::Var(n),
1743            Some(Literal(s)) => match u32::from_str(&s) {
1744                Ok(n)  => Parameter::Positional(n),
1745                Err(_) => return Err(ParseError::BadSubst(Literal(s), start_pos)),
1746            },
1747
1748            Some(t) => return Err(ParseError::BadSubst(t, start_pos)),
1749            None => return Err(ParseError::UnexpectedEOF),
1750        };
1751
1752        Ok(param)
1753    }
1754
1755    /// Parses any number of sequential commands between the `do` and `done`
1756    /// reserved words. Each of the reserved words must be a literal token, and cannot be
1757    /// quoted or concatenated.
1758    pub fn do_group(&mut self) -> ParseResult<builder::CommandGroup<B::Command>, B::Error> {
1759        let start_pos = self.iter.pos();
1760        try!(self.reserved_word(&[DO]).map_err(|_| self.make_unexpected_err()));
1761        let result = try!(self.command_group(CommandGroupDelimiters {
1762            reserved_words: &[DONE],
1763            .. Default::default()
1764        }));
1765        try!(self.reserved_word(&[DONE])
1766             .or_else(|()| Err(ParseError::IncompleteCmd(DO, start_pos, DONE, self.iter.pos()))));
1767        Ok(result)
1768    }
1769
1770    /// Parses any number of sequential commands between balanced `{` and `}`
1771    /// reserved words. Each of the reserved words must be a literal token, and cannot be quoted.
1772    pub fn brace_group(&mut self) -> ParseResult<builder::CommandGroup<B::Command>, B::Error> {
1773        // CurlyClose must be encountered as a stand alone word,
1774        // even though it is represented as its own token
1775        let start_pos = self.iter.pos();
1776        try!(self.reserved_token(&[CurlyOpen]));
1777        let cmds = try!(self.command_group(CommandGroupDelimiters {
1778            reserved_tokens: &[CurlyClose],
1779            .. Default::default()
1780        }));
1781        try!(self.reserved_token(&[CurlyClose])
1782             .or_else(|_| Err(ParseError::Unmatched(CurlyOpen, start_pos))));
1783        Ok(cmds)
1784    }
1785
1786    /// Parses any number of sequential commands between balanced `(` and `)`.
1787    ///
1788    /// It is considered an error if no commands are present inside the subshell.
1789    pub fn subshell(&mut self) -> ParseResult<builder::CommandGroup<B::Command>, B::Error> {
1790        self.subshell_internal(false)
1791    }
1792
1793    /// Like `Parser::subshell` but allows the caller to specify
1794    /// if an empty body constitutes an error or not.
1795    fn subshell_internal(&mut self, empty_body_ok: bool)
1796        -> ParseResult<builder::CommandGroup<B::Command>, B::Error>
1797    {
1798        let start_pos = self.iter.pos();
1799        eat!(self, { ParenOpen => {} });
1800
1801        // Parens are always special tokens
1802        let body = try!(self.command_group_internal(CommandGroupDelimiters {
1803            exact_tokens: &[ParenClose],
1804            .. Default::default()
1805        }));
1806
1807        match self.iter.peek() {
1808            Some(&ParenClose) if empty_body_ok || !body.commands.is_empty() => {
1809                self.iter.next();
1810                Ok(body)
1811            },
1812            Some(_) => Err(self.make_unexpected_err()),
1813            None => Err(ParseError::Unmatched(ParenOpen, start_pos)),
1814        }
1815    }
1816
1817    /// Peeks at the next token (after skipping whitespace) to determine
1818    /// if (and which) compound command may follow.
1819    fn next_compound_command_type(&mut self) -> Option<CompoundCmdKeyword> {
1820        self.skip_whitespace();
1821        if Some(&ParenOpen) == self.iter.peek() {
1822            Some(CompoundCmdKeyword::Subshell)
1823        } else if self.peek_reserved_token(&[CurlyOpen]).is_some() {
1824            Some(CompoundCmdKeyword::Brace)
1825        } else {
1826            match self.peek_reserved_word(&[FOR, CASE, IF, WHILE, UNTIL]) {
1827                Some(FOR)   => Some(CompoundCmdKeyword::For),
1828                Some(CASE)  => Some(CompoundCmdKeyword::Case),
1829                Some(IF)    => Some(CompoundCmdKeyword::If),
1830                Some(WHILE) => Some(CompoundCmdKeyword::While),
1831                Some(UNTIL) => Some(CompoundCmdKeyword::Until),
1832                _ => None,
1833            }
1834        }
1835    }
1836
1837    /// Parses compound commands like `for`, `case`, `if`, `while`, `until`,
1838    /// brace groups, or subshells, including any redirection lists to be applied to them.
1839    pub fn compound_command(&mut self) -> ParseResult<B::CompoundCommand, B::Error> {
1840        self.compound_command_internal(None)
1841    }
1842
1843    /// Slightly optimized version of `Parse::compound_command` that will not
1844    /// check an upcoming reserved word if the caller already knows the answer.
1845    fn compound_command_internal(&mut self, kw: Option<CompoundCmdKeyword>) -> ParseResult<B::CompoundCommand, B::Error> {
1846        let cmd = match kw.or_else(|| self.next_compound_command_type()) {
1847            Some(CompoundCmdKeyword::If) => {
1848                let fragments = try!(self.if_command());
1849                let io = try!(self.redirect_list());
1850                try!(self.builder.if_command(fragments, io))
1851            },
1852
1853            Some(CompoundCmdKeyword::While) |
1854            Some(CompoundCmdKeyword::Until) => {
1855                let (until, guard_body_pair) = try!(self.loop_command());
1856                let io = try!(self.redirect_list());
1857                try!(self.builder.loop_command(until, guard_body_pair, io))
1858            },
1859
1860            Some(CompoundCmdKeyword::For) => {
1861                let for_fragments = try!(self.for_command());
1862                let io = try!(self.redirect_list());
1863                try!(self.builder.for_command(for_fragments, io))
1864            },
1865
1866            Some(CompoundCmdKeyword::Case) => {
1867                let fragments = try!(self.case_command());
1868                let io = try!(self.redirect_list());
1869                try!(self.builder.case_command(fragments, io))
1870            },
1871
1872            Some(CompoundCmdKeyword::Brace) => {
1873                let cmds = try!(self.brace_group());
1874                let io = try!(self.redirect_list());
1875                try!(self.builder.brace_group(cmds, io))
1876            },
1877
1878            Some(CompoundCmdKeyword::Subshell) => {
1879                let cmds = try!(self.subshell());
1880                let io = try!(self.redirect_list());
1881                try!(self.builder.subshell(cmds, io))
1882            },
1883
1884            None => return Err(self.make_unexpected_err()),
1885        };
1886
1887        Ok(cmd)
1888    }
1889
1890    /// Parses loop commands like `while` and `until` but does not parse any
1891    /// redirections that may follow.
1892    ///
1893    /// Since they are compound commands (and can have redirections applied to
1894    /// the entire loop) this method returns the relevant parts of the loop command,
1895    /// without constructing an AST node, it so that the caller can do so with redirections.
1896    pub fn loop_command(&mut self)
1897        -> ParseResult<(builder::LoopKind, builder::GuardBodyPairGroup<B::Command>), B::Error>
1898    {
1899        let start_pos = self.iter.pos();
1900        let kind = match try!(self.reserved_word(&[WHILE, UNTIL])
1901                              .map_err(|_| self.make_unexpected_err())) {
1902            WHILE => builder::LoopKind::While,
1903            UNTIL => builder::LoopKind::Until,
1904            _ => unreachable!(),
1905        };
1906        let guard = try!(self.command_group(CommandGroupDelimiters {
1907            reserved_words: &[DO],
1908            .. Default::default()
1909        }));
1910        match self.peek_reserved_word(&[DO]) {
1911            Some(_) => Ok((kind, builder::GuardBodyPairGroup {
1912                guard: guard,
1913                body: try!(self.do_group())
1914            })),
1915            None => Err(ParseError::IncompleteCmd(WHILE, start_pos, DO, self.iter.pos())),
1916        }
1917    }
1918
1919    /// Parses a single `if` command but does not parse any redirections that may follow.
1920    ///
1921    /// Since `if` is a compound command (and can have redirections applied to it) this
1922    /// method returns the relevant parts of the `if` command, without constructing an
1923    /// AST node, it so that the caller can do so with redirections.
1924    pub fn if_command(&mut self) -> ParseResult<builder::IfFragments<B::Command>, B::Error> {
1925        let start_pos = self.iter.pos();
1926        try!(self.reserved_word(&[IF]).map_err(|_| self.make_unexpected_err()));
1927
1928        macro_rules! missing_fi {
1929            () => { |_| ParseError::IncompleteCmd(IF, start_pos, FI, self.iter.pos()) }
1930        }
1931
1932        macro_rules! missing_then {
1933            () => { |_| ParseError::IncompleteCmd(IF, start_pos, THEN, self.iter.pos()) }
1934        }
1935
1936        let mut conditionals = Vec::new();
1937        loop {
1938            let guard = try!(self.command_group(CommandGroupDelimiters {
1939                reserved_words: &[THEN],
1940                .. Default::default()
1941            }));
1942            try!(self.reserved_word(&[THEN]).map_err(missing_then!()));
1943
1944            let body = try!(self.command_group(CommandGroupDelimiters {
1945                reserved_words: &[ELIF, ELSE, FI],
1946                .. Default::default()
1947            }));
1948            conditionals.push(builder::GuardBodyPairGroup {
1949                guard: guard,
1950                body: body,
1951            });
1952
1953            let els = match try!(self.reserved_word(&[ELIF, ELSE, FI]).map_err(missing_fi!())) {
1954                ELIF => continue,
1955                ELSE => {
1956                    let els = try!(self.command_group(CommandGroupDelimiters {
1957                        reserved_words: &[FI],
1958                        .. Default::default()
1959                    }));
1960                    try!(self.reserved_word(&[FI]).map_err(missing_fi!()));
1961                    Some(els)
1962                },
1963                FI => None,
1964                _ => unreachable!(),
1965            };
1966
1967            return Ok(builder::IfFragments { conditionals: conditionals, else_branch: els })
1968        }
1969    }
1970
1971    /// Parses a single `for` command but does not parse any redirections that may follow.
1972    ///
1973    /// Since `for` is a compound command (and can have redirections applied to it) this
1974    /// method returns the relevant parts of the `for` command, without constructing an
1975    /// AST node, it so that the caller can do so with redirections.
1976    pub fn for_command(&mut self) -> ParseResult<builder::ForFragments<B::Word, B::Command>, B::Error> {
1977        let start_pos = self.iter.pos();
1978        try!(self.reserved_word(&[FOR]).map_err(|_| self.make_unexpected_err()));
1979
1980        self.skip_whitespace();
1981
1982        match self.iter.peek() {
1983            Some(&Name(_))    |
1984            Some(&Literal(_)) => {},
1985            _ => return Err(self.make_unexpected_err()),
1986        }
1987
1988        let var_pos = self.iter.pos();
1989        let var = match self.iter.next() {
1990            Some(Name(v)) => v,
1991            Some(Literal(s)) => return Err(ParseError::BadIdent(s, var_pos)),
1992            _ => unreachable!(),
1993        };
1994
1995        let var_comment = self.newline();
1996        let post_var_comments = self.linebreak();
1997
1998        // A for command can take one of several different shapes (in pseudo regex syntax):
1999        // `for name [\n*] [in [word*]] [;\n* | \n+] do_group`
2000        // Below we'll disambiguate what situation we have as we move along.
2001        let (words, pre_body_comments) = if self.peek_reserved_word(&[IN]).is_some() {
2002            // Found `in` keyword, therefore we're looking at something like
2003            // `for name \n* in [words*] [;\n* | \n+] do_group`
2004            self.reserved_word(&[IN]).unwrap();
2005
2006            let mut words = Vec::new();
2007            while let Some(w) = try!(self.word()) {
2008                words.push(w);
2009            }
2010
2011            let found_semi = eat_maybe!(self, {
2012                Semi => { true };
2013                _ => { false }
2014            });
2015
2016            // We need either a newline or a ; to separate the words from the body
2017            // Thus if neither is found it is considered an error
2018            let words_comment = self.newline();
2019            if !found_semi && words_comment.is_none() {
2020                return Err(self.make_unexpected_err());
2021            }
2022
2023            (Some((post_var_comments, words, words_comment)), self.linebreak())
2024        } else if Some(&Semi) == self.iter.peek() {
2025            // `for name \n*;\n* do_group`
2026            eat!(self, { Semi => {} });
2027            (None, self.linebreak())
2028        } else if self.peek_reserved_word(&[DO]).is_none() {
2029            // If we didn't find an `in` keyword, and we havent hit the body
2030            // (a `do` keyword), then we can reasonably say the script has
2031            // words without an `in` keyword.
2032            return Err(ParseError::IncompleteCmd(FOR, start_pos, IN, self.iter.pos()));
2033        } else {
2034            // `for name \n* do_group`
2035            (None, post_var_comments)
2036        };
2037
2038        if self.peek_reserved_word(&[DO]).is_none() {
2039            return Err(ParseError::IncompleteCmd(FOR, start_pos , DO, self.iter.pos()));
2040        }
2041
2042        let body = try!(self.do_group());
2043        Ok(builder::ForFragments {
2044            var: var,
2045            var_comment: var_comment,
2046            words: words,
2047            pre_body_comments: pre_body_comments,
2048            body: body,
2049        })
2050    }
2051
2052    /// Parses a single `case` command but does not parse any redirections that may follow.
2053    ///
2054    /// Since `case` is a compound command (and can have redirections applied to it) this
2055    /// method returns the relevant parts of the `case` command, without constructing an
2056    /// AST node, it so that the caller can do so with redirections.
2057    pub fn case_command(&mut self) -> ParseResult<builder::CaseFragments<B::Word, B::Command>, B::Error> {
2058        let start_pos = self.iter.pos();
2059
2060        macro_rules! missing_in {
2061            () => { |_| ParseError::IncompleteCmd(CASE, start_pos, IN, self.iter.pos()); }
2062        }
2063
2064        macro_rules! missing_esac {
2065            () => { |_| ParseError::IncompleteCmd(CASE, start_pos, ESAC, self.iter.pos()); }
2066        }
2067
2068        try!(self.reserved_word(&[CASE]).map_err(|_| self.make_unexpected_err()));
2069
2070        let word = match try!(self.word()) {
2071            Some(w) => w,
2072            None => return Err(self.make_unexpected_err()),
2073        };
2074
2075        let post_word_comments = self.linebreak();
2076        try!(self.reserved_word(&[IN]).map_err(missing_in!()));
2077        let in_comment = self.newline();
2078
2079        let mut pre_esac_comments = None;
2080        let mut arms = Vec::new();
2081        loop {
2082            let pre_pattern_comments = self.linebreak();
2083            if self.peek_reserved_word(&[ESAC]).is_some() {
2084                // Make sure we don't lose the captured comments if there are no body
2085                debug_assert_eq!(pre_esac_comments, None);
2086                pre_esac_comments = Some(pre_pattern_comments);
2087                break;
2088            }
2089
2090            if let Some(&ParenOpen) = self.iter.peek() {
2091                self.iter.next();
2092            }
2093
2094            // Make sure we check for missing `esac` here, otherwise if we have EOF
2095            // trying to parse a word will result in an `UnexpectedEOF` error
2096            if self.iter.peek().is_none() {
2097                return Err(()).map_err(missing_esac!());
2098            }
2099
2100            let mut patterns = Vec::new();
2101            loop {
2102                match try!(self.word()) {
2103                    Some(p) => patterns.push(p),
2104                    None => return Err(self.make_unexpected_err()),
2105                }
2106
2107                match self.iter.peek() {
2108                    Some(&Pipe) => {
2109                        self.iter.next();
2110                        continue;
2111                    },
2112
2113                    Some(&ParenClose) if !patterns.is_empty() => {
2114                        self.iter.next();
2115                        break;
2116                    },
2117
2118                    // Make sure we check for missing `esac` here, otherwise if we have EOF
2119                    // trying to parse a word will result in an `UnexpectedEOF` error
2120                    None => return Err(()).map_err(missing_esac!()),
2121                    _ => return Err(self.make_unexpected_err()),
2122                }
2123            }
2124
2125            let pattern_comment = self.newline();
2126            let body = try!(self.command_group_internal(CommandGroupDelimiters {
2127                reserved_words: &[ESAC],
2128                reserved_tokens: &[],
2129                exact_tokens: &[DSemi]
2130            }));
2131
2132            let (no_more_arms, arm_comment) = if Some(&DSemi) == self.iter.peek() {
2133                self.iter.next();
2134                (false, self.newline())
2135            } else {
2136                (true, None)
2137            };
2138
2139            arms.push(builder::CaseArm {
2140                patterns: builder::CasePatternFragments {
2141                    pre_pattern_comments: pre_pattern_comments,
2142                    pattern_alternatives: patterns,
2143                    pattern_comment: pattern_comment,
2144                },
2145                body: body,
2146                arm_comment: arm_comment,
2147            });
2148
2149            if no_more_arms {
2150                break;
2151            }
2152        }
2153
2154        let remaining_comments = self.linebreak();
2155        let pre_esac_comments = match pre_esac_comments {
2156            Some(mut comments) => {
2157                comments.extend(remaining_comments);
2158                comments
2159            },
2160            None => remaining_comments,
2161        };
2162
2163        try!(self.reserved_word(&[ESAC]).map_err(missing_esac!()));
2164
2165        Ok(builder::CaseFragments {
2166            word: word,
2167            post_word_comments: post_word_comments,
2168            in_comment: in_comment,
2169            arms: arms,
2170            post_arms_comments: pre_esac_comments,
2171        })
2172    }
2173
2174    /// Parses a single function declaration if present. If no function is present,
2175    /// nothing is consumed from the token stream.
2176    pub fn maybe_function_declaration(&mut self) -> ParseResult<Option<B::PipeableCommand>, B::Error> {
2177        if self.peek_reserved_word(&[FUNCTION]).is_some() {
2178            return self.function_declaration().map(Some);
2179        }
2180
2181        let is_fn = {
2182            let mut peeked = self.iter.multipeek();
2183            if let Some(&Name(_)) = peeked.peek_next() {
2184                match peeked.peek_next() {
2185                    Some(&Whitespace(_)) => Some(&ParenOpen) == peeked.peek_next(),
2186                    Some(&ParenOpen) => true,
2187                    _ => false,
2188                }
2189            } else {
2190                false
2191            }
2192        };
2193
2194        if is_fn {
2195            self.function_declaration().map(Some)
2196        } else {
2197            Ok(None)
2198        }
2199    }
2200
2201    /// Parses a single function declaration.
2202    ///
2203    /// A function declaration must either begin with the `function` reserved word, or
2204    /// the name of the function must be followed by `()`. Whitespace is allowed between
2205    /// the name and `(`, and whitespace is allowed between `()`.
2206    pub fn function_declaration(&mut self) -> ParseResult<B::PipeableCommand, B::Error> {
2207        let (name, post_name_comments, body) = try!(self.function_declaration_internal());
2208        Ok(try!(self.builder.function_declaration(name, post_name_comments, body)))
2209    }
2210
2211    /// Like `Parser::function_declaration`, but does not pass the result to the builder
2212    fn function_declaration_internal(&mut self)
2213        -> ParseResult<(String, Vec<builder::Newline>, B::CompoundCommand), B::Error>
2214    {
2215        let found_fn = match self.peek_reserved_word(&[FUNCTION]) {
2216            Some(_) => { self.iter.next(); true },
2217            None => false,
2218        };
2219
2220        self.skip_whitespace();
2221
2222        match self.iter.peek() {
2223            Some(&Name(_))    |
2224            Some(&Literal(_)) => {},
2225            _ => return Err(self.make_unexpected_err()),
2226        }
2227
2228        let ident_pos = self.iter.pos();
2229        let name = match self.iter.next() {
2230            Some(Name(n)) => n,
2231            Some(Literal(s)) => return Err(ParseError::BadIdent(s, ident_pos)),
2232            _ => unreachable!(),
2233        };
2234
2235        // If there is no whitespace after the function name, the only valid
2236        // possibility is for `()` to appear.
2237        let body = if Some(&ParenOpen) == self.iter.peek() {
2238            eat!(self, { ParenOpen => {} });
2239            self.skip_whitespace();
2240            eat!(self, { ParenClose => {} });
2241            None
2242        } else if found_fn && Some(&Newline) == self.iter.peek() {
2243            // Do nothing, function declaration satisfied
2244            None
2245        } else {
2246            // Enforce at least one whitespace between function declaration and body
2247            eat!(self, { Whitespace(_) => {} });
2248            self.skip_whitespace();
2249
2250            // If we didn't find the function keyword, we MUST find `()` at this time
2251            if !found_fn {
2252                eat!(self, { ParenOpen => {} });
2253                self.skip_whitespace();
2254                eat!(self, { ParenClose => {} });
2255                None
2256            } else if Some(&ParenOpen) == self.iter.peek() {
2257                // Otherwise it is possible for there to be a subshell as the body
2258                let subshell = try!(self.subshell_internal(true));
2259                if subshell.commands.is_empty() && subshell.trailing_comments.is_empty() {
2260                    // Case like `function foo () ...`
2261                    None
2262                } else {
2263                    // Case like `function foo (subshell)`
2264                    Some(try!(self.builder.subshell(subshell, Vec::new())))
2265                }
2266            } else {
2267                None
2268            }
2269        };
2270
2271        let (post_name_comments, body) = match body {
2272            Some(subshell) => (Vec::new(), subshell),
2273            None => (self.linebreak(), try!(self.compound_command())),
2274        };
2275
2276        Ok((name, post_name_comments, body))
2277    }
2278
2279    /// Skips over any encountered whitespace but preserves newlines.
2280    #[inline]
2281    pub fn skip_whitespace(&mut self) {
2282        loop {
2283            while let Some(&Whitespace(_)) = self.iter.peek() {
2284                self.iter.next();
2285            }
2286
2287            let found_backslash_newline = {
2288                let mut peeked = self.iter.multipeek();
2289                Some(&Backslash) == peeked.peek_next() && Some(&Newline) == peeked.peek_next()
2290            };
2291
2292            if found_backslash_newline {
2293                self.iter.next();
2294                self.iter.next();
2295            } else {
2296                break;
2297            }
2298        }
2299    }
2300
2301    /// Parses zero or more `Token::Newline`s, skipping whitespace but capturing comments.
2302    #[inline]
2303    pub fn linebreak(&mut self) -> Vec<builder::Newline> {
2304        let mut lines = Vec::new();
2305        while let Some(n) = self.newline() {
2306            lines.push(n);
2307        }
2308        lines
2309    }
2310
2311    /// Tries to parse a `Token::Newline` (or a comment) after skipping whitespace.
2312    pub fn newline(&mut self) -> Option<builder::Newline> {
2313        self.skip_whitespace();
2314
2315        match self.iter.peek() {
2316            Some(&Pound) => {
2317                let comment = self.iter.by_ref().take_while(|t| t != &Newline).collect::<Vec<_>>();
2318                Some(builder::Newline(Some(concat_tokens(&comment))))
2319            },
2320
2321            Some(&Newline) => {
2322                self.iter.next();
2323                Some(builder::Newline(None))
2324            },
2325
2326            _ => None,
2327        }
2328    }
2329
2330    /// Checks that one of the specified tokens appears as a reserved word.
2331    ///
2332    /// The token must be followed by a token which delimits a word when it is
2333    /// unquoted/unescaped.
2334    ///
2335    /// If a reserved word is found, the token which it matches will be
2336    /// returned in case the caller cares which specific reserved word was found.
2337    pub fn peek_reserved_token<'a>(&mut self, tokens: &'a [Token]) -> Option<&'a Token> {
2338        if tokens.is_empty() {
2339            return None;
2340        }
2341
2342        let care_about_whitespace = tokens.iter().any(|tok| {
2343            if let Whitespace(_) = *tok {
2344                true
2345            } else {
2346                false
2347            }
2348        });
2349
2350        // If the caller cares about whitespace as a reserved word we should
2351        // do a reserved word check without skipping any leading whitespace.
2352        // If we don't find anything the first time (or if the caller does
2353        // not care about whitespace tokens) we will skip the whitespace
2354        // and try again.
2355        let num_tries = if care_about_whitespace {
2356            2
2357        } else {
2358            self.skip_whitespace();
2359            1
2360        };
2361
2362        for _ in 0..num_tries {
2363            {
2364                let mut peeked = self.iter.multipeek();
2365
2366                let found_tok = match peeked.peek_next() {
2367                    Some(tok) => tokens.iter().find(|&t| t == tok),
2368                    None => return None,
2369                };
2370
2371                if let ret@Some(_) = found_tok {
2372                    let found_delim = match peeked.peek_next() {
2373                        Some(delim) => delim.is_word_delimiter(),
2374                        None => true, // EOF is also a valid delimeter
2375                    };
2376
2377                    if found_delim {
2378                        return ret;
2379                    }
2380                }
2381            }
2382
2383            self.skip_whitespace();
2384        }
2385
2386        None
2387    }
2388
2389    /// Checks that one of the specified strings appears as a reserved word.
2390    ///
2391    /// The word must appear as a single token, unquoted and unescaped, and
2392    /// must be followed by a token which delimits a word when it is
2393    /// unquoted/unescaped. The reserved word may appear as a `Token::Name`
2394    /// or a `Token::Literal`.
2395    ///
2396    /// If a reserved word is found, the string which it matches will be
2397    /// returned in case the caller cares which specific reserved word was found.
2398    pub fn peek_reserved_word<'a>(&mut self, words: &'a [&str]) -> Option<&'a str> {
2399        if words.is_empty() {
2400            return None;
2401        }
2402
2403        self.skip_whitespace();
2404        let mut peeked = self.iter.multipeek();
2405        let found_tok = match peeked.peek_next() {
2406            Some(&Name(ref kw)) |
2407            Some(&Literal(ref kw)) => words.iter().find(|&w| w == kw).map(|kw| *kw),
2408            _ => None,
2409        };
2410
2411        match peeked.peek_next() {
2412            Some(delim) if delim.is_word_delimiter() => found_tok,
2413            None => found_tok, // EOF is a valid delimeter
2414            _ => None,
2415        }
2416    }
2417
2418    /// Checks that one of the specified tokens appears as a reserved word
2419    /// and consumes it, returning the token it matched in case the caller
2420    /// cares which specific reserved word was found.
2421    pub fn reserved_token(&mut self, tokens: &[Token]) -> ParseResult<Token, B::Error> {
2422        match self.peek_reserved_token(tokens) {
2423            Some(_) => Ok(self.iter.next().unwrap()),
2424            None => {
2425                // If the desired token is next, but we failed to find a reserved
2426                // token (because the token after it isn't a valid delimeter)
2427                // then the following token is the unexpected culprit, so we should
2428                // skip the upcoming one before forming the error.
2429                let skip_one = {
2430                    let peeked = self.iter.peek();
2431                    tokens.iter().any(|t| Some(t) == peeked)
2432                };
2433
2434                if skip_one { self.iter.next(); }
2435                Err(self.make_unexpected_err())
2436            },
2437        }
2438    }
2439
2440    /// Checks that one of the specified strings appears as a reserved word
2441    /// and consumes it, returning the string it matched in case the caller
2442    /// cares which specific reserved word was found.
2443    pub fn reserved_word<'a>(&mut self, words: &'a [&str]) -> Result<&'a str, ()> {
2444        match self.peek_reserved_word(words) {
2445            Some(s) => { self.iter.next(); Ok(s) },
2446            None => Err(()),
2447        }
2448    }
2449
2450    /// Parses commands until a configured delimeter (or EOF)
2451    /// is reached, without consuming the token or reserved word.
2452    ///
2453    /// Any reserved word/token **must** appear after a complete command
2454    /// separator (e.g. `;`, `&`, or a newline), otherwise it will be
2455    /// parsed as part of the command.
2456    ///
2457    /// It is considered an error if no commands are present.
2458    pub fn command_group(&mut self, cfg: CommandGroupDelimiters)
2459        -> ParseResult<builder::CommandGroup<B::Command>, B::Error>
2460    {
2461        let group = try!(self.command_group_internal(cfg));
2462        if group.commands.is_empty() {
2463            Err(self.make_unexpected_err())
2464        } else {
2465            Ok(group)
2466        }
2467    }
2468
2469    /// Like `compound_list`, but allows for the list of commands to be empty.
2470    fn command_group_internal(&mut self, cfg: CommandGroupDelimiters)
2471        -> ParseResult<builder::CommandGroup<B::Command>, B::Error>
2472    {
2473        let found_delim = |slf: &mut Parser<_, _>| {
2474            let found_exact = ! cfg.exact_tokens.is_empty() && slf.iter.peek()
2475                    .map(|peeked| cfg.exact_tokens.iter().any(|tok| tok == peeked))
2476                    .unwrap_or(false);
2477
2478            found_exact
2479                || slf.peek_reserved_word(cfg.reserved_words).is_some()
2480                || slf.peek_reserved_token(cfg.reserved_tokens).is_some()
2481        };
2482
2483        let mut cmds = Vec::new();
2484        let mut trailing_comments = Vec::new();
2485        loop {
2486            if found_delim(self) {
2487                break;
2488            }
2489
2490            let leading_comments = self.linebreak();
2491
2492            if found_delim(self) || self.iter.peek().is_none() {
2493                debug_assert!(trailing_comments.is_empty());
2494                trailing_comments = leading_comments;
2495                break;
2496            }
2497
2498            cmds.push(try!(self.complete_command_with_leading_comments(leading_comments)));
2499        }
2500
2501        Ok(builder::CommandGroup {
2502            commands: cmds,
2503            trailing_comments: trailing_comments,
2504        })
2505    }
2506
2507    /// Parses the body of any arbitrary arithmetic expression, e.g. `x + $y << 5`.
2508    /// The caller is responsible for parsing the external `$(( ))` tokens.
2509    pub fn arithmetic_substitution(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
2510        let mut exprs = Vec::new();
2511        loop {
2512            self.skip_whitespace();
2513            exprs.push(try!(self.arith_assig()));
2514
2515            eat_maybe!(self, {
2516                Comma => {};
2517                _ => { break },
2518            });
2519        }
2520
2521        if exprs.len() == 1 {
2522            Ok(exprs.pop().unwrap())
2523        } else {
2524            Ok(ast::Arithmetic::Sequence(exprs))
2525        }
2526    }
2527
2528    /// Parses expressions such as `var = expr` or `var op= expr`, where `op` is
2529    /// any of the following operators: *, /, %, +, -, <<, >>, &, |, ^.
2530    fn arith_assig(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
2531        use ast::Arithmetic::*;
2532
2533        self.skip_whitespace();
2534
2535        let assig = {
2536            let mut assig = false;
2537            let mut peeked = self.iter.multipeek();
2538
2539            'assig_check: loop {
2540                match peeked.peek_next() {
2541                    Some(&Dollar) => continue, // Skip Dollar and peek next
2542                    Some(&Name(_)) => loop {
2543                        match peeked.peek_next() {
2544                            Some(&Whitespace(_)) => continue, // Skip whitespace and peek next
2545                            Some(&Star)    |
2546                            Some(&Slash)   |
2547                            Some(&Percent) |
2548                            Some(&Plus)    |
2549                            Some(&Dash)    |
2550                            Some(&DLess)   |
2551                            Some(&DGreat)  |
2552                            Some(&Amp)     |
2553                            Some(&Pipe)    |
2554                            Some(&Caret)   => assig = Some(&Equals) == peeked.peek_next(),
2555
2556                            // Make sure we only recognize $(( x = ...)) but NOT $(( x == ...))
2557                            Some(&Equals)  => assig = Some(&Equals) != peeked.peek_next(),
2558                            _ => {}
2559                        }
2560
2561                        break 'assig_check;
2562                    },
2563                    _ => break 'assig_check,
2564                }
2565            }
2566
2567            assig
2568        };
2569
2570        if !assig {
2571            return self.arith_ternary();
2572        }
2573
2574        let var = try!(self.arith_var());
2575        self.skip_whitespace();
2576        let op = match self.iter.next() {
2577            Some(op@Star)    |
2578            Some(op@Slash)   |
2579            Some(op@Percent) |
2580            Some(op@Plus)    |
2581            Some(op@Dash)    |
2582            Some(op@DLess)   |
2583            Some(op@DGreat)  |
2584            Some(op@Amp)     |
2585            Some(op@Pipe)    |
2586            Some(op@Caret)   => { eat!(self, { Equals => {} }); op },
2587            Some(op@Equals)  => op,
2588            _ => unreachable!(),
2589        };
2590
2591        let value = Box::new(try!(self.arith_assig()));
2592        let expr = match op {
2593            Star    => Box::new(Mult(Box::new(Var(var.clone())), value)),
2594            Slash   => Box::new(Div(Box::new(Var(var.clone())), value)),
2595            Percent => Box::new(Modulo(Box::new(Var(var.clone())), value)),
2596            Plus    => Box::new(Add(Box::new(Var(var.clone())), value)),
2597            Dash    => Box::new(Sub(Box::new(Var(var.clone())), value)),
2598            DLess   => Box::new(ShiftLeft(Box::new(Var(var.clone())), value)),
2599            DGreat  => Box::new(ShiftRight(Box::new(Var(var.clone())), value)),
2600            Amp     => Box::new(BitwiseAnd(Box::new(Var(var.clone())), value)),
2601            Pipe    => Box::new(BitwiseOr(Box::new(Var(var.clone())), value)),
2602            Caret   => Box::new(BitwiseXor(Box::new(Var(var.clone())), value)),
2603            Equals  => value,
2604            _ => unreachable!(),
2605        };
2606        Ok(Assign(var, expr))
2607    }
2608
2609    /// Parses expressions such as `expr ? expr : expr`.
2610    fn arith_ternary(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
2611        let guard = try!(self.arith_logical_or());
2612        self.skip_whitespace();
2613        eat_maybe!(self, {
2614            Question => {
2615                let body = try!(self.arith_ternary());
2616                self.skip_whitespace();
2617                eat!(self, { Colon => {} });
2618                let els = try!(self.arith_ternary());
2619                Ok(ast::Arithmetic::Ternary(Box::new(guard), Box::new(body), Box::new(els)))
2620            };
2621            _ => { Ok(guard) },
2622        })
2623    }
2624
2625    /// Parses expressions such as `expr || expr`.
2626    arith_parse!(arith_logical_or,  arith_logical_and, OrIf  => ast::Arithmetic::LogicalOr);
2627    /// Parses expressions such as `expr && expr`.
2628    arith_parse!(arith_logical_and, arith_bitwise_or,  AndIf => ast::Arithmetic::LogicalAnd);
2629    /// Parses expressions such as `expr | expr`.
2630    arith_parse!(arith_bitwise_or,  arith_bitwise_xor, Pipe  => ast::Arithmetic::BitwiseOr);
2631    /// Parses expressions such as `expr ^ expr`.
2632    arith_parse!(arith_bitwise_xor, arith_bitwise_and, Caret => ast::Arithmetic::BitwiseXor);
2633    /// Parses expressions such as `expr & expr`.
2634    arith_parse!(arith_bitwise_and, arith_eq,          Amp   => ast::Arithmetic::BitwiseAnd);
2635
2636    /// Parses expressions such as `expr == expr` or `expr != expr`.
2637    #[inline]
2638    fn arith_eq(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
2639        let mut expr = try!(self.arith_ineq());
2640        loop {
2641            self.skip_whitespace();
2642            let eq_type = eat_maybe!(self, {
2643                Equals => { true },
2644                Bang => { false };
2645                _ => { break }
2646            });
2647
2648            eat!(self, { Equals => {} });
2649            let next = try!(self.arith_ineq());
2650            expr = if eq_type {
2651                ast::Arithmetic::Eq(Box::new(expr), Box::new(next))
2652            } else {
2653                ast::Arithmetic::NotEq(Box::new(expr), Box::new(next))
2654            };
2655        }
2656        Ok(expr)
2657    }
2658
2659    /// Parses expressions such as `expr < expr`,`expr <= expr`,`expr > expr`,`expr >= expr`.
2660    #[inline]
2661    fn arith_ineq(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
2662        let mut expr = try!(self.arith_shift());
2663        loop {
2664            self.skip_whitespace();
2665            eat_maybe!(self, {
2666                Less => {
2667                    let eq = eat_maybe!(self, { Equals => { true }; _ => { false } });
2668                    let next = try!(self.arith_shift());
2669
2670                    expr = if eq {
2671                        ast::Arithmetic::LessEq(Box::new(expr), Box::new(next))
2672                    } else {
2673                        ast::Arithmetic::Less(Box::new(expr), Box::new(next))
2674                    };
2675                },
2676                Great => {
2677                    let eq = eat_maybe!(self, { Equals => { true }; _ => { false } });
2678                    let next = try!(self.arith_shift());
2679
2680                    expr = if eq {
2681                        ast::Arithmetic::GreatEq(Box::new(expr), Box::new(next))
2682                    } else {
2683                        ast::Arithmetic::Great(Box::new(expr), Box::new(next))
2684                    };
2685                };
2686                _ => { break },
2687            });
2688        }
2689        Ok(expr)
2690    }
2691
2692    /// Parses expressions such as `expr << expr` or `expr >> expr`.
2693    arith_parse!(arith_shift, arith_add,
2694                 DLess  => ast::Arithmetic::ShiftLeft,
2695                 DGreat => ast::Arithmetic::ShiftRight
2696    );
2697
2698    /// Parses expressions such as `expr + expr` or `expr - expr`.
2699    arith_parse!(arith_add, arith_mult,
2700                 Plus => ast::Arithmetic::Add,
2701                 Dash => ast::Arithmetic::Sub
2702    );
2703
2704    /// Parses expressions such as `expr * expr`, `expr / expr`, or `expr % expr`.
2705    arith_parse!(arith_mult, arith_pow,
2706                 Star    => ast::Arithmetic::Mult,
2707                 Slash   => ast::Arithmetic::Div,
2708                 Percent => ast::Arithmetic::Modulo
2709    );
2710
2711    /// Parses expressions such as `expr ** expr`.
2712    fn arith_pow(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
2713        let expr = try!(self.arith_unary_misc());
2714        self.skip_whitespace();
2715
2716        // We must be extra careful here because ** has a higher precedence
2717        // than *, meaning power operations will be parsed before multiplication.
2718        // Thus we should be absolutely certain we should parse a ** operator
2719        // and avoid confusing it with a multiplication operation that is yet
2720        // to be parsed.
2721        let double_star = {
2722            let mut peeked = self.iter.multipeek();
2723            peeked.peek_next() == Some(&Star) && peeked.peek_next() == Some(&Star)
2724        };
2725
2726        if double_star {
2727            eat!(self, { Star => {} });
2728            eat!(self, { Star => {} });
2729            Ok(ast::Arithmetic::Pow(Box::new(expr), Box::new(try!(self.arith_pow()))))
2730        } else {
2731            Ok(expr)
2732        }
2733    }
2734
2735    /// Parses expressions such as `!expr`, `~expr`, `+expr`, `-expr`, `++var` and `--var`.
2736    fn arith_unary_misc(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
2737        self.skip_whitespace();
2738        let expr = eat_maybe!(self, {
2739            Bang  => { ast::Arithmetic::LogicalNot(Box::new(try!(self.arith_unary_misc()))) },
2740            Tilde => { ast::Arithmetic::BitwiseNot(Box::new(try!(self.arith_unary_misc()))) },
2741            Plus  => {
2742                eat_maybe!(self, {
2743                    // Although we can optimize this out, we'll let the AST builder handle
2744                    // optimizations, in case it is interested in such redundant situations.
2745                    Dash => {
2746                        let next = try!(self.arith_unary_misc());
2747                        ast::Arithmetic::UnaryPlus(Box::new(ast::Arithmetic::UnaryMinus(Box::new(next))))
2748                    },
2749                    Plus => { ast::Arithmetic::PreIncr(try!(self.arith_var())) };
2750                    _ => { ast::Arithmetic::UnaryPlus(Box::new(try!(self.arith_unary_misc()))) }
2751                })
2752            },
2753
2754            Dash  => {
2755                eat_maybe!(self, {
2756                    // Although we can optimize this out, we'll let the AST builder handle
2757                    // optimizations, in case it is interested in such redundant situations.
2758                    Plus => {
2759                        let next = try!(self.arith_unary_misc());
2760                        ast::Arithmetic::UnaryMinus(Box::new(ast::Arithmetic::UnaryPlus(Box::new(next))))
2761                    },
2762                    Dash => { ast::Arithmetic::PreDecr(try!(self.arith_var())) };
2763                    _ => { ast::Arithmetic::UnaryMinus(Box::new(try!(self.arith_unary_misc()))) }
2764                })
2765            };
2766
2767            _ => { try!(self.arith_post_incr()) }
2768        });
2769        Ok(expr)
2770    }
2771
2772    /// Parses expressions such as `(expr)`, numeric literals, `var`, `var++`, or `var--`.
2773    /// Numeric literals must appear as a single `Literal` token. `Name` tokens will be
2774    /// treated as variables.
2775    #[inline]
2776    fn arith_post_incr(&mut self) -> ParseResult<DefaultArithmetic, B::Error> {
2777        self.skip_whitespace();
2778        eat_maybe!(self, {
2779            ParenOpen => {
2780                let expr = try!(self.arithmetic_substitution());
2781                self.skip_whitespace();
2782                eat!(self, { ParenClose => {} });
2783                return Ok(expr);
2784            }
2785        });
2786
2787        let num = if let Some(&Literal(ref s)) = self.iter.peek() {
2788            if s.starts_with("0x") || s.starts_with("0X") {
2789                // from_str_radix does not like it when 0x is present
2790                // in the string to parse, thus we should strip it off.
2791                // Also, if the string is empty from_str_radix will return
2792                // an error; shells like bash and zsh treat `0x` as `0x0`
2793                // so we will do the same.
2794                let num = &s[2..];
2795                if num.is_empty() {
2796                    Some(0)
2797                } else {
2798                    isize::from_str_radix(&s[2..], 16).ok()
2799                }
2800            } else if s.starts_with('0') {
2801                isize::from_str_radix(s, 8).ok()
2802            } else {
2803                isize::from_str_radix(s, 10).ok()
2804            }
2805        } else {
2806            None
2807        };
2808
2809        let expr = match num {
2810            Some(num) => {
2811                // Make sure we consume the Token::Literal which holds the number
2812                self.iter.next();
2813                ast::Arithmetic::Literal(num)
2814            },
2815            None => {
2816                let var = try!(self.arith_var());
2817
2818                // We must be extra careful here because post-increment has a higher precedence
2819                // than addition/subtraction meaning post-increment operations will be parsed
2820                // before addition. Thus we should be absolutely certain we should parse a
2821                // post-increment operator and avoid confusing it with an addition operation
2822                // that is yet to be parsed.
2823                let post_incr = {
2824                    self.skip_whitespace();
2825                    let mut peeked = self.iter.multipeek();
2826                    match peeked.peek_next() {
2827                        Some(&Plus) => peeked.peek_next() == Some(&Plus),
2828                        Some(&Dash) => peeked.peek_next() == Some(&Dash),
2829                        _ => false,
2830                    }
2831                };
2832
2833                if post_incr {
2834                    eat!(self, {
2835                        Plus => { eat!(self, { Plus => { ast::Arithmetic::PostIncr(var) } }) },
2836                        Dash => { eat!(self, { Dash => { ast::Arithmetic::PostDecr(var) } }) },
2837                    })
2838                } else {
2839                    ast::Arithmetic::Var(var)
2840                }
2841            }
2842        };
2843        Ok(expr)
2844    }
2845
2846    /// Parses a variable name in the form `name` or `$name`.
2847    #[inline]
2848    fn arith_var(&mut self) -> ParseResult<String, B::Error> {
2849        self.skip_whitespace();
2850        eat_maybe!(self, { Dollar => {} });
2851
2852        if let Some(&Name(_)) = self.iter.peek() {
2853            if let Some(Name(n)) = self.iter.next() { Ok(n) } else { unreachable!() }
2854        } else {
2855            return Err(self.make_unexpected_err())
2856        }
2857    }
2858}
2859
2860fn concat_tokens(tokens: &[Token]) -> String {
2861    let len = tokens.iter().fold(0, |len, t| len + t.len());
2862    let mut s = String::with_capacity(len);
2863    s.extend(tokens.iter().map(Token::as_str));
2864    s
2865}
2866
2867#[cfg(test)]
2868mod tests {
2869    use ast::*;
2870    use ast::builder::Newline;
2871    use ast::Command::*;
2872    use ast::CompoundCommandKind::*;
2873    use lexer::Lexer;
2874    use parse::*;
2875
2876    fn make_parser(src: &str) -> DefaultParser<Lexer<::std::str::Chars>> {
2877        DefaultParser::new(Lexer::new(src.chars()))
2878    }
2879
2880    fn word(s: &str) -> TopLevelWord<String> {
2881        TopLevelWord(ComplexWord::Single(Word::Simple(SimpleWord::Literal(String::from(s)))))
2882    }
2883
2884    fn cmd_args_simple(cmd: &str, args: &[&str]) -> Box<DefaultSimpleCommand> {
2885        let mut cmd_args = Vec::with_capacity(args.len() + 1);
2886        cmd_args.push(RedirectOrCmdWord::CmdWord(word(cmd)));
2887        cmd_args.extend(args.iter().map(|&a| RedirectOrCmdWord::CmdWord(word(a))));
2888
2889        Box::new(SimpleCommand {
2890            redirects_or_env_vars: vec!(),
2891            redirects_or_cmd_words: cmd_args,
2892        })
2893    }
2894
2895    fn cmd(cmd: &str) -> TopLevelCommand<String> {
2896        cmd_args(cmd, &[])
2897    }
2898
2899    fn cmd_args(cmd: &str, args: &[&str]) -> TopLevelCommand<String> {
2900        TopLevelCommand(List(CommandList {
2901            first: ListableCommand::Single(PipeableCommand::Simple(cmd_args_simple(cmd, args))),
2902            rest: vec!(),
2903        }))
2904    }
2905
2906    #[test]
2907    fn test_function_declaration_comments_before_body() {
2908        use std::iter::repeat;
2909
2910        let cases_brace = vec!(
2911            "function foo() #comment1\n\n#comment2\n { echo body; }",
2912            "function foo () #comment1\n\n#comment2\n { echo body; }",
2913            "function foo (  ) #comment1\n\n#comment2\n { echo body; }",
2914            "function foo(  ) #comment1\n\n#comment2\n { echo body; }",
2915            "function foo #comment1\n\n#comment2\n   { echo body; }",
2916            "foo() #comment1\n\n#comment2\n          { echo body; }",
2917            "foo () #comment1\n\n#comment2\n         { echo body; }",
2918            "foo (  ) #comment1\n\n#comment2\n         { echo body; }",
2919            "foo(  ) #comment1\n\n#comment2\n         { echo body; }",
2920        );
2921
2922        let cases_subshell = vec!(
2923            "function foo() #comment1\n\n#comment2\n (echo body)",
2924            "function foo #comment1\n\n#comment2\n   (echo body)",
2925            "foo() #comment1\n\n#comment2\n          (echo body)",
2926            "foo () #comment1\n\n#comment2\n         (echo body)",
2927        );
2928
2929        let comments = vec!(
2930            Newline(Some(String::from("#comment1"))),
2931            Newline(None),
2932            Newline(Some(String::from("#comment2"))),
2933        );
2934
2935        let name = String::from("foo");
2936        let body = vec!(cmd_args("echo", &["body"]));
2937        let body_brace = CompoundCommand {
2938            kind: Brace(body.clone()),
2939            io: vec!(),
2940        };
2941        let body_subshell = CompoundCommand {
2942            kind: Subshell(body),
2943            io: vec!(),
2944        };
2945
2946        let iter = cases_brace.into_iter().zip(repeat(body_brace))
2947            .chain(cases_subshell.into_iter().zip(repeat(body_subshell)))
2948            .map(|(src, body)| (src, (name.clone(), comments.clone(), body)));
2949
2950        for (src, correct) in iter {
2951            assert_eq!(correct, make_parser(src).function_declaration_internal().unwrap());
2952        }
2953    }
2954
2955    #[test]
2956    fn test_word_preserve_trailing_whitespace() {
2957        let mut p = make_parser("test       ");
2958        p.word_preserve_trailing_whitespace().unwrap();
2959        assert!(p.iter.next().is_some());
2960    }
2961
2962    #[test]
2963    fn test_parameter_substitution_command_can_contain_comments() {
2964        let param_subst = builder::SimpleWordKind::Subst(Box::new(
2965            builder::ParameterSubstitutionKind::Command(builder::CommandGroup {
2966                commands: vec!(cmd("foo")),
2967                trailing_comments: vec!(Newline(Some("#comment".into()))),
2968            })
2969        ));
2970        assert_eq!(Ok(param_subst), make_parser("$(foo\n#comment\n)").parameter_raw());
2971    }
2972
2973    #[test]
2974    fn test_backticked_command_can_contain_comments() {
2975        let cmd_subst = builder::SimpleWordKind::CommandSubst(builder::CommandGroup {
2976            commands: vec!(cmd("foo")),
2977            trailing_comments: vec!(Newline(Some("#comment".into()))),
2978        });
2979        assert_eq!(Ok(cmd_subst), make_parser("`foo\n#comment\n`").backticked_raw());
2980    }
2981}