Skip to main content

molt/
parser.rs

1//! # Molt TCL Parser
2//!
3//! This is the Molt TCL Parser.  It parses a TCL script (e.g., the contents of a TCL file,
4//! the body of a `proc`, the body of a loop, an `if` clause) into an internal form for later
5//! evaluation.
6//!
7//! ## The Dodekalogue
8//!
9//! TCL syntax is governed by a set of rules called The Dodekalogue.  See the
10//! [Tcl(n) man page for Tcl 8.7](https://www.tcl-lang.org/man/tcl8.7/TclCmd/Tcl.htm)
11//! details.
12//!
13//! ## The Internal Form
14//!
15//! The internal form is as follows:
16//!
17//! * A `Script` represents a compiled script.
18//! * A `Script` consists of list of `WordVec`'s, each of which represents a single command.
19//! * A `WordVec` is a list of `Words` representing the command name and its arguments.
20//! * A `Word` is an entity that can be evaluated by the interpreter to produce a single
21//!   `Value`.
22//!
23//! ## Evaluation
24//!
25//! Thus, evaluation consists of looping over the commands in the script.  For each command
26//!
27//! *   Convert each `Word` in the command's `WordVec` into a `Value`
28//! *   Look up the Molt command given its name.
29//! *   Pass the list of `Value`'s to the command in the usual way.
30//! *   If the command returns `Err(_)`, script execution terminates early and control is
31//!     returned to the caller.
32//!
33//! ## Scripts and Values
34//!
35//! Script parsing is most usually performed by the `Value::as_script` method as part of
36//! script evaluation by the `Interp`.  In this way, the script's internal form persists and
37//! need not be recomputed for each evaluation.
38//!
39//! ## Other Parsing Functions
40//!
41//! The module provides a number lower-level parsing functions to the rest of the library.
42//! For example, the `expr` parser sometimes need to parse quoted string and variable names.
43//!
44//! ## Variable Name Literals
45//!
46//! Variable names are parsed in two contexts: as part of "$-substitution", and as simple command
47//! arguments, e.g., as in `set my_var 1`.  In the latter case, the variable name is parsed not by
48//! the parser but by the command that interprets the argument as a variable name.  This module
49//! provides `parse_varname_literal` for this case; it is usually used via `Value::as_var_name`.
50
51use crate::check_args;
52use crate::eval_ptr::EvalPtr;
53use crate::interp::Interp;
54use crate::types::ContextID;
55use crate::types::Exception;
56use crate::types::MoltResult;
57use crate::types::VarName;
58use crate::util::is_varname_char;
59use crate::value::Value;
60
61/// A compiled script, which can be executed in the context of an interpreter.
62#[derive(Debug, PartialEq)]
63pub(crate) struct Script {
64    // A script is a list of one or more commands to execute.
65    commands: Vec<WordVec>,
66}
67
68impl Script {
69    /// Create a new script object, to which commands will be added during parsing.
70    fn new() -> Self {
71        Self {
72            commands: Vec::new(),
73        }
74    }
75
76    /// Return the list of commands for evaluation.
77    pub fn commands(&self) -> &[WordVec] {
78        &self.commands
79    }
80}
81
82/// A single command, consisting of a vector of `Word`'s for evaluation.
83#[derive(Debug, PartialEq)]
84pub(crate) struct WordVec {
85    words: Vec<Word>,
86}
87
88impl WordVec {
89    /// Create a new `WordVec`, to which `Word`'s can be added during parsing.
90    fn new() -> Self {
91        Self { words: Vec::new() }
92    }
93
94    /// Return the list of words for evaluation.
95    pub fn words(&self) -> &[Word] {
96        &self.words
97    }
98}
99
100/// A single `Word` in a command.  A `Word` can be evaluated to produce a `Value`.
101#[derive(Debug, PartialEq)]
102pub(crate) enum Word {
103    /// A `Value`, e.g., the braced word `{a b c}` parses to the value "a b c".
104    Value(Value),
105
106    /// VarRef(name): a scalar variable reference, e.g., `$name`
107    VarRef(String),
108
109    /// ArrayRef(name, index): an array variable reference, e.g., `$a(1)`.  The index is
110    /// represented by a `Word` since it can include various substitutions.
111    ArrayRef(String, Box<Word>),
112
113    /// Script(script): A nested script, e.g., `[foo 1 2 3]`.
114    Script(Script),
115
116    /// Tokens(words...): A list of `Words` that will be concatenated into a single `Value`,
117    /// e.g., `a $x [foo] bar` or `foo.$x`.
118    Tokens(Vec<Word>),
119
120    /// Expand(word): A word preceded by the expansion operator, e.g, `{*}...`.
121    Expand(Box<Word>),
122
123    /// String(string): A string literal.  This usually appears only as an element in
124    /// a `Tokens` list, e.g., the `a` and `b` in `a[myproc]b`.
125    ///
126    String(String),
127}
128
129/// Parses a script, given as a string slice.  Returns a parsed `Script` (or an error).
130pub(crate) fn parse(input: &str) -> Result<Script, Exception> {
131    // FIRST, create an EvalPtr as a parsing aid; then parse the script.
132    let mut ctx = EvalPtr::new(input);
133    parse_script(&mut ctx)
134}
135
136/// Parses a script represented by an `EvalPtr`.  This form is also used by `expr`.
137pub(crate) fn parse_script(ctx: &mut EvalPtr) -> Result<Script, Exception> {
138    let mut script = Script::new();
139
140    // Parse commands from the input until we've reach the end.
141    while !ctx.at_end_of_script() {
142        script.commands.push(parse_command(ctx)?);
143    }
144
145    Ok(script)
146}
147
148/// Parses a single command from the input, returning it as a `WordVec`.
149fn parse_command(ctx: &mut EvalPtr) -> Result<WordVec, Exception> {
150    let mut cmd: WordVec = WordVec::new();
151
152    // FIRST, deal with whitespace and comments between "here" and the next command.
153    while !ctx.at_end_of_script() {
154        ctx.skip_block_white();
155
156        // Either there's a comment, or we're at the beginning of the next command.
157        // If the former, skip the comment; then check for more whitespace and comments.
158        // Otherwise, go on to the command.
159        if !ctx.skip_comment() {
160            break;
161        }
162    }
163
164    // NEXT, Read words until we get to the end of the line or hit an error
165    // NOTE: parse_word() can always assume that it's at the beginning of a word.
166    while !ctx.at_end_of_command() {
167        // FIRST, get the next word; there has to be one, or there's an input error.
168        cmd.words.push(parse_next_word(ctx)?);
169
170        // NEXT, skip any whitespace.
171        ctx.skip_line_white();
172    }
173
174    // NEXT, If we ended at a ";", consume the semi-colon.
175    if ctx.next_is(';') {
176        ctx.next();
177    }
178
179    // NEXT, return the parsed command.
180    Ok(cmd)
181}
182
183/// Parse and return the next word from the input.
184fn parse_next_word(ctx: &mut EvalPtr) -> Result<Word, Exception> {
185    if ctx.next_is('{') {
186        // FIRST, look for "{*}" operator
187        if ctx.tok().as_str().starts_with("{*}") {
188            ctx.skip();
189            ctx.skip();
190            ctx.skip();
191
192            // If the next character is white space, this is just a normal braced
193            // word; return its content.  Otherwise, parse what remains as a word
194            // and box it in Expand.
195            if ctx.at_end() || ctx.next_is_block_white() {
196                return Ok(Word::Value(Value::from("*")));
197            } else {
198                return Ok(Word::Expand(Box::new(parse_next_word(ctx)?)));
199            }
200        }
201
202        // NEXT, just a normal braced word containing an asterisk.
203        parse_braced_word(ctx)
204    } else if ctx.next_is('"') {
205        parse_quoted_word(ctx)
206    } else {
207        parse_bare_word(ctx, false)
208    }
209}
210
211/// Parses a braced word from the input.  It's an error if the there are any non-whitespace
212/// characters following the close brace, or if the close brace is missing.
213pub(crate) fn parse_braced_word(ctx: &mut EvalPtr) -> Result<Word, Exception> {
214    // FIRST, skip the opening brace, and count it; non-escaped braces need to
215    // balance.
216    ctx.skip_char('{');
217    let mut count = 1;
218
219    // NEXT, add tokens to the word until we reach the close quote
220    let mut text = String::new();
221    let mut start = ctx.mark();
222
223    while !ctx.at_end() {
224        // Note: the while condition ensures that there's a character.
225        if ctx.next_is('{') {
226            count += 1;
227            ctx.skip();
228        } else if ctx.next_is('}') {
229            count -= 1;
230
231            if count > 0 {
232                ctx.skip();
233            } else {
234                // We've found and consumed the closing brace.  We should either
235                // see more more whitespace, or we should be at the end of the list
236                // Otherwise, there are incorrect characters following the close-brace.
237                text.push_str(ctx.token(start));
238                let result = Ok(Word::Value(Value::from(text)));
239                ctx.skip(); // Skip the closing brace
240
241                if ctx.at_end_of_command() || ctx.next_is_line_white() {
242                    return result;
243                } else {
244                    return molt_err!("extra characters after close-brace");
245                }
246            }
247        } else if ctx.next_is('\\') {
248            text.push_str(ctx.token(start));
249            ctx.skip();
250
251            // If there's no character it's because we're at the end; and there's
252            // no close brace.
253            if let Some(ch) = ctx.next() {
254                if ch == '\n' {
255                    text.push(' ');
256                } else {
257                    text.push('\\');
258                    text.push(ch);
259                }
260            }
261            start = ctx.mark();
262        } else {
263            ctx.skip();
264        }
265    }
266
267    molt_err!("missing close-brace")
268}
269
270/// Parses a quoted word, handling backslash, variable, and command substitution. It's
271/// an error if the there are any non-whitespace characters following the close quote, or
272/// if the close quote is missing.
273pub(crate) fn parse_quoted_word(ctx: &mut EvalPtr) -> Result<Word, Exception> {
274    // FIRST, consume the the opening quote.
275    ctx.next();
276
277    // NEXT, add tokens to the word until we reach the close quote
278    let mut tokens = Tokens::new();
279    let mut start = ctx.mark();
280
281    while !ctx.at_end() {
282        // Note: the while condition ensures that there's a character.
283        if ctx.next_is('[') {
284            if start != ctx.mark() {
285                tokens.push_str(ctx.token(start));
286            }
287            tokens.push(Word::Script(parse_brackets(ctx)?));
288            start = ctx.mark();
289        } else if ctx.next_is('$') {
290            if start != ctx.mark() {
291                tokens.push_str(ctx.token(start));
292            }
293            parse_dollar(ctx, &mut tokens)?;
294            start = ctx.mark();
295        } else if ctx.next_is('\\') {
296            if start != ctx.mark() {
297                tokens.push_str(ctx.token(start));
298            }
299            tokens.push_char(ctx.backslash_subst());
300            start = ctx.mark();
301        } else if ctx.next_is('"') {
302            if start != ctx.mark() {
303                tokens.push_str(ctx.token(start));
304            }
305            ctx.skip_char('"');
306            if !ctx.at_end_of_command() && !ctx.next_is_line_white() {
307                return molt_err!("extra characters after close-quote");
308            } else {
309                return Ok(tokens.take());
310            }
311        } else {
312            ctx.skip();
313        }
314    }
315
316    molt_err!("missing \"")
317}
318
319/// Parses a bare word, handling backslash, variable, and command substitution.
320fn parse_bare_word(ctx: &mut EvalPtr, index_flag: bool) -> Result<Word, Exception> {
321    let mut tokens = Tokens::new();
322    let mut start = ctx.mark();
323
324    while !ctx.at_end_of_command() && !ctx.next_is_line_white() {
325        // Note: the while condition ensures that there's a character.
326        if index_flag && ctx.next_is(')') {
327            // Parsing an array index, and we're at the end.
328            break;
329        } else if ctx.next_is('[') {
330            if start != ctx.mark() {
331                tokens.push_str(ctx.token(start));
332            }
333            tokens.push(Word::Script(parse_brackets(ctx)?));
334            start = ctx.mark();
335        } else if ctx.next_is('$') {
336            if start != ctx.mark() {
337                tokens.push_str(ctx.token(start));
338            }
339            parse_dollar(ctx, &mut tokens)?;
340            start = ctx.mark();
341        } else if ctx.next_is('\\') {
342            if start != ctx.mark() {
343                tokens.push_str(ctx.token(start));
344            }
345            tokens.push_char(ctx.backslash_subst());
346            start = ctx.mark();
347        } else {
348            ctx.skip();
349        }
350    }
351
352    if start != ctx.mark() {
353        tokens.push_str(ctx.token(start));
354    }
355
356    Ok(tokens.take())
357}
358
359/// Parses an embedded script in a bare or quoted word, returning the result as a
360/// Script.  It's an error if the close-bracket is missing.
361fn parse_brackets(ctx: &mut EvalPtr) -> Result<Script, Exception> {
362    // FIRST, skip the '['
363    ctx.skip_char('[');
364
365    // NEXT, parse the script up to the matching ']'
366    let old_flag = ctx.is_bracket_term();
367    ctx.set_bracket_term(true);
368    let result = parse_script(ctx);
369    ctx.set_bracket_term(old_flag);
370
371    // NEXT, make sure there's a closing bracket
372    if result.is_ok() {
373        if ctx.next_is(']') {
374            ctx.next();
375        } else {
376            return molt_err!("missing close-bracket");
377        }
378    }
379
380    result
381}
382
383/// Parses a "$" in the input, and pushes the result into a list of tokens.  Usually this
384/// will be a variable reference, but it may simply be a bare "$".
385fn parse_dollar(ctx: &mut EvalPtr, tokens: &mut Tokens) -> Result<(), Exception> {
386    // FIRST, skip the '$'
387    ctx.skip_char('$');
388
389    // NEXT, make sure this is really a variable reference.  If it isn't
390    // just return a "$".
391    if !ctx.next_is_varname_char() && !ctx.next_is('{') {
392        tokens.push_char('$');
393    } else {
394        tokens.push(parse_varname(ctx)?);
395    }
396
397    Ok(())
398}
399
400/// Parses a variable name; the "$" has already been consumed.  Handles both braced
401/// and non-braced variable names, including array names.
402///
403/// Also used by expr.rs.
404pub(crate) fn parse_varname(ctx: &mut EvalPtr) -> Result<Word, Exception> {
405    // FIRST, is this a braced variable name?
406    if ctx.next_is('{') {
407        ctx.skip_char('{');
408        let start = ctx.mark();
409        ctx.skip_while(|ch| *ch != '}');
410
411        if ctx.at_end() {
412            return molt_err!("missing close-brace for variable name");
413        }
414
415        let var_name = parse_varname_literal(ctx.token(start));
416        ctx.skip_char('}');
417        match var_name.index() {
418            Some(index) => Ok(Word::ArrayRef(
419                var_name.name().into(),
420                Box::new(Word::String(index.into())),
421            )),
422            None => Ok(Word::VarRef(var_name.name().into())),
423        }
424    } else {
425        let start = ctx.mark();
426        ctx.skip_while(|ch| is_varname_char(*ch));
427        let name = ctx.token(start).to_string();
428
429        if !ctx.next_is('(') {
430            // Scalar; just return it.
431            Ok(Word::VarRef(name))
432        } else {
433            // Array; parse out the word that evaluates to the index.
434            ctx.skip();
435            let index = parse_bare_word(ctx, true)?;
436            ctx.skip_char(')');
437            Ok(Word::ArrayRef(name, Box::new(index)))
438        }
439    }
440}
441
442/// Parses a literal variable name: a string that is known to be a complete variable
443/// name.
444///
445/// If it contains an opening parenthesis and ends with a closing parenthesis, then
446/// it's an array reference; otherwise it's just a scalar name.
447pub(crate) fn parse_varname_literal(literal: &str) -> VarName {
448    let mut ctx = EvalPtr::new(literal);
449
450    // FIRST, find the first open parenthesis.  If there is none, just return the literal
451    // as a scalar.
452    let start = ctx.mark();
453    ctx.skip_while(|ch| *ch != '(');
454
455    if ctx.at_end() {
456        return VarName::scalar(literal.into());
457    }
458
459    // NEXT, pluck out the name.
460    let name = ctx.token(start).to_string();
461    ctx.skip_char('(');
462
463    if ctx.tok().as_str().is_empty() {
464        return VarName::scalar(literal.into());
465    }
466
467    // NEXT, skip to the final character.
468    let start = ctx.mark();
469    let chars_left = ctx.tok().as_str().len() - 1;
470
471    for _ in 0..chars_left {
472        ctx.skip();
473    }
474
475    if ctx.next_is(')') {
476        VarName::array(name, ctx.token(start).to_string())
477    } else {
478        VarName::scalar(literal.into())
479    }
480}
481
482/// The Tokens structure.  This is used when parsing a bare or quoted word; the
483/// intent is to accumulate the relevant words, while merging adjacent string literals.
484struct Tokens {
485    /// The list of words
486    list: Vec<Word>,
487
488    /// If true, we're accumulating a string literal, which will eventually become a `Word`.
489    got_string: bool,
490
491    /// The string literal we're accumulating, if any, or an empty string otherwise.
492    string: String,
493}
494
495impl Tokens {
496    /// Creates a new Tokens structure.
497    fn new() -> Self {
498        Self {
499            list: Vec::new(),
500            got_string: false,
501            string: String::new(),
502        }
503    }
504
505    /// Pushes an entire word into the list of tokens.  If a string literal is being
506    /// accumulated, it is turned into a `Word` and pushed before the input word.
507    fn push(&mut self, word: Word) {
508        if self.got_string {
509            let string = std::mem::replace(&mut self.string, String::new());
510            self.list.push(Word::String(string));
511            self.got_string = false;
512        }
513
514        self.list.push(word);
515    }
516
517    /// Pushes a literal string onto the list of tokens.  It will be merged with any
518    /// string literal that's being accumulated.
519    fn push_str(&mut self, str: &str) {
520        self.string.push_str(str);
521        self.got_string = true;
522    }
523
524    /// Pushes a single character onto the list of tokens.  It will be merged with any
525    /// string literal that's being accumulated.
526    fn push_char(&mut self, ch: char) {
527        self.string.push(ch);
528        self.got_string = true;
529    }
530
531    /// Takes the accumulated tokens as a single `Word`, either `Word::Value` or
532    /// `Word::Tokens`.
533    fn take(mut self) -> Word {
534        if self.got_string {
535            // If there's nothing but the string, turn it into a value.
536            // Otherwise, just add it to the list of tokens.
537            if self.list.is_empty() {
538                return Word::Value(Value::from(self.string));
539            } else {
540                let string = std::mem::replace(&mut self.string, String::new());
541                self.list.push(Word::String(string));
542            }
543        }
544
545        if self.list.is_empty() {
546            Word::Value(Value::empty())
547        } else if self.list.len() == 1 {
548            self.list.pop().unwrap()
549        } else {
550            Word::Tokens(self.list)
551        }
552    }
553}
554
555/// # parse *script*
556///
557/// A command for parsing an arbitrary script and outputting the parsed form.
558/// This is an undocumented debugging aid.  The output can be greatly improved.
559pub fn cmd_parse(_interp: &mut Interp, _: ContextID, argv: &[Value]) -> MoltResult {
560    check_args(1, argv, 2, 2, "script")?;
561
562    let script = &argv[1];
563
564    molt_ok!(format!("{:?}", parse(script.as_str())?))
565}
566
567#[cfg(test)]
568mod tests {
569    use super::*;
570
571    #[test]
572    fn test_tokens() {
573        // No tokens pushed; get empty string.
574        let tokens = Tokens::new();
575        assert_eq!(tokens.take(), Word::Value(Value::empty()));
576
577        // Push normal Word only; get it back.
578        let mut tokens = Tokens::new();
579        tokens.push(Word::Value(Value::from("abc")));
580        assert_eq!(tokens.take(), Word::Value(Value::from("abc")));
581
582        // Push a single str.  Get Value.
583        let mut tokens = Tokens::new();
584        tokens.push_str("xyz");
585        assert_eq!(tokens.take(), Word::Value(Value::from("xyz")));
586
587        // Push two strs.  Get one value.
588        let mut tokens = Tokens::new();
589        tokens.push_str("abc");
590        tokens.push_str("def");
591        assert_eq!(tokens.take(), Word::Value(Value::from("abcdef")));
592
593        // Push strs and chars.  Get one value.
594        let mut tokens = Tokens::new();
595        tokens.push_str("abc");
596        tokens.push_char('/');
597        tokens.push_str("def");
598        assert_eq!(tokens.take(), Word::Value(Value::from("abc/def")));
599
600        // Push multiple normal words
601        let mut tokens = Tokens::new();
602        tokens.push(Word::VarRef("a".into()));
603        tokens.push(Word::String("xyz".into()));
604        assert_eq!(
605            tokens.take(),
606            Word::Tokens(vec![Word::VarRef("a".into()), Word::String("xyz".into())])
607        );
608
609        // Push a string, a word, and another string
610        let mut tokens = Tokens::new();
611        tokens.push_str("a");
612        tokens.push_str("b");
613        tokens.push(Word::VarRef("xyz".into()));
614        tokens.push_str("c");
615        tokens.push_str("d");
616        assert_eq!(
617            tokens.take(),
618            Word::Tokens(vec![
619                Word::String("ab".into()),
620                Word::VarRef("xyz".into()),
621                Word::String("cd".into())
622            ])
623        );
624    }
625
626    #[test]
627    fn test_parse() {
628        assert!(parse("").unwrap().commands.is_empty());
629
630        let cmds = parse("a").unwrap().commands;
631        assert_eq!(cmds.len(), 1);
632        assert_eq!(cmds[0].words, vec![Word::Value(Value::from("a"))]);
633
634        let cmds = parse("a\nb").unwrap().commands;
635        assert_eq!(cmds.len(), 2);
636        assert_eq!(cmds[0].words, vec![Word::Value(Value::from("a"))]);
637        assert_eq!(cmds[1].words, vec![Word::Value(Value::from("b"))]);
638
639        let cmds = parse("a;b").unwrap().commands;
640        assert_eq!(cmds.len(), 2);
641        assert_eq!(cmds[0].words, vec![Word::Value(Value::from("a"))]);
642        assert_eq!(cmds[1].words, vec![Word::Value(Value::from("b"))]);
643
644        let cmds = parse(" a ; b ").unwrap().commands;
645        assert_eq!(cmds.len(), 2);
646        assert_eq!(cmds[0].words, vec![Word::Value(Value::from("a"))]);
647        assert_eq!(cmds[1].words, vec![Word::Value(Value::from("b"))]);
648
649        assert_eq!(parse("a {"), molt_err!("missing close-brace"));
650    }
651
652    #[test]
653    fn test_parse_next_word() {
654        // NOTE: The point of this test is to make sure that parse_next_word is
655        // calling the right functions to complete the job, not to verify what
656        // those functions are doing; they have their own tests.
657
658        // Normal Braced Word
659        assert_eq!(
660            pword("{abc}"),
661            Ok((Word::Value(Value::from("abc")), "".into()))
662        );
663
664        // {*} at end of input
665        assert_eq!(pword("{*}"), Ok((Word::Value(Value::from("*")), "".into())));
666
667        // {*} followed by white-space
668        assert_eq!(
669            pword("{*} "),
670            Ok((Word::Value(Value::from("*")), " ".into()))
671        );
672
673        // {*} followed by word
674        assert_eq!(
675            pword("{*}abc "),
676            Ok((
677                Word::Expand(Box::new(Word::Value(Value::from("abc")))),
678                " ".into()
679            ))
680        );
681
682        // Quoted Word
683        assert_eq!(
684            pword("\"abc\""),
685            Ok((Word::Value(Value::from("abc")), "".into()))
686        );
687
688        // Bare word
689        assert_eq!(
690            pword("abc"),
691            Ok((Word::Value(Value::from("abc")), "".into()))
692        );
693    }
694
695    fn pword(input: &str) -> Result<(Word, String), Exception> {
696        let mut ctx = EvalPtr::new(input);
697        let word = parse_next_word(&mut ctx)?;
698        Ok((word, ctx.tok().as_str().to_string()))
699    }
700
701    #[test]
702    fn test_parse_braced_word() {
703        // Simple string
704        assert_eq!(
705            pbrace("{abc}"),
706            Ok((Word::Value(Value::from("abc")), "".into()))
707        );
708
709        // Simple string with following space
710        assert_eq!(
711            pbrace("{abc} "),
712            Ok((Word::Value(Value::from("abc")), " ".into()))
713        );
714
715        // String with white space
716        assert_eq!(
717            pbrace("{a b c} "),
718            Ok((Word::Value(Value::from("a b c")), " ".into()))
719        );
720
721        // String with $ and []space
722        assert_eq!(
723            pbrace("{a $b [c]} "),
724            Ok((Word::Value(Value::from("a $b [c]")), " ".into()))
725        );
726
727        // String with balanced braces
728        assert_eq!(
729            pbrace("{a{b}c} "),
730            Ok((Word::Value(Value::from("a{b}c")), " ".into()))
731        );
732
733        // String with escaped braces
734        assert_eq!(
735            pbrace("{a\\{bc} "),
736            Ok((Word::Value(Value::from("a\\{bc")), " ".into()))
737        );
738
739        assert_eq!(
740            pbrace("{ab\\}c} "),
741            Ok((Word::Value(Value::from("ab\\}c")), " ".into()))
742        );
743
744        // String with escaped newline (a real newline with a \ in front)
745        assert_eq!(
746            pbrace("{ab\\\nc} "),
747            Ok((Word::Value(Value::from("ab c")), " ".into()))
748        );
749
750        // Strings with missing close-brace
751        assert_eq!(pbrace("{abc"), molt_err!("missing close-brace"));
752
753        assert_eq!(pbrace("{a{b}c"), molt_err!("missing close-brace"));
754    }
755
756    fn pbrace(input: &str) -> Result<(Word, String), Exception> {
757        let mut ctx = EvalPtr::new(input);
758        let word = parse_braced_word(&mut ctx)?;
759        Ok((word, ctx.tok().as_str().to_string()))
760    }
761
762    #[test]
763    fn test_parse_quoted_word() {
764        // Simple string
765        assert_eq!(
766            pqw("\"abc\""),
767            Ok((Word::Value(Value::from("abc")), "".into()))
768        );
769
770        // Simple string with text following
771        assert_eq!(
772            pqw("\"abc\" "),
773            Ok((Word::Value(Value::from("abc")), " ".into()))
774        );
775
776        // Backslash substitution at beginning, middle, and end
777        assert_eq!(
778            pqw("\"\\x77-\" "),
779            Ok((Word::Value(Value::from("w-")), " ".into()))
780        );
781
782        assert_eq!(
783            pqw("\"-\\x77-\" "),
784            Ok((Word::Value(Value::from("-w-")), " ".into()))
785        );
786
787        assert_eq!(
788            pqw("\"-\\x77\" "),
789            Ok((Word::Value(Value::from("-w")), " ".into()))
790        );
791
792        // Variable reference
793        assert_eq!(
794            pqw("\"a$x.b\" "),
795            Ok((
796                Word::Tokens(vec![
797                    Word::String("a".into()),
798                    Word::VarRef("x".into()),
799                    Word::String(".b".into()),
800                ]),
801                " ".into()
802            ))
803        );
804
805        assert_eq!(
806            pqw("\"a${x}b\" "),
807            Ok((
808                Word::Tokens(vec![
809                    Word::String("a".into()),
810                    Word::VarRef("x".into()),
811                    Word::String("b".into()),
812                ]),
813                " ".into()
814            ))
815        );
816
817        // Not actually a variable reference
818        assert_eq!(
819            pqw("\"a$.b\" "),
820            Ok((Word::Value(Value::from("a$.b")), " ".into()))
821        );
822
823        // Brackets
824        assert_eq!(
825            pqw("\"a[list b]c\" "),
826            Ok((
827                Word::Tokens(vec![
828                    Word::String("a".into()),
829                    Word::Script(pbrack("[list b]").unwrap()),
830                    Word::String("c".into()),
831                ]),
832                " ".into()
833            ))
834        );
835
836        // Missing close quote
837        assert_eq!(pqw("\"abc"), molt_err!("missing \""));
838
839        // Extra characters after close-quote
840        assert_eq!(
841            pqw("\"abc\"x "),
842            molt_err!("extra characters after close-quote")
843        );
844    }
845
846    fn pqw(input: &str) -> Result<(Word, String), Exception> {
847        let mut ctx = EvalPtr::new(input);
848        let word = parse_quoted_word(&mut ctx)?;
849        Ok((word, ctx.tok().as_str().to_string()))
850    }
851
852    #[test]
853    fn test_parse_bare_word() {
854        // Simple string
855        assert_eq!(
856            pbare("abc", false),
857            Ok((Word::Value(Value::from("abc")), "".into()))
858        );
859
860        // Simple string with text following
861        assert_eq!(
862            pbare("abc ", false),
863            Ok((Word::Value(Value::from("abc")), " ".into()))
864        );
865
866        // Backslash substitution at beginning, middle, and end
867        assert_eq!(
868            pbare("\\x77- ", false),
869            Ok((Word::Value(Value::from("w-")), " ".into()))
870        );
871
872        assert_eq!(
873            pbare("-\\x77- ", false),
874            Ok((Word::Value(Value::from("-w-")), " ".into()))
875        );
876
877        assert_eq!(
878            pbare("-\\x77 ", false),
879            Ok((Word::Value(Value::from("-w")), " ".into()))
880        );
881
882        // Variable reference
883        assert_eq!(
884            pbare("a$x.b ", false),
885            Ok((
886                Word::Tokens(vec![
887                    Word::String("a".into()),
888                    Word::VarRef("x".into()),
889                    Word::String(".b".into()),
890                ]),
891                " ".into()
892            ))
893        );
894
895        assert_eq!(
896            pbare("a${x}b ", false),
897            Ok((
898                Word::Tokens(vec![
899                    Word::String("a".into()),
900                    Word::VarRef("x".into()),
901                    Word::String("b".into()),
902                ]),
903                " ".into()
904            ))
905        );
906
907        // Not actually a variable reference
908        assert_eq!(
909            pbare("a$.b ", false),
910            Ok((Word::Value(Value::from("a$.b")), " ".into()))
911        );
912
913        // Brackets
914        assert_eq!(
915            pbare("a[list b]c ", false),
916            Ok((
917                Word::Tokens(vec![
918                    Word::String("a".into()),
919                    Word::Script(pbrack("[list b]").unwrap()),
920                    Word::String("c".into()),
921                ]),
922                " ".into()
923            ))
924        );
925
926        // Array index
927        assert_eq!(
928            // Parse up to but not including the ")".
929            pbare("a)b", true),
930            Ok((Word::Value(Value::from("a")), ")b".into()))
931        );
932    }
933
934    fn pbare(input: &str, index_flag: bool) -> Result<(Word, String), Exception> {
935        let mut ctx = EvalPtr::new(input);
936        let word = parse_bare_word(&mut ctx, index_flag)?;
937        Ok((word, ctx.tok().as_str().to_string()))
938    }
939
940    #[test]
941    fn test_parse_brackets() {
942        let script = pbrack("[set a 5]").unwrap();
943        assert_eq!(script.commands.len(), 1);
944        let cmd = &script.commands[0];
945        assert_eq!(
946            cmd.words,
947            vec![
948                Word::Value(Value::from("set")),
949                Word::Value(Value::from("a")),
950                Word::Value(Value::from("5")),
951            ]
952        );
953
954        assert_eq!(pbrack("[incomplete"), molt_err!("missing close-bracket"));
955    }
956
957    fn pbrack(input: &str) -> Result<Script, Exception> {
958        let mut ctx = EvalPtr::new(input);
959        parse_brackets(&mut ctx)
960    }
961
962    #[test]
963    fn test_parse_dollar() {
964        // Normal var names
965        assert_eq!(pvar("$a"), Ok((Word::VarRef("a".into()), "".into())));
966        assert_eq!(pvar("$abc"), Ok((Word::VarRef("abc".into()), "".into())));
967        assert_eq!(pvar("$abc."), Ok((Word::VarRef("abc".into()), ".".into())));
968        assert_eq!(pvar("$a.bc"), Ok((Word::VarRef("a".into()), ".bc".into())));
969        assert_eq!(
970            pvar("$a1_.bc"),
971            Ok((Word::VarRef("a1_".into()), ".bc".into()))
972        );
973
974        // Array names
975        assert_eq!(
976            pvar("$a(1)"),
977            Ok((
978                Word::ArrayRef("a".into(), Box::new(Word::Value(Value::from("1")))),
979                "".into()
980            ))
981        );
982
983        // Braced var names
984        assert_eq!(pvar("${a}b"), Ok((Word::VarRef("a".into()), "b".into())));
985        assert_eq!(
986            pvar("${ab"),
987            molt_err!("missing close-brace for variable name")
988        );
989
990        // Braced var names with arrays
991        assert_eq!(
992            pvar("${a(1)}"),
993            Ok((
994                Word::ArrayRef("a".into(), Box::new(Word::String("1".into()))),
995                "".into()
996            ))
997        );
998
999        // Just a bare "$"
1000        assert_eq!(pvar("$"), Ok((Word::Value(Value::from("$")), "".into())));
1001        assert_eq!(pvar("$."), Ok((Word::Value(Value::from("$")), ".".into())));
1002    }
1003
1004    fn pvar(input: &str) -> Result<(Word, String), Exception> {
1005        let mut ctx = EvalPtr::new(input);
1006        let mut tokens = Tokens::new();
1007        parse_dollar(&mut ctx, &mut tokens)?;
1008        Ok((tokens.take(), ctx.tok().as_str().to_string()))
1009    }
1010
1011    #[test]
1012    fn test_parse_varname_literal() {
1013        // Scalars
1014        assert_eq!(parse_varname_literal(""), scalar(""));
1015        assert_eq!(parse_varname_literal("a"), scalar("a"));
1016        assert_eq!(parse_varname_literal("a(b"), scalar("a(b"));
1017        assert_eq!(parse_varname_literal("("), scalar("("));
1018        assert_eq!(parse_varname_literal(")"), scalar(")"));
1019        assert_eq!(parse_varname_literal("a(b)c"), scalar("a(b)c"));
1020        assert_eq!(parse_varname_literal("(b)c"), scalar("(b)c"));
1021
1022        // Arrays
1023        assert_eq!(parse_varname_literal("a(b)"), array("a", "b"));
1024        assert_eq!(parse_varname_literal("a({)"), array("a", "{"));
1025        assert_eq!(parse_varname_literal("()"), array("", ""));
1026        assert_eq!(parse_varname_literal("(b)"), array("", "b"));
1027        assert_eq!(parse_varname_literal("a()"), array("a", ""));
1028        assert_eq!(parse_varname_literal("%(()"), array("%", "("));
1029        assert_eq!(parse_varname_literal("%())"), array("%", ")"));
1030    }
1031
1032    fn scalar(name: &str) -> VarName {
1033        VarName::scalar(name.into())
1034    }
1035
1036    fn array(name: &str, index: &str) -> VarName {
1037        VarName::array(name.into(), index.into())
1038    }
1039}