shellish_parse/
lib.rs

1//! This is a Rust crate to do "command line parsing". No, I'm not talking
2//! about parsing command line *arguments* that were passed to your program;
3//! for that purpose, I recommend the excellent [Clap][1] crate (with features
4//! `wrap_help` and `derive` enabled). What this crate does is take a *line of
5//! text* and parse it like a command line. In other words, it parses shellish.
6//!
7//! This is useful if you're implementing any kind of interactive system where
8//! a user needs to be able to input commands.
9//!
10//! [1]: https://crates.io/crates/clap
11//!
12//! # Usage
13//!
14//! Add `shellish_parse` to your `Cargo.toml`:
15//!
16//! ```toml
17//! shellish_parse = "2.2"
18//! ```
19//!
20//! Use `shellish_parse::parse` to parse some shellish:
21//!
22//! ```rust
23//! let line = "Hello World";
24//! assert_eq!(shellish_parse::parse(line, false).unwrap(), &[
25//!     "Hello", "World"
26//! ]);
27//! ```
28//!
29//! The first parameter, a `&str`, is the line to parse. The second parameter,
30//! a can be a `bool`, indicating whether an unrecognized escape sequence
31//! should be an error:
32//!
33//! ```rust
34//! let line = r#"In\mvalid"#; // note: raw string
35//! assert_eq!(shellish_parse::parse(line, false).unwrap(), &[
36//!     "In�valid"
37//! ]);
38//! assert_eq!(shellish_parse::parse(line, true).unwrap_err(),
39//!     shellish_parse::ParseError::UnrecognizedEscape("\\m".to_string()));
40//! ```
41//! 
42//! Or a [`ParseOptions`](struct.ParseOptions.html), giving you more control
43//! (see that struct's documentation for more details):
44//! 
45//! ```rust
46//! # use shellish_parse::ParseOptions;
47//! let line = r#"In\mvalid"#; // note: raw string
48//! let options = ParseOptions::new().no_strict_escapes();
49//! assert_eq!(shellish_parse::parse(line, options).unwrap(), &[
50//!     "In�valid"
51//! ]);
52//! let options = ParseOptions::new();
53//! assert_eq!(shellish_parse::parse(line, options).unwrap_err(),
54//!     shellish_parse::ParseError::UnrecognizedEscape("\\m".to_string()));
55//! ```
56//!
57//! You may want to use an alias to make calling this function more convenient
58//! if you're using it in a lot of places:
59//!
60//! ```rust
61//! use shellish_parse::parse as parse_shellish;
62//! let line = "Hello World";
63//! assert_eq!(parse_shellish(line, false).unwrap(), &[
64//!     "Hello", "World"
65//! ]);
66//! ```
67//! 
68//! And putting your preferred `ParseOptions` into a `const` can save you some
69//! typing:
70//! 
71//! ```rust
72//! # use shellish_parse::ParseOptions;
73//! const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new()
74//!         .allow_comments_within_elements();
75//! use shellish_parse::parse as parse_shellish;
76//! let line = "This line contains a com#ment";
77//! assert_eq!(parse_shellish(line, SHELLISH_OPTIONS).unwrap(), &[
78//!     "This", "line", "contains", "a", "com"
79//! ]);
80//! ```
81//!
82//! Regular parse is great and everything, but sometimes you want to be able
83//! to chain multiple commands on the same line. That's where `multiparse`
84//! comes in:
85//!
86//! ```rust
87//! # use shellish_parse::ParseOptions;
88//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
89//! let line = "Hello World; How are you?";
90//! assert_eq!(shellish_parse::multiparse(line, SHELLISH_OPTIONS, &[";"])
91//!            .unwrap(), &[
92//!     (vec!["Hello".to_string(), "World".to_string()], Some(0)),
93//!     (vec!["How".to_string(), "are".to_string(), "you?".to_string()], None),
94//! ]);
95//! ```
96//!
97//! (Since it returns a vec of tuples, it's rather awkward to phrase in tests.)
98//!
99//! You pass the separators you want to use. A single semicolon is probably
100//! all you want. If you want to get really fancy, you can add arbitrarily many
101//! different separators. Each command returned comes with the index of the
102//! separator that terminated it:
103//!
104//! ```rust
105//! # use shellish_parse::ParseOptions;
106//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
107//! let line = "test -f foo && pv foo | bar || echo no foo & echo wat";
108//! assert_eq!(shellish_parse::multiparse(line, SHELLISH_OPTIONS,
109//!                                       &["&&", "||", "&", "|", ";"])
110//!            .unwrap(), &[
111//!     (vec!["test".to_string(), "-f".to_string(), "foo".to_string()], Some(0)),
112//!     (vec!["pv".to_string(), "foo".to_string()], Some(3)),
113//!     (vec!["bar".to_string()], Some(1)),
114//!     (vec!["echo".to_string(), "no".to_string(), "foo".to_string()], Some(2)),
115//!     (vec!["echo".to_string(), "wat".to_string()], None),
116//! ]);
117//! ```
118//!
119//! Since the separators are checked in the order passed, put longer
120//! separators before shorter ones. If `"&"` preceded `"&&"` in the above call,
121//! `"&"` would always be recognized first, and `"&&"` would never be
122//! recognized.
123//!
124//! Extremely shellish things, like redirection or using parentheses to group
125//! commands, are out of scope of this crate. If you want those things, you
126//! might be writing an actual shell, and not just something shellish.
127//!
128//! # Syntax
129//!
130//! The syntax is heavily inspired by the UNIX Bourne shell. Quotation works
131//! exactly like in said shell. Backslashes can also be used for escaping (and
132//! more advanced usage, more like Rust strings than shellish). Unlike the real
133//! Bourne shell, `parse_shellish` contains no form of variable substitution.
134//!
135//! ## Whitespace
136//!
137//! Elements are separated by one or more whitespace characters.
138//!
139//! ```rust
140//! # use shellish_parse::ParseOptions;
141//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
142//! let line = "Hello there!";
143//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
144//!     "Hello", "there!",
145//! ])
146//! ```
147//!
148//! Whitespace consists of spaces, tabs, or newlines. Whitespace before and
149//! after the command line is ignored. Any combination and quantity of
150//! whitespace between elements acts the same as a single space.
151//!
152//! ```rust
153//! # use shellish_parse::ParseOptions;
154//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
155//! let line = "\tHello\n\t  there!    \n\n";
156//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
157//!     "Hello", "there!",
158//! ])
159//! ```
160//!
161//! ## Backslash escapes
162//!
163//! (All example input strings in this section are given as raw strings. The
164//! backslashes and quotation marks you see in them are literal.)
165//!
166//! You may escape any character with backslash.
167//!
168//! Backslash followed by an ASCII letter (26 letters `'A'` through `'Z'` and
169//! `'a'` through `'z'`) or digit (`'0'` through `'9'`) has a special meaning.
170//!
171//! - `'n'`: Newline (U+000A LINE FEED)
172//! - `'t'`: Tab (U+0009 CHARACTER TABULATION)
173//! - Any other letter (and any digit) will either insert a � (U+FFFD
174//!   REPLACEMENT CHARACTER) or cause a parse error, depending on the value you
175//!   pass as the second parameter to `parse`.
176//!
177//! ```rust
178//! # use shellish_parse::ParseOptions;
179//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
180//! let line = r#"General\t Kenobi\n"#;
181//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
182//!     "General\t", "Kenobi\n",
183//! ])
184//! ```
185//!
186//! Backslash followed by a newline followed by any number of unescaped tabs or
187//! spaces will give nothing, just like in Rust strings. (i.e. you may continue
188//! a command line onto another line by preceding the linebreak with a
189//! backslash)
190//!
191//! ```rust
192//! # use shellish_parse::ParseOptions;
193//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
194//! let line = r#"You will die br\
195//!               aver than most."#;
196//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
197//!     "You", "will", "die", "braver", "than", "most."
198//! ])
199//! ```
200//!
201//! Backslash followed by anything else will give that character, ignoring any
202//! special meaning it might otherwise have had.
203//!
204//! ```rust
205//! # use shellish_parse::ParseOptions;
206//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
207//! let line = r#"Four\-score\ and\ seven \"years\" ago"#;
208//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
209//!     "Four-score and seven", "\"years\"", "ago"
210//! ])
211//! ```
212//!
213//! Future versions may add more special characters. These will only be denoted
214//! by letter(s) or digit(s). For all other characters, the handling of
215//! backslash is guaranteed not to change.
216//!
217//! ## Quoting
218//!
219//! (All example input strings in this section are given as raw strings. The
220//! backslashes and quotation marks you see in them are literal.)
221//!
222//! You may quote parts of the command line. The quoted text will all go into
223//! the same element.
224//!
225//! ```rust
226//! # use shellish_parse::ParseOptions;
227//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
228//! let line = r#"cp "Quotation Mark Test" "Quotation Mark Test Backup""#;
229//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
230//!     "cp", "Quotation Mark Test", "Quotation Mark Test Backup"
231//! ])
232//! ```
233//!
234//! Quoting will *not* create a new element on its own.
235//!
236//! ```rust
237//! # use shellish_parse::ParseOptions;
238//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
239//! let line = r#"I Probably Should Have"Added A Space!""#;
240//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
241//!     "I", "Probably", "Should", "HaveAdded A Space!"
242//! ])
243//! ```
244//!
245//! There are two kinds of quotation. A double-quoted string will interpret
246//! backslash escapes, including `\"`.
247//!
248//! ```rust
249//! # use shellish_parse::ParseOptions;
250//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
251//! let line = r#"movie recommend "\"Swing it\" magistern""#;
252//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
253//!     "movie", "recommend", "\"Swing it\" magistern"
254//! ])
255//! ```
256//!
257//! A single-quoted string **will not** interpret backslash escapes, not even
258//! `\'`!
259//!
260//! ```rust
261//! # use shellish_parse::ParseOptions;
262//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
263//! let line = r#"addendum 'and then he said "But I haven'\''t seen it, I \
264//! just searched for '\''movies with quotes in their titles'\'' on IMDB and \
265//! saw that it was popular"'"#;
266//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
267//!     "addendum", "and then he said \"But I haven't seen it, I just \
268//! searched for 'movies with quotes in their titles' on IMDB and saw that it \
269//! was popular\""
270//! ])
271//! ```
272//!
273//! ## Continuation
274//!
275//! `parse` returns `Err(ParseResult::...)` on failure. There are three ways
276//! parsing can fail:
277//!
278//! 1. Dangling backslash: `like this\`
279//! 2. Unterminated string: `like "this`
280//! 3. Unrecognized escape sequence: `like this\m`
281//!
282//! In the first two cases, parsing could succeed if there were only more input
283//! to read. So you can handle these errors by prompting for more input, adding
284//! it onto the end of the string, and trying again. The `needs_continuation`
285//! method of `ParseResult` is here to help:
286//!
287//! ```rust
288//! # use shellish_parse::ParseOptions;
289//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
290//! // note: raw strings
291//! let input_lines = [r#"This is not a very \"#,
292//!                    r#"long line, so why did \"#,
293//!                    r#"we choose to 'force "#,
294//!                    r#"continuation'?"#];
295//! let mut input_iter = input_lines.into_iter();
296//! let mut buf = input_iter.next().unwrap().to_string();
297//! let result = loop {
298//!     match shellish_parse::parse(&buf, SHELLISH_OPTIONS) {
299//!         Err(x) if x.needs_continuation() => {
300//!             buf.push('\n'); // don't forget this part!
301//!             buf.push_str(input_iter.next().unwrap())
302//!         },
303//!         x => break x,
304//!     }
305//! };
306//! assert_eq!(result.unwrap(), &[
307//!     "This", "is", "not", "a", "very", "long", "line,", "so", "why", "did",
308//!     "we", "choose", "to", "force \ncontinuation?"
309//! ]);
310//! ```
311//! 
312//! ## Comments
313//! 
314//! By default, comments are delimited by a `#` character.
315//! 
316//! ```rust
317//! # use shellish_parse::ParseOptions;
318//! # const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
319//! let line = "Comment test. #comments #sayinghashtagoutloud";
320//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
321//!     "Comment", "test."
322//! ])
323//! ```
324//! 
325//! You can change this to any other character using
326//! [`ParseOptions`](struct.ParseOptions.html):
327//! 
328//! ```rust
329//! # use shellish_parse::ParseOptions;
330//! const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new()
331//!         .comment_char(Some('%'));
332//! let line = "bind lmbutton Interact % make left mouse button interact";
333//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
334//!     "bind", "lmbutton", "Interact"
335//! ])
336//! ```
337//! 
338//! You can also disable comment parsing entirely:
339//! 
340//! ```rust
341//! # use shellish_parse::ParseOptions;
342//! const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new()
343//!         .comment_char(None);
344//! let line = "Comment test. #comments #sayinghashtagoutloud";
345//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
346//!     "Comment", "test.", "#comments", "#sayinghashtagoutloud"
347//! ])
348//! ```
349//! 
350//! By default, comments are not allowed in the middle of an element. This
351//! behavior matches the Bourne shell. You can make it so that any comment
352//! character, found outside a string, will be accepted as the beginning of a
353//! comment:
354//! 
355//! ```rust
356//! # use shellish_parse::ParseOptions;
357//! let line = "Comment that breaks an el#ement.";
358//! const SHELLISH_OPTIONS: ParseOptions = ParseOptions::new();
359//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS).unwrap(), &[
360//!     "Comment", "that", "breaks", "an", "el#ement."
361//! ]);
362//! const SHELLISH_OPTIONS_2: ParseOptions = ParseOptions::new()
363//!         .allow_comments_within_elements();
364//! assert_eq!(shellish_parse::parse(line, SHELLISH_OPTIONS_2).unwrap(), &[
365//!     "Comment", "that", "breaks", "an", "el"
366//! ]);
367//! ```
368//!
369//! # Legalese
370//!
371//! `shellish_parse` is copyright 2022-2023, Solra Bizna, and licensed under
372//! either of:
373//!
374//! - Apache License, Version 2.0
375//!   ([LICENSE-APACHE](LICENSE-APACHE) or
376//!   <http://www.apache.org/licenses/LICENSE-2.0>)
377//! - MIT license
378//!   ([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)
379//!
380//! at your option.
381//!
382//! Unless you explicitly state otherwise, any contribution intentionally
383//! submitted for inclusion in the `shellish_parse` crate by you, as defined
384//! in the Apache-2.0 license, shall be dual licensed as above, without any
385//! additional terms or conditions.
386
387use std::{
388    fmt::Display,
389    error::Error,
390};
391
392/// Options for configuring command-line parsing.
393/// 
394/// For backwards compatibility with 2.1, you can convert a `bool` into this
395/// type. `true` will be the default, and `false` will be `no_strict_escapes`.
396#[derive(Copy,Clone,Debug)]
397pub struct ParseOptions {
398    strict_escapes: bool,
399    allow_comments_within_elements: bool,
400    comment_char: Option<char>,
401}
402
403impl ParseOptions {
404    /// Create a new `ParseOptions` starting at the defaults.
405    /// 
406    /// Equivalent to `ParseOptions::default()`, except that it's a `const fn`
407    /// so you can put it into a `const` variable if you like.
408    pub const fn new() -> ParseOptions {
409        ParseOptions {
410            strict_escapes: true,
411            allow_comments_within_elements: false,
412            comment_char: Some('#'),
413        }
414    }
415    /// The default is for bad escape sequences to result in a `ParseError`.
416    /// If `no_strict_escapes()` is used, then bad escape sequences will result
417    /// in '�' instead.
418    pub const fn no_strict_escapes(mut self) -> Self {
419        self.strict_escapes = false;
420        self
421    }
422    /// The default is for comments to be delimited by a `#` character. You can
423    /// specify another comment character, or disable comment delimiting,
424    /// using `comment_char()`.
425    pub const fn comment_char(mut self, comment_char: Option<char>) -> Self {
426        self.comment_char = comment_char;
427        self
428    }
429    /// The default is that comments will only count if they are preceded by
430    /// whitespace. Thus, by default, `foo bar#baz # bang` will parse as
431    /// `["foo", "bar#baz"]`. This matches the behavior of the Bourne shell.
432    /// You can override this behavior by calling
433    /// `allow_comments_within_elements()`, which would make that line parse
434    /// as `["foo", "bar"]` instead.
435    pub const fn allow_comments_within_elements(mut self) -> Self {
436        self.allow_comments_within_elements = true;
437        self
438    }
439}
440
441impl Default for ParseOptions {
442    fn default() -> ParseOptions {
443        ParseOptions::new()
444    }
445}
446
447impl From<bool> for ParseOptions {
448    fn from(i: bool) -> Self {
449        if i { ParseOptions::new() }
450        else { ParseOptions::new().no_strict_escapes() }
451    }
452}
453
454/// A result of a failed command line parse.
455///
456/// Most of these errors can be resolved with additional user input. The
457/// `needs_continuation` method is there to help. See
458/// [the module-level documentation](index.html) for more information.
459#[derive(Clone,Debug,PartialEq,Eq)]
460pub enum ParseError {
461    /// The command line ended with an unescaped backslash.
462    DanglingBackslash,
463    /// A string was still open when the command line ended.
464    DanglingString,
465    /// There was an unrecognized backslash escape sequence.
466    UnrecognizedEscape(String),
467}
468
469impl ParseError {
470    /// Returns `true` if this kind of ParseError will be resolved if the user
471    /// provides more input.
472    pub fn needs_continuation(&self) -> bool {
473        match self {
474            &ParseError::DanglingBackslash | &ParseError::DanglingString
475                => true,
476            _ => false,
477        }
478    }
479}
480
481impl Display for ParseError {
482    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
483        match self {
484            &ParseError::DanglingBackslash
485                => write!(fmt, "dangling backslash"),
486            &ParseError::DanglingString
487                => write!(fmt, "dangling string"),
488            &ParseError::UnrecognizedEscape(ref seq)
489                => write!(fmt, "unrecognized escape sequence: {:?}", seq)
490        }
491    }
492}
493
494impl Error for ParseError {
495}
496
497/// Parse a shellish string into elements. This function will parse a single
498/// command. See [the module-level documentation](index.html) for more
499/// information.
500///
501/// - `input`: The string to parse.
502/// - `options`: A [`ParseOptions`](struct.ParseOptions.html) instance,
503///    describing the options in effect for this parse. For compatibility,
504///    may also be `true` as shorthand for `ParseOptions::new()`, and `false`
505///    as shorthand for `ParseOptions::new().no_strict_escapes()`.
506///
507/// When parsing is successful, returns a vector containing each individual
508/// element of the parsed command line.
509pub fn parse<T: Into<ParseOptions>>(input: &str, options: T)
510-> Result<Vec<String>, ParseError> {
511    match inner_parse(input, &options.into(), &[]) {
512        Ok(mut x) => {
513            assert!(x.len() <= 1);
514            if x.len() == 0 {
515                Ok(vec![])
516            }
517            else {
518                let (command, sep) = x.swap_remove(0);
519                assert_eq!(sep, None);
520                Ok(command)
521            }
522        },
523        Err(x) => Err(x),
524    }
525}
526
527/// Parse a shellish string into elements. This function can parse multiple
528/// commands on a single line, separated by any of the given list of
529/// separators. See [the module-level documentation](index.html) for more
530/// information.
531///
532/// - `input`: The string to parse.
533/// - `options`: A [`ParseOptions`](struct.ParseOptions.html) instance,
534///    describing the options in effect for this parse. For compatibility,
535///    may also be `true` as shorthand for `ParseOptions::new()`, and `false`
536///    as shorthand for `ParseOptions::new().no_strict_escapes()`.
537///
538/// When parsing is successful, returns a vector containing tuples of
539/// individual commands, along with the index of the separator that ended that
540/// command. The last command may not have a separator, in which case it was
541/// ended by the end of the string, rather than a separator.
542pub fn multiparse<T: Into<ParseOptions>>(
543    input: &str, options: T, separators: &[&str]
544) -> Result<Vec<(Vec<String>, Option<usize>)>, ParseError> {
545    inner_parse(input, &options.into(), separators)
546}
547
548fn inner_parse(input: &str, options: &ParseOptions, separators: &[&str])
549    -> Result<Vec<(Vec<String>, Option<usize>)>, ParseError> {
550    let mut utf8_buffer = [0u8; 5];
551    let comment_bytes = options.comment_char.map(|x| x.encode_utf8(&mut utf8_buffer));
552    let inbytes = input.as_bytes();
553    let mut ret = Vec::new();
554    let mut cur_line = Vec::new();
555    let mut pos = 0;
556    let mut cur_arg: Option<Vec<u8>> = None;
557    let mut cur_string = None;
558    'outer: while pos < input.len() {
559        if cur_string.is_none() {
560            // Check separators iff we're not in a string
561            let rem = &inbytes[pos..];
562            for (index, separator) in separators.iter().enumerate() {
563                if separator.len() > rem.len() { continue }
564                if separator.as_bytes() == &rem[..separator.len()] {
565                    // Hit a separator. Skip it and ship it.
566                    pos += separator.len();
567                    if let Some(cur_arg) = cur_arg.take() {
568                        cur_line.push(unsafe {String::from_utf8_unchecked(cur_arg) });
569                    }
570                    ret.push((cur_line, Some(index)));
571                    cur_line = Vec::new();
572                    continue 'outer;
573                }
574            }
575            // Also check comments, iff we're not in a string (and comments are
576            // a thing)
577            if let Some(comment_bytes) = comment_bytes.as_ref() {
578                if cur_arg.is_none() || options.allow_comments_within_elements {
579                    if comment_bytes.as_bytes() == &rem[..comment_bytes.len()] {
580                        break 'outer;
581                    }
582                }
583            }
584        }
585        let nextb = inbytes[pos];
586        if cur_string.is_none() && (nextb == b'\n' || nextb == b' ' || nextb == b'\t') {
587            if let Some(cur_arg) = cur_arg.take() {
588                cur_line.push(unsafe {String::from_utf8_unchecked(cur_arg) });
589            }
590            pos += 1;
591        }
592        else if Some(nextb) == cur_string {
593            debug_assert!(cur_arg.is_some());
594            cur_string = None;
595            pos += 1;
596        }
597        else if nextb == b'\\' {
598            cur_arg.get_or_insert_with(Vec::new);
599            let cur_arg = cur_arg.as_mut().unwrap();
600            pos += 1;
601            if pos >= input.len() {
602                return Err(ParseError::DanglingBackslash);
603            }
604            let escb = inbytes[pos];
605            if escb.is_ascii_alphabetic() {
606                match escb {
607                    b't' => cur_arg.push(b'\t'),
608                    b'n' => cur_arg.push(b'\n'),
609                    _ => {
610                        if options.strict_escapes {
611                            return Err(ParseError::UnrecognizedEscape(input[pos-1..=pos].to_string()))
612                        }
613                        else {
614                            cur_arg.extend_from_slice("\u{FFFD}".as_bytes());
615                        }
616                    },
617                }
618            }
619            else if escb == b'\n' {
620                // eat any subsequent non-newline whitespace we find
621                while pos + 1 < inbytes.len()
622                      && (inbytes[pos+1] == b' ' || inbytes[pos+1] == b'\t') {
623                    pos += 1;
624                }
625            }
626            else {
627                cur_arg.push(escb);
628            }
629            pos += 1;
630        }
631        else if cur_string.is_none() && nextb == b'"' {
632            cur_arg.get_or_insert_with(Vec::new);
633            cur_string = Some(b'"');
634            pos += 1;
635        }
636        else if cur_string.is_none() && nextb == b'\'' {
637            cur_arg.get_or_insert_with(Vec::new);
638            cur_string = Some(b'\'');
639            pos += 1;
640        }
641        else {
642            cur_arg.get_or_insert_with(Vec::new).push(nextb);
643            pos += 1;
644        }
645    }
646    if cur_string.is_some() {
647        return Err(ParseError::DanglingString);
648    }
649    if let Some(cur_arg) = cur_arg.take() {
650        cur_line.push(unsafe {String::from_utf8_unchecked(cur_arg) });
651    }
652    if !cur_line.is_empty() {
653        ret.push((cur_line, None));
654    }
655    Ok(ret)
656}
shellish_parse/lib.rs

shellish_parse/
lib.rs