fclones/
arg.rs

1//! Command line argument parsing and quoting utilities.
2//!
3//! Provides lossless OsString conversions to and from String by shell-like escaping and quoting.
4
5use std::error::Error;
6use std::ffi::{OsStr, OsString};
7use std::fmt::{Debug, Display, Formatter};
8use std::mem;
9
10use itertools::Itertools;
11use serde::de::Visitor;
12use serde::{Deserialize, Deserializer, Serialize, Serializer};
13use stfu8::DecodeError;
14
15/// Argument passed to the app
16#[derive(Debug, Eq, PartialEq, Clone)]
17pub struct Arg(OsString);
18
19impl Arg {
20    pub fn from_escaped_string(s: &str) -> Result<Self, DecodeError> {
21        Ok(Arg(from_stfu8(s)?))
22    }
23
24    pub fn to_escaped_string(&self) -> String {
25        to_stfu8(self.0.clone())
26    }
27
28    pub fn quote(&self) -> String {
29        quote(self.0.to_os_string())
30    }
31
32    pub fn as_os_str(&self) -> &OsStr {
33        self.0.as_ref()
34    }
35}
36
37impl AsRef<OsStr> for Arg {
38    fn as_ref(&self) -> &OsStr {
39        self.0.as_os_str()
40    }
41}
42
43impl From<OsString> for Arg {
44    fn from(s: OsString) -> Self {
45        Arg(s)
46    }
47}
48
49impl From<&OsStr> for Arg {
50    fn from(s: &OsStr) -> Self {
51        Arg(OsString::from(s))
52    }
53}
54
55impl From<&str> for Arg {
56    fn from(s: &str) -> Self {
57        Arg(OsString::from(s))
58    }
59}
60
61struct ArgVisitor;
62
63impl Visitor<'_> for ArgVisitor {
64    type Value = Arg;
65
66    fn expecting(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
67        formatter.write_str("an STFU encoded string")
68    }
69
70    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
71    where
72        E: serde::de::Error,
73    {
74        let arg = Arg::from_escaped_string(v).map_err(|e| E::custom(e.to_string()))?;
75        Ok(arg)
76    }
77}
78
79impl Serialize for Arg {
80    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
81    where
82        S: Serializer,
83    {
84        serializer.serialize_str(self.to_escaped_string().as_str())
85    }
86}
87
88impl<'de> Deserialize<'de> for Arg {
89    fn deserialize<D>(deserializer: D) -> Result<Arg, D::Error>
90    where
91        D: Deserializer<'de>,
92    {
93        deserializer.deserialize_str(ArgVisitor)
94    }
95}
96
97/// Returns a lossless string representation in [STFU8 format](https://crates.io/crates/stfu8).
98#[cfg(unix)]
99pub fn to_stfu8(s: OsString) -> String {
100    use std::os::unix::ffi::OsStringExt;
101    let raw_path_bytes = s.into_vec();
102    stfu8::encode_u8(&raw_path_bytes)
103}
104
105/// Returns a lossless string representation in [STFU8 format](https://crates.io/crates/stfu8).
106#[cfg(windows)]
107pub fn to_stfu8(s: OsString) -> String {
108    use std::os::windows::ffi::OsStrExt;
109    let raw_path_bytes: Vec<u16> = s.encode_wide().collect();
110    stfu8::encode_u16(&raw_path_bytes)
111}
112
113/// Decodes the path from the string encoded with [`to_stfu8`](OsString::to_stfu8).
114#[cfg(unix)]
115pub fn from_stfu8(encoded: &str) -> Result<OsString, DecodeError> {
116    use std::os::unix::ffi::OsStringExt;
117    let raw_bytes = stfu8::decode_u8(encoded)?;
118    Ok(OsString::from_vec(raw_bytes))
119}
120
121/// Decodes the path from the string encoded with [`to_stfu8`](OsString::to_stfu8).
122#[cfg(windows)]
123pub fn from_stfu8(encoded: &str) -> Result<OsString, DecodeError> {
124    use std::os::windows::ffi::OsStringExt;
125    let raw_bytes = stfu8::decode_u16(encoded)?;
126    Ok(OsString::from_wide(&raw_bytes))
127}
128
129const SPECIAL_CHARS: [char; 25] = [
130    '|', '&', ';', '<', '>', '(', ')', '{', '}', '$', '`', '\\', '\'', '"', ' ', '\t', '*', '?',
131    '+', '[', ']', '#', '˜', '=', '%',
132];
133
134/// Escapes special characters in a string, so that it will retain its literal meaning when used as
135/// a part of command in Unix shell.
136///
137/// It tries to avoid introducing any unnecessary quotes or escape characters, but specifics
138/// regarding quoting style are left unspecified.
139pub fn quote(s: OsString) -> String {
140    let lossy = s.to_string_lossy();
141    if lossy
142        .chars()
143        .any(|c| c < '\u{20}' || c == '\u{7f}' || c == '\u{fffd}' || c == '\'')
144    {
145        format!("$'{}'", to_stfu8(s).replace('\'', "\\'"))
146    } else if lossy.chars().any(|c| SPECIAL_CHARS.contains(&c)) {
147        format!("'{lossy}'")
148    } else {
149        lossy.to_string()
150    }
151}
152
153#[derive(Debug)]
154pub struct ParseError {
155    pub msg: String,
156}
157
158impl ParseError {
159    pub fn new(msg: &str) -> ParseError {
160        ParseError {
161            msg: msg.to_string(),
162        }
163    }
164}
165
166impl Display for ParseError {
167    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
168        write!(f, "{}", self.msg)
169    }
170}
171
172impl Error for ParseError {}
173
174enum State {
175    /// Within a delimiter.
176    Delimiter,
177    /// After backslash, but before starting word.
178    Backslash,
179    /// Within an unquoted word.
180    Unquoted,
181    /// After backslash in an unquoted word.
182    UnquotedBackslash,
183    /// Within a single quoted word.
184    SingleQuoted,
185    /// Within a double quoted word.
186    DoubleQuoted,
187    /// After backslash inside a double quoted word.
188    DoubleQuotedBackslash,
189    /// After dollar in an unquoted word.
190    Dollar,
191    /// Within a quoted word preceded by a dollar sign.
192    DollarQuoted,
193    /// After backslash in a dollar-quoted word.
194    DollarQuotedBackslash,
195    /// Inside a comment.
196    Comment,
197}
198
199/// Appends a character to OsString
200fn append(s: &mut OsString, c: char) {
201    let mut buf = [0; 4];
202    let c = c.encode_utf8(&mut buf);
203    s.push(c)
204}
205
206/// Splits command line into separate arguments, in much the same way Unix shell would, but without
207/// many of expansion the shell would perform.
208///
209/// The split functionality is compatible with behaviour of Unix shell, but with word expansions
210/// limited to quote removal, and without special token recognition rules for operators.
211///
212/// The result is exactly the same as one obtained from Unix shell as long as those unsupported
213/// features are not present in input: no operators, no variable assignments, no tilde expansion,
214/// no parameter expansion, no command substitution, no arithmetic expansion, no pathname
215/// expansion.
216///
217/// In case those unsupported shell features are present, the syntax that introduce them is
218/// interpreted literally.
219///
220/// # Errors
221///
222/// When input contains unmatched quote, an error is returned.
223///
224/// # Compatibility with other implementations
225///
226/// It should be fully compatible with g_shell_parse_argv from GLib, except that in GLib
227/// it is an error not to have any words after tokenization.
228///
229/// It is also very close to shlex.split available in Python standard library, when used in POSIX
230/// mode with support for comments. Though, shlex implementation diverges from POSIX, and from
231/// implementation contained herein in three aspects. First, it doesn't support line continuations.
232/// Second, inside double quotes, the backslash characters retains its special meaning as an escape
233/// character only when followed by \\ or \", whereas POSIX specifies that it should retain its
234/// special meaning when followed by: $, \`, \", \\, or a newline. Third, it treats carriage return
235/// as one of delimiters.
236/// ```
237pub fn split(s: &str) -> Result<Vec<Arg>, ParseError> {
238    // Based on shell-words crate by Tomasz Miąsko
239    // Handling of dollar quotes added by Piotr Kołaczkowski
240
241    use State::*;
242
243    let mut words = Vec::new();
244    let mut word = OsString::new();
245
246    let mut pos = 0;
247    let mut dollar_quote_start = 0;
248
249    let mut chars = s.chars();
250    let mut state = Delimiter;
251
252    loop {
253        let c = chars.next();
254        state = match state {
255            Delimiter => match c {
256                None => break,
257                Some('\'') => SingleQuoted,
258                Some('\"') => DoubleQuoted,
259                Some('\\') => Backslash,
260                Some('\t') | Some(' ') | Some('\n') => Delimiter,
261                Some('$') => Dollar,
262                Some('#') => Comment,
263                Some(c) => {
264                    append(&mut word, c);
265                    Unquoted
266                }
267            },
268            Backslash => match c {
269                None => {
270                    append(&mut word, '\\');
271                    words.push(Arg(mem::replace(&mut word, OsString::new())));
272                    break;
273                }
274                Some('\n') => Delimiter,
275                Some(c) => {
276                    append(&mut word, c);
277                    Unquoted
278                }
279            },
280            Unquoted => match c {
281                None => {
282                    words.push(Arg(mem::replace(&mut word, OsString::new())));
283                    break;
284                }
285                Some('\'') => SingleQuoted,
286                Some('\"') => DoubleQuoted,
287                Some('\\') => UnquotedBackslash,
288                Some('$') => Dollar,
289                Some('\t') | Some(' ') | Some('\n') => {
290                    words.push(Arg(mem::replace(&mut word, OsString::new())));
291                    Delimiter
292                }
293                Some(c) => {
294                    append(&mut word, c);
295                    Unquoted
296                }
297            },
298            UnquotedBackslash => match c {
299                None => {
300                    append(&mut word, '\\');
301                    words.push(Arg(mem::replace(&mut word, OsString::new())));
302                    break;
303                }
304                Some('\n') => Unquoted,
305                Some(c) => {
306                    append(&mut word, c);
307                    Unquoted
308                }
309            },
310            SingleQuoted => match c {
311                None => return Err(ParseError::new("Unclosed single quote")),
312                Some('\'') => Unquoted,
313                Some(c) => {
314                    append(&mut word, c);
315                    SingleQuoted
316                }
317            },
318            DoubleQuoted => match c {
319                None => return Err(ParseError::new("Unclosed double quote")),
320                Some('\"') => Unquoted,
321                Some('\\') => DoubleQuotedBackslash,
322                Some(c) => {
323                    append(&mut word, c);
324                    DoubleQuoted
325                }
326            },
327            DoubleQuotedBackslash => match c {
328                None => return Err(ParseError::new("Unexpected end of input")),
329                Some('\n') => DoubleQuoted,
330                Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
331                    append(&mut word, c);
332                    DoubleQuoted
333                }
334                Some(c) => {
335                    append(&mut word, '\\');
336                    append(&mut word, c);
337                    DoubleQuoted
338                }
339            },
340            Dollar => match c {
341                None => return Err(ParseError::new("Unexpected end of input")),
342                Some('\'') => {
343                    dollar_quote_start = pos + 1;
344                    DollarQuoted
345                }
346                Some(_) => return Err(ParseError::new("Expected single quote")),
347            },
348            DollarQuoted => match c {
349                None => return Err(ParseError::new("Unclosed single quote")),
350                Some('\\') => DollarQuotedBackslash,
351                Some('\'') => {
352                    let quoted_slice = &s[dollar_quote_start..pos].replace("\\'", "'");
353                    let decoded = from_stfu8(quoted_slice).map_err(|e| {
354                        ParseError::new(format!("Failed to decode STFU-8 chunk: {e}").as_str())
355                    })?;
356                    word.push(decoded.as_os_str());
357                    Unquoted
358                }
359                Some(_) => DollarQuoted,
360            },
361            DollarQuotedBackslash => match c {
362                None => return Err(ParseError::new("Unexpected end of input")),
363                Some(_) => DollarQuoted,
364            },
365            Comment => match c {
366                None => break,
367                Some('\n') => Delimiter,
368                Some(_) => Comment,
369            },
370        };
371        pos += 1;
372    }
373
374    Ok(words)
375}
376
377/// Joins multiple command line args into a single-line escaped representation
378pub fn join(args: &[Arg]) -> String {
379    args.iter().map(|arg| arg.quote()).join(" ")
380}
381
382#[cfg(test)]
383mod test {
384    use std::ffi::OsString;
385
386    use crate::arg::{quote, split, Arg};
387
388    #[test]
389    fn quote_no_special_chars() {
390        assert_eq!(quote(OsString::from("abc/def_123.txt")), "abc/def_123.txt");
391    }
392
393    #[test]
394    fn quote_path_with_control_chars() {
395        assert_eq!(quote(OsString::from("a\nb")), "$'a\\nb'");
396        assert_eq!(quote(OsString::from("a\tb")), "$'a\\tb'");
397    }
398
399    #[test]
400    fn quote_path_with_special_chars() {
401        assert_eq!(quote(OsString::from("a b")), "'a b'");
402        assert_eq!(quote(OsString::from("a*b")), "'a*b'");
403        assert_eq!(quote(OsString::from("a?b")), "'a?b'");
404        assert_eq!(quote(OsString::from("$ab")), "'$ab'");
405        assert_eq!(quote(OsString::from("a(b)")), "'a(b)'");
406        assert_eq!(quote(OsString::from("a\\b")), "'a\\b'");
407    }
408
409    #[test]
410    fn quote_path_with_single_quotes() {
411        assert_eq!(quote(OsString::from("a'b")), "$'a\\'b'");
412        assert_eq!(quote(OsString::from("a'b'")), "$'a\\'b\\''");
413    }
414
415    #[test]
416    fn split_unquoted_args() {
417        assert_eq!(
418            split("arg1 arg2").unwrap(),
419            vec![Arg::from("arg1"), Arg::from("arg2")]
420        )
421    }
422
423    #[test]
424    fn split_single_quoted_args() {
425        assert_eq!(
426            split("'arg1 with spaces' arg2").unwrap(),
427            vec![Arg::from("arg1 with spaces"), Arg::from("arg2")]
428        )
429    }
430
431    #[test]
432    fn split_doubly_quoted_args() {
433        assert_eq!(
434            split("\"arg1 with spaces\" arg2").unwrap(),
435            vec![Arg::from("arg1 with spaces"), Arg::from("arg2")]
436        )
437    }
438
439    #[test]
440    fn split_quotes_escaping() {
441        assert_eq!(
442            split("\"escaped \\\" quotes\"").unwrap(),
443            vec![Arg::from("escaped \" quotes")]
444        )
445    }
446
447    #[test]
448    fn split_escaped_single_quote() {
449        assert_eq!(
450            split("$'single\\'quote'").unwrap(),
451            vec![Arg::from("single'quote")]
452        );
453    }
454
455    #[test]
456    fn split_spaces_escaping() {
457        assert_eq!(
458            split("escaped\\ space").unwrap(),
459            vec![Arg::from("escaped space")]
460        )
461    }
462
463    #[test]
464    fn dollar_quoting() {
465        assert_eq!(
466            split("arg1 $'arg2-\\n\\t\\\\' arg3-$'\\x7f'").unwrap(),
467            vec![
468                Arg::from("arg1"),
469                Arg::from("arg2-\n\t\\"),
470                Arg::from("arg3-\x7f")
471            ]
472        )
473    }
474}