gix_command/lib.rs
1//! Launch commands very similarly to `Command`, but with `git` specific capabilities and adjustments.
2#![deny(rust_2018_idioms, missing_docs)]
3#![forbid(unsafe_code)]
4
5use std::{
6    ffi::OsString,
7    io::Read,
8    path::{Path, PathBuf},
9};
10
11use bstr::{BString, ByteSlice};
12
13/// A structure to keep settings to use when invoking a command via [`spawn()`][Prepare::spawn()],
14/// after creating it with [`prepare()`].
15pub struct Prepare {
16    /// The command to invoke, either directly or with a shell depending on `use_shell`.
17    pub command: OsString,
18    /// Additional information to be passed to the spawned command.
19    pub context: Option<Context>,
20    /// The way standard input is configured.
21    pub stdin: std::process::Stdio,
22    /// The way standard output is configured.
23    pub stdout: std::process::Stdio,
24    /// The way standard error is configured.
25    pub stderr: std::process::Stdio,
26    /// The arguments to pass to the process being spawned.
27    pub args: Vec<OsString>,
28    /// Environment variables to set for the spawned process.
29    pub env: Vec<(OsString, OsString)>,
30    /// If `true`, we will use `shell_program` or `sh` to execute the `command`.
31    pub use_shell: bool,
32    /// If `true`, `command` is assumed to be a command or path to the program to execute, and it
33    /// will be shell-quoted to assure it will be executed as is and without splitting across
34    /// whitespace.
35    pub quote_command: bool,
36    /// The name or path to the shell program to use instead of `sh`.
37    pub shell_program: Option<OsString>,
38    /// If `true` (default `true` on Windows and `false` everywhere else) we will see if it's safe
39    /// to manually invoke `command` after splitting its arguments as a shell would do.
40    ///
41    /// Note that outside of Windows, it's generally not advisable as this removes support for
42    /// literal shell scripts with shell-builtins.
43    ///
44    /// This mimics the behaviour we see with `git` on Windows, which also won't invoke the shell
45    /// there at all.
46    ///
47    /// Only effective if `use_shell` is `true` as well, as the shell will be used as a fallback if
48    /// it's not possible to split arguments as the command-line contains 'scripting'.
49    pub allow_manual_arg_splitting: bool,
50}
51
52/// Additional information that is relevant to spawned processes, which typically receive
53/// a wealth of contextual information when spawned from `git`.
54///
55/// See [the git source code](https://github.com/git/git/blob/cfb8a6e9a93adbe81efca66e6110c9b4d2e57169/git.c#L191)
56/// for details.
57#[derive(Debug, Default, Clone)]
58pub struct Context {
59    /// The `.git` directory that contains the repository.
60    ///
61    /// If set, it will be used to set the `GIT_DIR` environment variable.
62    pub git_dir: Option<PathBuf>,
63    /// Set the `GIT_WORK_TREE` environment variable with the given path.
64    pub worktree_dir: Option<PathBuf>,
65    /// If `true`, set `GIT_NO_REPLACE_OBJECTS` to `1`, which turns off object replacements, or `0` otherwise.
66    /// If `None`, the variable won't be set.
67    pub no_replace_objects: Option<bool>,
68    /// Set the `GIT_NAMESPACE` variable with the given value, effectively namespacing all
69    /// operations on references.
70    pub ref_namespace: Option<BString>,
71    /// If `true`, set `GIT_LITERAL_PATHSPECS` to `1`, which makes globs literal and prefixes as well, or `0` otherwise.
72    /// If `None`, the variable won't be set.
73    pub literal_pathspecs: Option<bool>,
74    /// If `true`, set `GIT_GLOB_PATHSPECS` to `1`, which lets wildcards not match the `/` character, and equals the `:(glob)` prefix.
75    /// If `false`, set `GIT_NOGLOB_PATHSPECS` to `1` which lets globs match only themselves.
76    /// If `None`, the variable won't be set.
77    pub glob_pathspecs: Option<bool>,
78    /// If `true`, set `GIT_ICASE_PATHSPECS` to `1`, to let patterns match case-insensitively, or `0` otherwise.
79    /// If `None`, the variable won't be set.
80    pub icase_pathspecs: Option<bool>,
81    /// If `true`, inherit `stderr` just like it's the default when spawning processes.
82    /// If `false`, suppress all stderr output.
83    /// If not `None`, this will override any value set with [`Prepare::stderr()`].
84    pub stderr: Option<bool>,
85}
86
87mod prepare {
88    use std::{
89        borrow::Cow,
90        ffi::OsString,
91        process::{Command, Stdio},
92    };
93
94    use bstr::ByteSlice;
95
96    use crate::{extract_interpreter, win_path_lookup, Context, Prepare};
97
98    /// Builder
99    impl Prepare {
100        /// If called, the command will be checked for characters that are typical for shell
101        /// scripts, and if found will use `sh` to execute it or whatever is set as
102        /// [`with_shell_program()`](Self::with_shell_program()).
103        ///
104        /// If the command isn't valid UTF-8, a shell will always be used.
105        ///
106        /// If a shell is used, then arguments given here with [arg()](Self::arg) or
107        /// [args()](Self::args) will be substituted via `"$@"` if it's not already present in the
108        /// command.
109        ///
110        ///
111        /// The [`command_may_be_shell_script_allow_manual_argument_splitting()`](Self::command_may_be_shell_script_allow_manual_argument_splitting())
112        /// and [`command_may_be_shell_script_disallow_manual_argument_splitting()`](Self::command_may_be_shell_script_disallow_manual_argument_splitting())
113        /// methods also call this method.
114        ///
115        /// If neither this method nor [`with_shell()`](Self::with_shell()) is called, commands are
116        /// always executed verbatim and directly, without the use of a shell.
117        pub fn command_may_be_shell_script(mut self) -> Self {
118            self.use_shell = self
119                .command
120                .to_str()
121                .is_none_or(|cmd| cmd.as_bytes().find_byteset(b"|&;<>()$`\\\"' \t\n*?[#~=%").is_some());
122            self
123        }
124
125        /// If called, unconditionally use a shell to execute the command and its arguments.
126        ///
127        /// This uses `sh` to execute it, or whatever is set as
128        /// [`with_shell_program()`](Self::with_shell_program()).
129        ///
130        /// Arguments given here with [arg()](Self::arg) or [args()](Self::args) will be
131        /// substituted via `"$@"` if it's not already present in the command.
132        ///
133        /// If neither this method nor
134        /// [`command_may_be_shell_script()`](Self::command_may_be_shell_script()) is called,
135        /// commands are always executed verbatim and directly, without the use of a shell. (But
136        /// see [`command_may_be_shell_script()`](Self::command_may_be_shell_script()) on other
137        /// methods that call that method.)
138        ///
139        /// We also disallow manual argument splitting
140        /// (see [`command_may_be_shell_script_disallow_manual_argument_splitting`](Self::command_may_be_shell_script_disallow_manual_argument_splitting()))
141        /// to assure a shell is indeed used, no matter what.
142        pub fn with_shell(mut self) -> Self {
143            self.use_shell = true;
144            self.allow_manual_arg_splitting = false;
145            self
146        }
147
148        /// Quote the command if it is run in a shell, so its path is left intact.
149        ///
150        /// This is only meaningful if the command has been arranged to run in a shell, either
151        /// unconditionally with [`with_shell()`](Self::with_shell()), or conditionally with
152        /// [`command_may_be_shell_script()`](Self::command_may_be_shell_script()).
153        ///
154        /// Note that this should not be used if the command is a script - quoting is only the
155        /// right choice if it's known to be a program path.
156        ///
157        /// Note also that this does not affect arguments passed with [arg()](Self::arg) or
158        /// [args()](Self::args), which do not have to be quoted by the *caller* because they are
159        /// passed as `"$@"` positional parameters (`"$1"`, `"$2"`, and so on).
160        pub fn with_quoted_command(mut self) -> Self {
161            self.quote_command = true;
162            self
163        }
164
165        /// Set the name or path to the shell `program` to use if a shell is to be used, to avoid
166        /// using the default shell which is `sh`.
167        ///
168        /// Note that shells that are not Bourne-style cannot be expected to work correctly,
169        /// because POSIX shell syntax is assumed when searching for and conditionally adding
170        /// `"$@"` to receive arguments, where applicable (and in the behaviour of
171        /// [`with_quoted_command()`](Self::with_quoted_command()), if called).
172        pub fn with_shell_program(mut self, program: impl Into<OsString>) -> Self {
173            self.shell_program = Some(program.into());
174            self
175        }
176
177        /// Unconditionally turn off using the shell when spawning the command.
178        ///
179        /// Note that not using the shell is the default. So an effective use of this method
180        /// is some time after [`command_may_be_shell_script()`](Self::command_may_be_shell_script())
181        /// or [`with_shell()`](Self::with_shell()) was called.
182        pub fn without_shell(mut self) -> Self {
183            self.use_shell = false;
184            self
185        }
186
187        /// Set additional `ctx` to be used when spawning the process.
188        ///
189        /// Note that this is a must for most kind of commands that `git` usually spawns, as at
190        /// least they need to know the correct Git repository to function.
191        pub fn with_context(mut self, ctx: Context) -> Self {
192            self.context = Some(ctx);
193            self
194        }
195
196        /// Like [`command_may_be_shell_script()`](Self::command_may_be_shell_script()), but try to
197        /// split arguments by hand if this can be safely done without a shell.
198        ///
199        /// This is useful on platforms where spawning processes is slow, or where many processes
200        /// have to be spawned in a row which should be sped up. Manual argument splitting is
201        /// enabled by default on Windows only.
202        ///
203        /// Note that this does *not* check for the use of possible shell builtins. Commands may
204        /// fail or behave differently if they are available as shell builtins and no corresponding
205        /// external command exists, or the external command behaves differently.
206        pub fn command_may_be_shell_script_allow_manual_argument_splitting(mut self) -> Self {
207            self.allow_manual_arg_splitting = true;
208            self.command_may_be_shell_script()
209        }
210
211        /// Like [`command_may_be_shell_script()`](Self::command_may_be_shell_script()), but don't
212        /// allow to bypass the shell even if manual argument splitting can be performed safely.
213        pub fn command_may_be_shell_script_disallow_manual_argument_splitting(mut self) -> Self {
214            self.allow_manual_arg_splitting = false;
215            self.command_may_be_shell_script()
216        }
217
218        /// Configure the process to use `stdio` for _stdin_.
219        pub fn stdin(mut self, stdio: Stdio) -> Self {
220            self.stdin = stdio;
221            self
222        }
223        /// Configure the process to use `stdio` for _stdout_.
224        pub fn stdout(mut self, stdio: Stdio) -> Self {
225            self.stdout = stdio;
226            self
227        }
228        /// Configure the process to use `stdio` for _stderr_.
229        pub fn stderr(mut self, stdio: Stdio) -> Self {
230            self.stderr = stdio;
231            self
232        }
233
234        /// Add `arg` to the list of arguments to call the command with.
235        pub fn arg(mut self, arg: impl Into<OsString>) -> Self {
236            self.args.push(arg.into());
237            self
238        }
239
240        /// Add `args` to the list of arguments to call the command with.
241        pub fn args(mut self, args: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
242            self.args
243                .append(&mut args.into_iter().map(Into::into).collect::<Vec<_>>());
244            self
245        }
246
247        /// Add `key` with `value` to the environment of the spawned command.
248        pub fn env(mut self, key: impl Into<OsString>, value: impl Into<OsString>) -> Self {
249            self.env.push((key.into(), value.into()));
250            self
251        }
252    }
253
254    /// Finalization
255    impl Prepare {
256        /// Spawn the command as configured.
257        pub fn spawn(self) -> std::io::Result<std::process::Child> {
258            let mut cmd = Command::from(self);
259            gix_trace::debug!(cmd = ?cmd);
260            cmd.spawn()
261        }
262    }
263
264    impl From<Prepare> for Command {
265        fn from(mut prep: Prepare) -> Command {
266            let mut cmd = if prep.use_shell {
267                let split_args = prep
268                    .allow_manual_arg_splitting
269                    .then(|| {
270                        if gix_path::into_bstr(std::borrow::Cow::Borrowed(prep.command.as_ref()))
271                            .find_byteset(b"\\|&;<>()$`\n*?[#~%")
272                            .is_none()
273                        {
274                            prep.command
275                                .to_str()
276                                .and_then(|args| shell_words::split(args).ok().map(Vec::into_iter))
277                        } else {
278                            None
279                        }
280                    })
281                    .flatten();
282                match split_args {
283                    Some(mut args) => {
284                        let mut cmd = Command::new(args.next().expect("non-empty input"));
285                        cmd.args(args);
286                        cmd
287                    }
288                    None => {
289                        let shell = prep.shell_program.unwrap_or_else(|| gix_path::env::shell().into());
290                        let mut cmd = Command::new(shell);
291                        cmd.arg("-c");
292                        if !prep.args.is_empty() {
293                            if prep.command.to_str().is_none_or(|cmd| !cmd.contains("$@")) {
294                                if prep.quote_command {
295                                    if let Ok(command) = gix_path::os_str_into_bstr(&prep.command) {
296                                        prep.command = gix_path::from_bstring(gix_quote::single(command)).into();
297                                    }
298                                }
299                                prep.command.push(r#" "$@""#);
300                            } else {
301                                gix_trace::debug!(
302                                    r#"Will not add '"$@"' to '{:?}' as it seems to contain '$@' already"#,
303                                    prep.command
304                                );
305                            }
306                        }
307                        cmd.arg(prep.command);
308                        cmd.arg("--");
309                        cmd
310                    }
311                }
312            } else if cfg!(windows) {
313                let program: Cow<'_, std::path::Path> = std::env::var_os("PATH")
314                    .and_then(|path| win_path_lookup(prep.command.as_ref(), &path))
315                    .map(Cow::Owned)
316                    .unwrap_or(Cow::Borrowed(prep.command.as_ref()));
317                if let Some(shebang) = extract_interpreter(program.as_ref()) {
318                    let mut cmd = Command::new(shebang.interpreter);
319                    // For relative paths, we may have picked up a file in the current repository
320                    // for which an attacker could control everything. Hence, strip options just like Git.
321                    // If the file was found in the PATH though, it should be trustworthy.
322                    if program.is_absolute() {
323                        cmd.args(shebang.args);
324                    }
325                    cmd.arg(prep.command);
326                    cmd
327                } else {
328                    Command::new(prep.command)
329                }
330            } else {
331                Command::new(prep.command)
332            };
333            // We never want to have terminals pop-up on Windows if this runs from a GUI application.
334            #[cfg(windows)]
335            {
336                use std::os::windows::process::CommandExt;
337                const CREATE_NO_WINDOW: u32 = 0x08000000;
338                cmd.creation_flags(CREATE_NO_WINDOW);
339            }
340            cmd.stdin(prep.stdin)
341                .stdout(prep.stdout)
342                .stderr(prep.stderr)
343                .envs(prep.env)
344                .args(prep.args);
345            if let Some(ctx) = prep.context {
346                if let Some(git_dir) = ctx.git_dir {
347                    cmd.env("GIT_DIR", &git_dir);
348                }
349                if let Some(worktree_dir) = ctx.worktree_dir {
350                    cmd.env("GIT_WORK_TREE", worktree_dir);
351                }
352                if let Some(value) = ctx.no_replace_objects {
353                    cmd.env("GIT_NO_REPLACE_OBJECTS", usize::from(value).to_string());
354                }
355                if let Some(namespace) = ctx.ref_namespace {
356                    cmd.env("GIT_NAMESPACE", gix_path::from_bstring(namespace));
357                }
358                if let Some(value) = ctx.literal_pathspecs {
359                    cmd.env("GIT_LITERAL_PATHSPECS", usize::from(value).to_string());
360                }
361                if let Some(value) = ctx.glob_pathspecs {
362                    cmd.env(
363                        if value {
364                            "GIT_GLOB_PATHSPECS"
365                        } else {
366                            "GIT_NOGLOB_PATHSPECS"
367                        },
368                        "1",
369                    );
370                }
371                if let Some(value) = ctx.icase_pathspecs {
372                    cmd.env("GIT_ICASE_PATHSPECS", usize::from(value).to_string());
373                }
374                if let Some(stderr) = ctx.stderr {
375                    cmd.stderr(if stderr { Stdio::inherit() } else { Stdio::null() });
376                }
377            }
378            cmd
379        }
380    }
381}
382
383fn is_exe(executable: &Path) -> bool {
384    executable.extension() == Some(std::ffi::OsStr::new("exe"))
385}
386
387/// Try to find `command` in the `path_value` (the value of `PATH`) as separated by `;`, or return `None`.
388/// Has special handling for `.exe` extensions, as these will be appended automatically if needed.
389/// Note that just like Git, no lookup is performed if a slash or backslash is in `command`.
390fn win_path_lookup(command: &Path, path_value: &std::ffi::OsStr) -> Option<PathBuf> {
391    fn lookup(root: &bstr::BStr, command: &Path, is_exe: bool) -> Option<PathBuf> {
392        let mut path = gix_path::try_from_bstr(root).ok()?.join(command);
393        if !is_exe {
394            path.set_extension("exe");
395        }
396        if path.is_file() {
397            return Some(path);
398        }
399        if is_exe {
400            return None;
401        }
402        path.set_extension("");
403        path.is_file().then_some(path)
404    }
405    if command.components().take(2).count() == 2 {
406        return None;
407    }
408    let path = gix_path::os_str_into_bstr(path_value).ok()?;
409    let is_exe = is_exe(command);
410
411    for root in path.split(|b| *b == b';') {
412        if let Some(executable) = lookup(root.as_bstr(), command, is_exe) {
413            return Some(executable);
414        }
415    }
416    None
417}
418
419/// Parse the shebang (`#!<path>`) from the first line of `executable`, and return the shebang
420/// data when available.
421pub fn extract_interpreter(executable: &Path) -> Option<shebang::Data> {
422    #[cfg(windows)]
423    if is_exe(executable) {
424        return None;
425    }
426    let mut buf = [0; 100]; // Note: just like Git
427    let mut file = std::fs::File::open(executable).ok()?;
428    let n = file.read(&mut buf).ok()?;
429    shebang::parse(buf[..n].as_bstr())
430}
431
432///
433pub mod shebang {
434    use std::{ffi::OsString, path::PathBuf};
435
436    use bstr::{BStr, ByteSlice};
437
438    /// Parse `buf` to extract all shebang information.
439    pub fn parse(buf: &BStr) -> Option<Data> {
440        let mut line = buf.lines().next()?;
441        line = line.strip_prefix(b"#!")?;
442
443        let slash_idx = line.rfind_byteset(br"/\")?;
444        Some(match line[slash_idx..].find_byte(b' ') {
445            Some(space_idx) => {
446                let space = slash_idx + space_idx;
447                Data {
448                    interpreter: gix_path::from_byte_slice(line[..space].trim()).to_owned(),
449                    args: line
450                        .get(space + 1..)
451                        .and_then(|mut r| {
452                            r = r.trim();
453                            if r.is_empty() {
454                                return None;
455                            }
456
457                            match r.as_bstr().to_str() {
458                                Ok(args) => shell_words::split(args)
459                                    .ok()
460                                    .map(|args| args.into_iter().map(Into::into).collect()),
461                                Err(_) => Some(vec![gix_path::from_byte_slice(r).to_owned().into()]),
462                            }
463                        })
464                        .unwrap_or_default(),
465                }
466            }
467            None => Data {
468                interpreter: gix_path::from_byte_slice(line.trim()).to_owned(),
469                args: Vec::new(),
470            },
471        })
472    }
473
474    /// Shebang information as [parsed](parse()) from a buffer that should contain at least one line.
475    ///
476    /// ### Deviation
477    ///
478    /// According to the [shebang documentation](https://en.wikipedia.org/wiki/Shebang_(Unix)), it will only consider
479    /// the path of the executable, along with the arguments as the consecutive portion after the space that separates
480    /// them. Argument splitting would then have to be done elsewhere, probably in the kernel.
481    ///
482    /// To make that work without the kernel, we perform the splitting while Git just ignores options.
483    /// For now it seems more compatible to not ignore options, but if it is important this could be changed.
484    #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
485    pub struct Data {
486        /// The interpreter to run.
487        pub interpreter: PathBuf,
488        /// The remainder of the line past the space after `interpreter`, without leading or trailing whitespace,
489        /// as pre-split arguments just like a shell would do it.
490        /// Note that we accept that illformed UTF-8 will prevent argument splitting.
491        pub args: Vec<OsString>,
492    }
493}
494
495/// Prepare `cmd` for [spawning][std::process::Command::spawn()] by configuring it with various builder methods.
496///
497/// Note that the default IO is configured for typical API usage, that is
498///
499/// - `stdin` is null to prevent blocking unexpectedly on consumption of stdin
500/// - `stdout` is captured for consumption by the caller
501/// - `stderr` is inherited to allow the command to provide context to the user
502///
503/// On Windows, terminal Windows will be suppressed automatically.
504///
505/// ### Warning
506///
507/// When using this method, be sure that the invoked program doesn't rely on the current working dir and/or
508/// environment variables to know its context. If so, call instead [`Prepare::with_context()`] to provide
509/// additional information.
510pub fn prepare(cmd: impl Into<OsString>) -> Prepare {
511    Prepare {
512        command: cmd.into(),
513        shell_program: None,
514        context: None,
515        stdin: std::process::Stdio::null(),
516        stdout: std::process::Stdio::piped(),
517        stderr: std::process::Stdio::inherit(),
518        args: Vec::new(),
519        env: Vec::new(),
520        use_shell: false,
521        quote_command: false,
522        allow_manual_arg_splitting: cfg!(windows),
523    }
524}
525
526#[cfg(test)]
527mod tests {
528    use super::*;
529
530    #[test]
531    fn internal_win_path_lookup() -> gix_testtools::Result {
532        let root = gix_testtools::scripted_fixture_read_only("win_path_lookup.sh")?;
533        let mut paths: Vec<_> = std::fs::read_dir(&root)?
534            .filter_map(Result::ok)
535            .map(|e| e.path().to_str().expect("no illformed UTF8").to_owned())
536            .collect();
537        paths.sort();
538        let lookup_path: OsString = paths.join(";").into();
539
540        assert_eq!(
541            win_path_lookup("a/b".as_ref(), &lookup_path),
542            None,
543            "any path with separator is considered ready to use"
544        );
545        assert_eq!(
546            win_path_lookup("x".as_ref(), &lookup_path),
547            Some(root.join("a").join("x.exe")),
548            "exe will be preferred, and it searches left to right thus doesn't find c/x.exe"
549        );
550        assert_eq!(
551            win_path_lookup("x.exe".as_ref(), &lookup_path),
552            Some(root.join("a").join("x.exe")),
553            "no matter what, a/x won't be found as it's shadowed by an exe file"
554        );
555        assert_eq!(
556            win_path_lookup("exe".as_ref(), &lookup_path),
557            Some(root.join("b").join("exe")),
558            "it finds files further down the path as well"
559        );
560        Ok(())
561    }
562}