Skip to main content

gix_command/
lib.rs

1//! Launch commands very similarly to `Command`, but with `git` specific capabilities and adjustments.
2//!
3//! ## Examples
4//!
5//! ```
6//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
7//! let output = gix_command::prepare("git")
8//!     .arg("--version")
9//!     .spawn()?
10//!     .wait_with_output()?;
11//!
12//! assert!(output.status.success());
13//! assert!(String::from_utf8(output.stdout)?.starts_with("git version "));
14//! # Ok(()) }
15//! ```
16#![deny(missing_docs)]
17#![forbid(unsafe_code)]
18
19use std::{
20    ffi::OsString,
21    io::Read,
22    path::{Path, PathBuf},
23};
24
25use bstr::{BString, ByteSlice};
26
27/// A structure to keep settings to use when invoking a command via [`spawn()`][Prepare::spawn()],
28/// after creating it with [`prepare()`].
29pub struct Prepare {
30    /// The command to invoke, either directly or with a shell depending on `use_shell`.
31    pub command: OsString,
32    /// Additional information to be passed to the spawned command.
33    pub context: Option<Context>,
34    /// The way standard input is configured.
35    pub stdin: std::process::Stdio,
36    /// The way standard output is configured.
37    pub stdout: std::process::Stdio,
38    /// The way standard error is configured.
39    pub stderr: std::process::Stdio,
40    /// The arguments to pass to the process being spawned.
41    pub args: Vec<OsString>,
42    /// Environment variables to set for the spawned process.
43    pub env: Vec<(OsString, OsString)>,
44    /// If `true`, we will use `shell_program` or `sh` to execute the `command`.
45    pub use_shell: bool,
46    /// If `true`, `command` is assumed to be a command or path to the program to execute, and it
47    /// will be shell-quoted to assure it will be executed as is and without splitting across
48    /// whitespace.
49    pub quote_command: bool,
50    /// The name or path to the shell program to use instead of `sh`.
51    pub shell_program: Option<OsString>,
52    /// If `true` (default `true` on Windows and `false` everywhere else) we will see if it's safe
53    /// to manually invoke `command` after splitting its arguments as a shell would do.
54    ///
55    /// Note that outside of Windows, it's generally not advisable as this removes support for
56    /// literal shell scripts with shell-builtins.
57    ///
58    /// This mimics the behaviour we see with `git` on Windows, which also won't invoke the shell
59    /// there at all.
60    ///
61    /// Only effective if `use_shell` is `true` as well, as the shell will be used as a fallback if
62    /// it's not possible to split arguments as the command-line contains 'scripting'.
63    pub allow_manual_arg_splitting: bool,
64}
65
66/// Additional information that is relevant to spawned processes, which typically receive
67/// a wealth of contextual information when spawned from `git`.
68///
69/// See [the git source code](https://github.com/git/git/blob/cfb8a6e9a93adbe81efca66e6110c9b4d2e57169/git.c#L191)
70/// for details.
71#[derive(Debug, Default, Clone)]
72pub struct Context {
73    /// The `.git` directory that contains the repository.
74    ///
75    /// If set, it will be used to set the `GIT_DIR` environment variable.
76    pub git_dir: Option<PathBuf>,
77    /// Set the `GIT_WORK_TREE` environment variable with the given path.
78    pub worktree_dir: Option<PathBuf>,
79    /// If `true`, set `GIT_NO_REPLACE_OBJECTS` to `1`, which turns off object replacements, or `0` otherwise.
80    /// If `None`, the variable won't be set.
81    pub no_replace_objects: Option<bool>,
82    /// Set the `GIT_NAMESPACE` variable with the given value, effectively namespacing all
83    /// operations on references.
84    pub ref_namespace: Option<BString>,
85    /// If `true`, set `GIT_LITERAL_PATHSPECS` to `1`, which makes globs literal and prefixes as well, or `0` otherwise.
86    /// If `None`, the variable won't be set.
87    pub literal_pathspecs: Option<bool>,
88    /// If `true`, set `GIT_GLOB_PATHSPECS` to `1`, which lets wildcards not match the `/` character, and equals the `:(glob)` prefix.
89    /// If `false`, set `GIT_NOGLOB_PATHSPECS` to `1` which lets globs match only themselves.
90    /// If `None`, the variable won't be set.
91    pub glob_pathspecs: Option<bool>,
92    /// If `true`, set `GIT_ICASE_PATHSPECS` to `1`, to let patterns match case-insensitively, or `0` otherwise.
93    /// If `None`, the variable won't be set.
94    pub icase_pathspecs: Option<bool>,
95    /// If `true`, inherit `stderr` just like it's the default when spawning processes.
96    /// If `false`, suppress all stderr output.
97    /// If not `None`, this will override any value set with [`Prepare::stderr()`].
98    pub stderr: Option<bool>,
99}
100
101mod prepare {
102    use std::{
103        borrow::Cow,
104        ffi::OsString,
105        process::{Command, Stdio},
106    };
107
108    use bstr::ByteSlice;
109
110    use crate::{Context, Prepare, extract_interpreter, win_path_lookup};
111
112    /// Builder
113    impl Prepare {
114        /// If called, the command will be checked for characters that are typical for shell
115        /// scripts, and if found will use `sh` to execute it or whatever is set as
116        /// [`with_shell_program()`](Self::with_shell_program()).
117        ///
118        /// If the command isn't valid UTF-8, a shell will always be used.
119        ///
120        /// If a shell is used, then arguments given here with [arg()](Self::arg) or
121        /// [args()](Self::args) will be substituted via `"$@"` if it's not already present in the
122        /// command.
123        ///
124        ///
125        /// The [`command_may_be_shell_script_allow_manual_argument_splitting()`](Self::command_may_be_shell_script_allow_manual_argument_splitting())
126        /// and [`command_may_be_shell_script_disallow_manual_argument_splitting()`](Self::command_may_be_shell_script_disallow_manual_argument_splitting())
127        /// methods also call this method.
128        ///
129        /// If neither this method nor [`with_shell()`](Self::with_shell()) is called, commands are
130        /// always executed verbatim and directly, without the use of a shell.
131        pub fn command_may_be_shell_script(mut self) -> Self {
132            self.use_shell = self
133                .command
134                .to_str()
135                .is_none_or(|cmd| cmd.as_bytes().find_byteset(b"|&;<>()$`\\\"' \t\n*?[#~=%").is_some());
136            self
137        }
138
139        /// If called, unconditionally use a shell to execute the command and its arguments.
140        ///
141        /// This uses `sh` to execute it, or whatever is set as
142        /// [`with_shell_program()`](Self::with_shell_program()).
143        ///
144        /// Arguments given here with [arg()](Self::arg) or [args()](Self::args) will be
145        /// substituted via `"$@"` if it's not already present in the command.
146        ///
147        /// If neither this method nor
148        /// [`command_may_be_shell_script()`](Self::command_may_be_shell_script()) is called,
149        /// commands are always executed verbatim and directly, without the use of a shell. (But
150        /// see [`command_may_be_shell_script()`](Self::command_may_be_shell_script()) on other
151        /// methods that call that method.)
152        ///
153        /// We also disallow manual argument splitting
154        /// (see [`command_may_be_shell_script_disallow_manual_argument_splitting`](Self::command_may_be_shell_script_disallow_manual_argument_splitting()))
155        /// to assure a shell is indeed used, no matter what.
156        pub fn with_shell(mut self) -> Self {
157            self.use_shell = true;
158            self.allow_manual_arg_splitting = false;
159            self
160        }
161
162        /// Quote the command if it is run in a shell, so its path is left intact.
163        ///
164        /// This is only meaningful if the command has been arranged to run in a shell, either
165        /// unconditionally with [`with_shell()`](Self::with_shell()), or conditionally with
166        /// [`command_may_be_shell_script()`](Self::command_may_be_shell_script()).
167        ///
168        /// Note that this should not be used if the command is a script - quoting is only the
169        /// right choice if it's known to be a program path.
170        ///
171        /// Note also that this does not affect arguments passed with [arg()](Self::arg) or
172        /// [args()](Self::args), which do not have to be quoted by the *caller* because they are
173        /// passed as `"$@"` positional parameters (`"$1"`, `"$2"`, and so on).
174        pub fn with_quoted_command(mut self) -> Self {
175            self.quote_command = true;
176            self
177        }
178
179        /// Set the name or path to the shell `program` to use if a shell is to be used, to avoid
180        /// using the default shell which is `sh`.
181        ///
182        /// Note that shells that are not Bourne-style cannot be expected to work correctly,
183        /// because POSIX shell syntax is assumed when searching for and conditionally adding
184        /// `"$@"` to receive arguments, where applicable (and in the behaviour of
185        /// [`with_quoted_command()`](Self::with_quoted_command()), if called).
186        pub fn with_shell_program(mut self, program: impl Into<OsString>) -> Self {
187            self.shell_program = Some(program.into());
188            self
189        }
190
191        /// Unconditionally turn off using the shell when spawning the command.
192        ///
193        /// Note that not using the shell is the default. So an effective use of this method
194        /// is some time after [`command_may_be_shell_script()`](Self::command_may_be_shell_script())
195        /// or [`with_shell()`](Self::with_shell()) was called.
196        pub fn without_shell(mut self) -> Self {
197            self.use_shell = false;
198            self
199        }
200
201        /// Set additional `ctx` to be used when spawning the process.
202        ///
203        /// Note that this is a must for most kind of commands that `git` usually spawns, as at
204        /// least they need to know the correct Git repository to function.
205        pub fn with_context(mut self, ctx: Context) -> Self {
206            self.context = Some(ctx);
207            self
208        }
209
210        /// Like [`command_may_be_shell_script()`](Self::command_may_be_shell_script()), but try to
211        /// split arguments by hand if this can be safely done without a shell.
212        ///
213        /// This is useful on platforms where spawning processes is slow, or where many processes
214        /// have to be spawned in a row which should be sped up. Manual argument splitting is
215        /// enabled by default on Windows only.
216        ///
217        /// Note that this does *not* check for the use of possible shell builtins. Commands may
218        /// fail or behave differently if they are available as shell builtins and no corresponding
219        /// external command exists, or the external command behaves differently.
220        pub fn command_may_be_shell_script_allow_manual_argument_splitting(mut self) -> Self {
221            self.allow_manual_arg_splitting = true;
222            self.command_may_be_shell_script()
223        }
224
225        /// Like [`command_may_be_shell_script()`](Self::command_may_be_shell_script()), but don't
226        /// allow to bypass the shell even if manual argument splitting can be performed safely.
227        pub fn command_may_be_shell_script_disallow_manual_argument_splitting(mut self) -> Self {
228            self.allow_manual_arg_splitting = false;
229            self.command_may_be_shell_script()
230        }
231
232        /// Configure the process to use `stdio` for _stdin_.
233        pub fn stdin(mut self, stdio: Stdio) -> Self {
234            self.stdin = stdio;
235            self
236        }
237        /// Configure the process to use `stdio` for _stdout_.
238        pub fn stdout(mut self, stdio: Stdio) -> Self {
239            self.stdout = stdio;
240            self
241        }
242        /// Configure the process to use `stdio` for _stderr_.
243        pub fn stderr(mut self, stdio: Stdio) -> Self {
244            self.stderr = stdio;
245            self
246        }
247
248        /// Add `arg` to the list of arguments to call the command with.
249        pub fn arg(mut self, arg: impl Into<OsString>) -> Self {
250            self.args.push(arg.into());
251            self
252        }
253
254        /// Add `args` to the list of arguments to call the command with.
255        pub fn args(mut self, args: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
256            self.args
257                .append(&mut args.into_iter().map(Into::into).collect::<Vec<_>>());
258            self
259        }
260
261        /// Add `key` with `value` to the environment of the spawned command.
262        pub fn env(mut self, key: impl Into<OsString>, value: impl Into<OsString>) -> Self {
263            self.env.push((key.into(), value.into()));
264            self
265        }
266    }
267
268    /// Finalization
269    impl Prepare {
270        /// Spawn the command as configured.
271        pub fn spawn(self) -> std::io::Result<std::process::Child> {
272            let mut cmd = Command::from(self);
273            gix_trace::debug!(cmd = ?cmd);
274            cmd.spawn()
275        }
276    }
277
278    impl From<Prepare> for Command {
279        fn from(mut prep: Prepare) -> Command {
280            let mut cmd = if prep.use_shell {
281                let split_args = prep
282                    .allow_manual_arg_splitting
283                    .then(|| {
284                        if gix_path::into_bstr(std::borrow::Cow::Borrowed(prep.command.as_ref()))
285                            .find_byteset(b"\\|&;<>()$`\n*?[#~%")
286                            .is_none()
287                        {
288                            prep.command.to_str().and_then(|args| {
289                                shell_words::split(args)
290                                    .ok()
291                                    .filter(|args| !args.is_empty())
292                                    .map(Vec::into_iter)
293                            })
294                        } else {
295                            None
296                        }
297                    })
298                    .flatten();
299                match split_args {
300                    Some(mut args) => {
301                        let mut cmd = Command::new(args.next().expect("non-empty input"));
302                        cmd.args(args);
303                        cmd
304                    }
305                    None => {
306                        let shell = prep.shell_program.unwrap_or_else(|| gix_path::env::shell().into());
307                        // Passed as `command_name` after `-c <script>`; the shell uses it
308                        // as `$0`, which prefixes its own diagnostic messages. If the
309                        // shell path has no extractable basename — reachable only via
310                        // degenerate input like `""` or `/` — fall back to `_`, the
311                        // conventional placeholder for an unused `$0`, rather than
312                        // making a false claim about which shell is running.
313                        let arg0 = std::path::Path::new(&shell)
314                            .file_name()
315                            .unwrap_or(std::ffi::OsStr::new("_"))
316                            .to_os_string();
317                        let mut cmd = Command::new(shell);
318                        cmd.arg("-c");
319                        if !prep.args.is_empty() {
320                            if prep.command.to_str().is_none_or(|cmd| !cmd.contains("$@")) {
321                                if prep.quote_command {
322                                    if let Ok(command) = gix_path::os_str_into_bstr(&prep.command) {
323                                        prep.command = gix_path::from_bstring(gix_quote::single(command)).into();
324                                    }
325                                }
326                                prep.command.push(r#" "$@""#);
327                            } else {
328                                gix_trace::debug!(
329                                    r#"Will not add '"$@"' to '{:?}' as it seems to contain '$@' already"#,
330                                    prep.command
331                                );
332                            }
333                        }
334                        cmd.arg(prep.command);
335                        cmd.arg(arg0);
336                        cmd
337                    }
338                }
339            } else if cfg!(windows) {
340                let program: Cow<'_, std::path::Path> = std::env::var_os("PATH")
341                    .and_then(|path| win_path_lookup(prep.command.as_ref(), &path))
342                    .map(Cow::Owned)
343                    .unwrap_or(Cow::Borrowed(prep.command.as_ref()));
344                if let Some(shebang) = extract_interpreter(program.as_ref()) {
345                    let mut cmd = Command::new(shebang.interpreter);
346                    // For relative paths, we may have picked up a file in the current repository
347                    // for which an attacker could control everything. Hence, strip options just like Git.
348                    // If the file was found in the PATH though, it should be trustworthy.
349                    if program.is_absolute() {
350                        cmd.args(shebang.args);
351                    }
352                    cmd.arg(prep.command);
353                    cmd
354                } else {
355                    Command::new(prep.command)
356                }
357            } else {
358                Command::new(prep.command)
359            };
360            // We never want to have terminals pop-up on Windows if this runs from a GUI application.
361            #[cfg(windows)]
362            {
363                use std::os::windows::process::CommandExt;
364                const CREATE_NO_WINDOW: u32 = 0x08000000;
365                cmd.creation_flags(CREATE_NO_WINDOW);
366            }
367            cmd.stdin(prep.stdin)
368                .stdout(prep.stdout)
369                .stderr(prep.stderr)
370                .envs(prep.env)
371                .args(prep.args);
372            if let Some(ctx) = prep.context {
373                if let Some(git_dir) = ctx.git_dir {
374                    cmd.env("GIT_DIR", &git_dir);
375                }
376                if let Some(worktree_dir) = ctx.worktree_dir {
377                    cmd.env("GIT_WORK_TREE", worktree_dir);
378                }
379                if let Some(value) = ctx.no_replace_objects {
380                    cmd.env("GIT_NO_REPLACE_OBJECTS", usize::from(value).to_string());
381                }
382                if let Some(namespace) = ctx.ref_namespace {
383                    cmd.env("GIT_NAMESPACE", gix_path::from_bstring(namespace));
384                }
385                if let Some(value) = ctx.literal_pathspecs {
386                    cmd.env("GIT_LITERAL_PATHSPECS", usize::from(value).to_string());
387                }
388                if let Some(value) = ctx.glob_pathspecs {
389                    cmd.env(
390                        if value {
391                            "GIT_GLOB_PATHSPECS"
392                        } else {
393                            "GIT_NOGLOB_PATHSPECS"
394                        },
395                        "1",
396                    );
397                }
398                if let Some(value) = ctx.icase_pathspecs {
399                    cmd.env("GIT_ICASE_PATHSPECS", usize::from(value).to_string());
400                }
401                if let Some(stderr) = ctx.stderr {
402                    cmd.stderr(if stderr { Stdio::inherit() } else { Stdio::null() });
403                }
404            }
405            cmd
406        }
407    }
408}
409
410fn is_exe(executable: &Path) -> bool {
411    executable.extension() == Some(std::ffi::OsStr::new("exe"))
412}
413
414/// Try to find `command` in the `path_value` (the value of `PATH`) as separated by `;`, or return `None`.
415/// Has special handling for `.exe` extensions, as these will be appended automatically if needed.
416/// Note that just like Git, no lookup is performed if a slash or backslash is in `command`.
417fn win_path_lookup(command: &Path, path_value: &std::ffi::OsStr) -> Option<PathBuf> {
418    fn lookup(root: &bstr::BStr, command: &Path, is_exe: bool) -> Option<PathBuf> {
419        let mut path = gix_path::try_from_bstr(root).ok()?.join(command);
420        if !is_exe {
421            path.set_extension("exe");
422        }
423        if path.is_file() {
424            return Some(path);
425        }
426        if is_exe {
427            return None;
428        }
429        path.set_extension("");
430        path.is_file().then_some(path)
431    }
432    if command.components().take(2).count() == 2 {
433        return None;
434    }
435    let path = gix_path::os_str_into_bstr(path_value).ok()?;
436    let is_exe = is_exe(command);
437
438    for root in path.split(|b| *b == b';') {
439        if let Some(executable) = lookup(root.as_bstr(), command, is_exe) {
440            return Some(executable);
441        }
442    }
443    None
444}
445
446/// Parse the shebang (`#!<path>`) from the first line of `executable`, and return the shebang
447/// data when available.
448pub fn extract_interpreter(executable: &Path) -> Option<shebang::Data> {
449    #[cfg(windows)]
450    if is_exe(executable) {
451        return None;
452    }
453    let mut buf = [0; 100]; // Note: just like Git
454    let mut file = std::fs::File::open(executable).ok()?;
455    let n = file.read(&mut buf).ok()?;
456    shebang::parse(buf[..n].as_bstr())
457}
458
459///
460pub mod shebang {
461    use std::{ffi::OsString, path::PathBuf};
462
463    use bstr::{BStr, ByteSlice};
464
465    /// Parse `buf` to extract all shebang information.
466    pub fn parse(buf: &BStr) -> Option<Data> {
467        let mut line = buf.lines().next()?;
468        line = line.strip_prefix(b"#!")?;
469
470        let slash_idx = line.rfind_byteset(br"/\")?;
471        Some(match line[slash_idx..].find_byte(b' ') {
472            Some(space_idx) => {
473                let space = slash_idx + space_idx;
474                Data {
475                    interpreter: gix_path::from_byte_slice(line[..space].trim()).to_owned(),
476                    args: line
477                        .get(space + 1..)
478                        .and_then(|mut r| {
479                            r = r.trim();
480                            if r.is_empty() {
481                                return None;
482                            }
483
484                            match r.as_bstr().to_str() {
485                                Ok(args) => shell_words::split(args)
486                                    .ok()
487                                    .map(|args| args.into_iter().map(Into::into).collect()),
488                                Err(_) => Some(vec![gix_path::from_byte_slice(r).to_owned().into()]),
489                            }
490                        })
491                        .unwrap_or_default(),
492                }
493            }
494            None => Data {
495                interpreter: gix_path::from_byte_slice(line.trim()).to_owned(),
496                args: Vec::new(),
497            },
498        })
499    }
500
501    /// Shebang information as [parsed](parse()) from a buffer that should contain at least one line.
502    ///
503    /// ### Deviation
504    ///
505    /// According to the [shebang documentation](https://en.wikipedia.org/wiki/Shebang_(Unix)), it will only consider
506    /// the path of the executable, along with the arguments as the consecutive portion after the space that separates
507    /// them. Argument splitting would then have to be done elsewhere, probably in the kernel.
508    ///
509    /// To make that work without the kernel, we perform the splitting while Git just ignores options.
510    /// For now it seems more compatible to not ignore options, but if it is important this could be changed.
511    #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
512    pub struct Data {
513        /// The interpreter to run.
514        pub interpreter: PathBuf,
515        /// The remainder of the line past the space after `interpreter`, without leading or trailing whitespace,
516        /// as pre-split arguments just like a shell would do it.
517        /// Note that we accept that illformed UTF-8 will prevent argument splitting.
518        pub args: Vec<OsString>,
519    }
520}
521
522/// Prepare `cmd` for [spawning][std::process::Command::spawn()] by configuring it with various builder methods.
523///
524/// Note that the default IO is configured for typical API usage, that is
525///
526/// - `stdin` is null to prevent blocking unexpectedly on consumption of stdin
527/// - `stdout` is captured for consumption by the caller
528/// - `stderr` is inherited to allow the command to provide context to the user
529///
530/// On Windows, terminal Windows will be suppressed automatically.
531///
532/// ### Warning
533///
534/// When using this method, be sure that the invoked program doesn't rely on the current working dir and/or
535/// environment variables to know its context. If so, call instead [`Prepare::with_context()`] to provide
536/// additional information.
537pub fn prepare(cmd: impl Into<OsString>) -> Prepare {
538    Prepare {
539        command: cmd.into(),
540        shell_program: None,
541        context: None,
542        stdin: std::process::Stdio::null(),
543        stdout: std::process::Stdio::piped(),
544        stderr: std::process::Stdio::inherit(),
545        args: Vec::new(),
546        env: Vec::new(),
547        use_shell: false,
548        quote_command: false,
549        allow_manual_arg_splitting: cfg!(windows),
550    }
551}
552
553#[cfg(test)]
554mod tests {
555    use super::*;
556
557    #[test]
558    fn internal_win_path_lookup() -> gix_testtools::Result {
559        let root = gix_testtools::scripted_fixture_read_only("win_path_lookup.sh")?;
560        let mut paths: Vec<_> = std::fs::read_dir(&root)?
561            .filter_map(Result::ok)
562            .map(|e| e.path().to_str().expect("no illformed UTF8").to_owned())
563            .collect();
564        paths.sort();
565        let lookup_path: OsString = paths.join(";").into();
566
567        assert_eq!(
568            win_path_lookup("a/b".as_ref(), &lookup_path),
569            None,
570            "any path with separator is considered ready to use"
571        );
572        assert_eq!(
573            win_path_lookup("x".as_ref(), &lookup_path),
574            Some(root.join("a").join("x.exe")),
575            "exe will be preferred, and it searches left to right thus doesn't find c/x.exe"
576        );
577        assert_eq!(
578            win_path_lookup("x.exe".as_ref(), &lookup_path),
579            Some(root.join("a").join("x.exe")),
580            "no matter what, a/x won't be found as it's shadowed by an exe file"
581        );
582        assert_eq!(
583            win_path_lookup("exe".as_ref(), &lookup_path),
584            Some(root.join("b").join("exe")),
585            "it finds files further down the path as well"
586        );
587        Ok(())
588    }
589}