gix_command/lib.rs
1//! Launch commands very similarly to `Command`, but with `git` specific capabilities and adjustments.
2//!
3//! ## Examples
4//!
5//! ```
6//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
7//! let output = gix_command::prepare("git")
8//! .arg("--version")
9//! .spawn()?
10//! .wait_with_output()?;
11//!
12//! assert!(output.status.success());
13//! assert!(String::from_utf8(output.stdout)?.starts_with("git version "));
14//! # Ok(()) }
15//! ```
16#![deny(rust_2018_idioms, missing_docs)]
17#![forbid(unsafe_code)]
18
19use std::{
20 ffi::OsString,
21 io::Read,
22 path::{Path, PathBuf},
23};
24
25use bstr::{BString, ByteSlice};
26
27/// A structure to keep settings to use when invoking a command via [`spawn()`][Prepare::spawn()],
28/// after creating it with [`prepare()`].
29pub struct Prepare {
30 /// The command to invoke, either directly or with a shell depending on `use_shell`.
31 pub command: OsString,
32 /// Additional information to be passed to the spawned command.
33 pub context: Option<Context>,
34 /// The way standard input is configured.
35 pub stdin: std::process::Stdio,
36 /// The way standard output is configured.
37 pub stdout: std::process::Stdio,
38 /// The way standard error is configured.
39 pub stderr: std::process::Stdio,
40 /// The arguments to pass to the process being spawned.
41 pub args: Vec<OsString>,
42 /// Environment variables to set for the spawned process.
43 pub env: Vec<(OsString, OsString)>,
44 /// If `true`, we will use `shell_program` or `sh` to execute the `command`.
45 pub use_shell: bool,
46 /// If `true`, `command` is assumed to be a command or path to the program to execute, and it
47 /// will be shell-quoted to assure it will be executed as is and without splitting across
48 /// whitespace.
49 pub quote_command: bool,
50 /// The name or path to the shell program to use instead of `sh`.
51 pub shell_program: Option<OsString>,
52 /// If `true` (default `true` on Windows and `false` everywhere else) we will see if it's safe
53 /// to manually invoke `command` after splitting its arguments as a shell would do.
54 ///
55 /// Note that outside of Windows, it's generally not advisable as this removes support for
56 /// literal shell scripts with shell-builtins.
57 ///
58 /// This mimics the behaviour we see with `git` on Windows, which also won't invoke the shell
59 /// there at all.
60 ///
61 /// Only effective if `use_shell` is `true` as well, as the shell will be used as a fallback if
62 /// it's not possible to split arguments as the command-line contains 'scripting'.
63 pub allow_manual_arg_splitting: bool,
64}
65
66/// Additional information that is relevant to spawned processes, which typically receive
67/// a wealth of contextual information when spawned from `git`.
68///
69/// See [the git source code](https://github.com/git/git/blob/cfb8a6e9a93adbe81efca66e6110c9b4d2e57169/git.c#L191)
70/// for details.
71#[derive(Debug, Default, Clone)]
72pub struct Context {
73 /// The `.git` directory that contains the repository.
74 ///
75 /// If set, it will be used to set the `GIT_DIR` environment variable.
76 pub git_dir: Option<PathBuf>,
77 /// Set the `GIT_WORK_TREE` environment variable with the given path.
78 pub worktree_dir: Option<PathBuf>,
79 /// If `true`, set `GIT_NO_REPLACE_OBJECTS` to `1`, which turns off object replacements, or `0` otherwise.
80 /// If `None`, the variable won't be set.
81 pub no_replace_objects: Option<bool>,
82 /// Set the `GIT_NAMESPACE` variable with the given value, effectively namespacing all
83 /// operations on references.
84 pub ref_namespace: Option<BString>,
85 /// If `true`, set `GIT_LITERAL_PATHSPECS` to `1`, which makes globs literal and prefixes as well, or `0` otherwise.
86 /// If `None`, the variable won't be set.
87 pub literal_pathspecs: Option<bool>,
88 /// If `true`, set `GIT_GLOB_PATHSPECS` to `1`, which lets wildcards not match the `/` character, and equals the `:(glob)` prefix.
89 /// If `false`, set `GIT_NOGLOB_PATHSPECS` to `1` which lets globs match only themselves.
90 /// If `None`, the variable won't be set.
91 pub glob_pathspecs: Option<bool>,
92 /// If `true`, set `GIT_ICASE_PATHSPECS` to `1`, to let patterns match case-insensitively, or `0` otherwise.
93 /// If `None`, the variable won't be set.
94 pub icase_pathspecs: Option<bool>,
95 /// If `true`, inherit `stderr` just like it's the default when spawning processes.
96 /// If `false`, suppress all stderr output.
97 /// If not `None`, this will override any value set with [`Prepare::stderr()`].
98 pub stderr: Option<bool>,
99}
100
101mod prepare {
102 use std::{
103 borrow::Cow,
104 ffi::OsString,
105 process::{Command, Stdio},
106 };
107
108 use bstr::ByteSlice;
109
110 use crate::{extract_interpreter, win_path_lookup, Context, Prepare};
111
112 /// Builder
113 impl Prepare {
114 /// If called, the command will be checked for characters that are typical for shell
115 /// scripts, and if found will use `sh` to execute it or whatever is set as
116 /// [`with_shell_program()`](Self::with_shell_program()).
117 ///
118 /// If the command isn't valid UTF-8, a shell will always be used.
119 ///
120 /// If a shell is used, then arguments given here with [arg()](Self::arg) or
121 /// [args()](Self::args) will be substituted via `"$@"` if it's not already present in the
122 /// command.
123 ///
124 ///
125 /// The [`command_may_be_shell_script_allow_manual_argument_splitting()`](Self::command_may_be_shell_script_allow_manual_argument_splitting())
126 /// and [`command_may_be_shell_script_disallow_manual_argument_splitting()`](Self::command_may_be_shell_script_disallow_manual_argument_splitting())
127 /// methods also call this method.
128 ///
129 /// If neither this method nor [`with_shell()`](Self::with_shell()) is called, commands are
130 /// always executed verbatim and directly, without the use of a shell.
131 pub fn command_may_be_shell_script(mut self) -> Self {
132 self.use_shell = self
133 .command
134 .to_str()
135 .is_none_or(|cmd| cmd.as_bytes().find_byteset(b"|&;<>()$`\\\"' \t\n*?[#~=%").is_some());
136 self
137 }
138
139 /// If called, unconditionally use a shell to execute the command and its arguments.
140 ///
141 /// This uses `sh` to execute it, or whatever is set as
142 /// [`with_shell_program()`](Self::with_shell_program()).
143 ///
144 /// Arguments given here with [arg()](Self::arg) or [args()](Self::args) will be
145 /// substituted via `"$@"` if it's not already present in the command.
146 ///
147 /// If neither this method nor
148 /// [`command_may_be_shell_script()`](Self::command_may_be_shell_script()) is called,
149 /// commands are always executed verbatim and directly, without the use of a shell. (But
150 /// see [`command_may_be_shell_script()`](Self::command_may_be_shell_script()) on other
151 /// methods that call that method.)
152 ///
153 /// We also disallow manual argument splitting
154 /// (see [`command_may_be_shell_script_disallow_manual_argument_splitting`](Self::command_may_be_shell_script_disallow_manual_argument_splitting()))
155 /// to assure a shell is indeed used, no matter what.
156 pub fn with_shell(mut self) -> Self {
157 self.use_shell = true;
158 self.allow_manual_arg_splitting = false;
159 self
160 }
161
162 /// Quote the command if it is run in a shell, so its path is left intact.
163 ///
164 /// This is only meaningful if the command has been arranged to run in a shell, either
165 /// unconditionally with [`with_shell()`](Self::with_shell()), or conditionally with
166 /// [`command_may_be_shell_script()`](Self::command_may_be_shell_script()).
167 ///
168 /// Note that this should not be used if the command is a script - quoting is only the
169 /// right choice if it's known to be a program path.
170 ///
171 /// Note also that this does not affect arguments passed with [arg()](Self::arg) or
172 /// [args()](Self::args), which do not have to be quoted by the *caller* because they are
173 /// passed as `"$@"` positional parameters (`"$1"`, `"$2"`, and so on).
174 pub fn with_quoted_command(mut self) -> Self {
175 self.quote_command = true;
176 self
177 }
178
179 /// Set the name or path to the shell `program` to use if a shell is to be used, to avoid
180 /// using the default shell which is `sh`.
181 ///
182 /// Note that shells that are not Bourne-style cannot be expected to work correctly,
183 /// because POSIX shell syntax is assumed when searching for and conditionally adding
184 /// `"$@"` to receive arguments, where applicable (and in the behaviour of
185 /// [`with_quoted_command()`](Self::with_quoted_command()), if called).
186 pub fn with_shell_program(mut self, program: impl Into<OsString>) -> Self {
187 self.shell_program = Some(program.into());
188 self
189 }
190
191 /// Unconditionally turn off using the shell when spawning the command.
192 ///
193 /// Note that not using the shell is the default. So an effective use of this method
194 /// is some time after [`command_may_be_shell_script()`](Self::command_may_be_shell_script())
195 /// or [`with_shell()`](Self::with_shell()) was called.
196 pub fn without_shell(mut self) -> Self {
197 self.use_shell = false;
198 self
199 }
200
201 /// Set additional `ctx` to be used when spawning the process.
202 ///
203 /// Note that this is a must for most kind of commands that `git` usually spawns, as at
204 /// least they need to know the correct Git repository to function.
205 pub fn with_context(mut self, ctx: Context) -> Self {
206 self.context = Some(ctx);
207 self
208 }
209
210 /// Like [`command_may_be_shell_script()`](Self::command_may_be_shell_script()), but try to
211 /// split arguments by hand if this can be safely done without a shell.
212 ///
213 /// This is useful on platforms where spawning processes is slow, or where many processes
214 /// have to be spawned in a row which should be sped up. Manual argument splitting is
215 /// enabled by default on Windows only.
216 ///
217 /// Note that this does *not* check for the use of possible shell builtins. Commands may
218 /// fail or behave differently if they are available as shell builtins and no corresponding
219 /// external command exists, or the external command behaves differently.
220 pub fn command_may_be_shell_script_allow_manual_argument_splitting(mut self) -> Self {
221 self.allow_manual_arg_splitting = true;
222 self.command_may_be_shell_script()
223 }
224
225 /// Like [`command_may_be_shell_script()`](Self::command_may_be_shell_script()), but don't
226 /// allow to bypass the shell even if manual argument splitting can be performed safely.
227 pub fn command_may_be_shell_script_disallow_manual_argument_splitting(mut self) -> Self {
228 self.allow_manual_arg_splitting = false;
229 self.command_may_be_shell_script()
230 }
231
232 /// Configure the process to use `stdio` for _stdin_.
233 pub fn stdin(mut self, stdio: Stdio) -> Self {
234 self.stdin = stdio;
235 self
236 }
237 /// Configure the process to use `stdio` for _stdout_.
238 pub fn stdout(mut self, stdio: Stdio) -> Self {
239 self.stdout = stdio;
240 self
241 }
242 /// Configure the process to use `stdio` for _stderr_.
243 pub fn stderr(mut self, stdio: Stdio) -> Self {
244 self.stderr = stdio;
245 self
246 }
247
248 /// Add `arg` to the list of arguments to call the command with.
249 pub fn arg(mut self, arg: impl Into<OsString>) -> Self {
250 self.args.push(arg.into());
251 self
252 }
253
254 /// Add `args` to the list of arguments to call the command with.
255 pub fn args(mut self, args: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
256 self.args
257 .append(&mut args.into_iter().map(Into::into).collect::<Vec<_>>());
258 self
259 }
260
261 /// Add `key` with `value` to the environment of the spawned command.
262 pub fn env(mut self, key: impl Into<OsString>, value: impl Into<OsString>) -> Self {
263 self.env.push((key.into(), value.into()));
264 self
265 }
266 }
267
268 /// Finalization
269 impl Prepare {
270 /// Spawn the command as configured.
271 pub fn spawn(self) -> std::io::Result<std::process::Child> {
272 let mut cmd = Command::from(self);
273 gix_trace::debug!(cmd = ?cmd);
274 cmd.spawn()
275 }
276 }
277
278 impl From<Prepare> for Command {
279 fn from(mut prep: Prepare) -> Command {
280 let mut cmd = if prep.use_shell {
281 let split_args = prep
282 .allow_manual_arg_splitting
283 .then(|| {
284 if gix_path::into_bstr(std::borrow::Cow::Borrowed(prep.command.as_ref()))
285 .find_byteset(b"\\|&;<>()$`\n*?[#~%")
286 .is_none()
287 {
288 prep.command.to_str().and_then(|args| {
289 shell_words::split(args)
290 .ok()
291 .filter(|args| !args.is_empty())
292 .map(Vec::into_iter)
293 })
294 } else {
295 None
296 }
297 })
298 .flatten();
299 match split_args {
300 Some(mut args) => {
301 let mut cmd = Command::new(args.next().expect("non-empty input"));
302 cmd.args(args);
303 cmd
304 }
305 None => {
306 let shell = prep.shell_program.unwrap_or_else(|| gix_path::env::shell().into());
307 let mut cmd = Command::new(shell);
308 cmd.arg("-c");
309 if !prep.args.is_empty() {
310 if prep.command.to_str().is_none_or(|cmd| !cmd.contains("$@")) {
311 if prep.quote_command {
312 if let Ok(command) = gix_path::os_str_into_bstr(&prep.command) {
313 prep.command = gix_path::from_bstring(gix_quote::single(command)).into();
314 }
315 }
316 prep.command.push(r#" "$@""#);
317 } else {
318 gix_trace::debug!(
319 r#"Will not add '"$@"' to '{:?}' as it seems to contain '$@' already"#,
320 prep.command
321 );
322 }
323 }
324 cmd.arg(prep.command);
325 cmd.arg("--");
326 cmd
327 }
328 }
329 } else if cfg!(windows) {
330 let program: Cow<'_, std::path::Path> = std::env::var_os("PATH")
331 .and_then(|path| win_path_lookup(prep.command.as_ref(), &path))
332 .map(Cow::Owned)
333 .unwrap_or(Cow::Borrowed(prep.command.as_ref()));
334 if let Some(shebang) = extract_interpreter(program.as_ref()) {
335 let mut cmd = Command::new(shebang.interpreter);
336 // For relative paths, we may have picked up a file in the current repository
337 // for which an attacker could control everything. Hence, strip options just like Git.
338 // If the file was found in the PATH though, it should be trustworthy.
339 if program.is_absolute() {
340 cmd.args(shebang.args);
341 }
342 cmd.arg(prep.command);
343 cmd
344 } else {
345 Command::new(prep.command)
346 }
347 } else {
348 Command::new(prep.command)
349 };
350 // We never want to have terminals pop-up on Windows if this runs from a GUI application.
351 #[cfg(windows)]
352 {
353 use std::os::windows::process::CommandExt;
354 const CREATE_NO_WINDOW: u32 = 0x08000000;
355 cmd.creation_flags(CREATE_NO_WINDOW);
356 }
357 cmd.stdin(prep.stdin)
358 .stdout(prep.stdout)
359 .stderr(prep.stderr)
360 .envs(prep.env)
361 .args(prep.args);
362 if let Some(ctx) = prep.context {
363 if let Some(git_dir) = ctx.git_dir {
364 cmd.env("GIT_DIR", &git_dir);
365 }
366 if let Some(worktree_dir) = ctx.worktree_dir {
367 cmd.env("GIT_WORK_TREE", worktree_dir);
368 }
369 if let Some(value) = ctx.no_replace_objects {
370 cmd.env("GIT_NO_REPLACE_OBJECTS", usize::from(value).to_string());
371 }
372 if let Some(namespace) = ctx.ref_namespace {
373 cmd.env("GIT_NAMESPACE", gix_path::from_bstring(namespace));
374 }
375 if let Some(value) = ctx.literal_pathspecs {
376 cmd.env("GIT_LITERAL_PATHSPECS", usize::from(value).to_string());
377 }
378 if let Some(value) = ctx.glob_pathspecs {
379 cmd.env(
380 if value {
381 "GIT_GLOB_PATHSPECS"
382 } else {
383 "GIT_NOGLOB_PATHSPECS"
384 },
385 "1",
386 );
387 }
388 if let Some(value) = ctx.icase_pathspecs {
389 cmd.env("GIT_ICASE_PATHSPECS", usize::from(value).to_string());
390 }
391 if let Some(stderr) = ctx.stderr {
392 cmd.stderr(if stderr { Stdio::inherit() } else { Stdio::null() });
393 }
394 }
395 cmd
396 }
397 }
398}
399
400fn is_exe(executable: &Path) -> bool {
401 executable.extension() == Some(std::ffi::OsStr::new("exe"))
402}
403
404/// Try to find `command` in the `path_value` (the value of `PATH`) as separated by `;`, or return `None`.
405/// Has special handling for `.exe` extensions, as these will be appended automatically if needed.
406/// Note that just like Git, no lookup is performed if a slash or backslash is in `command`.
407fn win_path_lookup(command: &Path, path_value: &std::ffi::OsStr) -> Option<PathBuf> {
408 fn lookup(root: &bstr::BStr, command: &Path, is_exe: bool) -> Option<PathBuf> {
409 let mut path = gix_path::try_from_bstr(root).ok()?.join(command);
410 if !is_exe {
411 path.set_extension("exe");
412 }
413 if path.is_file() {
414 return Some(path);
415 }
416 if is_exe {
417 return None;
418 }
419 path.set_extension("");
420 path.is_file().then_some(path)
421 }
422 if command.components().take(2).count() == 2 {
423 return None;
424 }
425 let path = gix_path::os_str_into_bstr(path_value).ok()?;
426 let is_exe = is_exe(command);
427
428 for root in path.split(|b| *b == b';') {
429 if let Some(executable) = lookup(root.as_bstr(), command, is_exe) {
430 return Some(executable);
431 }
432 }
433 None
434}
435
436/// Parse the shebang (`#!<path>`) from the first line of `executable`, and return the shebang
437/// data when available.
438pub fn extract_interpreter(executable: &Path) -> Option<shebang::Data> {
439 #[cfg(windows)]
440 if is_exe(executable) {
441 return None;
442 }
443 let mut buf = [0; 100]; // Note: just like Git
444 let mut file = std::fs::File::open(executable).ok()?;
445 let n = file.read(&mut buf).ok()?;
446 shebang::parse(buf[..n].as_bstr())
447}
448
449///
450pub mod shebang {
451 use std::{ffi::OsString, path::PathBuf};
452
453 use bstr::{BStr, ByteSlice};
454
455 /// Parse `buf` to extract all shebang information.
456 pub fn parse(buf: &BStr) -> Option<Data> {
457 let mut line = buf.lines().next()?;
458 line = line.strip_prefix(b"#!")?;
459
460 let slash_idx = line.rfind_byteset(br"/\")?;
461 Some(match line[slash_idx..].find_byte(b' ') {
462 Some(space_idx) => {
463 let space = slash_idx + space_idx;
464 Data {
465 interpreter: gix_path::from_byte_slice(line[..space].trim()).to_owned(),
466 args: line
467 .get(space + 1..)
468 .and_then(|mut r| {
469 r = r.trim();
470 if r.is_empty() {
471 return None;
472 }
473
474 match r.as_bstr().to_str() {
475 Ok(args) => shell_words::split(args)
476 .ok()
477 .map(|args| args.into_iter().map(Into::into).collect()),
478 Err(_) => Some(vec![gix_path::from_byte_slice(r).to_owned().into()]),
479 }
480 })
481 .unwrap_or_default(),
482 }
483 }
484 None => Data {
485 interpreter: gix_path::from_byte_slice(line.trim()).to_owned(),
486 args: Vec::new(),
487 },
488 })
489 }
490
491 /// Shebang information as [parsed](parse()) from a buffer that should contain at least one line.
492 ///
493 /// ### Deviation
494 ///
495 /// According to the [shebang documentation](https://en.wikipedia.org/wiki/Shebang_(Unix)), it will only consider
496 /// the path of the executable, along with the arguments as the consecutive portion after the space that separates
497 /// them. Argument splitting would then have to be done elsewhere, probably in the kernel.
498 ///
499 /// To make that work without the kernel, we perform the splitting while Git just ignores options.
500 /// For now it seems more compatible to not ignore options, but if it is important this could be changed.
501 #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
502 pub struct Data {
503 /// The interpreter to run.
504 pub interpreter: PathBuf,
505 /// The remainder of the line past the space after `interpreter`, without leading or trailing whitespace,
506 /// as pre-split arguments just like a shell would do it.
507 /// Note that we accept that illformed UTF-8 will prevent argument splitting.
508 pub args: Vec<OsString>,
509 }
510}
511
512/// Prepare `cmd` for [spawning][std::process::Command::spawn()] by configuring it with various builder methods.
513///
514/// Note that the default IO is configured for typical API usage, that is
515///
516/// - `stdin` is null to prevent blocking unexpectedly on consumption of stdin
517/// - `stdout` is captured for consumption by the caller
518/// - `stderr` is inherited to allow the command to provide context to the user
519///
520/// On Windows, terminal Windows will be suppressed automatically.
521///
522/// ### Warning
523///
524/// When using this method, be sure that the invoked program doesn't rely on the current working dir and/or
525/// environment variables to know its context. If so, call instead [`Prepare::with_context()`] to provide
526/// additional information.
527pub fn prepare(cmd: impl Into<OsString>) -> Prepare {
528 Prepare {
529 command: cmd.into(),
530 shell_program: None,
531 context: None,
532 stdin: std::process::Stdio::null(),
533 stdout: std::process::Stdio::piped(),
534 stderr: std::process::Stdio::inherit(),
535 args: Vec::new(),
536 env: Vec::new(),
537 use_shell: false,
538 quote_command: false,
539 allow_manual_arg_splitting: cfg!(windows),
540 }
541}
542
543#[cfg(test)]
544mod tests {
545 use super::*;
546
547 #[test]
548 fn internal_win_path_lookup() -> gix_testtools::Result {
549 let root = gix_testtools::scripted_fixture_read_only("win_path_lookup.sh")?;
550 let mut paths: Vec<_> = std::fs::read_dir(&root)?
551 .filter_map(Result::ok)
552 .map(|e| e.path().to_str().expect("no illformed UTF8").to_owned())
553 .collect();
554 paths.sort();
555 let lookup_path: OsString = paths.join(";").into();
556
557 assert_eq!(
558 win_path_lookup("a/b".as_ref(), &lookup_path),
559 None,
560 "any path with separator is considered ready to use"
561 );
562 assert_eq!(
563 win_path_lookup("x".as_ref(), &lookup_path),
564 Some(root.join("a").join("x.exe")),
565 "exe will be preferred, and it searches left to right thus doesn't find c/x.exe"
566 );
567 assert_eq!(
568 win_path_lookup("x.exe".as_ref(), &lookup_path),
569 Some(root.join("a").join("x.exe")),
570 "no matter what, a/x won't be found as it's shadowed by an exe file"
571 );
572 assert_eq!(
573 win_path_lookup("exe".as_ref(), &lookup_path),
574 Some(root.join("b").join("exe")),
575 "it finds files further down the path as well"
576 );
577 Ok(())
578 }
579}