x_core/
git.rs

1// Copyright (c) Aptos
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::errors::*;
5use camino::{Utf8Path, Utf8PathBuf};
6use determinator::Utf8Paths0;
7use guppy::{graph::PackageGraph, MetadataCommand};
8use indoc::formatdoc;
9use log::{debug, info};
10use once_cell::sync::OnceCell;
11use std::{
12    borrow::Cow,
13    ffi::{OsStr, OsString},
14    fmt,
15    process::{Command, Stdio},
16};
17
18/// Support for source control operations through running Git commands.
19///
20/// This assumes that the underlying Git repository doesn't change in the middle of an operation,
21/// and caches data as a result. If mutation operations are added, the caches would need to be
22/// invalidated.
23#[derive(Clone, Debug)]
24pub struct GitCli {
25    root: &'static Utf8Path,
26    // Caches.
27    tracked_files: OnceCell<Utf8Paths0>,
28}
29
30impl GitCli {
31    /// Creates a new instance of the Git CLI.
32    pub fn new(root: &'static Utf8Path) -> Result<Self> {
33        let git_cli = Self {
34            root,
35            tracked_files: OnceCell::new(),
36        };
37        git_cli.validate()?;
38        Ok(git_cli)
39    }
40
41    /// Returns the files tracked by Git in this working copy.
42    ///
43    /// The return value can be iterated on to get a list of paths.
44    pub fn tracked_files(&self) -> Result<&Utf8Paths0> {
45        self.tracked_files.get_or_try_init(|| {
46            // TODO: abstract out SCM and command-running functionality.
47            let output = self
48                .git_command()
49                // The -z causes files to not be quoted, and to be separated by \0.
50                .args(&["ls-files", "-z"])
51                .output()
52                .map_err(|err| SystemError::io("running git ls-files", err))?;
53            if !output.status.success() {
54                return Err(SystemError::Exec {
55                    cmd: "git ls-files",
56                    status: output.status,
57                });
58            }
59
60            Utf8Paths0::from_bytes(output.stdout)
61                .map_err(|(path, err)| SystemError::NonUtf8Path { path, err })
62        })
63    }
64
65    /// Returns the merge base of the current commit (`HEAD`) with the specified commit.
66    pub fn merge_base(&self, commit_ref: &str) -> Result<GitHash> {
67        let output = self
68            .git_command()
69            .args(&["merge-base", "HEAD", commit_ref])
70            .output()
71            .map_err(|err| {
72                SystemError::io(format!("running git merge-base HEAD {}", commit_ref), err)
73            })?;
74        if !output.status.success() {
75            return Err(SystemError::Exec {
76                cmd: "git merge-base",
77                status: output.status,
78            });
79        }
80
81        // The output is a hex-encoded hash followed by a newline.
82        let stdout = &output.stdout[..(output.stdout.len() - 1)];
83        GitHash::from_hex(stdout)
84    }
85
86    /// Returns the files changed between the given commits, or the current directory if the new
87    /// commit isn't specified.
88    ///
89    /// For more about the diff filter, see `man git-diff`'s help for `--diff-filter`.
90    pub fn files_changed_between<'a>(
91        &self,
92        old: impl Into<Cow<'a, OsStr>>,
93        new: impl Into<Option<Cow<'a, OsStr>>>,
94        // TODO: make this more well-typed/express more of the diff model in Rust
95        diff_filter: Option<&str>,
96    ) -> Result<Utf8Paths0> {
97        let mut command = self.git_command();
98        command.args(&["diff", "-z", "--name-only"]);
99        if let Some(diff_filter) = diff_filter {
100            command.arg(format!("--diff-filter={}", diff_filter));
101        }
102        command.arg(old.into());
103        if let Some(new) = new.into() {
104            command.arg(new);
105        }
106
107        let output = command
108            .output()
109            .map_err(|err| SystemError::io("running git diff", err))?;
110        if !output.status.success() {
111            return Err(SystemError::Exec {
112                cmd: "git diff",
113                status: output.status,
114            });
115        }
116
117        Utf8Paths0::from_bytes(output.stdout)
118            .map_err(|(path, err)| SystemError::NonUtf8Path { path, err })
119    }
120
121    /// Returns a package graph for the given commit, using a scratch repo if necessary.
122    pub fn package_graph_at(&self, commit_ref: &GitHash) -> Result<PackageGraph> {
123        // Create or initialize the scratch worktree.
124        let scratch = self.get_or_init_scratch(commit_ref)?;
125
126        // Compute the package graph for the scratch worktree.
127        MetadataCommand::new()
128            .current_dir(scratch)
129            .build_graph()
130            .map_err(|err| SystemError::guppy("building package graph", err))
131    }
132
133    // ---
134    // Helper methods
135    // ---
136
137    fn validate(&self) -> Result<()> {
138        // Check that the project root and the Git root match.
139        let output = self
140            .git_command()
141            .args(&["rev-parse", "--show-toplevel"])
142            .stderr(Stdio::inherit())
143            .output()
144            .map_err(|err| SystemError::io("running git rev-parse --show-toplevel", err))?;
145        if !output.status.success() {
146            let msg = formatdoc!(
147                "unable to find a git repo at {}
148                (hint: did you download an archive from GitHub? x requires a git clone)",
149                self.root
150            );
151            return Err(SystemError::git_root(msg));
152        }
153
154        let mut git_root_bytes = output.stdout;
155        // Pop the newline off the git root bytes.
156        git_root_bytes.pop();
157        let git_root = match String::from_utf8(git_root_bytes) {
158            Ok(git_root) => git_root,
159            Err(_) => {
160                return Err(SystemError::git_root(
161                    "git rev-parse --show-toplevel returned a non-Unicode path",
162                ));
163            }
164        };
165        if self.root != git_root {
166            let msg = formatdoc!(
167                "git root expected to be at {}, but actually found at {}
168                (hint: did you download an archive from GitHub? x requires a git clone)",
169                self.root,
170                git_root,
171            );
172            return Err(SystemError::git_root(msg));
173        }
174        Ok(())
175    }
176
177    // TODO: abstract out command running and error handling
178    fn git_command(&self) -> Command {
179        // TODO: add support for the GIT environment variable?
180        let mut command = Command::new("git");
181        command.current_dir(self.root).stderr(Stdio::inherit());
182        command
183    }
184
185    /// Gets the scratch worktree if it exists, or initializes it if it doesn't.
186    ///
187    /// The scratch worktree is meant to be persistent across invocations of `x`. This is done for
188    /// performance reasons.
189    fn get_or_init_scratch(&self, hash: &GitHash) -> Result<Utf8PathBuf> {
190        let mut scratch_dir = self.root.join("target");
191        scratch_dir.extend(&["x-scratch", "tree"]);
192
193        if scratch_dir.is_dir() && self.is_git_repo(&scratch_dir)? {
194            debug!("Using existing scratch worktree at {}", scratch_dir,);
195
196            // Check out the given hash in the scratch worktree.
197            let output = self
198                .git_command()
199                .current_dir(&scratch_dir)
200                // TODO: also git clean?
201                .args(&["reset", &format!("{:x}", hash), "--hard"])
202                .output()
203                .map_err(|err| SystemError::io("running git checkout in scratch tree", err))?;
204            if !output.status.success() {
205                return Err(SystemError::Exec {
206                    cmd: "git checkout",
207                    status: output.status,
208                });
209            }
210        } else {
211            if scratch_dir.is_dir() {
212                std::fs::remove_dir_all(&scratch_dir)
213                    .map_err(|err| SystemError::io("cleaning old scratch_dir", err))?;
214            }
215
216            // Try creating a scratch worktree at that location.
217            info!("Setting up scratch worktree in {}", scratch_dir);
218            let output = self
219                .git_command()
220                .args(&["worktree", "add"])
221                .arg(&scratch_dir)
222                .args(&[&format!("{:x}", hash), "--detach"])
223                .output()
224                .map_err(|err| SystemError::io("running git worktree add", err))?;
225            if !output.status.success() {
226                return Err(SystemError::Exec {
227                    cmd: "git worktree add",
228                    status: output.status,
229                });
230            }
231        }
232
233        // TODO: some sort of cross-process locking may be necessary in the future. Don't worry
234        // about it for now.
235        Ok(scratch_dir)
236    }
237
238    pub fn is_git_repo(&self, dir: &Utf8Path) -> Result<bool> {
239        let output = self
240            .git_command()
241            .current_dir(dir)
242            .args(&["rev-parse", "--git-dir"])
243            .output()
244            .map_err(|err| SystemError::io("checking if a directory is a git repo", err))?;
245
246        Ok(output.status.success())
247    }
248}
249
250/// A Git hash.
251#[derive(Copy, Clone, Debug, Eq, PartialEq)]
252pub struct GitHash([u8; 20]);
253
254impl GitHash {
255    /// Creates a new Git hash from a hex-encoded string.
256    pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self> {
257        let hex = hex.as_ref();
258        Ok(GitHash(hex::FromHex::from_hex(hex).map_err(|err| {
259            SystemError::from_hex(format!("parsing a Git hash: {:?}", hex), err)
260        })?))
261    }
262}
263
264impl<'a, 'b> From<&'a GitHash> for Cow<'b, OsStr> {
265    fn from(git_hash: &'a GitHash) -> Cow<'b, OsStr> {
266        OsString::from(format!("{:x}", git_hash)).into()
267    }
268}
269
270impl fmt::LowerHex for GitHash {
271    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
272        write!(f, "{}", hex::encode(&self.0))
273    }
274}