branchless/git/
snapshot.rs

1//! Implementation of working copy snapshots. The ideas are based off of those
2//! in Jujutsu: <https://github.com/martinvonz/jj/blob/main/docs/working-copy.md>
3//!
4//! Normally, Git only tracks committed changes via commits, and a subset of
5//! information about uncommitted changes via the index. This module implements
6//! "working copy snapshots", which are enough to reproduce the entire tracked
7//! contents of the working copy, including staged changes and files with merge
8//! conflicts.
9//!
10//! Untracked changes are not handled by this module. The changes might contain
11//! sensitive data which we don't want to accidentally store in Git, or might be
12//! very large and cause performance issues if committed.
13//!
14//! There are two main reasons to implement working copy snapshots:
15//!
16//!  1. To support enhanced undo features. For example, you should be able to
17//!     jump back into merge conflict resolution which was happening at some
18//!     past time.
19//!  2. To unify the implementations of operations across commits and the
20//!     working copy. For example, a `git split` command which splits one commit
21//!     into multiple could also be used to split the working copy into multiple
22//!     commits.
23
24use itertools::Itertools;
25use std::collections::HashMap;
26use std::str::FromStr;
27
28use tracing::instrument;
29
30use crate::core::formatting::Pluralize;
31use crate::git::FileStatus;
32
33use super::index::{Index, IndexEntry, Stage};
34use super::repo::Signature;
35use super::status::FileMode;
36use super::tree::{hydrate_tree, make_empty_tree};
37use super::{
38    Commit, MaybeZeroOid, NonZeroOid, ReferenceName, Repo, ResolvedReferenceInfo, StatusEntry,
39};
40
41const BRANCHLESS_HEAD_TRAILER: &str = "Branchless-head";
42const BRANCHLESS_HEAD_REF_TRAILER: &str = "Branchless-head-ref";
43const BRANCHLESS_UNSTAGED_TRAILER: &str = "Branchless-unstaged";
44
45/// A special `Commit` which represents the status of the working copy at a
46/// given point in time. This means that it can include changes in any stage.
47#[derive(Clone, Debug)]
48pub struct WorkingCopySnapshot<'repo> {
49    /// The commit which contains the metadata about the `HEAD` commit and all
50    /// the "stage commits" included in this snapshot.
51    ///
52    /// The stage commits each correspond to one of the possible stages in the
53    /// index. If a file is not present in that stage, it's assumed that it's
54    /// unchanged from the `HEAD` commit at the time which the snapshot was
55    /// taken.
56    ///
57    /// The metadata is stored in the commit message.
58    pub base_commit: Commit<'repo>,
59
60    /// The commit that was checked out at the time of this snapshot. It's
61    /// possible that *no* commit was checked out (called an "unborn HEAD").
62    /// This could happen when the repository has been freshly initialized, but
63    /// no commits have yet been made.
64    pub head_commit: Option<Commit<'repo>>,
65
66    /// The branch that was checked out at the time of this snapshot, if any.
67    /// This includes the `refs/heads/` prefix.
68    pub head_reference_name: Option<ReferenceName>,
69
70    /// The unstaged changes in the working copy.
71    pub commit_unstaged: Commit<'repo>,
72
73    /// The index contents at stage 0 (normal staged changes).
74    pub commit_stage0: Commit<'repo>,
75
76    /// The index contents at stage 1. For a merge conflict, this corresponds to
77    /// the contents of the file at the common ancestor of the merged commits.
78    pub commit_stage1: Commit<'repo>,
79
80    /// The index contents at stage 2 ("ours").
81    pub commit_stage2: Commit<'repo>,
82
83    /// The index contents at stage 3 ("theirs", i.e. the commit being merged
84    /// in).
85    pub commit_stage3: Commit<'repo>,
86}
87
88/// The type of changes in the working copy, if any.
89#[derive(Clone, Debug, PartialEq, Eq)]
90pub enum WorkingCopyChangesType {
91    /// There are no changes to tracked files in the working copy.
92    None,
93
94    /// There are unstaged changes to tracked files in the working copy.
95    Unstaged,
96
97    /// There are staged changes to tracked files in the working copy. (There may also be unstaged
98    /// changes.)
99    Staged,
100
101    /// The working copy has unresolved merge conflicts.
102    Conflicts,
103}
104
105impl<'repo> WorkingCopySnapshot<'repo> {
106    #[instrument]
107    pub(super) fn create(
108        repo: &'repo Repo,
109        index: &Index,
110        head_info: &ResolvedReferenceInfo,
111        status_entries: &[StatusEntry],
112    ) -> eyre::Result<Self> {
113        let head_commit = match head_info.oid {
114            Some(oid) => Some(repo.find_commit_or_fail(oid)?),
115            None => None,
116        };
117        let head_commit_oid: MaybeZeroOid = match &head_commit {
118            Some(head_commit) => MaybeZeroOid::NonZero(head_commit.get_oid()),
119            None => MaybeZeroOid::Zero,
120        };
121        let head_reference_name: Option<ReferenceName> = head_info.reference_name.clone();
122
123        let commit_unstaged_oid: NonZeroOid = {
124            Self::create_commit_for_unstaged_changes(repo, head_commit.as_ref(), status_entries)?
125        };
126
127        let commit_stage0 = Self::create_commit_for_stage(
128            repo,
129            index,
130            head_commit.as_ref(),
131            status_entries,
132            Stage::Stage0,
133        )?;
134        let commit_stage1 = Self::create_commit_for_stage(
135            repo,
136            index,
137            head_commit.as_ref(),
138            status_entries,
139            Stage::Stage1,
140        )?;
141        let commit_stage2 = Self::create_commit_for_stage(
142            repo,
143            index,
144            head_commit.as_ref(),
145            status_entries,
146            Stage::Stage2,
147        )?;
148        let commit_stage3 = Self::create_commit_for_stage(
149            repo,
150            index,
151            head_commit.as_ref(),
152            status_entries,
153            Stage::Stage3,
154        )?;
155
156        let trailers = {
157            let mut result = vec![(BRANCHLESS_HEAD_TRAILER, head_commit_oid.to_string())];
158            if let Some(head_reference_name) = &head_reference_name {
159                result.push((
160                    BRANCHLESS_HEAD_REF_TRAILER,
161                    head_reference_name.as_str().to_owned(),
162                ));
163            }
164            result.extend([
165                (BRANCHLESS_UNSTAGED_TRAILER, commit_unstaged_oid.to_string()),
166                (Stage::Stage0.get_trailer(), commit_stage0.to_string()),
167                (Stage::Stage1.get_trailer(), commit_stage1.to_string()),
168                (Stage::Stage2.get_trailer(), commit_stage2.to_string()),
169                (Stage::Stage3.get_trailer(), commit_stage3.to_string()),
170            ]);
171            result
172        };
173        let signature = Signature::automated()?;
174        let message = format!(
175            "\
176branchless: automated working copy snapshot
177
178{}
179",
180            trailers
181                .into_iter()
182                .map(|(name, value)| format!("{name}: {value}"))
183                .collect_vec()
184                .join("\n"),
185        );
186
187        // Use the current HEAD as the tree for parent commit, so that we can
188        // look at any of the stage commits and compare them to their immediate
189        // parent to find their logical contents.
190        let tree = match &head_commit {
191            Some(head_commit) => head_commit.get_tree()?,
192            None => make_empty_tree(repo)?,
193        };
194
195        let commit_stage0 = repo.find_commit_or_fail(commit_stage0)?;
196        let commit_stage1 = repo.find_commit_or_fail(commit_stage1)?;
197        let commit_stage2 = repo.find_commit_or_fail(commit_stage2)?;
198        let commit_stage3 = repo.find_commit_or_fail(commit_stage3)?;
199        let parents = {
200            // Add these commits as parents to ensure that they're kept live for
201            // as long as the snapshot commit itself is live.
202            let mut parents = vec![
203                &commit_stage0,
204                &commit_stage1,
205                &commit_stage2,
206                &commit_stage3,
207            ];
208            if let Some(head_commit) = &head_commit {
209                // Make the head commit the first parent, since that's
210                // conventionally the mainline parent.
211                parents.insert(0, head_commit);
212            }
213            parents
214        };
215        let commit_oid =
216            repo.create_commit(None, &signature, &signature, &message, &tree, parents)?;
217
218        Ok(WorkingCopySnapshot {
219            base_commit: repo.find_commit_or_fail(commit_oid)?,
220            head_commit: head_commit.clone(),
221            head_reference_name,
222            commit_unstaged: repo.find_commit_or_fail(commit_unstaged_oid)?,
223            commit_stage0,
224            commit_stage1,
225            commit_stage2,
226            commit_stage3,
227        })
228    }
229
230    /// Attempt to load the provided commit as if it were the base commit for a
231    /// [`WorkingCopySnapshot`]. Returns `None` if it was not.
232    #[instrument]
233    pub fn try_from_base_commit<'a>(
234        repo: &'repo Repo,
235        base_commit: &'a Commit<'repo>,
236    ) -> eyre::Result<Option<WorkingCopySnapshot<'repo>>> {
237        let trailers = base_commit.get_trailers()?;
238        let find_commit = |trailer: &str| -> eyre::Result<Option<Commit>> {
239            for (k, v) in trailers.iter() {
240                if k != trailer {
241                    continue;
242                }
243
244                let oid = MaybeZeroOid::from_str(v);
245                let oid = match oid {
246                    Ok(MaybeZeroOid::NonZero(oid)) => oid,
247                    Ok(MaybeZeroOid::Zero) => return Ok(None),
248                    Err(_) => continue,
249                };
250
251                let result = repo.find_commit_or_fail(oid)?;
252                return Ok(Some(result));
253            }
254            Ok(None)
255        };
256
257        let head_commit = find_commit(BRANCHLESS_HEAD_TRAILER)?;
258        let commit_unstaged = match find_commit(BRANCHLESS_UNSTAGED_TRAILER)? {
259            Some(commit) => commit,
260            None => return Ok(None),
261        };
262        let head_reference_name = trailers.iter().find_map(|(k, v)| {
263            if k == BRANCHLESS_HEAD_REF_TRAILER {
264                Some(ReferenceName::from(v.as_str()))
265            } else {
266                None
267            }
268        });
269
270        let commit_stage0 = match find_commit(Stage::Stage0.get_trailer())? {
271            Some(commit) => commit,
272            None => return Ok(None),
273        };
274        let commit_stage1 = match find_commit(Stage::Stage1.get_trailer())? {
275            Some(commit) => commit,
276            None => return Ok(None),
277        };
278        let commit_stage2 = match find_commit(Stage::Stage2.get_trailer())? {
279            Some(commit) => commit,
280            None => return Ok(None),
281        };
282        let commit_stage3 = match find_commit(Stage::Stage3.get_trailer())? {
283            Some(commit) => commit,
284            None => return Ok(None),
285        };
286
287        Ok(Some(WorkingCopySnapshot {
288            base_commit: base_commit.to_owned(),
289            head_commit,
290            head_reference_name,
291            commit_unstaged,
292            commit_stage0,
293            commit_stage1,
294            commit_stage2,
295            commit_stage3,
296        }))
297    }
298
299    #[instrument]
300    fn create_commit_for_unstaged_changes(
301        repo: &Repo,
302        head_commit: Option<&Commit>,
303        status_entries: &[StatusEntry],
304    ) -> eyre::Result<NonZeroOid> {
305        let changed_paths: Vec<_> = status_entries
306            .iter()
307            .filter(|entry| {
308                // The working copy status is reported with respect to the
309                // staged changes, not to the `HEAD` commit. That means that if
310                // the working copy status is reported as modified and the
311                // staged status is reported as unmodified, there actually *was*
312                // a change on disk that we need to detect.
313                //
314                // On the other hand, if both are reported as modified, it's
315                // possible that there's *only* a staged change.
316                //
317                // Thus, we simply take all status entries that might refer to a
318                // file which has changed since `HEAD`. Later, we'll recompute
319                // the blobs for those files and hydrate the tree object. If it
320                // wasn't actually changed, then no harm will be done and that
321                // entry in the tree will also be unchanged.
322                entry.working_copy_status.is_changed() || entry.index_status.is_changed()
323            })
324            .flat_map(|entry| {
325                entry
326                    .paths()
327                    .into_iter()
328                    .map(|path| (path, entry.working_copy_file_mode))
329            })
330            .collect();
331        let num_changes = changed_paths.len();
332
333        let head_tree = head_commit.map(|commit| commit.get_tree()).transpose()?;
334        let hydrate_entries = {
335            let mut result = HashMap::new();
336            for (path, file_mode) in changed_paths {
337                let entry = if file_mode == FileMode::Unreadable {
338                    // If the file was deleted from the index, it's possible
339                    // that it might still exist on disk. However, if the mode
340                    // is `Unreadable`, that means that we should ignore its
341                    // existence on disk because it's no longer being tracked by
342                    // the index.
343                    None
344                } else {
345                    repo.create_blob_from_path(&path)?
346                        .map(|blob_oid| (blob_oid, file_mode))
347                };
348                result.insert(path, entry);
349            }
350            result
351        };
352        let tree_unstaged = {
353            let tree_oid = hydrate_tree(repo, head_tree.as_ref(), hydrate_entries)?;
354            repo.find_tree_or_fail(tree_oid)?
355        };
356
357        let signature = Signature::automated()?;
358        let message = format!(
359            "branchless: working copy snapshot data: {}",
360            Pluralize {
361                determiner: None,
362                amount: num_changes,
363                unit: ("unstaged change", "unstaged changes"),
364            }
365        );
366        let commit = repo.create_commit(
367            None,
368            &signature,
369            &signature,
370            &message,
371            &tree_unstaged,
372            Vec::from_iter(head_commit),
373        )?;
374        Ok(commit)
375    }
376
377    #[instrument]
378    fn create_commit_for_stage(
379        repo: &Repo,
380        index: &Index,
381        head_commit: Option<&Commit>,
382        status_entries: &[StatusEntry],
383        stage: Stage,
384    ) -> eyre::Result<NonZeroOid> {
385        let mut updated_entries = HashMap::new();
386        for StatusEntry {
387            path, index_status, ..
388        } in status_entries
389        {
390            let index_entry = index.get_entry_in_stage(path, stage);
391
392            let entry = match index_entry {
393                None => match (stage, index_status) {
394                    // Stage 0 should have a copy of every file in the working
395                    // tree, so the absence of that file now means that it was
396                    // staged as deleted.
397                    (Stage::Stage0, _) => None,
398
399                    // If this file was in a state of conflict, then having
400                    // failed to find it in the index means that it was deleted
401                    // in this stage.
402                    (Stage::Stage1 | Stage::Stage2 | Stage::Stage3, FileStatus::Unmerged) => None,
403
404                    // If this file wasn't in a state of conflict, then we
405                    // should use the HEAD entry for this stage.
406                    (
407                        Stage::Stage1 | Stage::Stage2 | Stage::Stage3,
408                        FileStatus::Added
409                        | FileStatus::Copied
410                        | FileStatus::Deleted
411                        | FileStatus::Ignored
412                        | FileStatus::Modified
413                        | FileStatus::Renamed
414                        | FileStatus::Unmodified
415                        | FileStatus::Untracked,
416                    ) => continue,
417                },
418
419                Some(IndexEntry {
420                    oid: MaybeZeroOid::Zero,
421                    file_mode: _,
422                }) => None,
423
424                Some(IndexEntry {
425                    oid: MaybeZeroOid::NonZero(oid),
426                    file_mode,
427                }) => Some((oid, file_mode)),
428            };
429
430            updated_entries.insert(path.clone(), entry);
431        }
432
433        let num_stage_changes = updated_entries.len();
434        let head_tree = match head_commit {
435            Some(head_commit) => Some(head_commit.get_tree()?),
436            None => None,
437        };
438        let tree_oid = hydrate_tree(repo, head_tree.as_ref(), updated_entries)?;
439        let tree = repo.find_tree_or_fail(tree_oid)?;
440
441        let signature = Signature::automated()?;
442        let message = format!(
443            "branchless: working copy snapshot data: {}",
444            Pluralize {
445                determiner: None,
446                amount: num_stage_changes,
447                unit: (
448                    &format!("change in stage {}", i32::from(stage)),
449                    &format!("changes in stage {}", i32::from(stage)),
450                ),
451            }
452        );
453        let commit_oid = repo.create_commit(
454            None,
455            &signature,
456            &signature,
457            &message,
458            &tree,
459            match head_commit {
460                Some(parent_commit) => vec![parent_commit],
461                None => vec![],
462            },
463        )?;
464        Ok(commit_oid)
465    }
466
467    /// Determine what kind of changes to the working copy the user made in this snapshot.
468    #[instrument]
469    pub fn get_working_copy_changes_type(&self) -> eyre::Result<WorkingCopyChangesType> {
470        let base_tree_oid = self.base_commit.get_tree_oid();
471        let unstaged_tree_oid = self.commit_unstaged.get_tree_oid();
472        let stage0_tree_oid = self.commit_stage0.get_tree_oid();
473        let stage1_tree_oid = self.commit_stage1.get_tree_oid();
474        let stage2_tree_oid = self.commit_stage2.get_tree_oid();
475        let stage3_tree_oid = self.commit_stage3.get_tree_oid();
476
477        if base_tree_oid != stage1_tree_oid
478            || base_tree_oid != stage2_tree_oid
479            || base_tree_oid != stage3_tree_oid
480        {
481            Ok(WorkingCopyChangesType::Conflicts)
482        } else if base_tree_oid != stage0_tree_oid {
483            Ok(WorkingCopyChangesType::Staged)
484        } else if base_tree_oid != unstaged_tree_oid {
485            Ok(WorkingCopyChangesType::Unstaged)
486        } else {
487            Ok(WorkingCopyChangesType::None)
488        }
489    }
490}