Skip to main content

branchless/git/
snapshot.rs

1//! Implementation of working copy snapshots. The ideas are based off of those
2//! in Jujutsu: <https://github.com/martinvonz/jj/blob/main/docs/working-copy.md>
3//!
4//! Normally, Git only tracks committed changes via commits, and a subset of
5//! information about uncommitted changes via the index. This module implements
6//! "working copy snapshots", which are enough to reproduce the entire tracked
7//! contents of the working copy, including staged changes and files with merge
8//! conflicts.
9//!
10//! Untracked changes are not handled by this module. The changes might contain
11//! sensitive data which we don't want to accidentally store in Git, or might be
12//! very large and cause performance issues if committed.
13//!
14//! There are two main reasons to implement working copy snapshots:
15//!
16//!  1. To support enhanced undo features. For example, you should be able to
17//!     jump back into merge conflict resolution which was happening at some
18//!     past time.
19//!  2. To unify the implementations of operations across commits and the
20//!     working copy. For example, a `git split` command which splits one commit
21//!     into multiple could also be used to split the working copy into multiple
22//!     commits.
23
24use itertools::Itertools;
25use std::collections::HashMap;
26use std::str::FromStr;
27
28use tracing::instrument;
29
30use crate::core::formatting::Pluralize;
31use crate::git::FileStatus;
32
33use super::index::{Index, IndexEntry, Stage};
34use super::repo::Signature;
35use super::status::FileMode;
36use super::tree::{hydrate_tree, make_empty_tree};
37use super::{
38    Commit, MaybeZeroOid, NonZeroOid, ReferenceName, Repo, ResolvedReferenceInfo, StatusEntry,
39};
40
41const BRANCHLESS_HEAD_TRAILER: &str = "Branchless-head";
42const BRANCHLESS_HEAD_REF_TRAILER: &str = "Branchless-head-ref";
43const BRANCHLESS_UNSTAGED_TRAILER: &str = "Branchless-unstaged";
44
45/// A special `Commit` which represents the status of the working copy at a
46/// given point in time. This means that it can include changes in any stage.
47#[derive(Clone, Debug)]
48pub struct WorkingCopySnapshot<'repo> {
49    /// The commit which contains the metadata about the `HEAD` commit and all
50    /// the "stage commits" included in this snapshot.
51    ///
52    /// The stage commits each correspond to one of the possible stages in the
53    /// index. If a file is not present in that stage, it's assumed that it's
54    /// unchanged from the `HEAD` commit at the time which the snapshot was
55    /// taken.
56    ///
57    /// The metadata is stored in the commit message.
58    pub base_commit: Commit<'repo>,
59
60    /// The commit that was checked out at the time of this snapshot. It's
61    /// possible that *no* commit was checked out (called an "unborn HEAD").
62    /// This could happen when the repository has been freshly initialized, but
63    /// no commits have yet been made.
64    pub head_commit: Option<Commit<'repo>>,
65
66    /// The branch that was checked out at the time of this snapshot, if any.
67    /// This includes the `refs/heads/` prefix.
68    pub head_reference_name: Option<ReferenceName>,
69
70    /// The unstaged changes in the working copy.
71    pub commit_unstaged: Commit<'repo>,
72
73    /// The index contents at stage 0 (normal staged changes).
74    pub commit_stage0: Commit<'repo>,
75
76    /// The index contents at stage 1. For a merge conflict, this corresponds to
77    /// the contents of the file at the common ancestor of the merged commits.
78    pub commit_stage1: Commit<'repo>,
79
80    /// The index contents at stage 2 ("ours").
81    pub commit_stage2: Commit<'repo>,
82
83    /// The index contents at stage 3 ("theirs", i.e. the commit being merged
84    /// in).
85    pub commit_stage3: Commit<'repo>,
86}
87
88/// The type of changes in the working copy, if any.
89#[derive(Clone, Debug, PartialEq, Eq)]
90pub enum WorkingCopyChangesType {
91    /// There are no changes to tracked files in the working copy.
92    None,
93
94    /// There are unstaged changes to tracked files in the working copy.
95    Unstaged,
96
97    /// There are staged changes to tracked files in the working copy. (There may also be unstaged
98    /// changes.)
99    Staged,
100
101    /// The working copy has unresolved merge conflicts.
102    Conflicts,
103}
104
105impl<'repo> WorkingCopySnapshot<'repo> {
106    #[instrument]
107    pub(super) fn create(
108        repo: &'repo Repo,
109        index: &Index,
110        head_info: &ResolvedReferenceInfo,
111        status_entries: &[StatusEntry],
112    ) -> eyre::Result<Self> {
113        let head_commit = match head_info.oid {
114            Some(oid) => Some(repo.find_commit_or_fail(oid)?),
115            None => None,
116        };
117        let head_commit_oid: MaybeZeroOid = match &head_commit {
118            Some(head_commit) => MaybeZeroOid::NonZero(head_commit.get_oid()),
119            None => MaybeZeroOid::Zero,
120        };
121        let head_reference_name: Option<ReferenceName> = head_info.reference_name.clone();
122
123        let commit_unstaged_oid: NonZeroOid = {
124            Self::create_commit_for_unstaged_changes(
125                repo,
126                index,
127                head_commit.as_ref(),
128                status_entries,
129            )?
130        };
131
132        let commit_stage0 = Self::create_commit_for_stage(
133            repo,
134            index,
135            head_commit.as_ref(),
136            status_entries,
137            Stage::Stage0,
138        )?;
139        let commit_stage1 = Self::create_commit_for_stage(
140            repo,
141            index,
142            head_commit.as_ref(),
143            status_entries,
144            Stage::Stage1,
145        )?;
146        let commit_stage2 = Self::create_commit_for_stage(
147            repo,
148            index,
149            head_commit.as_ref(),
150            status_entries,
151            Stage::Stage2,
152        )?;
153        let commit_stage3 = Self::create_commit_for_stage(
154            repo,
155            index,
156            head_commit.as_ref(),
157            status_entries,
158            Stage::Stage3,
159        )?;
160
161        let trailers = {
162            let mut result = vec![(BRANCHLESS_HEAD_TRAILER, head_commit_oid.to_string())];
163            if let Some(head_reference_name) = &head_reference_name {
164                result.push((
165                    BRANCHLESS_HEAD_REF_TRAILER,
166                    head_reference_name.as_str().to_owned(),
167                ));
168            }
169            result.extend([
170                (BRANCHLESS_UNSTAGED_TRAILER, commit_unstaged_oid.to_string()),
171                (Stage::Stage0.get_trailer(), commit_stage0.to_string()),
172                (Stage::Stage1.get_trailer(), commit_stage1.to_string()),
173                (Stage::Stage2.get_trailer(), commit_stage2.to_string()),
174                (Stage::Stage3.get_trailer(), commit_stage3.to_string()),
175            ]);
176            result
177        };
178        let signature = Signature::automated()?;
179        let message = format!(
180            "\
181branchless: automated working copy snapshot
182
183{}
184",
185            trailers
186                .into_iter()
187                .map(|(name, value)| format!("{name}: {value}"))
188                .collect_vec()
189                .join("\n"),
190        );
191
192        // Use the current HEAD as the tree for parent commit, so that we can
193        // look at any of the stage commits and compare them to their immediate
194        // parent to find their logical contents.
195        let tree = match &head_commit {
196            Some(head_commit) => head_commit.get_tree()?,
197            None => make_empty_tree(repo)?,
198        };
199
200        let commit_stage0 = repo.find_commit_or_fail(commit_stage0)?;
201        let commit_stage1 = repo.find_commit_or_fail(commit_stage1)?;
202        let commit_stage2 = repo.find_commit_or_fail(commit_stage2)?;
203        let commit_stage3 = repo.find_commit_or_fail(commit_stage3)?;
204        let parents = {
205            // Add these commits as parents to ensure that they're kept live for
206            // as long as the snapshot commit itself is live.
207            let mut parents = vec![
208                &commit_stage0,
209                &commit_stage1,
210                &commit_stage2,
211                &commit_stage3,
212            ];
213            if let Some(head_commit) = &head_commit {
214                // Make the head commit the first parent, since that's
215                // conventionally the mainline parent.
216                parents.insert(0, head_commit);
217            }
218            parents
219        };
220        let commit_oid =
221            repo.create_commit(None, &signature, &signature, &message, &tree, parents)?;
222
223        Ok(WorkingCopySnapshot {
224            base_commit: repo.find_commit_or_fail(commit_oid)?,
225            head_commit: head_commit.clone(),
226            head_reference_name,
227            commit_unstaged: repo.find_commit_or_fail(commit_unstaged_oid)?,
228            commit_stage0,
229            commit_stage1,
230            commit_stage2,
231            commit_stage3,
232        })
233    }
234
235    /// Attempt to load the provided commit as if it were the base commit for a
236    /// [`WorkingCopySnapshot`]. Returns `None` if it was not.
237    #[instrument]
238    pub fn try_from_base_commit<'a>(
239        repo: &'repo Repo,
240        base_commit: &'a Commit<'repo>,
241    ) -> eyre::Result<Option<WorkingCopySnapshot<'repo>>> {
242        let trailers = base_commit.get_trailers()?;
243        let find_commit = |trailer: &str| -> eyre::Result<Option<Commit>> {
244            for (k, v) in trailers.iter() {
245                if k != trailer {
246                    continue;
247                }
248
249                let oid = MaybeZeroOid::from_str(v);
250                let oid = match oid {
251                    Ok(MaybeZeroOid::NonZero(oid)) => oid,
252                    Ok(MaybeZeroOid::Zero) => return Ok(None),
253                    Err(_) => continue,
254                };
255
256                let result = repo.find_commit_or_fail(oid)?;
257                return Ok(Some(result));
258            }
259            Ok(None)
260        };
261
262        let head_commit = find_commit(BRANCHLESS_HEAD_TRAILER)?;
263        let commit_unstaged = match find_commit(BRANCHLESS_UNSTAGED_TRAILER)? {
264            Some(commit) => commit,
265            None => return Ok(None),
266        };
267        let head_reference_name = trailers.iter().find_map(|(k, v)| {
268            if k == BRANCHLESS_HEAD_REF_TRAILER {
269                Some(ReferenceName::from(v.as_str()))
270            } else {
271                None
272            }
273        });
274
275        let commit_stage0 = match find_commit(Stage::Stage0.get_trailer())? {
276            Some(commit) => commit,
277            None => return Ok(None),
278        };
279        let commit_stage1 = match find_commit(Stage::Stage1.get_trailer())? {
280            Some(commit) => commit,
281            None => return Ok(None),
282        };
283        let commit_stage2 = match find_commit(Stage::Stage2.get_trailer())? {
284            Some(commit) => commit,
285            None => return Ok(None),
286        };
287        let commit_stage3 = match find_commit(Stage::Stage3.get_trailer())? {
288            Some(commit) => commit,
289            None => return Ok(None),
290        };
291
292        Ok(Some(WorkingCopySnapshot {
293            base_commit: base_commit.to_owned(),
294            head_commit,
295            head_reference_name,
296            commit_unstaged,
297            commit_stage0,
298            commit_stage1,
299            commit_stage2,
300            commit_stage3,
301        }))
302    }
303
304    #[instrument]
305    fn create_commit_for_unstaged_changes(
306        repo: &Repo,
307        index: &Index,
308        head_commit: Option<&Commit>,
309        status_entries: &[StatusEntry],
310    ) -> eyre::Result<NonZeroOid> {
311        let changed_paths: Vec<_> = status_entries
312            .iter()
313            .filter(|entry| {
314                // The working copy status is reported with respect to the
315                // staged changes, not to the `HEAD` commit. That means that if
316                // the working copy status is reported as modified and the
317                // staged status is reported as unmodified, there actually *was*
318                // a change on disk that we need to detect.
319                //
320                // On the other hand, if both are reported as modified, it's
321                // possible that there's *only* a staged change.
322                //
323                // Thus, we simply take all status entries that might refer to a
324                // file which has changed since `HEAD`. Later, we'll recompute
325                // the blobs for those files and hydrate the tree object. If it
326                // wasn't actually changed, then no harm will be done and that
327                // entry in the tree will also be unchanged.
328                entry.working_copy_status.is_changed() || entry.index_status.is_changed()
329            })
330            .flat_map(|entry| {
331                entry
332                    .paths()
333                    .into_iter()
334                    .map(|path| (path, entry.working_copy_file_mode))
335            })
336            .collect();
337        let num_changes = changed_paths.len();
338
339        let head_tree = head_commit.map(|commit| commit.get_tree()).transpose()?;
340        let hydrate_entries = {
341            let mut result = HashMap::new();
342            for (path, file_mode) in changed_paths {
343                let entry = if file_mode == FileMode::Unreadable {
344                    // If the file was deleted from the index, it's possible
345                    // that it might still exist on disk. However, if the mode
346                    // is `Unreadable`, that means that we should ignore its
347                    // existence on disk because it's no longer being tracked by
348                    // the index.
349                    None
350                } else {
351                    repo.create_blob_from_path_for_mode(&path, file_mode, index)?
352                        .map(|blob_oid| (blob_oid, file_mode))
353                };
354                result.insert(path, entry);
355            }
356            result
357        };
358        let tree_unstaged = {
359            let tree_oid = hydrate_tree(repo, head_tree.as_ref(), hydrate_entries)?;
360            repo.find_tree_or_fail(tree_oid)?
361        };
362
363        let signature = Signature::automated()?;
364        let message = format!(
365            "branchless: working copy snapshot data: {}",
366            Pluralize {
367                determiner: None,
368                amount: num_changes,
369                unit: ("unstaged change", "unstaged changes"),
370            }
371        );
372        let commit = repo.create_commit(
373            None,
374            &signature,
375            &signature,
376            &message,
377            &tree_unstaged,
378            Vec::from_iter(head_commit),
379        )?;
380        Ok(commit)
381    }
382
383    #[instrument]
384    fn create_commit_for_stage(
385        repo: &Repo,
386        index: &Index,
387        head_commit: Option<&Commit>,
388        status_entries: &[StatusEntry],
389        stage: Stage,
390    ) -> eyre::Result<NonZeroOid> {
391        let mut updated_entries = HashMap::new();
392        for StatusEntry {
393            path, index_status, ..
394        } in status_entries
395        {
396            let index_entry = index.get_entry_in_stage(path, stage);
397
398            let entry = match index_entry {
399                None => match (stage, index_status) {
400                    // Stage 0 should have a copy of every file in the working
401                    // tree, so the absence of that file now means that it was
402                    // staged as deleted.
403                    (Stage::Stage0, _) => None,
404
405                    // If this file was in a state of conflict, then having
406                    // failed to find it in the index means that it was deleted
407                    // in this stage.
408                    (Stage::Stage1 | Stage::Stage2 | Stage::Stage3, FileStatus::Unmerged) => None,
409
410                    // If this file wasn't in a state of conflict, then we
411                    // should use the HEAD entry for this stage.
412                    (
413                        Stage::Stage1 | Stage::Stage2 | Stage::Stage3,
414                        FileStatus::Added
415                        | FileStatus::Copied
416                        | FileStatus::Deleted
417                        | FileStatus::Ignored
418                        | FileStatus::Modified
419                        | FileStatus::Renamed
420                        | FileStatus::Unmodified
421                        | FileStatus::Untracked,
422                    ) => continue,
423                },
424
425                Some(IndexEntry {
426                    oid: MaybeZeroOid::Zero,
427                    file_mode: _,
428                }) => None,
429
430                Some(IndexEntry {
431                    oid: MaybeZeroOid::NonZero(oid),
432                    file_mode,
433                }) => Some((oid, file_mode)),
434            };
435
436            updated_entries.insert(path.clone(), entry);
437        }
438
439        let num_stage_changes = updated_entries.len();
440        let head_tree = match head_commit {
441            Some(head_commit) => Some(head_commit.get_tree()?),
442            None => None,
443        };
444        let tree_oid = hydrate_tree(repo, head_tree.as_ref(), updated_entries)?;
445        let tree = repo.find_tree_or_fail(tree_oid)?;
446
447        let signature = Signature::automated()?;
448        let message = format!(
449            "branchless: working copy snapshot data: {}",
450            Pluralize {
451                determiner: None,
452                amount: num_stage_changes,
453                unit: (
454                    &format!("change in stage {}", i32::from(stage)),
455                    &format!("changes in stage {}", i32::from(stage)),
456                ),
457            }
458        );
459        let commit_oid = repo.create_commit(
460            None,
461            &signature,
462            &signature,
463            &message,
464            &tree,
465            match head_commit {
466                Some(parent_commit) => vec![parent_commit],
467                None => vec![],
468            },
469        )?;
470        Ok(commit_oid)
471    }
472
473    /// Determine what kind of changes to the working copy the user made in this snapshot.
474    #[instrument]
475    pub fn get_working_copy_changes_type(&self) -> eyre::Result<WorkingCopyChangesType> {
476        let base_tree_oid = self.base_commit.get_tree_oid();
477        let unstaged_tree_oid = self.commit_unstaged.get_tree_oid();
478        let stage0_tree_oid = self.commit_stage0.get_tree_oid();
479        let stage1_tree_oid = self.commit_stage1.get_tree_oid();
480        let stage2_tree_oid = self.commit_stage2.get_tree_oid();
481        let stage3_tree_oid = self.commit_stage3.get_tree_oid();
482
483        if base_tree_oid != stage1_tree_oid
484            || base_tree_oid != stage2_tree_oid
485            || base_tree_oid != stage3_tree_oid
486        {
487            Ok(WorkingCopyChangesType::Conflicts)
488        } else if base_tree_oid != stage0_tree_oid {
489            Ok(WorkingCopyChangesType::Staged)
490        } else if base_tree_oid != unstaged_tree_oid {
491            Ok(WorkingCopyChangesType::Unstaged)
492        } else {
493            Ok(WorkingCopyChangesType::None)
494        }
495    }
496}