jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::HashSet;
19use std::fmt::Debug;
20use std::fmt::Error;
21use std::fmt::Formatter;
22use std::fs;
23use std::io;
24use std::io::Cursor;
25use std::io::Read;
26use std::path::Path;
27use std::path::PathBuf;
28use std::process::Command;
29use std::process::ExitStatus;
30use std::str;
31use std::sync::Arc;
32use std::sync::Mutex;
33use std::sync::MutexGuard;
34use std::time::SystemTime;
35
36use async_trait::async_trait;
37use futures::stream::BoxStream;
38use gix::bstr::BString;
39use gix::objs::CommitRef;
40use gix::objs::CommitRefIter;
41use gix::objs::WriteTo;
42use itertools::Itertools;
43use pollster::FutureExt;
44use prost::Message;
45use smallvec::SmallVec;
46use thiserror::Error;
47
48use crate::backend::make_root_commit;
49use crate::backend::Backend;
50use crate::backend::BackendError;
51use crate::backend::BackendInitError;
52use crate::backend::BackendLoadError;
53use crate::backend::BackendResult;
54use crate::backend::ChangeId;
55use crate::backend::Commit;
56use crate::backend::CommitId;
57use crate::backend::Conflict;
58use crate::backend::ConflictId;
59use crate::backend::ConflictTerm;
60use crate::backend::CopyRecord;
61use crate::backend::FileId;
62use crate::backend::MergedTreeId;
63use crate::backend::MillisSinceEpoch;
64use crate::backend::SecureSig;
65use crate::backend::Signature;
66use crate::backend::SigningFn;
67use crate::backend::SymlinkId;
68use crate::backend::Timestamp;
69use crate::backend::Tree;
70use crate::backend::TreeId;
71use crate::backend::TreeValue;
72use crate::file_util::IoResultExt as _;
73use crate::file_util::PathError;
74use crate::index::Index;
75use crate::lock::FileLock;
76use crate::merge::Merge;
77use crate::merge::MergeBuilder;
78use crate::object_id::ObjectId;
79use crate::repo_path::RepoPath;
80use crate::repo_path::RepoPathBuf;
81use crate::repo_path::RepoPathComponentBuf;
82use crate::settings::UserSettings;
83use crate::stacked_table::MutableTable;
84use crate::stacked_table::ReadonlyTable;
85use crate::stacked_table::TableSegment;
86use crate::stacked_table::TableStore;
87use crate::stacked_table::TableStoreError;
88
89const HASH_LENGTH: usize = 20;
90const CHANGE_ID_LENGTH: usize = 16;
91/// Ref namespace used only for preventing GC.
92const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
93const CONFLICT_SUFFIX: &str = ".jjconflict";
94
95const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees";
96
97#[derive(Debug, Error)]
98pub enum GitBackendInitError {
99    #[error("Failed to initialize git repository")]
100    InitRepository(#[source] gix::init::Error),
101    #[error("Failed to open git repository")]
102    OpenRepository(#[source] gix::open::Error),
103    #[error(transparent)]
104    Path(PathError),
105}
106
107impl From<Box<GitBackendInitError>> for BackendInitError {
108    fn from(err: Box<GitBackendInitError>) -> Self {
109        BackendInitError(err)
110    }
111}
112
113#[derive(Debug, Error)]
114pub enum GitBackendLoadError {
115    #[error("Failed to open git repository")]
116    OpenRepository(#[source] gix::open::Error),
117    #[error(transparent)]
118    Path(PathError),
119}
120
121impl From<Box<GitBackendLoadError>> for BackendLoadError {
122    fn from(err: Box<GitBackendLoadError>) -> Self {
123        BackendLoadError(err)
124    }
125}
126
127/// `GitBackend`-specific error that may occur after the backend is loaded.
128#[derive(Debug, Error)]
129pub enum GitBackendError {
130    #[error("Failed to read non-git metadata")]
131    ReadMetadata(#[source] TableStoreError),
132    #[error("Failed to write non-git metadata")]
133    WriteMetadata(#[source] TableStoreError),
134}
135
136impl From<GitBackendError> for BackendError {
137    fn from(err: GitBackendError) -> Self {
138        BackendError::Other(err.into())
139    }
140}
141
142#[derive(Debug, Error)]
143pub enum GitGcError {
144    #[error("Failed to run git gc command")]
145    GcCommand(#[source] std::io::Error),
146    #[error("git gc command exited with an error: {0}")]
147    GcCommandErrorStatus(ExitStatus),
148}
149
150pub struct GitBackend {
151    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
152    // cheaper to cache the thread-local instance behind a mutex than creating
153    // one for each backend method call. Our GitBackend is most likely to be
154    // used in a single-threaded context.
155    base_repo: gix::ThreadSafeRepository,
156    repo: Mutex<gix::Repository>,
157    root_commit_id: CommitId,
158    root_change_id: ChangeId,
159    empty_tree_id: TreeId,
160    extra_metadata_store: TableStore,
161    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
162}
163
164impl GitBackend {
165    pub fn name() -> &'static str {
166        "git"
167    }
168
169    fn new(base_repo: gix::ThreadSafeRepository, extra_metadata_store: TableStore) -> Self {
170        let repo = Mutex::new(base_repo.to_thread_local());
171        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
172        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
173        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
174        GitBackend {
175            base_repo,
176            repo,
177            root_commit_id,
178            root_change_id,
179            empty_tree_id,
180            extra_metadata_store,
181            cached_extra_metadata: Mutex::new(None),
182        }
183    }
184
185    pub fn init_internal(
186        settings: &UserSettings,
187        store_path: &Path,
188    ) -> Result<Self, Box<GitBackendInitError>> {
189        let git_repo_path = Path::new("git");
190        let git_repo = gix::ThreadSafeRepository::init_opts(
191            store_path.join(git_repo_path),
192            gix::create::Kind::Bare,
193            gix::create::Options::default(),
194            gix_open_opts_from_settings(settings),
195        )
196        .map_err(GitBackendInitError::InitRepository)?;
197        Self::init_with_repo(store_path, git_repo_path, git_repo)
198    }
199
200    /// Initializes backend by creating a new Git repo at the specified
201    /// workspace path. The workspace directory must exist.
202    pub fn init_colocated(
203        settings: &UserSettings,
204        store_path: &Path,
205        workspace_root: &Path,
206    ) -> Result<Self, Box<GitBackendInitError>> {
207        let canonical_workspace_root = {
208            let path = store_path.join(workspace_root);
209            dunce::canonicalize(&path)
210                .context(&path)
211                .map_err(GitBackendInitError::Path)?
212        };
213        let git_repo = gix::ThreadSafeRepository::init_opts(
214            canonical_workspace_root,
215            gix::create::Kind::WithWorktree,
216            gix::create::Options::default(),
217            gix_open_opts_from_settings(settings),
218        )
219        .map_err(GitBackendInitError::InitRepository)?;
220        let git_repo_path = workspace_root.join(".git");
221        Self::init_with_repo(store_path, &git_repo_path, git_repo)
222    }
223
224    /// Initializes backend with an existing Git repo at the specified path.
225    pub fn init_external(
226        settings: &UserSettings,
227        store_path: &Path,
228        git_repo_path: &Path,
229    ) -> Result<Self, Box<GitBackendInitError>> {
230        let canonical_git_repo_path = {
231            let path = store_path.join(git_repo_path);
232            canonicalize_git_repo_path(&path)
233                .context(&path)
234                .map_err(GitBackendInitError::Path)?
235        };
236        let git_repo = gix::ThreadSafeRepository::open_opts(
237            canonical_git_repo_path,
238            gix_open_opts_from_settings(settings),
239        )
240        .map_err(GitBackendInitError::OpenRepository)?;
241        Self::init_with_repo(store_path, git_repo_path, git_repo)
242    }
243
244    fn init_with_repo(
245        store_path: &Path,
246        git_repo_path: &Path,
247        git_repo: gix::ThreadSafeRepository,
248    ) -> Result<Self, Box<GitBackendInitError>> {
249        let extra_path = store_path.join("extra");
250        fs::create_dir(&extra_path)
251            .context(&extra_path)
252            .map_err(GitBackendInitError::Path)?;
253        let target_path = store_path.join("git_target");
254        if cfg!(windows) && git_repo_path.is_relative() {
255            // When a repository is created in Windows, format the path with *forward
256            // slashes* and not backwards slashes. This makes it possible to use the same
257            // repository under Windows Subsystem for Linux.
258            //
259            // This only works for relative paths. If the path is absolute, there's not much
260            // we can do, and it simply won't work inside and outside WSL at the same time.
261            let git_repo_path_string = git_repo_path
262                .components()
263                .map(|component| component.as_os_str().to_str().unwrap().to_owned())
264                .join("/");
265            fs::write(&target_path, git_repo_path_string.as_bytes())
266                .context(&target_path)
267                .map_err(GitBackendInitError::Path)?;
268        } else {
269            fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes())
270                .context(&target_path)
271                .map_err(GitBackendInitError::Path)?;
272        };
273        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
274        Ok(GitBackend::new(git_repo, extra_metadata_store))
275    }
276
277    pub fn load(
278        settings: &UserSettings,
279        store_path: &Path,
280    ) -> Result<Self, Box<GitBackendLoadError>> {
281        let git_repo_path = {
282            let target_path = store_path.join("git_target");
283            let git_repo_path_str = fs::read_to_string(&target_path)
284                .context(&target_path)
285                .map_err(GitBackendLoadError::Path)?;
286            let git_repo_path = store_path.join(git_repo_path_str);
287            canonicalize_git_repo_path(&git_repo_path)
288                .context(&git_repo_path)
289                .map_err(GitBackendLoadError::Path)?
290        };
291        let repo = gix::ThreadSafeRepository::open_opts(
292            git_repo_path,
293            gix_open_opts_from_settings(settings),
294        )
295        .map_err(GitBackendLoadError::OpenRepository)?;
296        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
297        Ok(GitBackend::new(repo, extra_metadata_store))
298    }
299
300    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
301        self.repo.lock().unwrap()
302    }
303
304    /// Returns new thread-local instance to access to the underlying Git repo.
305    pub fn git_repo(&self) -> gix::Repository {
306        self.base_repo.to_thread_local()
307    }
308
309    /// Creates new owned git repository instance.
310    pub fn open_git_repo(&self) -> Result<git2::Repository, git2::Error> {
311        git2::Repository::open(self.git_repo_path())
312    }
313
314    /// Path to the `.git` directory or the repository itself if it's bare.
315    pub fn git_repo_path(&self) -> &Path {
316        self.base_repo.path()
317    }
318
319    /// Path to the working directory if the repository isn't bare.
320    pub fn git_workdir(&self) -> Option<&Path> {
321        self.base_repo.work_dir()
322    }
323
324    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
325        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
326        match locked_head.as_ref() {
327            Some(head) => Ok(head.clone()),
328            None => {
329                let table = self
330                    .extra_metadata_store
331                    .get_head()
332                    .map_err(GitBackendError::ReadMetadata)?;
333                *locked_head = Some(table.clone());
334                Ok(table)
335            }
336        }
337    }
338
339    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
340        let table = self
341            .extra_metadata_store
342            .get_head_locked()
343            .map_err(GitBackendError::ReadMetadata)?;
344        Ok(table)
345    }
346
347    fn save_extra_metadata_table(
348        &self,
349        mut_table: MutableTable,
350        _table_lock: &FileLock,
351    ) -> BackendResult<()> {
352        let table = self
353            .extra_metadata_store
354            .save_table(mut_table)
355            .map_err(GitBackendError::WriteMetadata)?;
356        // Since the parent table was the head, saved table are likely to be new head.
357        // If it's not, cache will be reloaded when entry can't be found.
358        *self.cached_extra_metadata.lock().unwrap() = Some(table);
359        Ok(())
360    }
361
362    /// Imports the given commits and ancestors from the backing Git repo.
363    ///
364    /// The `head_ids` may contain commits that have already been imported, but
365    /// the caller should filter them out to eliminate redundant I/O processing.
366    #[tracing::instrument(skip(self, head_ids))]
367    pub fn import_head_commits<'a>(
368        &self,
369        head_ids: impl IntoIterator<Item = &'a CommitId>,
370    ) -> BackendResult<()> {
371        self.import_head_commits_with_tree_conflicts(head_ids, true)
372    }
373
374    fn import_head_commits_with_tree_conflicts<'a>(
375        &self,
376        head_ids: impl IntoIterator<Item = &'a CommitId>,
377        uses_tree_conflict_format: bool,
378    ) -> BackendResult<()> {
379        let head_ids: HashSet<&CommitId> = head_ids
380            .into_iter()
381            .filter(|&id| *id != self.root_commit_id)
382            .collect();
383        if head_ids.is_empty() {
384            return Ok(());
385        }
386
387        // Create no-gc ref even if known to the extras table. Concurrent GC
388        // process might have deleted the no-gc ref.
389        let locked_repo = self.lock_git_repo();
390        locked_repo
391            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
392            .map_err(|err| BackendError::Other(Box::new(err)))?;
393
394        // These commits are imported from Git. Make our change ids persist (otherwise
395        // future write_commit() could reassign new change id.)
396        tracing::debug!(
397            heads_count = head_ids.len(),
398            "import extra metadata entries"
399        );
400        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
401        let mut mut_table = table.start_mutation();
402        import_extra_metadata_entries_from_heads(
403            &locked_repo,
404            &mut mut_table,
405            &table_lock,
406            &head_ids,
407            uses_tree_conflict_format,
408        )?;
409        self.save_extra_metadata_table(mut_table, &table_lock)
410    }
411
412    fn read_file_sync(&self, id: &FileId) -> BackendResult<Box<dyn Read>> {
413        let git_blob_id = validate_git_object_id(id)?;
414        let locked_repo = self.lock_git_repo();
415        let mut blob = locked_repo
416            .find_object(git_blob_id)
417            .map_err(|err| map_not_found_err(err, id))?
418            .try_into_blob()
419            .map_err(|err| to_read_object_err(err, id))?;
420        Ok(Box::new(Cursor::new(blob.take_data())))
421    }
422
423    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
424        let attributes = gix::worktree::Stack::new(
425            Path::new(""),
426            gix::worktree::stack::State::AttributesStack(Default::default()),
427            gix::worktree::glob::pattern::Case::Sensitive,
428            Vec::new(),
429            Vec::new(),
430        );
431        let filter = gix::diff::blob::Pipeline::new(
432            Default::default(),
433            gix::filter::plumbing::Pipeline::new(
434                self.git_repo()
435                    .command_context()
436                    .map_err(|err| BackendError::Other(Box::new(err)))?,
437                Default::default(),
438            ),
439            Vec::new(),
440            Default::default(),
441        );
442        Ok(gix::diff::blob::Platform::new(
443            Default::default(),
444            filter,
445            gix::diff::blob::pipeline::Mode::ToGit,
446            attributes,
447        ))
448    }
449
450    fn read_tree_for_commit<'repo>(
451        &self,
452        repo: &'repo gix::Repository,
453        id: &CommitId,
454    ) -> BackendResult<gix::Tree<'repo>> {
455        let tree = self.read_commit(id).block_on()?.root_tree.to_merge();
456        // TODO(kfm): probably want to do something here if it is a merge
457        let tree_id = tree.first().clone();
458        let gix_id = validate_git_object_id(&tree_id)?;
459        repo.find_object(gix_id)
460            .map_err(|err| map_not_found_err(err, &tree_id))?
461            .try_into_tree()
462            .map_err(|err| to_read_object_err(err, &tree_id))
463    }
464}
465
466/// Canonicalizes the given `path` except for the last `".git"` component.
467///
468/// The last path component matters when opening a Git repo without `core.bare`
469/// config. This config is usually set, but the "repo" tool will set up such
470/// repositories and symlinks. Opening such repo with fully-canonicalized path
471/// would turn a colocated Git repo into a bare repo.
472pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
473    if path.ends_with(".git") {
474        let workdir = path.parent().unwrap();
475        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
476    } else {
477        dunce::canonicalize(path)
478    }
479}
480
481fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
482    let user_name = settings.user_name();
483    let user_email = settings.user_email();
484    gix::open::Options::default()
485        .config_overrides([
486            // Committer has to be configured to record reflog. Author isn't
487            // needed, but let's copy the same values.
488            format!("author.name={user_name}"),
489            format!("author.email={user_email}"),
490            format!("committer.name={user_name}"),
491            format!("committer.email={user_email}"),
492        ])
493        // The git_target path should point the repository, not the working directory.
494        .open_path_as_is(true)
495}
496
497/// Reads the `jj:trees` header from the commit.
498fn root_tree_from_header(git_commit: &CommitRef) -> Result<Option<MergedTreeId>, ()> {
499    for (key, value) in &git_commit.extra_headers {
500        if *key == JJ_TREES_COMMIT_HEADER {
501            let mut tree_ids = SmallVec::new();
502            for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') {
503                let tree_id = TreeId::try_from_hex(hex).or(Err(()))?;
504                if tree_id.as_bytes().len() != HASH_LENGTH {
505                    return Err(());
506                }
507                tree_ids.push(tree_id);
508            }
509            if tree_ids.len() % 2 == 0 {
510                return Err(());
511            }
512            return Ok(Some(MergedTreeId::Merge(Merge::from_vec(tree_ids))));
513        }
514    }
515    Ok(None)
516}
517
518fn commit_from_git_without_root_parent(
519    id: &CommitId,
520    git_object: &gix::Object,
521    uses_tree_conflict_format: bool,
522    is_shallow: bool,
523) -> BackendResult<Commit> {
524    let commit = git_object
525        .try_to_commit_ref()
526        .map_err(|err| to_read_object_err(err, id))?;
527
528    // We reverse the bits of the commit id to create the change id. We don't want
529    // to use the first bytes unmodified because then it would be ambiguous
530    // if a given hash prefix refers to the commit id or the change id. It
531    // would have been enough to pick the last 16 bytes instead of the
532    // leading 16 bytes to address that. We also reverse the bits to make it less
533    // likely that users depend on any relationship between the two ids.
534    let change_id = ChangeId::new(
535        id.as_bytes()[4..HASH_LENGTH]
536            .iter()
537            .rev()
538            .map(|b| b.reverse_bits())
539            .collect(),
540    );
541    // shallow commits don't have parents their parents actually fetched, so we
542    // discard them here
543    // TODO: This causes issues when a shallow repository is deepened/unshallowed
544    let parents = if is_shallow {
545        vec![]
546    } else {
547        commit
548            .parents()
549            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
550            .collect_vec()
551    };
552    let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
553    // If this commit is a conflict, we'll update the root tree later, when we read
554    // the extra metadata.
555    let root_tree = root_tree_from_header(&commit)
556        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?;
557    let root_tree = root_tree.unwrap_or_else(|| {
558        if uses_tree_conflict_format {
559            MergedTreeId::resolved(tree_id)
560        } else {
561            MergedTreeId::Legacy(tree_id)
562        }
563    });
564    // Use lossy conversion as commit message with "mojibake" is still better than
565    // nothing.
566    // TODO: what should we do with commit.encoding?
567    let description = String::from_utf8_lossy(commit.message).into_owned();
568    let author = signature_from_git(commit.author());
569    let committer = signature_from_git(commit.committer());
570
571    // If the commit is signed, extract both the signature and the signed data
572    // (which is the commit buffer with the gpgsig header omitted).
573    // We have to re-parse the raw commit data because gix CommitRef does not give
574    // us the sogned data, only the signature.
575    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
576    // function and extract everything from that. For now, this works
577    let secure_sig = commit
578        .extra_headers
579        .iter()
580        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
581        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
582        .then(|| CommitRefIter::signature(&git_object.data))
583        .transpose()
584        .map_err(|err| to_read_object_err(err, id))?
585        .flatten()
586        .map(|(sig, data)| SecureSig {
587            data: data.to_bstring().into(),
588            sig: sig.into_owned().into(),
589        });
590
591    Ok(Commit {
592        parents,
593        predecessors: vec![],
594        // If this commit has associated extra metadata, we may reset this later.
595        root_tree,
596        change_id,
597        description,
598        author,
599        committer,
600        secure_sig,
601    })
602}
603
604const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
605
606fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
607    let name = signature.name;
608    let name = if name != EMPTY_STRING_PLACEHOLDER {
609        String::from_utf8_lossy(name).into_owned()
610    } else {
611        "".to_string()
612    };
613    let email = signature.email;
614    let email = if email != EMPTY_STRING_PLACEHOLDER {
615        String::from_utf8_lossy(email).into_owned()
616    } else {
617        "".to_string()
618    };
619    let timestamp = MillisSinceEpoch(signature.time.seconds * 1000);
620    let tz_offset = signature.time.offset.div_euclid(60); // in minutes
621    Signature {
622        name,
623        email,
624        timestamp: Timestamp {
625            timestamp,
626            tz_offset,
627        },
628    }
629}
630
631fn signature_to_git(signature: &Signature) -> gix::actor::SignatureRef<'_> {
632    // git does not support empty names or emails
633    let name = if !signature.name.is_empty() {
634        &signature.name
635    } else {
636        EMPTY_STRING_PLACEHOLDER
637    };
638    let email = if !signature.email.is_empty() {
639        &signature.email
640    } else {
641        EMPTY_STRING_PLACEHOLDER
642    };
643    let time = gix::date::Time::new(
644        signature.timestamp.timestamp.0.div_euclid(1000),
645        signature.timestamp.tz_offset * 60, // in seconds
646    );
647    gix::actor::SignatureRef {
648        name: name.into(),
649        email: email.into(),
650        time,
651    }
652}
653
654fn serialize_extras(commit: &Commit) -> Vec<u8> {
655    let mut proto = crate::protos::git_store::Commit {
656        change_id: commit.change_id.to_bytes(),
657        ..Default::default()
658    };
659    if let MergedTreeId::Merge(tree_ids) = &commit.root_tree {
660        proto.uses_tree_conflict_format = true;
661        if !tree_ids.is_resolved() {
662            proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
663        }
664    }
665    for predecessor in &commit.predecessors {
666        proto.predecessors.push(predecessor.to_bytes());
667    }
668    proto.encode_to_vec()
669}
670
671fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
672    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
673    commit.change_id = ChangeId::new(proto.change_id);
674    if proto.uses_tree_conflict_format {
675        if !proto.root_tree.is_empty() {
676            let merge_builder: MergeBuilder<_> = proto
677                .root_tree
678                .iter()
679                .map(|id_bytes| TreeId::from_bytes(id_bytes))
680                .collect();
681            let merge = merge_builder.build();
682            // Check that the trees from the extras match the one we found in the jj:trees
683            // header
684            if let MergedTreeId::Merge(existing_merge) = &commit.root_tree {
685                assert!(existing_merge.is_resolved() || *existing_merge == merge);
686            }
687            commit.root_tree = MergedTreeId::Merge(merge);
688        } else {
689            // uses_tree_conflict_format was set but there was no root_tree override in the
690            // proto, which means we should just promote the tree id from the
691            // git commit to be a known-conflict-free tree
692            let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree else {
693                panic!("root tree should have been initialized to a legacy id");
694            };
695            commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone());
696        }
697    }
698    for predecessor in &proto.predecessors {
699        commit.predecessors.push(CommitId::from_bytes(predecessor));
700    }
701}
702
703/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
704/// Used for preventing GC of commits we create.
705fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
706    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
707    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
708    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
709    gix::refs::transaction::RefEdit {
710        change: gix::refs::transaction::Change::Update {
711            log: gix::refs::transaction::LogChange {
712                message: "used by jj".into(),
713                ..Default::default()
714            },
715            expected,
716            new,
717        },
718        name: name.try_into().unwrap(),
719        deref: false,
720    }
721}
722
723fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
724    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
725    gix::refs::transaction::RefEdit {
726        change: gix::refs::transaction::Change::Delete {
727            expected,
728            log: gix::refs::transaction::RefLog::AndReference,
729        },
730        name: git_ref.name,
731        deref: false,
732    }
733}
734
735/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
736/// unreachable and non-head refs.
737fn recreate_no_gc_refs(
738    git_repo: &gix::Repository,
739    new_heads: impl IntoIterator<Item = CommitId>,
740    keep_newer: SystemTime,
741) -> BackendResult<()> {
742    // Calculate diff between existing no-gc refs and new heads.
743    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
744    let mut no_gc_refs_to_keep_count: usize = 0;
745    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
746    let git_references = git_repo
747        .references()
748        .map_err(|err| BackendError::Other(err.into()))?;
749    let no_gc_refs_iter = git_references
750        .prefixed(NO_GC_REF_NAMESPACE)
751        .map_err(|err| BackendError::Other(err.into()))?;
752    for git_ref in no_gc_refs_iter {
753        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
754        let oid = git_ref.target.try_id().ok_or_else(|| {
755            let name = git_ref.name.as_bstr();
756            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
757        })?;
758        let id = CommitId::from_bytes(oid.as_bytes());
759        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
760        if new_heads.contains(&id) && name_good {
761            no_gc_refs_to_keep_count += 1;
762            continue;
763        }
764        // Check timestamp of loose ref, but this is still racy on re-import
765        // because:
766        // - existing packed ref won't be demoted to loose ref
767        // - existing loose ref won't be touched
768        //
769        // TODO: might be better to switch to a dummy merge, where new no-gc ref
770        // will always have a unique name. Doing that with the current
771        // ref-per-head strategy would increase the number of the no-gc refs.
772        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
773        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
774        if let Ok(metadata) = loose_ref_path.metadata() {
775            let mtime = metadata.modified().expect("unsupported platform?");
776            if mtime > keep_newer {
777                tracing::trace!(?git_ref, "not deleting new");
778                no_gc_refs_to_keep_count += 1;
779                continue;
780            }
781        }
782        // Also deletes no-gc ref of random name created by old jj.
783        tracing::trace!(?git_ref, ?name_good, "will delete");
784        no_gc_refs_to_delete.push(git_ref);
785    }
786    tracing::info!(
787        new_heads_count = new_heads.len(),
788        no_gc_refs_to_keep_count,
789        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
790        "collected reachable refs"
791    );
792
793    // It's slow to delete packed refs one by one, so update refs all at once.
794    let ref_edits = itertools::chain(
795        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
796        new_heads.iter().map(to_no_gc_ref_update),
797    );
798    git_repo
799        .edit_references(ref_edits)
800        .map_err(|err| BackendError::Other(err.into()))?;
801
802    Ok(())
803}
804
805fn run_git_gc(git_dir: &Path) -> Result<(), GitGcError> {
806    let mut git = Command::new("git");
807    git.arg("--git-dir=."); // turn off discovery
808    git.arg("gc");
809    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
810    // canonicalized as UNC path, which wouldn't be supported by git.
811    git.current_dir(git_dir);
812    // TODO: pass output to UI layer instead of printing directly here
813    let status = git.status().map_err(GitGcError::GcCommand)?;
814    if !status.success() {
815        return Err(GitGcError::GcCommandErrorStatus(status));
816    }
817    Ok(())
818}
819
820fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
821    if id.as_bytes().len() != HASH_LENGTH {
822        return Err(BackendError::InvalidHashLength {
823            expected: HASH_LENGTH,
824            actual: id.as_bytes().len(),
825            object_type: id.object_type(),
826            hash: id.hex(),
827        });
828    }
829    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
830}
831
832fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
833    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
834        BackendError::ObjectNotFound {
835            object_type: id.object_type(),
836            hash: id.hex(),
837            source: Box::new(err),
838        }
839    } else {
840        to_read_object_err(err, id)
841    }
842}
843
844fn to_read_object_err(
845    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
846    id: &impl ObjectId,
847) -> BackendError {
848    BackendError::ReadObject {
849        object_type: id.object_type(),
850        hash: id.hex(),
851        source: err.into(),
852    }
853}
854
855fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError {
856    BackendError::InvalidUtf8 {
857        object_type: id.object_type(),
858        hash: id.hex(),
859        source,
860    }
861}
862
863fn import_extra_metadata_entries_from_heads(
864    git_repo: &gix::Repository,
865    mut_table: &mut MutableTable,
866    _table_lock: &FileLock,
867    head_ids: &HashSet<&CommitId>,
868    uses_tree_conflict_format: bool,
869) -> BackendResult<()> {
870    let shallow_commits = git_repo
871        .shallow_commits()
872        .map_err(|e| BackendError::Other(Box::new(e)))?;
873
874    let mut work_ids = head_ids
875        .iter()
876        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
877        .map(|&id| id.clone())
878        .collect_vec();
879    while let Some(id) = work_ids.pop() {
880        let git_object = git_repo
881            .find_object(validate_git_object_id(&id)?)
882            .map_err(|err| map_not_found_err(err, &id))?;
883        let is_shallow = shallow_commits
884            .as_ref()
885            .is_some_and(|shallow| shallow.contains(&git_object.id));
886        // TODO(#1624): Should we read the root tree here and check if it has a
887        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
888        // change the description of a commit with tree-level conflicts.
889        let commit = commit_from_git_without_root_parent(
890            &id,
891            &git_object,
892            uses_tree_conflict_format,
893            is_shallow,
894        )?;
895        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
896        work_ids.extend(
897            commit
898                .parents
899                .into_iter()
900                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
901        );
902    }
903    Ok(())
904}
905
906impl Debug for GitBackend {
907    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
908        f.debug_struct("GitBackend")
909            .field("path", &self.git_repo_path())
910            .finish()
911    }
912}
913
914#[async_trait]
915impl Backend for GitBackend {
916    fn as_any(&self) -> &dyn Any {
917        self
918    }
919
920    fn name(&self) -> &str {
921        Self::name()
922    }
923
924    fn commit_id_length(&self) -> usize {
925        HASH_LENGTH
926    }
927
928    fn change_id_length(&self) -> usize {
929        CHANGE_ID_LENGTH
930    }
931
932    fn root_commit_id(&self) -> &CommitId {
933        &self.root_commit_id
934    }
935
936    fn root_change_id(&self) -> &ChangeId {
937        &self.root_change_id
938    }
939
940    fn empty_tree_id(&self) -> &TreeId {
941        &self.empty_tree_id
942    }
943
944    fn concurrency(&self) -> usize {
945        1
946    }
947
948    async fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> {
949        self.read_file_sync(id)
950    }
951
952    async fn write_file(
953        &self,
954        _path: &RepoPath,
955        contents: &mut (dyn Read + Send),
956    ) -> BackendResult<FileId> {
957        let mut bytes = Vec::new();
958        contents.read_to_end(&mut bytes).unwrap();
959        let locked_repo = self.lock_git_repo();
960        let oid = locked_repo
961            .write_blob(bytes)
962            .map_err(|err| BackendError::WriteObject {
963                object_type: "file",
964                source: Box::new(err),
965            })?;
966        Ok(FileId::new(oid.as_bytes().to_vec()))
967    }
968
969    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
970        let git_blob_id = validate_git_object_id(id)?;
971        let locked_repo = self.lock_git_repo();
972        let mut blob = locked_repo
973            .find_object(git_blob_id)
974            .map_err(|err| map_not_found_err(err, id))?
975            .try_into_blob()
976            .map_err(|err| to_read_object_err(err, id))?;
977        let target = String::from_utf8(blob.take_data())
978            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
979        Ok(target)
980    }
981
982    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
983        let locked_repo = self.lock_git_repo();
984        let oid =
985            locked_repo
986                .write_blob(target.as_bytes())
987                .map_err(|err| BackendError::WriteObject {
988                    object_type: "symlink",
989                    source: Box::new(err),
990                })?;
991        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
992    }
993
994    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
995        if id == &self.empty_tree_id {
996            return Ok(Tree::default());
997        }
998        let git_tree_id = validate_git_object_id(id)?;
999
1000        let locked_repo = self.lock_git_repo();
1001        let git_tree = locked_repo
1002            .find_object(git_tree_id)
1003            .map_err(|err| map_not_found_err(err, id))?
1004            .try_into_tree()
1005            .map_err(|err| to_read_object_err(err, id))?;
1006        let mut tree = Tree::default();
1007        for entry in git_tree.iter() {
1008            let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1009            let name =
1010                str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
1011            let (name, value) = match entry.mode().kind() {
1012                gix::object::tree::EntryKind::Tree => {
1013                    let id = TreeId::from_bytes(entry.oid().as_bytes());
1014                    (name, TreeValue::Tree(id))
1015                }
1016                gix::object::tree::EntryKind::Blob => {
1017                    let id = FileId::from_bytes(entry.oid().as_bytes());
1018                    if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) {
1019                        (
1020                            basename,
1021                            TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())),
1022                        )
1023                    } else {
1024                        (
1025                            name,
1026                            TreeValue::File {
1027                                id,
1028                                executable: false,
1029                            },
1030                        )
1031                    }
1032                }
1033                gix::object::tree::EntryKind::BlobExecutable => {
1034                    let id = FileId::from_bytes(entry.oid().as_bytes());
1035                    (
1036                        name,
1037                        TreeValue::File {
1038                            id,
1039                            executable: true,
1040                        },
1041                    )
1042                }
1043                gix::object::tree::EntryKind::Link => {
1044                    let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1045                    (name, TreeValue::Symlink(id))
1046                }
1047                gix::object::tree::EntryKind::Commit => {
1048                    let id = CommitId::from_bytes(entry.oid().as_bytes());
1049                    (name, TreeValue::GitSubmodule(id))
1050                }
1051            };
1052            tree.set(RepoPathComponentBuf::from(name), value);
1053        }
1054        Ok(tree)
1055    }
1056
1057    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1058        // Tree entries to be written must be sorted by Entry::filename(), which
1059        // is slightly different from the order of our backend::Tree.
1060        let entries = contents
1061            .entries()
1062            .map(|entry| {
1063                let name = entry.name().as_internal_str();
1064                match entry.value() {
1065                    TreeValue::File {
1066                        id,
1067                        executable: false,
1068                    } => gix::objs::tree::Entry {
1069                        mode: gix::object::tree::EntryKind::Blob.into(),
1070                        filename: name.into(),
1071                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1072                    },
1073                    TreeValue::File {
1074                        id,
1075                        executable: true,
1076                    } => gix::objs::tree::Entry {
1077                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1078                        filename: name.into(),
1079                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1080                    },
1081                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1082                        mode: gix::object::tree::EntryKind::Link.into(),
1083                        filename: name.into(),
1084                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1085                    },
1086                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1087                        mode: gix::object::tree::EntryKind::Tree.into(),
1088                        filename: name.into(),
1089                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1090                    },
1091                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1092                        mode: gix::object::tree::EntryKind::Commit.into(),
1093                        filename: name.into(),
1094                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1095                    },
1096                    TreeValue::Conflict(id) => gix::objs::tree::Entry {
1097                        mode: gix::object::tree::EntryKind::Blob.into(),
1098                        filename: (name.to_owned() + CONFLICT_SUFFIX).into(),
1099                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1100                    },
1101                }
1102            })
1103            .sorted_unstable()
1104            .collect();
1105        let locked_repo = self.lock_git_repo();
1106        let oid = locked_repo
1107            .write_object(gix::objs::Tree { entries })
1108            .map_err(|err| BackendError::WriteObject {
1109                object_type: "tree",
1110                source: Box::new(err),
1111            })?;
1112        Ok(TreeId::from_bytes(oid.as_bytes()))
1113    }
1114
1115    fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
1116        let mut file = self.read_file_sync(&FileId::new(id.to_bytes()))?;
1117        let mut data = String::new();
1118        file.read_to_string(&mut data)
1119            .map_err(|err| BackendError::ReadObject {
1120                object_type: "conflict".to_owned(),
1121                hash: id.hex(),
1122                source: err.into(),
1123            })?;
1124        let json: serde_json::Value = serde_json::from_str(&data).unwrap();
1125        Ok(Conflict {
1126            removes: conflict_term_list_from_json(json.get("removes").unwrap()),
1127            adds: conflict_term_list_from_json(json.get("adds").unwrap()),
1128        })
1129    }
1130
1131    fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
1132        let json = serde_json::json!({
1133            "removes": conflict_term_list_to_json(&conflict.removes),
1134            "adds": conflict_term_list_to_json(&conflict.adds),
1135        });
1136        let json_string = json.to_string();
1137        let bytes = json_string.as_bytes();
1138        let locked_repo = self.lock_git_repo();
1139        let oid = locked_repo
1140            .write_blob(bytes)
1141            .map_err(|err| BackendError::WriteObject {
1142                object_type: "conflict",
1143                source: Box::new(err),
1144            })?;
1145        Ok(ConflictId::from_bytes(oid.as_bytes()))
1146    }
1147
1148    #[tracing::instrument(skip(self))]
1149    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1150        if *id == self.root_commit_id {
1151            return Ok(make_root_commit(
1152                self.root_change_id().clone(),
1153                self.empty_tree_id.clone(),
1154            ));
1155        }
1156        let git_commit_id = validate_git_object_id(id)?;
1157
1158        let mut commit = {
1159            let locked_repo = self.lock_git_repo();
1160            let git_object = locked_repo
1161                .find_object(git_commit_id)
1162                .map_err(|err| map_not_found_err(err, id))?;
1163            let is_shallow = locked_repo
1164                .shallow_commits()
1165                .ok()
1166                .flatten()
1167                .is_some_and(|shallow| shallow.contains(&git_object.id));
1168            commit_from_git_without_root_parent(id, &git_object, false, is_shallow)?
1169        };
1170        if commit.parents.is_empty() {
1171            commit.parents.push(self.root_commit_id.clone());
1172        };
1173
1174        let table = self.cached_extra_metadata_table()?;
1175        if let Some(extras) = table.get_value(id.as_bytes()) {
1176            deserialize_extras(&mut commit, extras);
1177        } else {
1178            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1179            // there are no reachable ancestor commits without extras metadata. Git commits
1180            // imported by jj < 0.8.0 might not have extras (#924).
1181            // https://github.com/jj-vcs/jj/issues/2343
1182            tracing::info!("unimported Git commit found");
1183            self.import_head_commits([id])?;
1184            let table = self.cached_extra_metadata_table()?;
1185            let extras = table.get_value(id.as_bytes()).unwrap();
1186            deserialize_extras(&mut commit, extras);
1187        }
1188        Ok(commit)
1189    }
1190
1191    async fn write_commit(
1192        &self,
1193        mut contents: Commit,
1194        mut sign_with: Option<&mut SigningFn>,
1195    ) -> BackendResult<(CommitId, Commit)> {
1196        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1197
1198        let locked_repo = self.lock_git_repo();
1199        let git_tree_id = match &contents.root_tree {
1200            MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
1201            MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
1202                Some(tree_id) => validate_git_object_id(tree_id)?,
1203                None => write_tree_conflict(&locked_repo, tree_ids)?,
1204            },
1205        };
1206        let author = signature_to_git(&contents.author);
1207        let mut committer = signature_to_git(&contents.committer);
1208        let message = &contents.description;
1209        if contents.parents.is_empty() {
1210            return Err(BackendError::Other(
1211                "Cannot write a commit with no parents".into(),
1212            ));
1213        }
1214        let mut parents = SmallVec::new();
1215        for parent_id in &contents.parents {
1216            if *parent_id == self.root_commit_id {
1217                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1218                // add it to the list of parents to write in the Git commit. We also check that
1219                // there are no other parents since Git cannot represent a merge between a root
1220                // commit and another commit.
1221                if contents.parents.len() > 1 {
1222                    return Err(BackendError::Unsupported(
1223                        "The Git backend does not support creating merge commits with the root \
1224                         commit as one of the parents."
1225                            .to_owned(),
1226                    ));
1227                }
1228            } else {
1229                parents.push(validate_git_object_id(parent_id)?);
1230            }
1231        }
1232        let mut extra_headers = vec![];
1233        if let MergedTreeId::Merge(tree_ids) = &contents.root_tree {
1234            if !tree_ids.is_resolved() {
1235                let value = tree_ids.iter().map(|id| id.hex()).join(" ").into_bytes();
1236                extra_headers.push((
1237                    BString::new(JJ_TREES_COMMIT_HEADER.to_vec()),
1238                    BString::new(value),
1239                ));
1240            }
1241        }
1242        let extras = serialize_extras(&contents);
1243
1244        // If two writers write commits of the same id with different metadata, they
1245        // will both succeed and the metadata entries will be "merged" later. Since
1246        // metadata entry is keyed by the commit id, one of the entries would be lost.
1247        // To prevent such race condition locally, we extend the scope covered by the
1248        // table lock. This is still racy if multiple machines are involved and the
1249        // repository is rsync-ed.
1250        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1251        let id = loop {
1252            let mut commit = gix::objs::Commit {
1253                message: message.to_owned().into(),
1254                tree: git_tree_id,
1255                author: author.into(),
1256                committer: committer.into(),
1257                encoding: None,
1258                parents: parents.clone(),
1259                extra_headers: extra_headers.clone(),
1260            };
1261
1262            if let Some(sign) = &mut sign_with {
1263                // we don't use gix pool, but at least use their heuristic
1264                let mut data = Vec::with_capacity(512);
1265                commit.write_to(&mut data).unwrap();
1266
1267                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1268                    object_type: "commit",
1269                    source: Box::new(err),
1270                })?;
1271                commit
1272                    .extra_headers
1273                    .push(("gpgsig".into(), sig.clone().into()));
1274                contents.secure_sig = Some(SecureSig { data, sig });
1275            }
1276
1277            let git_id =
1278                locked_repo
1279                    .write_object(&commit)
1280                    .map_err(|err| BackendError::WriteObject {
1281                        object_type: "commit",
1282                        source: Box::new(err),
1283                    })?;
1284
1285            match table.get_value(git_id.as_bytes()) {
1286                Some(existing_extras) if existing_extras != extras => {
1287                    // It's possible a commit already exists with the same commit id but different
1288                    // change id. Adjust the timestamp until this is no longer the case.
1289                    committer.time.seconds -= 1;
1290                }
1291                _ => break CommitId::from_bytes(git_id.as_bytes()),
1292            }
1293        };
1294
1295        // Everything up to this point had no permanent effect on the repo except
1296        // GC-able objects
1297        locked_repo
1298            .edit_reference(to_no_gc_ref_update(&id))
1299            .map_err(|err| BackendError::Other(Box::new(err)))?;
1300
1301        // Update the signature to match the one that was actually written to the object
1302        // store
1303        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1304        let mut mut_table = table.start_mutation();
1305        mut_table.add_entry(id.to_bytes(), extras);
1306        self.save_extra_metadata_table(mut_table, &table_lock)?;
1307        Ok((id, contents))
1308    }
1309
1310    fn get_copy_records(
1311        &self,
1312        paths: Option<&[RepoPathBuf]>,
1313        root_id: &CommitId,
1314        head_id: &CommitId,
1315    ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
1316        let repo = self.git_repo();
1317        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1318        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1319
1320        let change_to_copy_record =
1321            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1322                let gix::object::tree::diff::Change::Rewrite {
1323                    source_location,
1324                    source_id,
1325                    location: dest_location,
1326                    ..
1327                } = change
1328                else {
1329                    return Ok(None);
1330                };
1331
1332                let source = str::from_utf8(source_location)
1333                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1334                let dest = str::from_utf8(dest_location)
1335                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1336
1337                let target = RepoPathBuf::from_internal_string(dest);
1338                if !paths.map_or(true, |paths| paths.contains(&target)) {
1339                    return Ok(None);
1340                }
1341
1342                Ok(Some(CopyRecord {
1343                    target,
1344                    target_commit: head_id.clone(),
1345                    source: RepoPathBuf::from_internal_string(source),
1346                    source_file: FileId::from_bytes(source_id.as_bytes()),
1347                    source_commit: root_id.clone(),
1348                }))
1349            };
1350
1351        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1352        root_tree
1353            .changes()
1354            .map_err(|err| BackendError::Other(err.into()))?
1355            .options(|opts| {
1356                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1357                    copies: Some(gix::diff::rewrites::Copies {
1358                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1359                        percentage: Some(0.5),
1360                    }),
1361                    percentage: Some(0.5),
1362                    limit: 1000,
1363                    track_empty: false,
1364                }));
1365            })
1366            .for_each_to_obtain_tree_with_cache(
1367                &head_tree,
1368                &mut self.new_diff_platform()?,
1369                |change| -> BackendResult<_> {
1370                    match change_to_copy_record(change) {
1371                        Ok(None) => {}
1372                        Ok(Some(change)) => records.push(Ok(change)),
1373                        Err(err) => records.push(Err(err)),
1374                    }
1375                    Ok(gix::object::tree::diff::Action::Continue)
1376                },
1377            )
1378            .map_err(|err| BackendError::Other(err.into()))?;
1379        Ok(Box::pin(futures::stream::iter(records)))
1380    }
1381
1382    #[tracing::instrument(skip(self, index))]
1383    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1384        let git_repo = self.lock_git_repo();
1385        let new_heads = index
1386            .all_heads_for_gc()
1387            .map_err(|err| BackendError::Other(err.into()))?
1388            .filter(|id| *id != self.root_commit_id);
1389        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1390        // TODO: remove unreachable entries from extras table if segment file
1391        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1392        // preserved by the keep_newer timestamp though)
1393        // TODO: remove unreachable extras table segments
1394        // TODO: pass in keep_newer to "git gc" command
1395        run_git_gc(self.git_repo_path()).map_err(|err| BackendError::Other(err.into()))?;
1396        // Since "git gc" will move loose refs into packed refs, in-memory
1397        // packed-refs cache should be invalidated without relying on mtime.
1398        git_repo.refs.force_refresh_packed_buffer().ok();
1399        Ok(())
1400    }
1401}
1402
1403/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1404/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1405fn write_tree_conflict(
1406    repo: &gix::Repository,
1407    conflict: &Merge<TreeId>,
1408) -> BackendResult<gix::ObjectId> {
1409    // Tree entries to be written must be sorted by Entry::filename().
1410    let mut entries = itertools::chain(
1411        conflict
1412            .removes()
1413            .enumerate()
1414            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1415        conflict
1416            .adds()
1417            .enumerate()
1418            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1419    )
1420    .map(|(name, tree_id)| gix::objs::tree::Entry {
1421        mode: gix::object::tree::EntryKind::Tree.into(),
1422        filename: name.into(),
1423        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1424    })
1425    .collect_vec();
1426    let readme_id = repo
1427        .write_blob(
1428            r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1429The commit contains file conflicts, and therefore looks wrong when used with plain
1430Git or other tools that are unfamiliar with jj.
1431
1432The .jjconflict-* directories represent the different inputs to the conflict.
1433For details, see
1434https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1435
1436If you see this file in your working copy, it probably means that you used a
1437regular `git` command to check out a conflicted commit. Use `jj abandon` to
1438recover.
1439"#,
1440        )
1441        .map_err(|err| {
1442            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1443        })?
1444        .detach();
1445    entries.push(gix::objs::tree::Entry {
1446        mode: gix::object::tree::EntryKind::Blob.into(),
1447        filename: "README".into(),
1448        oid: readme_id,
1449    });
1450    entries.sort_unstable();
1451    let id = repo
1452        .write_object(gix::objs::Tree { entries })
1453        .map_err(|err| BackendError::WriteObject {
1454            object_type: "tree",
1455            source: Box::new(err),
1456        })?;
1457    Ok(id.detach())
1458}
1459
1460fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
1461    serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
1462}
1463
1464fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> {
1465    json.as_array()
1466        .unwrap()
1467        .iter()
1468        .map(conflict_term_from_json)
1469        .collect()
1470}
1471
1472fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value {
1473    serde_json::json!({
1474        "value": tree_value_to_json(&part.value),
1475    })
1476}
1477
1478fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm {
1479    let json_value = json.get("value").unwrap();
1480    ConflictTerm {
1481        value: tree_value_from_json(json_value),
1482    }
1483}
1484
1485fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
1486    match value {
1487        TreeValue::File { id, executable } => serde_json::json!({
1488             "file": {
1489                 "id": id.hex(),
1490                 "executable": executable,
1491             },
1492        }),
1493        TreeValue::Symlink(id) => serde_json::json!({
1494             "symlink_id": id.hex(),
1495        }),
1496        TreeValue::Tree(id) => serde_json::json!({
1497             "tree_id": id.hex(),
1498        }),
1499        TreeValue::GitSubmodule(id) => serde_json::json!({
1500             "submodule_id": id.hex(),
1501        }),
1502        TreeValue::Conflict(id) => serde_json::json!({
1503             "conflict_id": id.hex(),
1504        }),
1505    }
1506}
1507
1508fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
1509    if let Some(json_file) = json.get("file") {
1510        TreeValue::File {
1511            id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())),
1512            executable: json_file.get("executable").unwrap().as_bool().unwrap(),
1513        }
1514    } else if let Some(json_id) = json.get("symlink_id") {
1515        TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id)))
1516    } else if let Some(json_id) = json.get("tree_id") {
1517        TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id)))
1518    } else if let Some(json_id) = json.get("submodule_id") {
1519        TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id)))
1520    } else if let Some(json_id) = json.get("conflict_id") {
1521        TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id)))
1522    } else {
1523        panic!("unexpected json value in conflict: {json:#?}");
1524    }
1525}
1526
1527fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
1528    hex::decode(value.as_str().unwrap()).unwrap()
1529}
1530
1531#[cfg(test)]
1532mod tests {
1533    use assert_matches::assert_matches;
1534    use hex::ToHex;
1535    use pollster::FutureExt;
1536    use test_case::test_case;
1537
1538    use super::*;
1539    use crate::config::StackedConfig;
1540    use crate::content_hash::blake2b_hash;
1541    use crate::tests::new_temp_dir;
1542
1543    const GIT_USER: &str = "Someone";
1544    const GIT_EMAIL: &str = "someone@example.com";
1545
1546    fn git_config() -> Vec<bstr::BString> {
1547        vec![
1548            format!("user.name = {GIT_USER}").into(),
1549            format!("user.email = {GIT_EMAIL}").into(),
1550            "init.defaultBranch = master".into(),
1551        ]
1552    }
1553
1554    fn open_options() -> gix::open::Options {
1555        gix::open::Options::isolated().config_overrides(git_config())
1556    }
1557
1558    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1559        gix::ThreadSafeRepository::init_opts(
1560            directory,
1561            gix::create::Kind::WithWorktree,
1562            gix::create::Options::default(),
1563            open_options(),
1564        )
1565        .unwrap()
1566        .to_thread_local()
1567    }
1568
1569    #[test_case(false; "legacy tree format")]
1570    #[test_case(true; "tree-level conflict format")]
1571    fn read_plain_git_commit(uses_tree_conflict_format: bool) {
1572        let settings = user_settings();
1573        let temp_dir = new_temp_dir();
1574        let store_path = temp_dir.path();
1575        let git_repo_path = temp_dir.path().join("git");
1576        let git_repo = git_init(git_repo_path);
1577
1578        // Add a commit with some files in
1579        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1580        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1581        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1582        dir_tree_editor
1583            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1584            .unwrap();
1585        dir_tree_editor
1586            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1587            .unwrap();
1588        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1589        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1590        root_tree_builder
1591            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1592            .unwrap();
1593        let root_tree_id = root_tree_builder.write().unwrap().detach();
1594        let git_author = gix::actor::Signature {
1595            name: "git author".into(),
1596            email: "git.author@example.com".into(),
1597            time: gix::date::Time::new(1000, 60 * 60),
1598        };
1599        let git_committer = gix::actor::Signature {
1600            name: "git committer".into(),
1601            email: "git.committer@example.com".into(),
1602            time: gix::date::Time::new(2000, -480 * 60),
1603        };
1604        let git_commit_id = git_repo
1605            .commit_as(
1606                &git_committer,
1607                &git_author,
1608                "refs/heads/dummy",
1609                "git commit message",
1610                root_tree_id,
1611                [] as [gix::ObjectId; 0],
1612            )
1613            .unwrap()
1614            .detach();
1615        git_repo
1616            .find_reference("refs/heads/dummy")
1617            .unwrap()
1618            .delete()
1619            .unwrap();
1620        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1621        // The change id is the leading reverse bits of the commit id
1622        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1623        // Check that the git commit above got the hash we expect
1624        assert_eq!(
1625            git_commit_id.as_bytes(),
1626            commit_id.as_bytes(),
1627            "{git_commit_id:?} vs {commit_id:?}"
1628        );
1629
1630        // Add an empty commit on top
1631        let git_commit_id2 = git_repo
1632            .commit_as(
1633                &git_committer,
1634                &git_author,
1635                "refs/heads/dummy2",
1636                "git commit message 2",
1637                root_tree_id,
1638                [git_commit_id],
1639            )
1640            .unwrap()
1641            .detach();
1642        git_repo
1643            .find_reference("refs/heads/dummy2")
1644            .unwrap()
1645            .delete()
1646            .unwrap();
1647        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1648
1649        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1650
1651        // Import the head commit and its ancestors
1652        backend
1653            .import_head_commits_with_tree_conflicts([&commit_id2], uses_tree_conflict_format)
1654            .unwrap();
1655        // Ref should be created only for the head commit
1656        let git_refs = backend
1657            .git_repo()
1658            .references()
1659            .unwrap()
1660            .prefixed("refs/jj/keep/")
1661            .unwrap()
1662            .map(|git_ref| git_ref.unwrap().id().detach())
1663            .collect_vec();
1664        assert_eq!(git_refs, vec![git_commit_id2]);
1665
1666        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1667        assert_eq!(&commit.change_id, &change_id);
1668        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1669        assert_eq!(commit.predecessors, vec![]);
1670        assert_eq!(
1671            commit.root_tree.to_merge(),
1672            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1673        );
1674        if uses_tree_conflict_format {
1675            assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1676        } else {
1677            assert_matches!(commit.root_tree, MergedTreeId::Legacy(_));
1678        }
1679        assert_eq!(commit.description, "git commit message");
1680        assert_eq!(commit.author.name, "git author");
1681        assert_eq!(commit.author.email, "git.author@example.com");
1682        assert_eq!(
1683            commit.author.timestamp.timestamp,
1684            MillisSinceEpoch(1000 * 1000)
1685        );
1686        assert_eq!(commit.author.timestamp.tz_offset, 60);
1687        assert_eq!(commit.committer.name, "git committer");
1688        assert_eq!(commit.committer.email, "git.committer@example.com");
1689        assert_eq!(
1690            commit.committer.timestamp.timestamp,
1691            MillisSinceEpoch(2000 * 1000)
1692        );
1693        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1694
1695        let root_tree = backend
1696            .read_tree(
1697                RepoPath::root(),
1698                &TreeId::from_bytes(root_tree_id.as_bytes()),
1699            )
1700            .block_on()
1701            .unwrap();
1702        let mut root_entries = root_tree.entries();
1703        let dir = root_entries.next().unwrap();
1704        assert_eq!(root_entries.next(), None);
1705        assert_eq!(dir.name().as_internal_str(), "dir");
1706        assert_eq!(
1707            dir.value(),
1708            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1709        );
1710
1711        let dir_tree = backend
1712            .read_tree(
1713                RepoPath::from_internal_string("dir"),
1714                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1715            )
1716            .block_on()
1717            .unwrap();
1718        let mut entries = dir_tree.entries();
1719        let file = entries.next().unwrap();
1720        let symlink = entries.next().unwrap();
1721        assert_eq!(entries.next(), None);
1722        assert_eq!(file.name().as_internal_str(), "normal");
1723        assert_eq!(
1724            file.value(),
1725            &TreeValue::File {
1726                id: FileId::from_bytes(blob1.as_bytes()),
1727                executable: false
1728            }
1729        );
1730        assert_eq!(symlink.name().as_internal_str(), "symlink");
1731        assert_eq!(
1732            symlink.value(),
1733            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1734        );
1735
1736        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1737        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1738        assert_eq!(commit.predecessors, vec![]);
1739        assert_eq!(
1740            commit.root_tree.to_merge(),
1741            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1742        );
1743        if uses_tree_conflict_format {
1744            assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1745        } else {
1746            assert_matches!(commit.root_tree, MergedTreeId::Legacy(_));
1747        }
1748    }
1749
1750    #[test]
1751    fn read_git_commit_without_importing() {
1752        let settings = user_settings();
1753        let temp_dir = new_temp_dir();
1754        let store_path = temp_dir.path();
1755        let git_repo_path = temp_dir.path().join("git");
1756        let git_repo = git_init(&git_repo_path);
1757
1758        let signature = gix::actor::Signature {
1759            name: GIT_USER.into(),
1760            email: GIT_EMAIL.into(),
1761            time: gix::date::Time::now_utc(),
1762        };
1763        let empty_tree_id =
1764            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1765        let git_commit_id = git_repo
1766            .commit_as(
1767                &signature,
1768                &signature,
1769                "refs/heads/main",
1770                "git commit message",
1771                empty_tree_id,
1772                [] as [gix::ObjectId; 0],
1773            )
1774            .unwrap();
1775
1776        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1777
1778        // read_commit() without import_head_commits() works as of now. This might be
1779        // changed later.
1780        assert!(backend
1781            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1782            .block_on()
1783            .is_ok());
1784        assert!(
1785            backend
1786                .cached_extra_metadata_table()
1787                .unwrap()
1788                .get_value(git_commit_id.as_bytes())
1789                .is_some(),
1790            "extra metadata should have been be created"
1791        );
1792    }
1793
1794    #[test]
1795    fn read_signed_git_commit() {
1796        let settings = user_settings();
1797        let temp_dir = new_temp_dir();
1798        let store_path = temp_dir.path();
1799        let git_repo_path = temp_dir.path().join("git");
1800        let git_repo = git_init(git_repo_path);
1801
1802        let signature = gix::actor::Signature {
1803            name: GIT_USER.into(),
1804            email: GIT_EMAIL.into(),
1805            time: gix::date::Time::now_utc(),
1806        };
1807        let empty_tree_id =
1808            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1809
1810        let secure_sig =
1811            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1812
1813        let mut commit = gix::objs::Commit {
1814            tree: empty_tree_id,
1815            parents: smallvec::SmallVec::new(),
1816            author: signature.clone(),
1817            committer: signature.clone(),
1818            encoding: None,
1819            message: "git commit message".into(),
1820            extra_headers: Vec::new(),
1821        };
1822
1823        let mut commit_buf = Vec::new();
1824        commit.write_to(&mut commit_buf).unwrap();
1825        let commit_str = std::str::from_utf8(&commit_buf).unwrap();
1826
1827        commit
1828            .extra_headers
1829            .push(("gpgsig".into(), secure_sig.into()));
1830
1831        let git_commit_id = git_repo.write_object(&commit).unwrap();
1832
1833        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1834
1835        let commit = backend
1836            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1837            .block_on()
1838            .unwrap();
1839
1840        let sig = commit.secure_sig.expect("failed to read the signature");
1841
1842        // converting to string for nicer assert diff
1843        assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig);
1844        assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str);
1845    }
1846
1847    #[test]
1848    fn read_empty_string_placeholder() {
1849        let git_signature1 = gix::actor::SignatureRef {
1850            name: EMPTY_STRING_PLACEHOLDER.into(),
1851            email: "git.author@example.com".into(),
1852            time: gix::date::Time::new(1000, 60 * 60),
1853        };
1854        let signature1 = signature_from_git(git_signature1);
1855        assert!(signature1.name.is_empty());
1856        assert_eq!(signature1.email, "git.author@example.com");
1857        let git_signature2 = gix::actor::SignatureRef {
1858            name: "git committer".into(),
1859            email: EMPTY_STRING_PLACEHOLDER.into(),
1860            time: gix::date::Time::new(2000, -480 * 60),
1861        };
1862        let signature2 = signature_from_git(git_signature2);
1863        assert_eq!(signature2.name, "git committer");
1864        assert!(signature2.email.is_empty());
1865    }
1866
1867    #[test]
1868    fn write_empty_string_placeholder() {
1869        let signature1 = Signature {
1870            name: "".to_string(),
1871            email: "someone@example.com".to_string(),
1872            timestamp: Timestamp {
1873                timestamp: MillisSinceEpoch(0),
1874                tz_offset: 0,
1875            },
1876        };
1877        let git_signature1 = signature_to_git(&signature1);
1878        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
1879        assert_eq!(git_signature1.email, "someone@example.com");
1880        let signature2 = Signature {
1881            name: "Someone".to_string(),
1882            email: "".to_string(),
1883            timestamp: Timestamp {
1884                timestamp: MillisSinceEpoch(0),
1885                tz_offset: 0,
1886            },
1887        };
1888        let git_signature2 = signature_to_git(&signature2);
1889        assert_eq!(git_signature2.name, "Someone");
1890        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
1891    }
1892
1893    /// Test that parents get written correctly
1894    #[test]
1895    fn git_commit_parents() {
1896        let settings = user_settings();
1897        let temp_dir = new_temp_dir();
1898        let store_path = temp_dir.path();
1899        let git_repo_path = temp_dir.path().join("git");
1900        let git_repo = git_init(&git_repo_path);
1901
1902        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1903        let mut commit = Commit {
1904            parents: vec![],
1905            predecessors: vec![],
1906            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1907            change_id: ChangeId::from_hex("abc123"),
1908            description: "".to_string(),
1909            author: create_signature(),
1910            committer: create_signature(),
1911            secure_sig: None,
1912        };
1913
1914        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
1915            backend.write_commit(commit, None).block_on()
1916        };
1917
1918        // No parents
1919        commit.parents = vec![];
1920        assert_matches!(
1921            write_commit(commit.clone()),
1922            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
1923        );
1924
1925        // Only root commit as parent
1926        commit.parents = vec![backend.root_commit_id().clone()];
1927        let first_id = write_commit(commit.clone()).unwrap().0;
1928        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
1929        assert_eq!(first_commit, commit);
1930        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
1931        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
1932
1933        // Only non-root commit as parent
1934        commit.parents = vec![first_id.clone()];
1935        let second_id = write_commit(commit.clone()).unwrap().0;
1936        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
1937        assert_eq!(second_commit, commit);
1938        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
1939        assert_eq!(
1940            second_git_commit.parent_ids().collect_vec(),
1941            vec![git_id(&first_id)]
1942        );
1943
1944        // Merge commit
1945        commit.parents = vec![first_id.clone(), second_id.clone()];
1946        let merge_id = write_commit(commit.clone()).unwrap().0;
1947        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
1948        assert_eq!(merge_commit, commit);
1949        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
1950        assert_eq!(
1951            merge_git_commit.parent_ids().collect_vec(),
1952            vec![git_id(&first_id), git_id(&second_id)]
1953        );
1954
1955        // Merge commit with root as one parent
1956        commit.parents = vec![first_id, backend.root_commit_id().clone()];
1957        assert_matches!(
1958            write_commit(commit),
1959            Err(BackendError::Unsupported(message)) if message.contains("root commit")
1960        );
1961    }
1962
1963    #[test]
1964    fn write_tree_conflicts() {
1965        let settings = user_settings();
1966        let temp_dir = new_temp_dir();
1967        let store_path = temp_dir.path();
1968        let git_repo_path = temp_dir.path().join("git");
1969        let git_repo = git_init(&git_repo_path);
1970
1971        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1972        let create_tree = |i| {
1973            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
1974            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
1975            tree_builder
1976                .upsert(
1977                    format!("file{i}"),
1978                    gix::object::tree::EntryKind::Blob,
1979                    blob_id,
1980                )
1981                .unwrap();
1982            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
1983        };
1984
1985        let root_tree = Merge::from_removes_adds(
1986            vec![create_tree(0), create_tree(1)],
1987            vec![create_tree(2), create_tree(3), create_tree(4)],
1988        );
1989        let mut commit = Commit {
1990            parents: vec![backend.root_commit_id().clone()],
1991            predecessors: vec![],
1992            root_tree: MergedTreeId::Merge(root_tree.clone()),
1993            change_id: ChangeId::from_hex("abc123"),
1994            description: "".to_string(),
1995            author: create_signature(),
1996            committer: create_signature(),
1997            secure_sig: None,
1998        };
1999
2000        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2001            backend.write_commit(commit, None).block_on()
2002        };
2003
2004        // When writing a tree-level conflict, the root tree on the git side has the
2005        // individual trees as subtrees.
2006        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2007        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2008        assert_eq!(read_commit, commit);
2009        let git_commit = git_repo
2010            .find_commit(gix::ObjectId::from_bytes_or_panic(
2011                read_commit_id.as_bytes(),
2012            ))
2013            .unwrap();
2014        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2015        assert!(git_tree
2016            .iter()
2017            .map(Result::unwrap)
2018            .filter(|entry| entry.filename() != b"README")
2019            .all(|entry| entry.mode().0 == 0o040000));
2020        let mut iter = git_tree.iter().map(Result::unwrap);
2021        let entry = iter.next().unwrap();
2022        assert_eq!(entry.filename(), b".jjconflict-base-0");
2023        assert_eq!(
2024            entry.id().as_bytes(),
2025            root_tree.get_remove(0).unwrap().as_bytes()
2026        );
2027        let entry = iter.next().unwrap();
2028        assert_eq!(entry.filename(), b".jjconflict-base-1");
2029        assert_eq!(
2030            entry.id().as_bytes(),
2031            root_tree.get_remove(1).unwrap().as_bytes()
2032        );
2033        let entry = iter.next().unwrap();
2034        assert_eq!(entry.filename(), b".jjconflict-side-0");
2035        assert_eq!(
2036            entry.id().as_bytes(),
2037            root_tree.get_add(0).unwrap().as_bytes()
2038        );
2039        let entry = iter.next().unwrap();
2040        assert_eq!(entry.filename(), b".jjconflict-side-1");
2041        assert_eq!(
2042            entry.id().as_bytes(),
2043            root_tree.get_add(1).unwrap().as_bytes()
2044        );
2045        let entry = iter.next().unwrap();
2046        assert_eq!(entry.filename(), b".jjconflict-side-2");
2047        assert_eq!(
2048            entry.id().as_bytes(),
2049            root_tree.get_add(2).unwrap().as_bytes()
2050        );
2051        let entry = iter.next().unwrap();
2052        assert_eq!(entry.filename(), b"README");
2053        assert_eq!(entry.mode().0, 0o100644);
2054        assert!(iter.next().is_none());
2055
2056        // When writing a single tree using the new format, it's represented by a
2057        // regular git tree.
2058        commit.root_tree = MergedTreeId::resolved(create_tree(5));
2059        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2060        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2061        assert_eq!(read_commit, commit);
2062        let git_commit = git_repo
2063            .find_commit(gix::ObjectId::from_bytes_or_panic(
2064                read_commit_id.as_bytes(),
2065            ))
2066            .unwrap();
2067        assert_eq!(
2068            MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2069            commit.root_tree
2070        );
2071    }
2072
2073    #[test]
2074    fn commit_has_ref() {
2075        let settings = user_settings();
2076        let temp_dir = new_temp_dir();
2077        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2078        let git_repo = backend.git_repo();
2079        let signature = Signature {
2080            name: "Someone".to_string(),
2081            email: "someone@example.com".to_string(),
2082            timestamp: Timestamp {
2083                timestamp: MillisSinceEpoch(0),
2084                tz_offset: 0,
2085            },
2086        };
2087        let commit = Commit {
2088            parents: vec![backend.root_commit_id().clone()],
2089            predecessors: vec![],
2090            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2091            change_id: ChangeId::new(vec![]),
2092            description: "initial".to_string(),
2093            author: signature.clone(),
2094            committer: signature,
2095            secure_sig: None,
2096        };
2097        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2098        let git_refs = git_repo.references().unwrap();
2099        let git_ref_ids: Vec<_> = git_refs
2100            .prefixed("refs/jj/keep/")
2101            .unwrap()
2102            .map(|x| x.unwrap().id().detach())
2103            .collect();
2104        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2105
2106        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2107        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2108            git_ref.unwrap().delete().unwrap();
2109        }
2110        // Re-imported commit should have new ref.
2111        backend.import_head_commits([&commit_id]).unwrap();
2112        let git_refs = git_repo.references().unwrap();
2113        let git_ref_ids: Vec<_> = git_refs
2114            .prefixed("refs/jj/keep/")
2115            .unwrap()
2116            .map(|x| x.unwrap().id().detach())
2117            .collect();
2118        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2119    }
2120
2121    #[test]
2122    fn import_head_commits_duplicates() {
2123        let settings = user_settings();
2124        let temp_dir = new_temp_dir();
2125        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2126        let git_repo = backend.git_repo();
2127
2128        let signature = gix::actor::Signature {
2129            name: GIT_USER.into(),
2130            email: GIT_EMAIL.into(),
2131            time: gix::date::Time::now_utc(),
2132        };
2133        let empty_tree_id =
2134            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2135        let git_commit_id = git_repo
2136            .commit_as(
2137                &signature,
2138                &signature,
2139                "refs/heads/main",
2140                "git commit message",
2141                empty_tree_id,
2142                [] as [gix::ObjectId; 0],
2143            )
2144            .unwrap()
2145            .detach();
2146        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2147
2148        // Ref creation shouldn't fail because of duplicated head ids.
2149        backend
2150            .import_head_commits([&commit_id, &commit_id])
2151            .unwrap();
2152        assert!(git_repo
2153            .references()
2154            .unwrap()
2155            .prefixed("refs/jj/keep/")
2156            .unwrap()
2157            .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id));
2158    }
2159
2160    #[test]
2161    fn overlapping_git_commit_id() {
2162        let settings = user_settings();
2163        let temp_dir = new_temp_dir();
2164        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2165        let mut commit1 = Commit {
2166            parents: vec![backend.root_commit_id().clone()],
2167            predecessors: vec![],
2168            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2169            change_id: ChangeId::new(vec![]),
2170            description: "initial".to_string(),
2171            author: create_signature(),
2172            committer: create_signature(),
2173            secure_sig: None,
2174        };
2175
2176        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2177            backend.write_commit(commit, None).block_on()
2178        };
2179
2180        // libgit2 doesn't seem to preserve negative timestamps, so set it to at least 1
2181        // second after the epoch, so the timestamp adjustment can remove 1
2182        // second and it will still be nonnegative
2183        commit1.committer.timestamp.timestamp = MillisSinceEpoch(1000);
2184        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2185        commit2.predecessors.push(commit_id1.clone());
2186        // `write_commit` should prevent the ids from being the same by changing the
2187        // committer timestamp of the commit it actually writes.
2188        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2189        // The returned matches the ID
2190        assert_eq!(
2191            backend.read_commit(&commit_id2).block_on().unwrap(),
2192            actual_commit2
2193        );
2194        assert_ne!(commit_id2, commit_id1);
2195        // The committer timestamp should differ
2196        assert_ne!(
2197            actual_commit2.committer.timestamp.timestamp,
2198            commit2.committer.timestamp.timestamp
2199        );
2200        // The rest of the commit should be the same
2201        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2202        assert_eq!(actual_commit2, commit2);
2203    }
2204
2205    #[test]
2206    fn write_signed_commit() {
2207        let settings = user_settings();
2208        let temp_dir = new_temp_dir();
2209        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2210
2211        let commit = Commit {
2212            parents: vec![backend.root_commit_id().clone()],
2213            predecessors: vec![],
2214            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2215            change_id: ChangeId::new(vec![]),
2216            description: "initial".to_string(),
2217            author: create_signature(),
2218            committer: create_signature(),
2219            secure_sig: None,
2220        };
2221
2222        let mut signer = |data: &_| {
2223            let hash: String = blake2b_hash(data).encode_hex();
2224            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2225        };
2226
2227        let (id, commit) = backend
2228            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2229            .block_on()
2230            .unwrap();
2231
2232        let git_repo = backend.git_repo();
2233        let obj = git_repo
2234            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2235            .unwrap();
2236        insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r"
2237        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2238        author Someone <someone@example.com> 0 +0000
2239        committer Someone <someone@example.com> 0 +0000
2240        gpgsig test sig
2241         hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518
2242
2243        initial
2244        ");
2245
2246        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2247
2248        let commit = backend.read_commit(&id).block_on().unwrap();
2249
2250        let sig = commit.secure_sig.expect("failed to read the signature");
2251        assert_eq!(&sig, &returned_sig);
2252
2253        insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r###"
2254        test sig
2255        hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518
2256        "###);
2257        insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r###"
2258        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2259        author Someone <someone@example.com> 0 +0000
2260        committer Someone <someone@example.com> 0 +0000
2261
2262        initial
2263        "###);
2264    }
2265
2266    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2267        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2268    }
2269
2270    fn create_signature() -> Signature {
2271        Signature {
2272            name: GIT_USER.to_string(),
2273            email: GIT_EMAIL.to_string(),
2274            timestamp: Timestamp {
2275                timestamp: MillisSinceEpoch(0),
2276                tz_offset: 0,
2277            },
2278        }
2279    }
2280
2281    // Not using testutils::user_settings() because there is a dependency cycle
2282    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2283    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2284    // our UserSettings type comes from jj_lib (1).
2285    fn user_settings() -> UserSettings {
2286        let config = StackedConfig::with_defaults();
2287        UserSettings::from_config(config).unwrap()
2288    }
2289}