jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::HashSet;
19use std::fmt::Debug;
20use std::fmt::Error;
21use std::fmt::Formatter;
22use std::fs;
23use std::io;
24use std::io::Cursor;
25use std::io::Read;
26use std::path::Path;
27use std::path::PathBuf;
28use std::process::Command;
29use std::process::ExitStatus;
30use std::str;
31use std::sync::Arc;
32use std::sync::Mutex;
33use std::sync::MutexGuard;
34use std::time::SystemTime;
35
36use async_trait::async_trait;
37use futures::stream::BoxStream;
38use gix::bstr::BString;
39use gix::objs::CommitRef;
40use gix::objs::CommitRefIter;
41use gix::objs::WriteTo as _;
42use itertools::Itertools as _;
43use pollster::FutureExt as _;
44use prost::Message as _;
45use smallvec::SmallVec;
46use thiserror::Error;
47
48use crate::backend::make_root_commit;
49use crate::backend::Backend;
50use crate::backend::BackendError;
51use crate::backend::BackendInitError;
52use crate::backend::BackendLoadError;
53use crate::backend::BackendResult;
54use crate::backend::ChangeId;
55use crate::backend::Commit;
56use crate::backend::CommitId;
57use crate::backend::Conflict;
58use crate::backend::ConflictId;
59use crate::backend::ConflictTerm;
60use crate::backend::CopyRecord;
61use crate::backend::FileId;
62use crate::backend::MergedTreeId;
63use crate::backend::MillisSinceEpoch;
64use crate::backend::SecureSig;
65use crate::backend::Signature;
66use crate::backend::SigningFn;
67use crate::backend::SymlinkId;
68use crate::backend::Timestamp;
69use crate::backend::Tree;
70use crate::backend::TreeId;
71use crate::backend::TreeValue;
72use crate::file_util::IoResultExt as _;
73use crate::file_util::PathError;
74use crate::index::Index;
75use crate::lock::FileLock;
76use crate::merge::Merge;
77use crate::merge::MergeBuilder;
78use crate::object_id::ObjectId;
79use crate::repo_path::RepoPath;
80use crate::repo_path::RepoPathBuf;
81use crate::repo_path::RepoPathComponentBuf;
82use crate::settings::UserSettings;
83use crate::stacked_table::MutableTable;
84use crate::stacked_table::ReadonlyTable;
85use crate::stacked_table::TableSegment as _;
86use crate::stacked_table::TableStore;
87use crate::stacked_table::TableStoreError;
88
89const HASH_LENGTH: usize = 20;
90const CHANGE_ID_LENGTH: usize = 16;
91/// Ref namespace used only for preventing GC.
92const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
93const CONFLICT_SUFFIX: &str = ".jjconflict";
94
95pub const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees";
96
97#[derive(Debug, Error)]
98pub enum GitBackendInitError {
99    #[error("Failed to initialize git repository")]
100    InitRepository(#[source] gix::init::Error),
101    #[error("Failed to open git repository")]
102    OpenRepository(#[source] gix::open::Error),
103    #[error(transparent)]
104    Path(PathError),
105}
106
107impl From<Box<GitBackendInitError>> for BackendInitError {
108    fn from(err: Box<GitBackendInitError>) -> Self {
109        BackendInitError(err)
110    }
111}
112
113#[derive(Debug, Error)]
114pub enum GitBackendLoadError {
115    #[error("Failed to open git repository")]
116    OpenRepository(#[source] gix::open::Error),
117    #[error(transparent)]
118    Path(PathError),
119}
120
121impl From<Box<GitBackendLoadError>> for BackendLoadError {
122    fn from(err: Box<GitBackendLoadError>) -> Self {
123        BackendLoadError(err)
124    }
125}
126
127/// `GitBackend`-specific error that may occur after the backend is loaded.
128#[derive(Debug, Error)]
129pub enum GitBackendError {
130    #[error("Failed to read non-git metadata")]
131    ReadMetadata(#[source] TableStoreError),
132    #[error("Failed to write non-git metadata")]
133    WriteMetadata(#[source] TableStoreError),
134}
135
136impl From<GitBackendError> for BackendError {
137    fn from(err: GitBackendError) -> Self {
138        BackendError::Other(err.into())
139    }
140}
141
142#[derive(Debug, Error)]
143pub enum GitGcError {
144    #[error("Failed to run git gc command")]
145    GcCommand(#[source] std::io::Error),
146    #[error("git gc command exited with an error: {0}")]
147    GcCommandErrorStatus(ExitStatus),
148}
149
150pub struct GitBackend {
151    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
152    // cheaper to cache the thread-local instance behind a mutex than creating
153    // one for each backend method call. Our GitBackend is most likely to be
154    // used in a single-threaded context.
155    base_repo: gix::ThreadSafeRepository,
156    repo: Mutex<gix::Repository>,
157    root_commit_id: CommitId,
158    root_change_id: ChangeId,
159    empty_tree_id: TreeId,
160    extra_metadata_store: TableStore,
161    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
162}
163
164impl GitBackend {
165    pub fn name() -> &'static str {
166        "git"
167    }
168
169    fn new(base_repo: gix::ThreadSafeRepository, extra_metadata_store: TableStore) -> Self {
170        let repo = Mutex::new(base_repo.to_thread_local());
171        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
172        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
173        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
174        GitBackend {
175            base_repo,
176            repo,
177            root_commit_id,
178            root_change_id,
179            empty_tree_id,
180            extra_metadata_store,
181            cached_extra_metadata: Mutex::new(None),
182        }
183    }
184
185    pub fn init_internal(
186        settings: &UserSettings,
187        store_path: &Path,
188    ) -> Result<Self, Box<GitBackendInitError>> {
189        let git_repo_path = Path::new("git");
190        let git_repo = gix::ThreadSafeRepository::init_opts(
191            store_path.join(git_repo_path),
192            gix::create::Kind::Bare,
193            gix::create::Options::default(),
194            gix_open_opts_from_settings(settings),
195        )
196        .map_err(GitBackendInitError::InitRepository)?;
197        Self::init_with_repo(store_path, git_repo_path, git_repo)
198    }
199
200    /// Initializes backend by creating a new Git repo at the specified
201    /// workspace path. The workspace directory must exist.
202    pub fn init_colocated(
203        settings: &UserSettings,
204        store_path: &Path,
205        workspace_root: &Path,
206    ) -> Result<Self, Box<GitBackendInitError>> {
207        let canonical_workspace_root = {
208            let path = store_path.join(workspace_root);
209            dunce::canonicalize(&path)
210                .context(&path)
211                .map_err(GitBackendInitError::Path)?
212        };
213        let git_repo = gix::ThreadSafeRepository::init_opts(
214            canonical_workspace_root,
215            gix::create::Kind::WithWorktree,
216            gix::create::Options::default(),
217            gix_open_opts_from_settings(settings),
218        )
219        .map_err(GitBackendInitError::InitRepository)?;
220        let git_repo_path = workspace_root.join(".git");
221        Self::init_with_repo(store_path, &git_repo_path, git_repo)
222    }
223
224    /// Initializes backend with an existing Git repo at the specified path.
225    pub fn init_external(
226        settings: &UserSettings,
227        store_path: &Path,
228        git_repo_path: &Path,
229    ) -> Result<Self, Box<GitBackendInitError>> {
230        let canonical_git_repo_path = {
231            let path = store_path.join(git_repo_path);
232            canonicalize_git_repo_path(&path)
233                .context(&path)
234                .map_err(GitBackendInitError::Path)?
235        };
236        let git_repo = gix::ThreadSafeRepository::open_opts(
237            canonical_git_repo_path,
238            gix_open_opts_from_settings(settings),
239        )
240        .map_err(GitBackendInitError::OpenRepository)?;
241        Self::init_with_repo(store_path, git_repo_path, git_repo)
242    }
243
244    fn init_with_repo(
245        store_path: &Path,
246        git_repo_path: &Path,
247        git_repo: gix::ThreadSafeRepository,
248    ) -> Result<Self, Box<GitBackendInitError>> {
249        let extra_path = store_path.join("extra");
250        fs::create_dir(&extra_path)
251            .context(&extra_path)
252            .map_err(GitBackendInitError::Path)?;
253        let target_path = store_path.join("git_target");
254        if cfg!(windows) && git_repo_path.is_relative() {
255            // When a repository is created in Windows, format the path with *forward
256            // slashes* and not backwards slashes. This makes it possible to use the same
257            // repository under Windows Subsystem for Linux.
258            //
259            // This only works for relative paths. If the path is absolute, there's not much
260            // we can do, and it simply won't work inside and outside WSL at the same time.
261            let git_repo_path_string = git_repo_path
262                .components()
263                .map(|component| component.as_os_str().to_str().unwrap().to_owned())
264                .join("/");
265            fs::write(&target_path, git_repo_path_string.as_bytes())
266                .context(&target_path)
267                .map_err(GitBackendInitError::Path)?;
268        } else {
269            fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes())
270                .context(&target_path)
271                .map_err(GitBackendInitError::Path)?;
272        };
273        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
274        Ok(GitBackend::new(git_repo, extra_metadata_store))
275    }
276
277    pub fn load(
278        settings: &UserSettings,
279        store_path: &Path,
280    ) -> Result<Self, Box<GitBackendLoadError>> {
281        let git_repo_path = {
282            let target_path = store_path.join("git_target");
283            let git_repo_path_str = fs::read_to_string(&target_path)
284                .context(&target_path)
285                .map_err(GitBackendLoadError::Path)?;
286            let git_repo_path = store_path.join(git_repo_path_str);
287            canonicalize_git_repo_path(&git_repo_path)
288                .context(&git_repo_path)
289                .map_err(GitBackendLoadError::Path)?
290        };
291        let repo = gix::ThreadSafeRepository::open_opts(
292            git_repo_path,
293            gix_open_opts_from_settings(settings),
294        )
295        .map_err(GitBackendLoadError::OpenRepository)?;
296        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
297        Ok(GitBackend::new(repo, extra_metadata_store))
298    }
299
300    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
301        self.repo.lock().unwrap()
302    }
303
304    /// Returns new thread-local instance to access to the underlying Git repo.
305    pub fn git_repo(&self) -> gix::Repository {
306        self.base_repo.to_thread_local()
307    }
308
309    /// Path to the `.git` directory or the repository itself if it's bare.
310    pub fn git_repo_path(&self) -> &Path {
311        self.base_repo.path()
312    }
313
314    /// Path to the working directory if the repository isn't bare.
315    pub fn git_workdir(&self) -> Option<&Path> {
316        self.base_repo.work_dir()
317    }
318
319    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
320        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
321        match locked_head.as_ref() {
322            Some(head) => Ok(head.clone()),
323            None => {
324                let table = self
325                    .extra_metadata_store
326                    .get_head()
327                    .map_err(GitBackendError::ReadMetadata)?;
328                *locked_head = Some(table.clone());
329                Ok(table)
330            }
331        }
332    }
333
334    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
335        let table = self
336            .extra_metadata_store
337            .get_head_locked()
338            .map_err(GitBackendError::ReadMetadata)?;
339        Ok(table)
340    }
341
342    fn save_extra_metadata_table(
343        &self,
344        mut_table: MutableTable,
345        _table_lock: &FileLock,
346    ) -> BackendResult<()> {
347        let table = self
348            .extra_metadata_store
349            .save_table(mut_table)
350            .map_err(GitBackendError::WriteMetadata)?;
351        // Since the parent table was the head, saved table are likely to be new head.
352        // If it's not, cache will be reloaded when entry can't be found.
353        *self.cached_extra_metadata.lock().unwrap() = Some(table);
354        Ok(())
355    }
356
357    /// Imports the given commits and ancestors from the backing Git repo.
358    ///
359    /// The `head_ids` may contain commits that have already been imported, but
360    /// the caller should filter them out to eliminate redundant I/O processing.
361    #[tracing::instrument(skip(self, head_ids))]
362    pub fn import_head_commits<'a>(
363        &self,
364        head_ids: impl IntoIterator<Item = &'a CommitId>,
365    ) -> BackendResult<()> {
366        let head_ids: HashSet<&CommitId> = head_ids
367            .into_iter()
368            .filter(|&id| *id != self.root_commit_id)
369            .collect();
370        if head_ids.is_empty() {
371            return Ok(());
372        }
373
374        // Create no-gc ref even if known to the extras table. Concurrent GC
375        // process might have deleted the no-gc ref.
376        let locked_repo = self.lock_git_repo();
377        locked_repo
378            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
379            .map_err(|err| BackendError::Other(Box::new(err)))?;
380
381        // These commits are imported from Git. Make our change ids persist (otherwise
382        // future write_commit() could reassign new change id.)
383        tracing::debug!(
384            heads_count = head_ids.len(),
385            "import extra metadata entries"
386        );
387        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
388        let mut mut_table = table.start_mutation();
389        import_extra_metadata_entries_from_heads(
390            &locked_repo,
391            &mut mut_table,
392            &table_lock,
393            &head_ids,
394        )?;
395        self.save_extra_metadata_table(mut_table, &table_lock)
396    }
397
398    fn read_file_sync(&self, id: &FileId) -> BackendResult<Box<dyn Read>> {
399        let git_blob_id = validate_git_object_id(id)?;
400        let locked_repo = self.lock_git_repo();
401        let mut blob = locked_repo
402            .find_object(git_blob_id)
403            .map_err(|err| map_not_found_err(err, id))?
404            .try_into_blob()
405            .map_err(|err| to_read_object_err(err, id))?;
406        Ok(Box::new(Cursor::new(blob.take_data())))
407    }
408
409    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
410        let attributes = gix::worktree::Stack::new(
411            Path::new(""),
412            gix::worktree::stack::State::AttributesStack(Default::default()),
413            gix::worktree::glob::pattern::Case::Sensitive,
414            Vec::new(),
415            Vec::new(),
416        );
417        let filter = gix::diff::blob::Pipeline::new(
418            Default::default(),
419            gix::filter::plumbing::Pipeline::new(
420                self.git_repo()
421                    .command_context()
422                    .map_err(|err| BackendError::Other(Box::new(err)))?,
423                Default::default(),
424            ),
425            Vec::new(),
426            Default::default(),
427        );
428        Ok(gix::diff::blob::Platform::new(
429            Default::default(),
430            filter,
431            gix::diff::blob::pipeline::Mode::ToGit,
432            attributes,
433        ))
434    }
435
436    fn read_tree_for_commit<'repo>(
437        &self,
438        repo: &'repo gix::Repository,
439        id: &CommitId,
440    ) -> BackendResult<gix::Tree<'repo>> {
441        let tree = self.read_commit(id).block_on()?.root_tree.to_merge();
442        // TODO(kfm): probably want to do something here if it is a merge
443        let tree_id = tree.first().clone();
444        let gix_id = validate_git_object_id(&tree_id)?;
445        repo.find_object(gix_id)
446            .map_err(|err| map_not_found_err(err, &tree_id))?
447            .try_into_tree()
448            .map_err(|err| to_read_object_err(err, &tree_id))
449    }
450}
451
452/// Canonicalizes the given `path` except for the last `".git"` component.
453///
454/// The last path component matters when opening a Git repo without `core.bare`
455/// config. This config is usually set, but the "repo" tool will set up such
456/// repositories and symlinks. Opening such repo with fully-canonicalized path
457/// would turn a colocated Git repo into a bare repo.
458pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
459    if path.ends_with(".git") {
460        let workdir = path.parent().unwrap();
461        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
462    } else {
463        dunce::canonicalize(path)
464    }
465}
466
467fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
468    let user_name = settings.user_name();
469    let user_email = settings.user_email();
470    gix::open::Options::default()
471        .config_overrides([
472            // Committer has to be configured to record reflog. Author isn't
473            // needed, but let's copy the same values.
474            format!("author.name={user_name}"),
475            format!("author.email={user_email}"),
476            format!("committer.name={user_name}"),
477            format!("committer.email={user_email}"),
478        ])
479        // The git_target path should point the repository, not the working directory.
480        .open_path_as_is(true)
481        // Gitoxide recommends this when correctness is preferred
482        .strict_config(true)
483        // This breaks tests and generally seems undesirable
484        .lossy_config(false)
485}
486
487/// Reads the `jj:trees` header from the commit.
488fn root_tree_from_header(git_commit: &CommitRef) -> Result<Option<MergedTreeId>, ()> {
489    for (key, value) in &git_commit.extra_headers {
490        if *key == JJ_TREES_COMMIT_HEADER {
491            let mut tree_ids = SmallVec::new();
492            for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') {
493                let tree_id = TreeId::try_from_hex(hex).or(Err(()))?;
494                if tree_id.as_bytes().len() != HASH_LENGTH {
495                    return Err(());
496                }
497                tree_ids.push(tree_id);
498            }
499            // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
500            // allowed, it would be possible to construct a commit which appears to have
501            // different contents depending on whether it is viewed using `jj` or `git`.
502            if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
503                return Err(());
504            }
505            return Ok(Some(MergedTreeId::Merge(Merge::from_vec(tree_ids))));
506        }
507    }
508    Ok(None)
509}
510
511fn commit_from_git_without_root_parent(
512    id: &CommitId,
513    git_object: &gix::Object,
514    uses_tree_conflict_format: bool,
515    is_shallow: bool,
516) -> BackendResult<Commit> {
517    let commit = git_object
518        .try_to_commit_ref()
519        .map_err(|err| to_read_object_err(err, id))?;
520
521    // We reverse the bits of the commit id to create the change id. We don't want
522    // to use the first bytes unmodified because then it would be ambiguous
523    // if a given hash prefix refers to the commit id or the change id. It
524    // would have been enough to pick the last 16 bytes instead of the
525    // leading 16 bytes to address that. We also reverse the bits to make it less
526    // likely that users depend on any relationship between the two ids.
527    let change_id = ChangeId::new(
528        id.as_bytes()[4..HASH_LENGTH]
529            .iter()
530            .rev()
531            .map(|b| b.reverse_bits())
532            .collect(),
533    );
534    // shallow commits don't have parents their parents actually fetched, so we
535    // discard them here
536    // TODO: This causes issues when a shallow repository is deepened/unshallowed
537    let parents = if is_shallow {
538        vec![]
539    } else {
540        commit
541            .parents()
542            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
543            .collect_vec()
544    };
545    let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
546    // If this commit is a conflict, we'll update the root tree later, when we read
547    // the extra metadata.
548    let root_tree = root_tree_from_header(&commit)
549        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?;
550    let root_tree = root_tree.unwrap_or_else(|| {
551        if uses_tree_conflict_format {
552            MergedTreeId::resolved(tree_id)
553        } else {
554            MergedTreeId::Legacy(tree_id)
555        }
556    });
557    // Use lossy conversion as commit message with "mojibake" is still better than
558    // nothing.
559    // TODO: what should we do with commit.encoding?
560    let description = String::from_utf8_lossy(commit.message).into_owned();
561    let author = signature_from_git(commit.author());
562    let committer = signature_from_git(commit.committer());
563
564    // If the commit is signed, extract both the signature and the signed data
565    // (which is the commit buffer with the gpgsig header omitted).
566    // We have to re-parse the raw commit data because gix CommitRef does not give
567    // us the sogned data, only the signature.
568    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
569    // function and extract everything from that. For now, this works
570    let secure_sig = commit
571        .extra_headers
572        .iter()
573        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
574        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
575        .then(|| CommitRefIter::signature(&git_object.data))
576        .transpose()
577        .map_err(|err| to_read_object_err(err, id))?
578        .flatten()
579        .map(|(sig, data)| SecureSig {
580            data: data.to_bstring().into(),
581            sig: sig.into_owned().into(),
582        });
583
584    Ok(Commit {
585        parents,
586        predecessors: vec![],
587        // If this commit has associated extra metadata, we may reset this later.
588        root_tree,
589        change_id,
590        description,
591        author,
592        committer,
593        secure_sig,
594    })
595}
596
597const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
598
599fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
600    let name = signature.name;
601    let name = if name != EMPTY_STRING_PLACEHOLDER {
602        String::from_utf8_lossy(name).into_owned()
603    } else {
604        "".to_string()
605    };
606    let email = signature.email;
607    let email = if email != EMPTY_STRING_PLACEHOLDER {
608        String::from_utf8_lossy(email).into_owned()
609    } else {
610        "".to_string()
611    };
612    let timestamp = MillisSinceEpoch(signature.time.seconds * 1000);
613    let tz_offset = signature.time.offset.div_euclid(60); // in minutes
614    Signature {
615        name,
616        email,
617        timestamp: Timestamp {
618            timestamp,
619            tz_offset,
620        },
621    }
622}
623
624fn signature_to_git(signature: &Signature) -> gix::actor::SignatureRef<'_> {
625    // git does not support empty names or emails
626    let name = if !signature.name.is_empty() {
627        &signature.name
628    } else {
629        EMPTY_STRING_PLACEHOLDER
630    };
631    let email = if !signature.email.is_empty() {
632        &signature.email
633    } else {
634        EMPTY_STRING_PLACEHOLDER
635    };
636    let time = gix::date::Time::new(
637        signature.timestamp.timestamp.0.div_euclid(1000),
638        signature.timestamp.tz_offset * 60, // in seconds
639    );
640    gix::actor::SignatureRef {
641        name: name.into(),
642        email: email.into(),
643        time,
644    }
645}
646
647fn serialize_extras(commit: &Commit) -> Vec<u8> {
648    let mut proto = crate::protos::git_store::Commit {
649        change_id: commit.change_id.to_bytes(),
650        ..Default::default()
651    };
652    if let MergedTreeId::Merge(tree_ids) = &commit.root_tree {
653        proto.uses_tree_conflict_format = true;
654        if !tree_ids.is_resolved() {
655            // This is done for the sake of jj versions <0.28 (before commit
656            // f7b14be) being able to read the repo. At some point in the
657            // future, we can stop doing it.
658            proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
659        }
660    }
661    for predecessor in &commit.predecessors {
662        proto.predecessors.push(predecessor.to_bytes());
663    }
664    proto.encode_to_vec()
665}
666
667fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
668    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
669    commit.change_id = ChangeId::new(proto.change_id);
670    if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree {
671        if proto.uses_tree_conflict_format {
672            if !proto.root_tree.is_empty() {
673                let merge_builder: MergeBuilder<_> = proto
674                    .root_tree
675                    .iter()
676                    .map(|id_bytes| TreeId::from_bytes(id_bytes))
677                    .collect();
678                commit.root_tree = MergedTreeId::Merge(merge_builder.build());
679            } else {
680                // uses_tree_conflict_format was set but there was no root_tree override in the
681                // proto, which means we should just promote the tree id from the
682                // git commit to be a known-conflict-free tree
683                commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone());
684            }
685        }
686    }
687    for predecessor in &proto.predecessors {
688        commit.predecessors.push(CommitId::from_bytes(predecessor));
689    }
690}
691
692/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
693/// Used for preventing GC of commits we create.
694fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
695    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
696    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
697    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
698    gix::refs::transaction::RefEdit {
699        change: gix::refs::transaction::Change::Update {
700            log: gix::refs::transaction::LogChange {
701                message: "used by jj".into(),
702                ..Default::default()
703            },
704            expected,
705            new,
706        },
707        name: name.try_into().unwrap(),
708        deref: false,
709    }
710}
711
712fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
713    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
714    gix::refs::transaction::RefEdit {
715        change: gix::refs::transaction::Change::Delete {
716            expected,
717            log: gix::refs::transaction::RefLog::AndReference,
718        },
719        name: git_ref.name,
720        deref: false,
721    }
722}
723
724/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
725/// unreachable and non-head refs.
726fn recreate_no_gc_refs(
727    git_repo: &gix::Repository,
728    new_heads: impl IntoIterator<Item = CommitId>,
729    keep_newer: SystemTime,
730) -> BackendResult<()> {
731    // Calculate diff between existing no-gc refs and new heads.
732    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
733    let mut no_gc_refs_to_keep_count: usize = 0;
734    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
735    let git_references = git_repo
736        .references()
737        .map_err(|err| BackendError::Other(err.into()))?;
738    let no_gc_refs_iter = git_references
739        .prefixed(NO_GC_REF_NAMESPACE)
740        .map_err(|err| BackendError::Other(err.into()))?;
741    for git_ref in no_gc_refs_iter {
742        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
743        let oid = git_ref.target.try_id().ok_or_else(|| {
744            let name = git_ref.name.as_bstr();
745            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
746        })?;
747        let id = CommitId::from_bytes(oid.as_bytes());
748        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
749        if new_heads.contains(&id) && name_good {
750            no_gc_refs_to_keep_count += 1;
751            continue;
752        }
753        // Check timestamp of loose ref, but this is still racy on re-import
754        // because:
755        // - existing packed ref won't be demoted to loose ref
756        // - existing loose ref won't be touched
757        //
758        // TODO: might be better to switch to a dummy merge, where new no-gc ref
759        // will always have a unique name. Doing that with the current
760        // ref-per-head strategy would increase the number of the no-gc refs.
761        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
762        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
763        if let Ok(metadata) = loose_ref_path.metadata() {
764            let mtime = metadata.modified().expect("unsupported platform?");
765            if mtime > keep_newer {
766                tracing::trace!(?git_ref, "not deleting new");
767                no_gc_refs_to_keep_count += 1;
768                continue;
769            }
770        }
771        // Also deletes no-gc ref of random name created by old jj.
772        tracing::trace!(?git_ref, ?name_good, "will delete");
773        no_gc_refs_to_delete.push(git_ref);
774    }
775    tracing::info!(
776        new_heads_count = new_heads.len(),
777        no_gc_refs_to_keep_count,
778        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
779        "collected reachable refs"
780    );
781
782    // It's slow to delete packed refs one by one, so update refs all at once.
783    let ref_edits = itertools::chain(
784        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
785        new_heads.iter().map(to_no_gc_ref_update),
786    );
787    git_repo
788        .edit_references(ref_edits)
789        .map_err(|err| BackendError::Other(err.into()))?;
790
791    Ok(())
792}
793
794fn run_git_gc(git_dir: &Path) -> Result<(), GitGcError> {
795    let mut git = Command::new("git");
796    git.arg("--git-dir=."); // turn off discovery
797    git.arg("gc");
798    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
799    // canonicalized as UNC path, which wouldn't be supported by git.
800    git.current_dir(git_dir);
801    // TODO: pass output to UI layer instead of printing directly here
802    let status = git.status().map_err(GitGcError::GcCommand)?;
803    if !status.success() {
804        return Err(GitGcError::GcCommandErrorStatus(status));
805    }
806    Ok(())
807}
808
809fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
810    if id.as_bytes().len() != HASH_LENGTH {
811        return Err(BackendError::InvalidHashLength {
812            expected: HASH_LENGTH,
813            actual: id.as_bytes().len(),
814            object_type: id.object_type(),
815            hash: id.hex(),
816        });
817    }
818    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
819}
820
821fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
822    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
823        BackendError::ObjectNotFound {
824            object_type: id.object_type(),
825            hash: id.hex(),
826            source: Box::new(err),
827        }
828    } else {
829        to_read_object_err(err, id)
830    }
831}
832
833fn to_read_object_err(
834    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
835    id: &impl ObjectId,
836) -> BackendError {
837    BackendError::ReadObject {
838        object_type: id.object_type(),
839        hash: id.hex(),
840        source: err.into(),
841    }
842}
843
844fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError {
845    BackendError::InvalidUtf8 {
846        object_type: id.object_type(),
847        hash: id.hex(),
848        source,
849    }
850}
851
852fn import_extra_metadata_entries_from_heads(
853    git_repo: &gix::Repository,
854    mut_table: &mut MutableTable,
855    _table_lock: &FileLock,
856    head_ids: &HashSet<&CommitId>,
857) -> BackendResult<()> {
858    let shallow_commits = git_repo
859        .shallow_commits()
860        .map_err(|e| BackendError::Other(Box::new(e)))?;
861
862    let mut work_ids = head_ids
863        .iter()
864        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
865        .map(|&id| id.clone())
866        .collect_vec();
867    while let Some(id) = work_ids.pop() {
868        let git_object = git_repo
869            .find_object(validate_git_object_id(&id)?)
870            .map_err(|err| map_not_found_err(err, &id))?;
871        let is_shallow = shallow_commits
872            .as_ref()
873            .is_some_and(|shallow| shallow.contains(&git_object.id));
874        // TODO(#1624): Should we read the root tree here and check if it has a
875        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
876        // change the description of a commit with tree-level conflicts.
877        let commit = commit_from_git_without_root_parent(&id, &git_object, true, is_shallow)?;
878        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
879        work_ids.extend(
880            commit
881                .parents
882                .into_iter()
883                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
884        );
885    }
886    Ok(())
887}
888
889impl Debug for GitBackend {
890    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
891        f.debug_struct("GitBackend")
892            .field("path", &self.git_repo_path())
893            .finish()
894    }
895}
896
897#[async_trait]
898impl Backend for GitBackend {
899    fn as_any(&self) -> &dyn Any {
900        self
901    }
902
903    fn name(&self) -> &str {
904        Self::name()
905    }
906
907    fn commit_id_length(&self) -> usize {
908        HASH_LENGTH
909    }
910
911    fn change_id_length(&self) -> usize {
912        CHANGE_ID_LENGTH
913    }
914
915    fn root_commit_id(&self) -> &CommitId {
916        &self.root_commit_id
917    }
918
919    fn root_change_id(&self) -> &ChangeId {
920        &self.root_change_id
921    }
922
923    fn empty_tree_id(&self) -> &TreeId {
924        &self.empty_tree_id
925    }
926
927    fn concurrency(&self) -> usize {
928        1
929    }
930
931    async fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> {
932        self.read_file_sync(id)
933    }
934
935    async fn write_file(
936        &self,
937        _path: &RepoPath,
938        contents: &mut (dyn Read + Send),
939    ) -> BackendResult<FileId> {
940        let mut bytes = Vec::new();
941        contents.read_to_end(&mut bytes).unwrap();
942        let locked_repo = self.lock_git_repo();
943        let oid = locked_repo
944            .write_blob(bytes)
945            .map_err(|err| BackendError::WriteObject {
946                object_type: "file",
947                source: Box::new(err),
948            })?;
949        Ok(FileId::new(oid.as_bytes().to_vec()))
950    }
951
952    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
953        let git_blob_id = validate_git_object_id(id)?;
954        let locked_repo = self.lock_git_repo();
955        let mut blob = locked_repo
956            .find_object(git_blob_id)
957            .map_err(|err| map_not_found_err(err, id))?
958            .try_into_blob()
959            .map_err(|err| to_read_object_err(err, id))?;
960        let target = String::from_utf8(blob.take_data())
961            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
962        Ok(target)
963    }
964
965    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
966        let locked_repo = self.lock_git_repo();
967        let oid =
968            locked_repo
969                .write_blob(target.as_bytes())
970                .map_err(|err| BackendError::WriteObject {
971                    object_type: "symlink",
972                    source: Box::new(err),
973                })?;
974        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
975    }
976
977    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
978        if id == &self.empty_tree_id {
979            return Ok(Tree::default());
980        }
981        let git_tree_id = validate_git_object_id(id)?;
982
983        let locked_repo = self.lock_git_repo();
984        let git_tree = locked_repo
985            .find_object(git_tree_id)
986            .map_err(|err| map_not_found_err(err, id))?
987            .try_into_tree()
988            .map_err(|err| to_read_object_err(err, id))?;
989        let mut tree = Tree::default();
990        for entry in git_tree.iter() {
991            let entry = entry.map_err(|err| to_read_object_err(err, id))?;
992            let name =
993                str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
994            let (name, value) = match entry.mode().kind() {
995                gix::object::tree::EntryKind::Tree => {
996                    let id = TreeId::from_bytes(entry.oid().as_bytes());
997                    (name, TreeValue::Tree(id))
998                }
999                gix::object::tree::EntryKind::Blob => {
1000                    let id = FileId::from_bytes(entry.oid().as_bytes());
1001                    if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) {
1002                        (
1003                            basename,
1004                            TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())),
1005                        )
1006                    } else {
1007                        (
1008                            name,
1009                            TreeValue::File {
1010                                id,
1011                                executable: false,
1012                            },
1013                        )
1014                    }
1015                }
1016                gix::object::tree::EntryKind::BlobExecutable => {
1017                    let id = FileId::from_bytes(entry.oid().as_bytes());
1018                    (
1019                        name,
1020                        TreeValue::File {
1021                            id,
1022                            executable: true,
1023                        },
1024                    )
1025                }
1026                gix::object::tree::EntryKind::Link => {
1027                    let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1028                    (name, TreeValue::Symlink(id))
1029                }
1030                gix::object::tree::EntryKind::Commit => {
1031                    let id = CommitId::from_bytes(entry.oid().as_bytes());
1032                    (name, TreeValue::GitSubmodule(id))
1033                }
1034            };
1035            tree.set(RepoPathComponentBuf::from(name), value);
1036        }
1037        Ok(tree)
1038    }
1039
1040    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1041        // Tree entries to be written must be sorted by Entry::filename(), which
1042        // is slightly different from the order of our backend::Tree.
1043        let entries = contents
1044            .entries()
1045            .map(|entry| {
1046                let name = entry.name().as_internal_str();
1047                match entry.value() {
1048                    TreeValue::File {
1049                        id,
1050                        executable: false,
1051                    } => gix::objs::tree::Entry {
1052                        mode: gix::object::tree::EntryKind::Blob.into(),
1053                        filename: name.into(),
1054                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1055                    },
1056                    TreeValue::File {
1057                        id,
1058                        executable: true,
1059                    } => gix::objs::tree::Entry {
1060                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1061                        filename: name.into(),
1062                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1063                    },
1064                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1065                        mode: gix::object::tree::EntryKind::Link.into(),
1066                        filename: name.into(),
1067                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1068                    },
1069                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1070                        mode: gix::object::tree::EntryKind::Tree.into(),
1071                        filename: name.into(),
1072                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1073                    },
1074                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1075                        mode: gix::object::tree::EntryKind::Commit.into(),
1076                        filename: name.into(),
1077                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1078                    },
1079                    TreeValue::Conflict(id) => gix::objs::tree::Entry {
1080                        mode: gix::object::tree::EntryKind::Blob.into(),
1081                        filename: (name.to_owned() + CONFLICT_SUFFIX).into(),
1082                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1083                    },
1084                }
1085            })
1086            .sorted_unstable()
1087            .collect();
1088        let locked_repo = self.lock_git_repo();
1089        let oid = locked_repo
1090            .write_object(gix::objs::Tree { entries })
1091            .map_err(|err| BackendError::WriteObject {
1092                object_type: "tree",
1093                source: Box::new(err),
1094            })?;
1095        Ok(TreeId::from_bytes(oid.as_bytes()))
1096    }
1097
1098    fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
1099        let mut file = self.read_file_sync(&FileId::new(id.to_bytes()))?;
1100        let mut data = String::new();
1101        file.read_to_string(&mut data)
1102            .map_err(|err| BackendError::ReadObject {
1103                object_type: "conflict".to_owned(),
1104                hash: id.hex(),
1105                source: err.into(),
1106            })?;
1107        let json: serde_json::Value = serde_json::from_str(&data).unwrap();
1108        Ok(Conflict {
1109            removes: conflict_term_list_from_json(json.get("removes").unwrap()),
1110            adds: conflict_term_list_from_json(json.get("adds").unwrap()),
1111        })
1112    }
1113
1114    fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
1115        let json = serde_json::json!({
1116            "removes": conflict_term_list_to_json(&conflict.removes),
1117            "adds": conflict_term_list_to_json(&conflict.adds),
1118        });
1119        let json_string = json.to_string();
1120        let bytes = json_string.as_bytes();
1121        let locked_repo = self.lock_git_repo();
1122        let oid = locked_repo
1123            .write_blob(bytes)
1124            .map_err(|err| BackendError::WriteObject {
1125                object_type: "conflict",
1126                source: Box::new(err),
1127            })?;
1128        Ok(ConflictId::from_bytes(oid.as_bytes()))
1129    }
1130
1131    #[tracing::instrument(skip(self))]
1132    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1133        if *id == self.root_commit_id {
1134            return Ok(make_root_commit(
1135                self.root_change_id().clone(),
1136                self.empty_tree_id.clone(),
1137            ));
1138        }
1139        let git_commit_id = validate_git_object_id(id)?;
1140
1141        let mut commit = {
1142            let locked_repo = self.lock_git_repo();
1143            let git_object = locked_repo
1144                .find_object(git_commit_id)
1145                .map_err(|err| map_not_found_err(err, id))?;
1146            let is_shallow = locked_repo
1147                .shallow_commits()
1148                .ok()
1149                .flatten()
1150                .is_some_and(|shallow| shallow.contains(&git_object.id));
1151            commit_from_git_without_root_parent(id, &git_object, false, is_shallow)?
1152        };
1153        if commit.parents.is_empty() {
1154            commit.parents.push(self.root_commit_id.clone());
1155        };
1156
1157        let table = self.cached_extra_metadata_table()?;
1158        if let Some(extras) = table.get_value(id.as_bytes()) {
1159            deserialize_extras(&mut commit, extras);
1160        } else {
1161            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1162            // there are no reachable ancestor commits without extras metadata. Git commits
1163            // imported by jj < 0.8.0 might not have extras (#924).
1164            // https://github.com/jj-vcs/jj/issues/2343
1165            tracing::info!("unimported Git commit found");
1166            self.import_head_commits([id])?;
1167            let table = self.cached_extra_metadata_table()?;
1168            let extras = table.get_value(id.as_bytes()).unwrap();
1169            deserialize_extras(&mut commit, extras);
1170        }
1171        Ok(commit)
1172    }
1173
1174    async fn write_commit(
1175        &self,
1176        mut contents: Commit,
1177        mut sign_with: Option<&mut SigningFn>,
1178    ) -> BackendResult<(CommitId, Commit)> {
1179        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1180
1181        let locked_repo = self.lock_git_repo();
1182        let git_tree_id = match &contents.root_tree {
1183            MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
1184            MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
1185                Some(tree_id) => validate_git_object_id(tree_id)?,
1186                None => write_tree_conflict(&locked_repo, tree_ids)?,
1187            },
1188        };
1189        let author = signature_to_git(&contents.author);
1190        let mut committer = signature_to_git(&contents.committer);
1191        let message = &contents.description;
1192        if contents.parents.is_empty() {
1193            return Err(BackendError::Other(
1194                "Cannot write a commit with no parents".into(),
1195            ));
1196        }
1197        let mut parents = SmallVec::new();
1198        for parent_id in &contents.parents {
1199            if *parent_id == self.root_commit_id {
1200                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1201                // add it to the list of parents to write in the Git commit. We also check that
1202                // there are no other parents since Git cannot represent a merge between a root
1203                // commit and another commit.
1204                if contents.parents.len() > 1 {
1205                    return Err(BackendError::Unsupported(
1206                        "The Git backend does not support creating merge commits with the root \
1207                         commit as one of the parents."
1208                            .to_owned(),
1209                    ));
1210                }
1211            } else {
1212                parents.push(validate_git_object_id(parent_id)?);
1213            }
1214        }
1215        let mut extra_headers = vec![];
1216        if let MergedTreeId::Merge(tree_ids) = &contents.root_tree {
1217            if !tree_ids.is_resolved() {
1218                let value = tree_ids.iter().map(|id| id.hex()).join(" ").into_bytes();
1219                extra_headers.push((
1220                    BString::new(JJ_TREES_COMMIT_HEADER.to_vec()),
1221                    BString::new(value),
1222                ));
1223            }
1224        }
1225        let extras = serialize_extras(&contents);
1226
1227        // If two writers write commits of the same id with different metadata, they
1228        // will both succeed and the metadata entries will be "merged" later. Since
1229        // metadata entry is keyed by the commit id, one of the entries would be lost.
1230        // To prevent such race condition locally, we extend the scope covered by the
1231        // table lock. This is still racy if multiple machines are involved and the
1232        // repository is rsync-ed.
1233        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1234        let id = loop {
1235            let mut commit = gix::objs::Commit {
1236                message: message.to_owned().into(),
1237                tree: git_tree_id,
1238                author: author.into(),
1239                committer: committer.into(),
1240                encoding: None,
1241                parents: parents.clone(),
1242                extra_headers: extra_headers.clone(),
1243            };
1244
1245            if let Some(sign) = &mut sign_with {
1246                // we don't use gix pool, but at least use their heuristic
1247                let mut data = Vec::with_capacity(512);
1248                commit.write_to(&mut data).unwrap();
1249
1250                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1251                    object_type: "commit",
1252                    source: Box::new(err),
1253                })?;
1254                commit
1255                    .extra_headers
1256                    .push(("gpgsig".into(), sig.clone().into()));
1257                contents.secure_sig = Some(SecureSig { data, sig });
1258            }
1259
1260            let git_id =
1261                locked_repo
1262                    .write_object(&commit)
1263                    .map_err(|err| BackendError::WriteObject {
1264                        object_type: "commit",
1265                        source: Box::new(err),
1266                    })?;
1267
1268            match table.get_value(git_id.as_bytes()) {
1269                Some(existing_extras) if existing_extras != extras => {
1270                    // It's possible a commit already exists with the same commit id but different
1271                    // change id. Adjust the timestamp until this is no longer the case.
1272                    committer.time.seconds -= 1;
1273                }
1274                _ => break CommitId::from_bytes(git_id.as_bytes()),
1275            }
1276        };
1277
1278        // Everything up to this point had no permanent effect on the repo except
1279        // GC-able objects
1280        locked_repo
1281            .edit_reference(to_no_gc_ref_update(&id))
1282            .map_err(|err| BackendError::Other(Box::new(err)))?;
1283
1284        // Update the signature to match the one that was actually written to the object
1285        // store
1286        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1287        let mut mut_table = table.start_mutation();
1288        mut_table.add_entry(id.to_bytes(), extras);
1289        self.save_extra_metadata_table(mut_table, &table_lock)?;
1290        Ok((id, contents))
1291    }
1292
1293    fn get_copy_records(
1294        &self,
1295        paths: Option<&[RepoPathBuf]>,
1296        root_id: &CommitId,
1297        head_id: &CommitId,
1298    ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
1299        let repo = self.git_repo();
1300        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1301        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1302
1303        let change_to_copy_record =
1304            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1305                let gix::object::tree::diff::Change::Rewrite {
1306                    source_location,
1307                    source_id,
1308                    location: dest_location,
1309                    ..
1310                } = change
1311                else {
1312                    return Ok(None);
1313                };
1314
1315                let source = str::from_utf8(source_location)
1316                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1317                let dest = str::from_utf8(dest_location)
1318                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1319
1320                let target = RepoPathBuf::from_internal_string(dest);
1321                if !paths.is_none_or(|paths| paths.contains(&target)) {
1322                    return Ok(None);
1323                }
1324
1325                Ok(Some(CopyRecord {
1326                    target,
1327                    target_commit: head_id.clone(),
1328                    source: RepoPathBuf::from_internal_string(source),
1329                    source_file: FileId::from_bytes(source_id.as_bytes()),
1330                    source_commit: root_id.clone(),
1331                }))
1332            };
1333
1334        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1335        root_tree
1336            .changes()
1337            .map_err(|err| BackendError::Other(err.into()))?
1338            .options(|opts| {
1339                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1340                    copies: Some(gix::diff::rewrites::Copies {
1341                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1342                        percentage: Some(0.5),
1343                    }),
1344                    percentage: Some(0.5),
1345                    limit: 1000,
1346                    track_empty: false,
1347                }));
1348            })
1349            .for_each_to_obtain_tree_with_cache(
1350                &head_tree,
1351                &mut self.new_diff_platform()?,
1352                |change| -> BackendResult<_> {
1353                    match change_to_copy_record(change) {
1354                        Ok(None) => {}
1355                        Ok(Some(change)) => records.push(Ok(change)),
1356                        Err(err) => records.push(Err(err)),
1357                    }
1358                    Ok(gix::object::tree::diff::Action::Continue)
1359                },
1360            )
1361            .map_err(|err| BackendError::Other(err.into()))?;
1362        Ok(Box::pin(futures::stream::iter(records)))
1363    }
1364
1365    #[tracing::instrument(skip(self, index))]
1366    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1367        let git_repo = self.lock_git_repo();
1368        let new_heads = index
1369            .all_heads_for_gc()
1370            .map_err(|err| BackendError::Other(err.into()))?
1371            .filter(|id| *id != self.root_commit_id);
1372        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1373        // TODO: remove unreachable entries from extras table if segment file
1374        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1375        // preserved by the keep_newer timestamp though)
1376        // TODO: remove unreachable extras table segments
1377        // TODO: pass in keep_newer to "git gc" command
1378        run_git_gc(self.git_repo_path()).map_err(|err| BackendError::Other(err.into()))?;
1379        // Since "git gc" will move loose refs into packed refs, in-memory
1380        // packed-refs cache should be invalidated without relying on mtime.
1381        git_repo.refs.force_refresh_packed_buffer().ok();
1382        Ok(())
1383    }
1384}
1385
1386/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1387/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1388fn write_tree_conflict(
1389    repo: &gix::Repository,
1390    conflict: &Merge<TreeId>,
1391) -> BackendResult<gix::ObjectId> {
1392    // Tree entries to be written must be sorted by Entry::filename().
1393    let mut entries = itertools::chain(
1394        conflict
1395            .removes()
1396            .enumerate()
1397            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1398        conflict
1399            .adds()
1400            .enumerate()
1401            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1402    )
1403    .map(|(name, tree_id)| gix::objs::tree::Entry {
1404        mode: gix::object::tree::EntryKind::Tree.into(),
1405        filename: name.into(),
1406        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1407    })
1408    .collect_vec();
1409    let readme_id = repo
1410        .write_blob(
1411            r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1412The commit contains file conflicts, and therefore looks wrong when used with plain
1413Git or other tools that are unfamiliar with jj.
1414
1415The .jjconflict-* directories represent the different inputs to the conflict.
1416For details, see
1417https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1418
1419If you see this file in your working copy, it probably means that you used a
1420regular `git` command to check out a conflicted commit. Use `jj abandon` to
1421recover.
1422"#,
1423        )
1424        .map_err(|err| {
1425            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1426        })?
1427        .detach();
1428    entries.push(gix::objs::tree::Entry {
1429        mode: gix::object::tree::EntryKind::Blob.into(),
1430        filename: "README".into(),
1431        oid: readme_id,
1432    });
1433    entries.sort_unstable();
1434    let id = repo
1435        .write_object(gix::objs::Tree { entries })
1436        .map_err(|err| BackendError::WriteObject {
1437            object_type: "tree",
1438            source: Box::new(err),
1439        })?;
1440    Ok(id.detach())
1441}
1442
1443fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
1444    serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
1445}
1446
1447fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> {
1448    json.as_array()
1449        .unwrap()
1450        .iter()
1451        .map(conflict_term_from_json)
1452        .collect()
1453}
1454
1455fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value {
1456    serde_json::json!({
1457        "value": tree_value_to_json(&part.value),
1458    })
1459}
1460
1461fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm {
1462    let json_value = json.get("value").unwrap();
1463    ConflictTerm {
1464        value: tree_value_from_json(json_value),
1465    }
1466}
1467
1468fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
1469    match value {
1470        TreeValue::File { id, executable } => serde_json::json!({
1471             "file": {
1472                 "id": id.hex(),
1473                 "executable": executable,
1474             },
1475        }),
1476        TreeValue::Symlink(id) => serde_json::json!({
1477             "symlink_id": id.hex(),
1478        }),
1479        TreeValue::Tree(id) => serde_json::json!({
1480             "tree_id": id.hex(),
1481        }),
1482        TreeValue::GitSubmodule(id) => serde_json::json!({
1483             "submodule_id": id.hex(),
1484        }),
1485        TreeValue::Conflict(id) => serde_json::json!({
1486             "conflict_id": id.hex(),
1487        }),
1488    }
1489}
1490
1491fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
1492    if let Some(json_file) = json.get("file") {
1493        TreeValue::File {
1494            id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())),
1495            executable: json_file.get("executable").unwrap().as_bool().unwrap(),
1496        }
1497    } else if let Some(json_id) = json.get("symlink_id") {
1498        TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id)))
1499    } else if let Some(json_id) = json.get("tree_id") {
1500        TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id)))
1501    } else if let Some(json_id) = json.get("submodule_id") {
1502        TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id)))
1503    } else if let Some(json_id) = json.get("conflict_id") {
1504        TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id)))
1505    } else {
1506        panic!("unexpected json value in conflict: {json:#?}");
1507    }
1508}
1509
1510fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
1511    hex::decode(value.as_str().unwrap()).unwrap()
1512}
1513
1514#[cfg(test)]
1515mod tests {
1516    use assert_matches::assert_matches;
1517    use hex::ToHex as _;
1518    use pollster::FutureExt as _;
1519
1520    use super::*;
1521    use crate::config::StackedConfig;
1522    use crate::content_hash::blake2b_hash;
1523    use crate::tests::new_temp_dir;
1524
1525    const GIT_USER: &str = "Someone";
1526    const GIT_EMAIL: &str = "someone@example.com";
1527
1528    fn git_config() -> Vec<bstr::BString> {
1529        vec![
1530            format!("user.name = {GIT_USER}").into(),
1531            format!("user.email = {GIT_EMAIL}").into(),
1532            "init.defaultBranch = master".into(),
1533        ]
1534    }
1535
1536    fn open_options() -> gix::open::Options {
1537        gix::open::Options::isolated()
1538            .config_overrides(git_config())
1539            .strict_config(true)
1540            .lossy_config(false)
1541    }
1542
1543    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1544        gix::ThreadSafeRepository::init_opts(
1545            directory,
1546            gix::create::Kind::WithWorktree,
1547            gix::create::Options::default(),
1548            open_options(),
1549        )
1550        .unwrap()
1551        .to_thread_local()
1552    }
1553
1554    #[test]
1555    fn read_plain_git_commit() {
1556        let settings = user_settings();
1557        let temp_dir = new_temp_dir();
1558        let store_path = temp_dir.path();
1559        let git_repo_path = temp_dir.path().join("git");
1560        let git_repo = git_init(git_repo_path);
1561
1562        // Add a commit with some files in
1563        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1564        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1565        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1566        dir_tree_editor
1567            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1568            .unwrap();
1569        dir_tree_editor
1570            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1571            .unwrap();
1572        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1573        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1574        root_tree_builder
1575            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1576            .unwrap();
1577        let root_tree_id = root_tree_builder.write().unwrap().detach();
1578        let git_author = gix::actor::Signature {
1579            name: "git author".into(),
1580            email: "git.author@example.com".into(),
1581            time: gix::date::Time::new(1000, 60 * 60),
1582        };
1583        let git_committer = gix::actor::Signature {
1584            name: "git committer".into(),
1585            email: "git.committer@example.com".into(),
1586            time: gix::date::Time::new(2000, -480 * 60),
1587        };
1588        let git_commit_id = git_repo
1589            .commit_as(
1590                &git_committer,
1591                &git_author,
1592                "refs/heads/dummy",
1593                "git commit message",
1594                root_tree_id,
1595                [] as [gix::ObjectId; 0],
1596            )
1597            .unwrap()
1598            .detach();
1599        git_repo
1600            .find_reference("refs/heads/dummy")
1601            .unwrap()
1602            .delete()
1603            .unwrap();
1604        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1605        // The change id is the leading reverse bits of the commit id
1606        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1607        // Check that the git commit above got the hash we expect
1608        assert_eq!(
1609            git_commit_id.as_bytes(),
1610            commit_id.as_bytes(),
1611            "{git_commit_id:?} vs {commit_id:?}"
1612        );
1613
1614        // Add an empty commit on top
1615        let git_commit_id2 = git_repo
1616            .commit_as(
1617                &git_committer,
1618                &git_author,
1619                "refs/heads/dummy2",
1620                "git commit message 2",
1621                root_tree_id,
1622                [git_commit_id],
1623            )
1624            .unwrap()
1625            .detach();
1626        git_repo
1627            .find_reference("refs/heads/dummy2")
1628            .unwrap()
1629            .delete()
1630            .unwrap();
1631        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1632
1633        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1634
1635        // Import the head commit and its ancestors
1636        backend.import_head_commits([&commit_id2]).unwrap();
1637        // Ref should be created only for the head commit
1638        let git_refs = backend
1639            .git_repo()
1640            .references()
1641            .unwrap()
1642            .prefixed("refs/jj/keep/")
1643            .unwrap()
1644            .map(|git_ref| git_ref.unwrap().id().detach())
1645            .collect_vec();
1646        assert_eq!(git_refs, vec![git_commit_id2]);
1647
1648        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1649        assert_eq!(&commit.change_id, &change_id);
1650        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1651        assert_eq!(commit.predecessors, vec![]);
1652        assert_eq!(
1653            commit.root_tree.to_merge(),
1654            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1655        );
1656        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1657        assert_eq!(commit.description, "git commit message");
1658        assert_eq!(commit.author.name, "git author");
1659        assert_eq!(commit.author.email, "git.author@example.com");
1660        assert_eq!(
1661            commit.author.timestamp.timestamp,
1662            MillisSinceEpoch(1000 * 1000)
1663        );
1664        assert_eq!(commit.author.timestamp.tz_offset, 60);
1665        assert_eq!(commit.committer.name, "git committer");
1666        assert_eq!(commit.committer.email, "git.committer@example.com");
1667        assert_eq!(
1668            commit.committer.timestamp.timestamp,
1669            MillisSinceEpoch(2000 * 1000)
1670        );
1671        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1672
1673        let root_tree = backend
1674            .read_tree(
1675                RepoPath::root(),
1676                &TreeId::from_bytes(root_tree_id.as_bytes()),
1677            )
1678            .block_on()
1679            .unwrap();
1680        let mut root_entries = root_tree.entries();
1681        let dir = root_entries.next().unwrap();
1682        assert_eq!(root_entries.next(), None);
1683        assert_eq!(dir.name().as_internal_str(), "dir");
1684        assert_eq!(
1685            dir.value(),
1686            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1687        );
1688
1689        let dir_tree = backend
1690            .read_tree(
1691                RepoPath::from_internal_string("dir"),
1692                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1693            )
1694            .block_on()
1695            .unwrap();
1696        let mut entries = dir_tree.entries();
1697        let file = entries.next().unwrap();
1698        let symlink = entries.next().unwrap();
1699        assert_eq!(entries.next(), None);
1700        assert_eq!(file.name().as_internal_str(), "normal");
1701        assert_eq!(
1702            file.value(),
1703            &TreeValue::File {
1704                id: FileId::from_bytes(blob1.as_bytes()),
1705                executable: false
1706            }
1707        );
1708        assert_eq!(symlink.name().as_internal_str(), "symlink");
1709        assert_eq!(
1710            symlink.value(),
1711            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1712        );
1713
1714        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1715        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1716        assert_eq!(commit.predecessors, vec![]);
1717        assert_eq!(
1718            commit.root_tree.to_merge(),
1719            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1720        );
1721        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1722    }
1723
1724    #[test]
1725    fn read_git_commit_without_importing() {
1726        let settings = user_settings();
1727        let temp_dir = new_temp_dir();
1728        let store_path = temp_dir.path();
1729        let git_repo_path = temp_dir.path().join("git");
1730        let git_repo = git_init(&git_repo_path);
1731
1732        let signature = gix::actor::Signature {
1733            name: GIT_USER.into(),
1734            email: GIT_EMAIL.into(),
1735            time: gix::date::Time::now_utc(),
1736        };
1737        let empty_tree_id =
1738            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1739        let git_commit_id = git_repo
1740            .commit_as(
1741                &signature,
1742                &signature,
1743                "refs/heads/main",
1744                "git commit message",
1745                empty_tree_id,
1746                [] as [gix::ObjectId; 0],
1747            )
1748            .unwrap();
1749
1750        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1751
1752        // read_commit() without import_head_commits() works as of now. This might be
1753        // changed later.
1754        assert!(backend
1755            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1756            .block_on()
1757            .is_ok());
1758        assert!(
1759            backend
1760                .cached_extra_metadata_table()
1761                .unwrap()
1762                .get_value(git_commit_id.as_bytes())
1763                .is_some(),
1764            "extra metadata should have been be created"
1765        );
1766    }
1767
1768    #[test]
1769    fn read_signed_git_commit() {
1770        let settings = user_settings();
1771        let temp_dir = new_temp_dir();
1772        let store_path = temp_dir.path();
1773        let git_repo_path = temp_dir.path().join("git");
1774        let git_repo = git_init(git_repo_path);
1775
1776        let signature = gix::actor::Signature {
1777            name: GIT_USER.into(),
1778            email: GIT_EMAIL.into(),
1779            time: gix::date::Time::now_utc(),
1780        };
1781        let empty_tree_id =
1782            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1783
1784        let secure_sig =
1785            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1786
1787        let mut commit = gix::objs::Commit {
1788            tree: empty_tree_id,
1789            parents: smallvec::SmallVec::new(),
1790            author: signature.clone(),
1791            committer: signature.clone(),
1792            encoding: None,
1793            message: "git commit message".into(),
1794            extra_headers: Vec::new(),
1795        };
1796
1797        let mut commit_buf = Vec::new();
1798        commit.write_to(&mut commit_buf).unwrap();
1799        let commit_str = std::str::from_utf8(&commit_buf).unwrap();
1800
1801        commit
1802            .extra_headers
1803            .push(("gpgsig".into(), secure_sig.into()));
1804
1805        let git_commit_id = git_repo.write_object(&commit).unwrap();
1806
1807        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1808
1809        let commit = backend
1810            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1811            .block_on()
1812            .unwrap();
1813
1814        let sig = commit.secure_sig.expect("failed to read the signature");
1815
1816        // converting to string for nicer assert diff
1817        assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig);
1818        assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str);
1819    }
1820
1821    #[test]
1822    fn read_empty_string_placeholder() {
1823        let git_signature1 = gix::actor::SignatureRef {
1824            name: EMPTY_STRING_PLACEHOLDER.into(),
1825            email: "git.author@example.com".into(),
1826            time: gix::date::Time::new(1000, 60 * 60),
1827        };
1828        let signature1 = signature_from_git(git_signature1);
1829        assert!(signature1.name.is_empty());
1830        assert_eq!(signature1.email, "git.author@example.com");
1831        let git_signature2 = gix::actor::SignatureRef {
1832            name: "git committer".into(),
1833            email: EMPTY_STRING_PLACEHOLDER.into(),
1834            time: gix::date::Time::new(2000, -480 * 60),
1835        };
1836        let signature2 = signature_from_git(git_signature2);
1837        assert_eq!(signature2.name, "git committer");
1838        assert!(signature2.email.is_empty());
1839    }
1840
1841    #[test]
1842    fn write_empty_string_placeholder() {
1843        let signature1 = Signature {
1844            name: "".to_string(),
1845            email: "someone@example.com".to_string(),
1846            timestamp: Timestamp {
1847                timestamp: MillisSinceEpoch(0),
1848                tz_offset: 0,
1849            },
1850        };
1851        let git_signature1 = signature_to_git(&signature1);
1852        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
1853        assert_eq!(git_signature1.email, "someone@example.com");
1854        let signature2 = Signature {
1855            name: "Someone".to_string(),
1856            email: "".to_string(),
1857            timestamp: Timestamp {
1858                timestamp: MillisSinceEpoch(0),
1859                tz_offset: 0,
1860            },
1861        };
1862        let git_signature2 = signature_to_git(&signature2);
1863        assert_eq!(git_signature2.name, "Someone");
1864        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
1865    }
1866
1867    /// Test that parents get written correctly
1868    #[test]
1869    fn git_commit_parents() {
1870        let settings = user_settings();
1871        let temp_dir = new_temp_dir();
1872        let store_path = temp_dir.path();
1873        let git_repo_path = temp_dir.path().join("git");
1874        let git_repo = git_init(&git_repo_path);
1875
1876        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1877        let mut commit = Commit {
1878            parents: vec![],
1879            predecessors: vec![],
1880            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1881            change_id: ChangeId::from_hex("abc123"),
1882            description: "".to_string(),
1883            author: create_signature(),
1884            committer: create_signature(),
1885            secure_sig: None,
1886        };
1887
1888        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
1889            backend.write_commit(commit, None).block_on()
1890        };
1891
1892        // No parents
1893        commit.parents = vec![];
1894        assert_matches!(
1895            write_commit(commit.clone()),
1896            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
1897        );
1898
1899        // Only root commit as parent
1900        commit.parents = vec![backend.root_commit_id().clone()];
1901        let first_id = write_commit(commit.clone()).unwrap().0;
1902        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
1903        assert_eq!(first_commit, commit);
1904        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
1905        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
1906
1907        // Only non-root commit as parent
1908        commit.parents = vec![first_id.clone()];
1909        let second_id = write_commit(commit.clone()).unwrap().0;
1910        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
1911        assert_eq!(second_commit, commit);
1912        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
1913        assert_eq!(
1914            second_git_commit.parent_ids().collect_vec(),
1915            vec![git_id(&first_id)]
1916        );
1917
1918        // Merge commit
1919        commit.parents = vec![first_id.clone(), second_id.clone()];
1920        let merge_id = write_commit(commit.clone()).unwrap().0;
1921        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
1922        assert_eq!(merge_commit, commit);
1923        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
1924        assert_eq!(
1925            merge_git_commit.parent_ids().collect_vec(),
1926            vec![git_id(&first_id), git_id(&second_id)]
1927        );
1928
1929        // Merge commit with root as one parent
1930        commit.parents = vec![first_id, backend.root_commit_id().clone()];
1931        assert_matches!(
1932            write_commit(commit),
1933            Err(BackendError::Unsupported(message)) if message.contains("root commit")
1934        );
1935    }
1936
1937    #[test]
1938    fn write_tree_conflicts() {
1939        let settings = user_settings();
1940        let temp_dir = new_temp_dir();
1941        let store_path = temp_dir.path();
1942        let git_repo_path = temp_dir.path().join("git");
1943        let git_repo = git_init(&git_repo_path);
1944
1945        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1946        let create_tree = |i| {
1947            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
1948            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
1949            tree_builder
1950                .upsert(
1951                    format!("file{i}"),
1952                    gix::object::tree::EntryKind::Blob,
1953                    blob_id,
1954                )
1955                .unwrap();
1956            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
1957        };
1958
1959        let root_tree = Merge::from_removes_adds(
1960            vec![create_tree(0), create_tree(1)],
1961            vec![create_tree(2), create_tree(3), create_tree(4)],
1962        );
1963        let mut commit = Commit {
1964            parents: vec![backend.root_commit_id().clone()],
1965            predecessors: vec![],
1966            root_tree: MergedTreeId::Merge(root_tree.clone()),
1967            change_id: ChangeId::from_hex("abc123"),
1968            description: "".to_string(),
1969            author: create_signature(),
1970            committer: create_signature(),
1971            secure_sig: None,
1972        };
1973
1974        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
1975            backend.write_commit(commit, None).block_on()
1976        };
1977
1978        // When writing a tree-level conflict, the root tree on the git side has the
1979        // individual trees as subtrees.
1980        let read_commit_id = write_commit(commit.clone()).unwrap().0;
1981        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
1982        assert_eq!(read_commit, commit);
1983        let git_commit = git_repo
1984            .find_commit(gix::ObjectId::from_bytes_or_panic(
1985                read_commit_id.as_bytes(),
1986            ))
1987            .unwrap();
1988        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
1989        assert!(git_tree
1990            .iter()
1991            .map(Result::unwrap)
1992            .filter(|entry| entry.filename() != b"README")
1993            .all(|entry| entry.mode().0 == 0o040000));
1994        let mut iter = git_tree.iter().map(Result::unwrap);
1995        let entry = iter.next().unwrap();
1996        assert_eq!(entry.filename(), b".jjconflict-base-0");
1997        assert_eq!(
1998            entry.id().as_bytes(),
1999            root_tree.get_remove(0).unwrap().as_bytes()
2000        );
2001        let entry = iter.next().unwrap();
2002        assert_eq!(entry.filename(), b".jjconflict-base-1");
2003        assert_eq!(
2004            entry.id().as_bytes(),
2005            root_tree.get_remove(1).unwrap().as_bytes()
2006        );
2007        let entry = iter.next().unwrap();
2008        assert_eq!(entry.filename(), b".jjconflict-side-0");
2009        assert_eq!(
2010            entry.id().as_bytes(),
2011            root_tree.get_add(0).unwrap().as_bytes()
2012        );
2013        let entry = iter.next().unwrap();
2014        assert_eq!(entry.filename(), b".jjconflict-side-1");
2015        assert_eq!(
2016            entry.id().as_bytes(),
2017            root_tree.get_add(1).unwrap().as_bytes()
2018        );
2019        let entry = iter.next().unwrap();
2020        assert_eq!(entry.filename(), b".jjconflict-side-2");
2021        assert_eq!(
2022            entry.id().as_bytes(),
2023            root_tree.get_add(2).unwrap().as_bytes()
2024        );
2025        let entry = iter.next().unwrap();
2026        assert_eq!(entry.filename(), b"README");
2027        assert_eq!(entry.mode().0, 0o100644);
2028        assert!(iter.next().is_none());
2029
2030        // When writing a single tree using the new format, it's represented by a
2031        // regular git tree.
2032        commit.root_tree = MergedTreeId::resolved(create_tree(5));
2033        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2034        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2035        assert_eq!(read_commit, commit);
2036        let git_commit = git_repo
2037            .find_commit(gix::ObjectId::from_bytes_or_panic(
2038                read_commit_id.as_bytes(),
2039            ))
2040            .unwrap();
2041        assert_eq!(
2042            MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2043            commit.root_tree
2044        );
2045    }
2046
2047    #[test]
2048    fn commit_has_ref() {
2049        let settings = user_settings();
2050        let temp_dir = new_temp_dir();
2051        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2052        let git_repo = backend.git_repo();
2053        let signature = Signature {
2054            name: "Someone".to_string(),
2055            email: "someone@example.com".to_string(),
2056            timestamp: Timestamp {
2057                timestamp: MillisSinceEpoch(0),
2058                tz_offset: 0,
2059            },
2060        };
2061        let commit = Commit {
2062            parents: vec![backend.root_commit_id().clone()],
2063            predecessors: vec![],
2064            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2065            change_id: ChangeId::new(vec![]),
2066            description: "initial".to_string(),
2067            author: signature.clone(),
2068            committer: signature,
2069            secure_sig: None,
2070        };
2071        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2072        let git_refs = git_repo.references().unwrap();
2073        let git_ref_ids: Vec<_> = git_refs
2074            .prefixed("refs/jj/keep/")
2075            .unwrap()
2076            .map(|x| x.unwrap().id().detach())
2077            .collect();
2078        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2079
2080        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2081        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2082            git_ref.unwrap().delete().unwrap();
2083        }
2084        // Re-imported commit should have new ref.
2085        backend.import_head_commits([&commit_id]).unwrap();
2086        let git_refs = git_repo.references().unwrap();
2087        let git_ref_ids: Vec<_> = git_refs
2088            .prefixed("refs/jj/keep/")
2089            .unwrap()
2090            .map(|x| x.unwrap().id().detach())
2091            .collect();
2092        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2093    }
2094
2095    #[test]
2096    fn import_head_commits_duplicates() {
2097        let settings = user_settings();
2098        let temp_dir = new_temp_dir();
2099        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2100        let git_repo = backend.git_repo();
2101
2102        let signature = gix::actor::Signature {
2103            name: GIT_USER.into(),
2104            email: GIT_EMAIL.into(),
2105            time: gix::date::Time::now_utc(),
2106        };
2107        let empty_tree_id =
2108            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2109        let git_commit_id = git_repo
2110            .commit_as(
2111                &signature,
2112                &signature,
2113                "refs/heads/main",
2114                "git commit message",
2115                empty_tree_id,
2116                [] as [gix::ObjectId; 0],
2117            )
2118            .unwrap()
2119            .detach();
2120        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2121
2122        // Ref creation shouldn't fail because of duplicated head ids.
2123        backend
2124            .import_head_commits([&commit_id, &commit_id])
2125            .unwrap();
2126        assert!(git_repo
2127            .references()
2128            .unwrap()
2129            .prefixed("refs/jj/keep/")
2130            .unwrap()
2131            .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id));
2132    }
2133
2134    #[test]
2135    fn overlapping_git_commit_id() {
2136        let settings = user_settings();
2137        let temp_dir = new_temp_dir();
2138        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2139        let commit1 = Commit {
2140            parents: vec![backend.root_commit_id().clone()],
2141            predecessors: vec![],
2142            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2143            change_id: ChangeId::new(vec![]),
2144            description: "initial".to_string(),
2145            author: create_signature(),
2146            committer: create_signature(),
2147            secure_sig: None,
2148        };
2149
2150        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2151            backend.write_commit(commit, None).block_on()
2152        };
2153
2154        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2155        commit2.predecessors.push(commit_id1.clone());
2156        // `write_commit` should prevent the ids from being the same by changing the
2157        // committer timestamp of the commit it actually writes.
2158        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2159        // The returned matches the ID
2160        assert_eq!(
2161            backend.read_commit(&commit_id2).block_on().unwrap(),
2162            actual_commit2
2163        );
2164        assert_ne!(commit_id2, commit_id1);
2165        // The committer timestamp should differ
2166        assert_ne!(
2167            actual_commit2.committer.timestamp.timestamp,
2168            commit2.committer.timestamp.timestamp
2169        );
2170        // The rest of the commit should be the same
2171        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2172        assert_eq!(actual_commit2, commit2);
2173    }
2174
2175    #[test]
2176    fn write_signed_commit() {
2177        let settings = user_settings();
2178        let temp_dir = new_temp_dir();
2179        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2180
2181        let commit = Commit {
2182            parents: vec![backend.root_commit_id().clone()],
2183            predecessors: vec![],
2184            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2185            change_id: ChangeId::new(vec![]),
2186            description: "initial".to_string(),
2187            author: create_signature(),
2188            committer: create_signature(),
2189            secure_sig: None,
2190        };
2191
2192        let mut signer = |data: &_| {
2193            let hash: String = blake2b_hash(data).encode_hex();
2194            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2195        };
2196
2197        let (id, commit) = backend
2198            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2199            .block_on()
2200            .unwrap();
2201
2202        let git_repo = backend.git_repo();
2203        let obj = git_repo
2204            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2205            .unwrap();
2206        insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r"
2207        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2208        author Someone <someone@example.com> 0 +0000
2209        committer Someone <someone@example.com> 0 +0000
2210        gpgsig test sig
2211         hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518
2212
2213        initial
2214        ");
2215
2216        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2217
2218        let commit = backend.read_commit(&id).block_on().unwrap();
2219
2220        let sig = commit.secure_sig.expect("failed to read the signature");
2221        assert_eq!(&sig, &returned_sig);
2222
2223        insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r"
2224        test sig
2225        hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518
2226        ");
2227        insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r"
2228        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2229        author Someone <someone@example.com> 0 +0000
2230        committer Someone <someone@example.com> 0 +0000
2231
2232        initial
2233        ");
2234    }
2235
2236    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2237        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2238    }
2239
2240    fn create_signature() -> Signature {
2241        Signature {
2242            name: GIT_USER.to_string(),
2243            email: GIT_EMAIL.to_string(),
2244            timestamp: Timestamp {
2245                timestamp: MillisSinceEpoch(0),
2246                tz_offset: 0,
2247            },
2248        }
2249    }
2250
2251    // Not using testutils::user_settings() because there is a dependency cycle
2252    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2253    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2254    // our UserSettings type comes from jj_lib (1).
2255    fn user_settings() -> UserSettings {
2256        let config = StackedConfig::with_defaults();
2257        UserSettings::from_config(config).unwrap()
2258    }
2259}