jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::HashSet;
19use std::ffi::OsStr;
20use std::fmt::Debug;
21use std::fmt::Error;
22use std::fmt::Formatter;
23use std::fs;
24use std::io;
25use std::io::Cursor;
26use std::io::Read;
27use std::path::Path;
28use std::path::PathBuf;
29use std::process::Command;
30use std::process::ExitStatus;
31use std::str;
32use std::sync::Arc;
33use std::sync::Mutex;
34use std::sync::MutexGuard;
35use std::time::SystemTime;
36
37use async_trait::async_trait;
38use futures::stream::BoxStream;
39use gix::bstr::BString;
40use gix::objs::CommitRef;
41use gix::objs::CommitRefIter;
42use gix::objs::WriteTo as _;
43use itertools::Itertools as _;
44use pollster::FutureExt as _;
45use prost::Message as _;
46use smallvec::SmallVec;
47use thiserror::Error;
48
49use crate::backend::make_root_commit;
50use crate::backend::Backend;
51use crate::backend::BackendError;
52use crate::backend::BackendInitError;
53use crate::backend::BackendLoadError;
54use crate::backend::BackendResult;
55use crate::backend::ChangeId;
56use crate::backend::Commit;
57use crate::backend::CommitId;
58use crate::backend::Conflict;
59use crate::backend::ConflictId;
60use crate::backend::ConflictTerm;
61use crate::backend::CopyRecord;
62use crate::backend::FileId;
63use crate::backend::MergedTreeId;
64use crate::backend::MillisSinceEpoch;
65use crate::backend::SecureSig;
66use crate::backend::Signature;
67use crate::backend::SigningFn;
68use crate::backend::SymlinkId;
69use crate::backend::Timestamp;
70use crate::backend::Tree;
71use crate::backend::TreeId;
72use crate::backend::TreeValue;
73use crate::config::ConfigGetError;
74use crate::file_util::IoResultExt as _;
75use crate::file_util::PathError;
76use crate::hex_util::to_forward_hex;
77use crate::index::Index;
78use crate::lock::FileLock;
79use crate::merge::Merge;
80use crate::merge::MergeBuilder;
81use crate::object_id::ObjectId;
82use crate::repo_path::RepoPath;
83use crate::repo_path::RepoPathBuf;
84use crate::repo_path::RepoPathComponentBuf;
85use crate::settings::GitSettings;
86use crate::settings::UserSettings;
87use crate::stacked_table::MutableTable;
88use crate::stacked_table::ReadonlyTable;
89use crate::stacked_table::TableSegment as _;
90use crate::stacked_table::TableStore;
91use crate::stacked_table::TableStoreError;
92
93const HASH_LENGTH: usize = 20;
94const CHANGE_ID_LENGTH: usize = 16;
95/// Ref namespace used only for preventing GC.
96const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
97const CONFLICT_SUFFIX: &str = ".jjconflict";
98
99pub const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees";
100pub const CHANGE_ID_COMMIT_HEADER: &[u8] = b"change-id";
101
102#[derive(Debug, Error)]
103pub enum GitBackendInitError {
104    #[error("Failed to initialize git repository")]
105    InitRepository(#[source] gix::init::Error),
106    #[error("Failed to open git repository")]
107    OpenRepository(#[source] gix::open::Error),
108    #[error(transparent)]
109    Config(ConfigGetError),
110    #[error(transparent)]
111    Path(PathError),
112}
113
114impl From<Box<GitBackendInitError>> for BackendInitError {
115    fn from(err: Box<GitBackendInitError>) -> Self {
116        BackendInitError(err)
117    }
118}
119
120#[derive(Debug, Error)]
121pub enum GitBackendLoadError {
122    #[error("Failed to open git repository")]
123    OpenRepository(#[source] gix::open::Error),
124    #[error(transparent)]
125    Config(ConfigGetError),
126    #[error(transparent)]
127    Path(PathError),
128}
129
130impl From<Box<GitBackendLoadError>> for BackendLoadError {
131    fn from(err: Box<GitBackendLoadError>) -> Self {
132        BackendLoadError(err)
133    }
134}
135
136/// `GitBackend`-specific error that may occur after the backend is loaded.
137#[derive(Debug, Error)]
138pub enum GitBackendError {
139    #[error("Failed to read non-git metadata")]
140    ReadMetadata(#[source] TableStoreError),
141    #[error("Failed to write non-git metadata")]
142    WriteMetadata(#[source] TableStoreError),
143}
144
145impl From<GitBackendError> for BackendError {
146    fn from(err: GitBackendError) -> Self {
147        BackendError::Other(err.into())
148    }
149}
150
151#[derive(Debug, Error)]
152pub enum GitGcError {
153    #[error("Failed to run git gc command")]
154    GcCommand(#[source] std::io::Error),
155    #[error("git gc command exited with an error: {0}")]
156    GcCommandErrorStatus(ExitStatus),
157}
158
159pub struct GitBackend {
160    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
161    // cheaper to cache the thread-local instance behind a mutex than creating
162    // one for each backend method call. Our GitBackend is most likely to be
163    // used in a single-threaded context.
164    base_repo: gix::ThreadSafeRepository,
165    repo: Mutex<gix::Repository>,
166    root_commit_id: CommitId,
167    root_change_id: ChangeId,
168    empty_tree_id: TreeId,
169    extra_metadata_store: TableStore,
170    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
171    git_executable: PathBuf,
172    write_change_id_header: bool,
173}
174
175impl GitBackend {
176    pub fn name() -> &'static str {
177        "git"
178    }
179
180    fn new(
181        base_repo: gix::ThreadSafeRepository,
182        extra_metadata_store: TableStore,
183        git_settings: GitSettings,
184    ) -> Self {
185        let repo = Mutex::new(base_repo.to_thread_local());
186        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
187        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
188        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
189        GitBackend {
190            base_repo,
191            repo,
192            root_commit_id,
193            root_change_id,
194            empty_tree_id,
195            extra_metadata_store,
196            cached_extra_metadata: Mutex::new(None),
197            git_executable: git_settings.executable_path,
198            write_change_id_header: git_settings.write_change_id_header,
199        }
200    }
201
202    pub fn init_internal(
203        settings: &UserSettings,
204        store_path: &Path,
205    ) -> Result<Self, Box<GitBackendInitError>> {
206        let git_repo_path = Path::new("git");
207        let git_repo = gix::ThreadSafeRepository::init_opts(
208            store_path.join(git_repo_path),
209            gix::create::Kind::Bare,
210            gix::create::Options::default(),
211            gix_open_opts_from_settings(settings),
212        )
213        .map_err(GitBackendInitError::InitRepository)?;
214        let git_settings = settings
215            .git_settings()
216            .map_err(GitBackendInitError::Config)?;
217        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
218    }
219
220    /// Initializes backend by creating a new Git repo at the specified
221    /// workspace path. The workspace directory must exist.
222    pub fn init_colocated(
223        settings: &UserSettings,
224        store_path: &Path,
225        workspace_root: &Path,
226    ) -> Result<Self, Box<GitBackendInitError>> {
227        let canonical_workspace_root = {
228            let path = store_path.join(workspace_root);
229            dunce::canonicalize(&path)
230                .context(&path)
231                .map_err(GitBackendInitError::Path)?
232        };
233        let git_repo = gix::ThreadSafeRepository::init_opts(
234            canonical_workspace_root,
235            gix::create::Kind::WithWorktree,
236            gix::create::Options::default(),
237            gix_open_opts_from_settings(settings),
238        )
239        .map_err(GitBackendInitError::InitRepository)?;
240        let git_repo_path = workspace_root.join(".git");
241        let git_settings = settings
242            .git_settings()
243            .map_err(GitBackendInitError::Config)?;
244        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
245    }
246
247    /// Initializes backend with an existing Git repo at the specified path.
248    pub fn init_external(
249        settings: &UserSettings,
250        store_path: &Path,
251        git_repo_path: &Path,
252    ) -> Result<Self, Box<GitBackendInitError>> {
253        let canonical_git_repo_path = {
254            let path = store_path.join(git_repo_path);
255            canonicalize_git_repo_path(&path)
256                .context(&path)
257                .map_err(GitBackendInitError::Path)?
258        };
259        let git_repo = gix::ThreadSafeRepository::open_opts(
260            canonical_git_repo_path,
261            gix_open_opts_from_settings(settings),
262        )
263        .map_err(GitBackendInitError::OpenRepository)?;
264        let git_settings = settings
265            .git_settings()
266            .map_err(GitBackendInitError::Config)?;
267        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
268    }
269
270    fn init_with_repo(
271        store_path: &Path,
272        git_repo_path: &Path,
273        repo: gix::ThreadSafeRepository,
274        git_settings: GitSettings,
275    ) -> Result<Self, Box<GitBackendInitError>> {
276        let extra_path = store_path.join("extra");
277        fs::create_dir(&extra_path)
278            .context(&extra_path)
279            .map_err(GitBackendInitError::Path)?;
280        let target_path = store_path.join("git_target");
281        if cfg!(windows) && git_repo_path.is_relative() {
282            // When a repository is created in Windows, format the path with *forward
283            // slashes* and not backwards slashes. This makes it possible to use the same
284            // repository under Windows Subsystem for Linux.
285            //
286            // This only works for relative paths. If the path is absolute, there's not much
287            // we can do, and it simply won't work inside and outside WSL at the same time.
288            let git_repo_path_string = git_repo_path
289                .components()
290                .map(|component| component.as_os_str().to_str().unwrap().to_owned())
291                .join("/");
292            fs::write(&target_path, git_repo_path_string.as_bytes())
293                .context(&target_path)
294                .map_err(GitBackendInitError::Path)?;
295        } else {
296            fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes())
297                .context(&target_path)
298                .map_err(GitBackendInitError::Path)?;
299        };
300        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
301        Ok(GitBackend::new(repo, extra_metadata_store, git_settings))
302    }
303
304    pub fn load(
305        settings: &UserSettings,
306        store_path: &Path,
307    ) -> Result<Self, Box<GitBackendLoadError>> {
308        let git_repo_path = {
309            let target_path = store_path.join("git_target");
310            let git_repo_path_str = fs::read_to_string(&target_path)
311                .context(&target_path)
312                .map_err(GitBackendLoadError::Path)?;
313            let git_repo_path = store_path.join(git_repo_path_str);
314            canonicalize_git_repo_path(&git_repo_path)
315                .context(&git_repo_path)
316                .map_err(GitBackendLoadError::Path)?
317        };
318        let repo = gix::ThreadSafeRepository::open_opts(
319            git_repo_path,
320            gix_open_opts_from_settings(settings),
321        )
322        .map_err(GitBackendLoadError::OpenRepository)?;
323        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
324        let git_settings = settings
325            .git_settings()
326            .map_err(GitBackendLoadError::Config)?;
327        Ok(GitBackend::new(repo, extra_metadata_store, git_settings))
328    }
329
330    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
331        self.repo.lock().unwrap()
332    }
333
334    /// Returns new thread-local instance to access to the underlying Git repo.
335    pub fn git_repo(&self) -> gix::Repository {
336        self.base_repo.to_thread_local()
337    }
338
339    /// Path to the `.git` directory or the repository itself if it's bare.
340    pub fn git_repo_path(&self) -> &Path {
341        self.base_repo.path()
342    }
343
344    /// Path to the working directory if the repository isn't bare.
345    pub fn git_workdir(&self) -> Option<&Path> {
346        self.base_repo.work_dir()
347    }
348
349    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
350        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
351        match locked_head.as_ref() {
352            Some(head) => Ok(head.clone()),
353            None => {
354                let table = self
355                    .extra_metadata_store
356                    .get_head()
357                    .map_err(GitBackendError::ReadMetadata)?;
358                *locked_head = Some(table.clone());
359                Ok(table)
360            }
361        }
362    }
363
364    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
365        let table = self
366            .extra_metadata_store
367            .get_head_locked()
368            .map_err(GitBackendError::ReadMetadata)?;
369        Ok(table)
370    }
371
372    fn save_extra_metadata_table(
373        &self,
374        mut_table: MutableTable,
375        _table_lock: &FileLock,
376    ) -> BackendResult<()> {
377        let table = self
378            .extra_metadata_store
379            .save_table(mut_table)
380            .map_err(GitBackendError::WriteMetadata)?;
381        // Since the parent table was the head, saved table are likely to be new head.
382        // If it's not, cache will be reloaded when entry can't be found.
383        *self.cached_extra_metadata.lock().unwrap() = Some(table);
384        Ok(())
385    }
386
387    /// Imports the given commits and ancestors from the backing Git repo.
388    ///
389    /// The `head_ids` may contain commits that have already been imported, but
390    /// the caller should filter them out to eliminate redundant I/O processing.
391    #[tracing::instrument(skip(self, head_ids))]
392    pub fn import_head_commits<'a>(
393        &self,
394        head_ids: impl IntoIterator<Item = &'a CommitId>,
395    ) -> BackendResult<()> {
396        let head_ids: HashSet<&CommitId> = head_ids
397            .into_iter()
398            .filter(|&id| *id != self.root_commit_id)
399            .collect();
400        if head_ids.is_empty() {
401            return Ok(());
402        }
403
404        // Create no-gc ref even if known to the extras table. Concurrent GC
405        // process might have deleted the no-gc ref.
406        let locked_repo = self.lock_git_repo();
407        locked_repo
408            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
409            .map_err(|err| BackendError::Other(Box::new(err)))?;
410
411        // These commits are imported from Git. Make our change ids persist (otherwise
412        // future write_commit() could reassign new change id.)
413        tracing::debug!(
414            heads_count = head_ids.len(),
415            "import extra metadata entries"
416        );
417        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
418        let mut mut_table = table.start_mutation();
419        import_extra_metadata_entries_from_heads(
420            &locked_repo,
421            &mut mut_table,
422            &table_lock,
423            &head_ids,
424        )?;
425        self.save_extra_metadata_table(mut_table, &table_lock)
426    }
427
428    fn read_file_sync(&self, id: &FileId) -> BackendResult<Box<dyn Read>> {
429        let git_blob_id = validate_git_object_id(id)?;
430        let locked_repo = self.lock_git_repo();
431        let mut blob = locked_repo
432            .find_object(git_blob_id)
433            .map_err(|err| map_not_found_err(err, id))?
434            .try_into_blob()
435            .map_err(|err| to_read_object_err(err, id))?;
436        Ok(Box::new(Cursor::new(blob.take_data())))
437    }
438
439    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
440        let attributes = gix::worktree::Stack::new(
441            Path::new(""),
442            gix::worktree::stack::State::AttributesStack(Default::default()),
443            gix::worktree::glob::pattern::Case::Sensitive,
444            Vec::new(),
445            Vec::new(),
446        );
447        let filter = gix::diff::blob::Pipeline::new(
448            Default::default(),
449            gix::filter::plumbing::Pipeline::new(
450                self.git_repo()
451                    .command_context()
452                    .map_err(|err| BackendError::Other(Box::new(err)))?,
453                Default::default(),
454            ),
455            Vec::new(),
456            Default::default(),
457        );
458        Ok(gix::diff::blob::Platform::new(
459            Default::default(),
460            filter,
461            gix::diff::blob::pipeline::Mode::ToGit,
462            attributes,
463        ))
464    }
465
466    fn read_tree_for_commit<'repo>(
467        &self,
468        repo: &'repo gix::Repository,
469        id: &CommitId,
470    ) -> BackendResult<gix::Tree<'repo>> {
471        let tree = self.read_commit(id).block_on()?.root_tree.to_merge();
472        // TODO(kfm): probably want to do something here if it is a merge
473        let tree_id = tree.first().clone();
474        let gix_id = validate_git_object_id(&tree_id)?;
475        repo.find_object(gix_id)
476            .map_err(|err| map_not_found_err(err, &tree_id))?
477            .try_into_tree()
478            .map_err(|err| to_read_object_err(err, &tree_id))
479    }
480}
481
482/// Canonicalizes the given `path` except for the last `".git"` component.
483///
484/// The last path component matters when opening a Git repo without `core.bare`
485/// config. This config is usually set, but the "repo" tool will set up such
486/// repositories and symlinks. Opening such repo with fully-canonicalized path
487/// would turn a colocated Git repo into a bare repo.
488pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
489    if path.ends_with(".git") {
490        let workdir = path.parent().unwrap();
491        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
492    } else {
493        dunce::canonicalize(path)
494    }
495}
496
497fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
498    let user_name = settings.user_name();
499    let user_email = settings.user_email();
500    gix::open::Options::default()
501        .config_overrides([
502            // Committer has to be configured to record reflog. Author isn't
503            // needed, but let's copy the same values.
504            format!("author.name={user_name}"),
505            format!("author.email={user_email}"),
506            format!("committer.name={user_name}"),
507            format!("committer.email={user_email}"),
508        ])
509        // The git_target path should point the repository, not the working directory.
510        .open_path_as_is(true)
511        // Gitoxide recommends this when correctness is preferred
512        .strict_config(true)
513}
514
515/// Reads the `jj:trees` header from the commit.
516fn root_tree_from_header(git_commit: &CommitRef) -> Result<Option<MergedTreeId>, ()> {
517    for (key, value) in &git_commit.extra_headers {
518        if *key == JJ_TREES_COMMIT_HEADER {
519            let mut tree_ids = SmallVec::new();
520            for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') {
521                let tree_id = TreeId::try_from_hex(hex).or(Err(()))?;
522                if tree_id.as_bytes().len() != HASH_LENGTH {
523                    return Err(());
524                }
525                tree_ids.push(tree_id);
526            }
527            // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
528            // allowed, it would be possible to construct a commit which appears to have
529            // different contents depending on whether it is viewed using `jj` or `git`.
530            if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
531                return Err(());
532            }
533            return Ok(Some(MergedTreeId::Merge(Merge::from_vec(tree_ids))));
534        }
535    }
536    Ok(None)
537}
538
539fn commit_from_git_without_root_parent(
540    id: &CommitId,
541    git_object: &gix::Object,
542    uses_tree_conflict_format: bool,
543    is_shallow: bool,
544) -> BackendResult<Commit> {
545    let commit = git_object
546        .try_to_commit_ref()
547        .map_err(|err| to_read_object_err(err, id))?;
548
549    // If the git header has a change-id field, we attempt to convert that to a
550    // valid JJ Change Id
551    let change_id = commit
552        .extra_headers()
553        .find("change-id")
554        .and_then(to_forward_hex)
555        .and_then(|change_id_hex| ChangeId::try_from_hex(change_id_hex.as_str()).ok())
556        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
557        // Otherwise, we reverse the bits of the commit id to create the change id.
558        // We don't want to use the first bytes unmodified because then it would be
559        // ambiguous if a given hash prefix refers to the commit id or the change id.
560        // It would have been enough to pick the last 16 bytes instead of the
561        // leading 16 bytes to address that. We also reverse the bits to make it
562        // less likely that users depend on any relationship between the two ids.
563        .unwrap_or_else(|| {
564            ChangeId::new(
565                id.as_bytes()[4..HASH_LENGTH]
566                    .iter()
567                    .rev()
568                    .map(|b| b.reverse_bits())
569                    .collect(),
570            )
571        });
572
573    // shallow commits don't have parents their parents actually fetched, so we
574    // discard them here
575    // TODO: This causes issues when a shallow repository is deepened/unshallowed
576    let parents = if is_shallow {
577        vec![]
578    } else {
579        commit
580            .parents()
581            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
582            .collect_vec()
583    };
584    let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
585    // If this commit is a conflict, we'll update the root tree later, when we read
586    // the extra metadata.
587    let root_tree = root_tree_from_header(&commit)
588        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?;
589    let root_tree = root_tree.unwrap_or_else(|| {
590        if uses_tree_conflict_format {
591            MergedTreeId::resolved(tree_id)
592        } else {
593            MergedTreeId::Legacy(tree_id)
594        }
595    });
596    // Use lossy conversion as commit message with "mojibake" is still better than
597    // nothing.
598    // TODO: what should we do with commit.encoding?
599    let description = String::from_utf8_lossy(commit.message).into_owned();
600    let author = signature_from_git(commit.author());
601    let committer = signature_from_git(commit.committer());
602
603    // If the commit is signed, extract both the signature and the signed data
604    // (which is the commit buffer with the gpgsig header omitted).
605    // We have to re-parse the raw commit data because gix CommitRef does not give
606    // us the sogned data, only the signature.
607    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
608    // function and extract everything from that. For now, this works
609    let secure_sig = commit
610        .extra_headers
611        .iter()
612        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
613        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
614        .then(|| CommitRefIter::signature(&git_object.data))
615        .transpose()
616        .map_err(|err| to_read_object_err(err, id))?
617        .flatten()
618        .map(|(sig, data)| SecureSig {
619            data: data.to_bstring().into(),
620            sig: sig.into_owned().into(),
621        });
622
623    Ok(Commit {
624        parents,
625        predecessors: vec![],
626        // If this commit has associated extra metadata, we may reset this later.
627        root_tree,
628        change_id,
629        description,
630        author,
631        committer,
632        secure_sig,
633    })
634}
635
636const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
637
638fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
639    let name = signature.name;
640    let name = if name != EMPTY_STRING_PLACEHOLDER {
641        String::from_utf8_lossy(name).into_owned()
642    } else {
643        "".to_string()
644    };
645    let email = signature.email;
646    let email = if email != EMPTY_STRING_PLACEHOLDER {
647        String::from_utf8_lossy(email).into_owned()
648    } else {
649        "".to_string()
650    };
651    let timestamp = MillisSinceEpoch(signature.time.seconds * 1000);
652    let tz_offset = signature.time.offset.div_euclid(60); // in minutes
653    Signature {
654        name,
655        email,
656        timestamp: Timestamp {
657            timestamp,
658            tz_offset,
659        },
660    }
661}
662
663fn signature_to_git(signature: &Signature) -> gix::actor::SignatureRef<'_> {
664    // git does not support empty names or emails
665    let name = if !signature.name.is_empty() {
666        &signature.name
667    } else {
668        EMPTY_STRING_PLACEHOLDER
669    };
670    let email = if !signature.email.is_empty() {
671        &signature.email
672    } else {
673        EMPTY_STRING_PLACEHOLDER
674    };
675    let time = gix::date::Time::new(
676        signature.timestamp.timestamp.0.div_euclid(1000),
677        signature.timestamp.tz_offset * 60, // in seconds
678    );
679    gix::actor::SignatureRef {
680        name: name.into(),
681        email: email.into(),
682        time,
683    }
684}
685
686fn serialize_extras(commit: &Commit) -> Vec<u8> {
687    let mut proto = crate::protos::git_store::Commit {
688        change_id: commit.change_id.to_bytes(),
689        ..Default::default()
690    };
691    if let MergedTreeId::Merge(tree_ids) = &commit.root_tree {
692        proto.uses_tree_conflict_format = true;
693        if !tree_ids.is_resolved() {
694            // This is done for the sake of jj versions <0.28 (before commit
695            // f7b14be) being able to read the repo. At some point in the
696            // future, we can stop doing it.
697            proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
698        }
699    }
700    for predecessor in &commit.predecessors {
701        proto.predecessors.push(predecessor.to_bytes());
702    }
703    proto.encode_to_vec()
704}
705
706fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
707    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
708    if !proto.change_id.is_empty() {
709        commit.change_id = ChangeId::new(proto.change_id);
710    }
711    if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree {
712        if proto.uses_tree_conflict_format {
713            if !proto.root_tree.is_empty() {
714                let merge_builder: MergeBuilder<_> = proto
715                    .root_tree
716                    .iter()
717                    .map(|id_bytes| TreeId::from_bytes(id_bytes))
718                    .collect();
719                commit.root_tree = MergedTreeId::Merge(merge_builder.build());
720            } else {
721                // uses_tree_conflict_format was set but there was no root_tree override in the
722                // proto, which means we should just promote the tree id from the
723                // git commit to be a known-conflict-free tree
724                commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone());
725            }
726        }
727    }
728    for predecessor in &proto.predecessors {
729        commit.predecessors.push(CommitId::from_bytes(predecessor));
730    }
731}
732
733/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
734/// Used for preventing GC of commits we create.
735fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
736    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
737    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
738    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
739    gix::refs::transaction::RefEdit {
740        change: gix::refs::transaction::Change::Update {
741            log: gix::refs::transaction::LogChange {
742                message: "used by jj".into(),
743                ..Default::default()
744            },
745            expected,
746            new,
747        },
748        name: name.try_into().unwrap(),
749        deref: false,
750    }
751}
752
753fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
754    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
755    gix::refs::transaction::RefEdit {
756        change: gix::refs::transaction::Change::Delete {
757            expected,
758            log: gix::refs::transaction::RefLog::AndReference,
759        },
760        name: git_ref.name,
761        deref: false,
762    }
763}
764
765/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
766/// unreachable and non-head refs.
767fn recreate_no_gc_refs(
768    git_repo: &gix::Repository,
769    new_heads: impl IntoIterator<Item = CommitId>,
770    keep_newer: SystemTime,
771) -> BackendResult<()> {
772    // Calculate diff between existing no-gc refs and new heads.
773    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
774    let mut no_gc_refs_to_keep_count: usize = 0;
775    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
776    let git_references = git_repo
777        .references()
778        .map_err(|err| BackendError::Other(err.into()))?;
779    let no_gc_refs_iter = git_references
780        .prefixed(NO_GC_REF_NAMESPACE)
781        .map_err(|err| BackendError::Other(err.into()))?;
782    for git_ref in no_gc_refs_iter {
783        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
784        let oid = git_ref.target.try_id().ok_or_else(|| {
785            let name = git_ref.name.as_bstr();
786            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
787        })?;
788        let id = CommitId::from_bytes(oid.as_bytes());
789        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
790        if new_heads.contains(&id) && name_good {
791            no_gc_refs_to_keep_count += 1;
792            continue;
793        }
794        // Check timestamp of loose ref, but this is still racy on re-import
795        // because:
796        // - existing packed ref won't be demoted to loose ref
797        // - existing loose ref won't be touched
798        //
799        // TODO: might be better to switch to a dummy merge, where new no-gc ref
800        // will always have a unique name. Doing that with the current
801        // ref-per-head strategy would increase the number of the no-gc refs.
802        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
803        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
804        if let Ok(metadata) = loose_ref_path.metadata() {
805            let mtime = metadata.modified().expect("unsupported platform?");
806            if mtime > keep_newer {
807                tracing::trace!(?git_ref, "not deleting new");
808                no_gc_refs_to_keep_count += 1;
809                continue;
810            }
811        }
812        // Also deletes no-gc ref of random name created by old jj.
813        tracing::trace!(?git_ref, ?name_good, "will delete");
814        no_gc_refs_to_delete.push(git_ref);
815    }
816    tracing::info!(
817        new_heads_count = new_heads.len(),
818        no_gc_refs_to_keep_count,
819        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
820        "collected reachable refs"
821    );
822
823    // It's slow to delete packed refs one by one, so update refs all at once.
824    let ref_edits = itertools::chain(
825        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
826        new_heads.iter().map(to_no_gc_ref_update),
827    );
828    git_repo
829        .edit_references(ref_edits)
830        .map_err(|err| BackendError::Other(err.into()))?;
831
832    Ok(())
833}
834
835fn run_git_gc(program: &OsStr, git_dir: &Path) -> Result<(), GitGcError> {
836    let mut git = Command::new(program);
837    git.arg("--git-dir=."); // turn off discovery
838    git.arg("gc");
839    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
840    // canonicalized as UNC path, which wouldn't be supported by git.
841    git.current_dir(git_dir);
842    // TODO: pass output to UI layer instead of printing directly here
843    let status = git.status().map_err(GitGcError::GcCommand)?;
844    if !status.success() {
845        return Err(GitGcError::GcCommandErrorStatus(status));
846    }
847    Ok(())
848}
849
850fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
851    if id.as_bytes().len() != HASH_LENGTH {
852        return Err(BackendError::InvalidHashLength {
853            expected: HASH_LENGTH,
854            actual: id.as_bytes().len(),
855            object_type: id.object_type(),
856            hash: id.hex(),
857        });
858    }
859    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
860}
861
862fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
863    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
864        BackendError::ObjectNotFound {
865            object_type: id.object_type(),
866            hash: id.hex(),
867            source: Box::new(err),
868        }
869    } else {
870        to_read_object_err(err, id)
871    }
872}
873
874fn to_read_object_err(
875    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
876    id: &impl ObjectId,
877) -> BackendError {
878    BackendError::ReadObject {
879        object_type: id.object_type(),
880        hash: id.hex(),
881        source: err.into(),
882    }
883}
884
885fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError {
886    BackendError::InvalidUtf8 {
887        object_type: id.object_type(),
888        hash: id.hex(),
889        source,
890    }
891}
892
893fn import_extra_metadata_entries_from_heads(
894    git_repo: &gix::Repository,
895    mut_table: &mut MutableTable,
896    _table_lock: &FileLock,
897    head_ids: &HashSet<&CommitId>,
898) -> BackendResult<()> {
899    let shallow_commits = git_repo
900        .shallow_commits()
901        .map_err(|e| BackendError::Other(Box::new(e)))?;
902
903    let mut work_ids = head_ids
904        .iter()
905        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
906        .map(|&id| id.clone())
907        .collect_vec();
908    while let Some(id) = work_ids.pop() {
909        let git_object = git_repo
910            .find_object(validate_git_object_id(&id)?)
911            .map_err(|err| map_not_found_err(err, &id))?;
912        let is_shallow = shallow_commits
913            .as_ref()
914            .is_some_and(|shallow| shallow.contains(&git_object.id));
915        // TODO(#1624): Should we read the root tree here and check if it has a
916        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
917        // change the description of a commit with tree-level conflicts.
918        let commit = commit_from_git_without_root_parent(&id, &git_object, true, is_shallow)?;
919        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
920        work_ids.extend(
921            commit
922                .parents
923                .into_iter()
924                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
925        );
926    }
927    Ok(())
928}
929
930impl Debug for GitBackend {
931    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
932        f.debug_struct("GitBackend")
933            .field("path", &self.git_repo_path())
934            .finish()
935    }
936}
937
938#[async_trait]
939impl Backend for GitBackend {
940    fn as_any(&self) -> &dyn Any {
941        self
942    }
943
944    fn name(&self) -> &str {
945        Self::name()
946    }
947
948    fn commit_id_length(&self) -> usize {
949        HASH_LENGTH
950    }
951
952    fn change_id_length(&self) -> usize {
953        CHANGE_ID_LENGTH
954    }
955
956    fn root_commit_id(&self) -> &CommitId {
957        &self.root_commit_id
958    }
959
960    fn root_change_id(&self) -> &ChangeId {
961        &self.root_change_id
962    }
963
964    fn empty_tree_id(&self) -> &TreeId {
965        &self.empty_tree_id
966    }
967
968    fn concurrency(&self) -> usize {
969        1
970    }
971
972    async fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> {
973        self.read_file_sync(id)
974    }
975
976    async fn write_file(
977        &self,
978        _path: &RepoPath,
979        contents: &mut (dyn Read + Send),
980    ) -> BackendResult<FileId> {
981        let mut bytes = Vec::new();
982        contents.read_to_end(&mut bytes).unwrap();
983        let locked_repo = self.lock_git_repo();
984        let oid = locked_repo
985            .write_blob(bytes)
986            .map_err(|err| BackendError::WriteObject {
987                object_type: "file",
988                source: Box::new(err),
989            })?;
990        Ok(FileId::new(oid.as_bytes().to_vec()))
991    }
992
993    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
994        let git_blob_id = validate_git_object_id(id)?;
995        let locked_repo = self.lock_git_repo();
996        let mut blob = locked_repo
997            .find_object(git_blob_id)
998            .map_err(|err| map_not_found_err(err, id))?
999            .try_into_blob()
1000            .map_err(|err| to_read_object_err(err, id))?;
1001        let target = String::from_utf8(blob.take_data())
1002            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1003        Ok(target)
1004    }
1005
1006    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1007        let locked_repo = self.lock_git_repo();
1008        let oid =
1009            locked_repo
1010                .write_blob(target.as_bytes())
1011                .map_err(|err| BackendError::WriteObject {
1012                    object_type: "symlink",
1013                    source: Box::new(err),
1014                })?;
1015        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1016    }
1017
1018    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1019        if id == &self.empty_tree_id {
1020            return Ok(Tree::default());
1021        }
1022        let git_tree_id = validate_git_object_id(id)?;
1023
1024        let locked_repo = self.lock_git_repo();
1025        let git_tree = locked_repo
1026            .find_object(git_tree_id)
1027            .map_err(|err| map_not_found_err(err, id))?
1028            .try_into_tree()
1029            .map_err(|err| to_read_object_err(err, id))?;
1030        let mut tree = Tree::default();
1031        for entry in git_tree.iter() {
1032            let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1033            let name =
1034                str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
1035            let (name, value) = match entry.mode().kind() {
1036                gix::object::tree::EntryKind::Tree => {
1037                    let id = TreeId::from_bytes(entry.oid().as_bytes());
1038                    (name, TreeValue::Tree(id))
1039                }
1040                gix::object::tree::EntryKind::Blob => {
1041                    let id = FileId::from_bytes(entry.oid().as_bytes());
1042                    if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) {
1043                        (
1044                            basename,
1045                            TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())),
1046                        )
1047                    } else {
1048                        (
1049                            name,
1050                            TreeValue::File {
1051                                id,
1052                                executable: false,
1053                            },
1054                        )
1055                    }
1056                }
1057                gix::object::tree::EntryKind::BlobExecutable => {
1058                    let id = FileId::from_bytes(entry.oid().as_bytes());
1059                    (
1060                        name,
1061                        TreeValue::File {
1062                            id,
1063                            executable: true,
1064                        },
1065                    )
1066                }
1067                gix::object::tree::EntryKind::Link => {
1068                    let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1069                    (name, TreeValue::Symlink(id))
1070                }
1071                gix::object::tree::EntryKind::Commit => {
1072                    let id = CommitId::from_bytes(entry.oid().as_bytes());
1073                    (name, TreeValue::GitSubmodule(id))
1074                }
1075            };
1076            tree.set(RepoPathComponentBuf::new(name).unwrap(), value);
1077        }
1078        Ok(tree)
1079    }
1080
1081    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1082        // Tree entries to be written must be sorted by Entry::filename(), which
1083        // is slightly different from the order of our backend::Tree.
1084        let entries = contents
1085            .entries()
1086            .map(|entry| {
1087                let name = entry.name().as_internal_str();
1088                match entry.value() {
1089                    TreeValue::File {
1090                        id,
1091                        executable: false,
1092                    } => gix::objs::tree::Entry {
1093                        mode: gix::object::tree::EntryKind::Blob.into(),
1094                        filename: name.into(),
1095                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1096                    },
1097                    TreeValue::File {
1098                        id,
1099                        executable: true,
1100                    } => gix::objs::tree::Entry {
1101                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1102                        filename: name.into(),
1103                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1104                    },
1105                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1106                        mode: gix::object::tree::EntryKind::Link.into(),
1107                        filename: name.into(),
1108                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1109                    },
1110                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1111                        mode: gix::object::tree::EntryKind::Tree.into(),
1112                        filename: name.into(),
1113                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1114                    },
1115                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1116                        mode: gix::object::tree::EntryKind::Commit.into(),
1117                        filename: name.into(),
1118                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1119                    },
1120                    TreeValue::Conflict(id) => gix::objs::tree::Entry {
1121                        mode: gix::object::tree::EntryKind::Blob.into(),
1122                        filename: (name.to_owned() + CONFLICT_SUFFIX).into(),
1123                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1124                    },
1125                }
1126            })
1127            .sorted_unstable()
1128            .collect();
1129        let locked_repo = self.lock_git_repo();
1130        let oid = locked_repo
1131            .write_object(gix::objs::Tree { entries })
1132            .map_err(|err| BackendError::WriteObject {
1133                object_type: "tree",
1134                source: Box::new(err),
1135            })?;
1136        Ok(TreeId::from_bytes(oid.as_bytes()))
1137    }
1138
1139    fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
1140        let mut file = self.read_file_sync(&FileId::new(id.to_bytes()))?;
1141        let mut data = String::new();
1142        file.read_to_string(&mut data)
1143            .map_err(|err| BackendError::ReadObject {
1144                object_type: "conflict".to_owned(),
1145                hash: id.hex(),
1146                source: err.into(),
1147            })?;
1148        let json: serde_json::Value = serde_json::from_str(&data).unwrap();
1149        Ok(Conflict {
1150            removes: conflict_term_list_from_json(json.get("removes").unwrap()),
1151            adds: conflict_term_list_from_json(json.get("adds").unwrap()),
1152        })
1153    }
1154
1155    fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
1156        let json = serde_json::json!({
1157            "removes": conflict_term_list_to_json(&conflict.removes),
1158            "adds": conflict_term_list_to_json(&conflict.adds),
1159        });
1160        let json_string = json.to_string();
1161        let bytes = json_string.as_bytes();
1162        let locked_repo = self.lock_git_repo();
1163        let oid = locked_repo
1164            .write_blob(bytes)
1165            .map_err(|err| BackendError::WriteObject {
1166                object_type: "conflict",
1167                source: Box::new(err),
1168            })?;
1169        Ok(ConflictId::from_bytes(oid.as_bytes()))
1170    }
1171
1172    #[tracing::instrument(skip(self))]
1173    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1174        if *id == self.root_commit_id {
1175            return Ok(make_root_commit(
1176                self.root_change_id().clone(),
1177                self.empty_tree_id.clone(),
1178            ));
1179        }
1180        let git_commit_id = validate_git_object_id(id)?;
1181
1182        let mut commit = {
1183            let locked_repo = self.lock_git_repo();
1184            let git_object = locked_repo
1185                .find_object(git_commit_id)
1186                .map_err(|err| map_not_found_err(err, id))?;
1187            let is_shallow = locked_repo
1188                .shallow_commits()
1189                .ok()
1190                .flatten()
1191                .is_some_and(|shallow| shallow.contains(&git_object.id));
1192            commit_from_git_without_root_parent(id, &git_object, false, is_shallow)?
1193        };
1194        if commit.parents.is_empty() {
1195            commit.parents.push(self.root_commit_id.clone());
1196        };
1197
1198        let table = self.cached_extra_metadata_table()?;
1199        if let Some(extras) = table.get_value(id.as_bytes()) {
1200            deserialize_extras(&mut commit, extras);
1201        } else {
1202            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1203            // there are no reachable ancestor commits without extras metadata. Git commits
1204            // imported by jj < 0.8.0 might not have extras (#924).
1205            // https://github.com/jj-vcs/jj/issues/2343
1206            tracing::info!("unimported Git commit found");
1207            self.import_head_commits([id])?;
1208            let table = self.cached_extra_metadata_table()?;
1209            let extras = table.get_value(id.as_bytes()).unwrap();
1210            deserialize_extras(&mut commit, extras);
1211        }
1212        Ok(commit)
1213    }
1214
1215    async fn write_commit(
1216        &self,
1217        mut contents: Commit,
1218        mut sign_with: Option<&mut SigningFn>,
1219    ) -> BackendResult<(CommitId, Commit)> {
1220        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1221
1222        let locked_repo = self.lock_git_repo();
1223        let git_tree_id = match &contents.root_tree {
1224            MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
1225            MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
1226                Some(tree_id) => validate_git_object_id(tree_id)?,
1227                None => write_tree_conflict(&locked_repo, tree_ids)?,
1228            },
1229        };
1230        let author = signature_to_git(&contents.author);
1231        let mut committer = signature_to_git(&contents.committer);
1232        let message = &contents.description;
1233        if contents.parents.is_empty() {
1234            return Err(BackendError::Other(
1235                "Cannot write a commit with no parents".into(),
1236            ));
1237        }
1238        let mut parents = SmallVec::new();
1239        for parent_id in &contents.parents {
1240            if *parent_id == self.root_commit_id {
1241                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1242                // add it to the list of parents to write in the Git commit. We also check that
1243                // there are no other parents since Git cannot represent a merge between a root
1244                // commit and another commit.
1245                if contents.parents.len() > 1 {
1246                    return Err(BackendError::Unsupported(
1247                        "The Git backend does not support creating merge commits with the root \
1248                         commit as one of the parents."
1249                            .to_owned(),
1250                    ));
1251                }
1252            } else {
1253                parents.push(validate_git_object_id(parent_id)?);
1254            }
1255        }
1256        let mut extra_headers = vec![];
1257        if let MergedTreeId::Merge(tree_ids) = &contents.root_tree {
1258            if !tree_ids.is_resolved() {
1259                let value = tree_ids.iter().map(|id| id.hex()).join(" ").into_bytes();
1260                extra_headers.push((
1261                    BString::new(JJ_TREES_COMMIT_HEADER.to_vec()),
1262                    BString::new(value),
1263                ));
1264            }
1265        }
1266        if self.write_change_id_header {
1267            extra_headers.push((
1268                BString::new(CHANGE_ID_COMMIT_HEADER.to_vec()),
1269                BString::new(contents.change_id.reverse_hex().into()),
1270            ));
1271        }
1272
1273        let extras = serialize_extras(&contents);
1274
1275        // If two writers write commits of the same id with different metadata, they
1276        // will both succeed and the metadata entries will be "merged" later. Since
1277        // metadata entry is keyed by the commit id, one of the entries would be lost.
1278        // To prevent such race condition locally, we extend the scope covered by the
1279        // table lock. This is still racy if multiple machines are involved and the
1280        // repository is rsync-ed.
1281        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1282        let id = loop {
1283            let mut commit = gix::objs::Commit {
1284                message: message.to_owned().into(),
1285                tree: git_tree_id,
1286                author: author.into(),
1287                committer: committer.into(),
1288                encoding: None,
1289                parents: parents.clone(),
1290                extra_headers: extra_headers.clone(),
1291            };
1292
1293            if let Some(sign) = &mut sign_with {
1294                // we don't use gix pool, but at least use their heuristic
1295                let mut data = Vec::with_capacity(512);
1296                commit.write_to(&mut data).unwrap();
1297
1298                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1299                    object_type: "commit",
1300                    source: Box::new(err),
1301                })?;
1302                commit
1303                    .extra_headers
1304                    .push(("gpgsig".into(), sig.clone().into()));
1305                contents.secure_sig = Some(SecureSig { data, sig });
1306            }
1307
1308            let git_id =
1309                locked_repo
1310                    .write_object(&commit)
1311                    .map_err(|err| BackendError::WriteObject {
1312                        object_type: "commit",
1313                        source: Box::new(err),
1314                    })?;
1315
1316            match table.get_value(git_id.as_bytes()) {
1317                Some(existing_extras) if existing_extras != extras => {
1318                    // It's possible a commit already exists with the same commit id but different
1319                    // change id. Adjust the timestamp until this is no longer the case.
1320                    committer.time.seconds -= 1;
1321                }
1322                _ => break CommitId::from_bytes(git_id.as_bytes()),
1323            }
1324        };
1325
1326        // Everything up to this point had no permanent effect on the repo except
1327        // GC-able objects
1328        locked_repo
1329            .edit_reference(to_no_gc_ref_update(&id))
1330            .map_err(|err| BackendError::Other(Box::new(err)))?;
1331
1332        // Update the signature to match the one that was actually written to the object
1333        // store
1334        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1335        let mut mut_table = table.start_mutation();
1336        mut_table.add_entry(id.to_bytes(), extras);
1337        self.save_extra_metadata_table(mut_table, &table_lock)?;
1338        Ok((id, contents))
1339    }
1340
1341    fn get_copy_records(
1342        &self,
1343        paths: Option<&[RepoPathBuf]>,
1344        root_id: &CommitId,
1345        head_id: &CommitId,
1346    ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
1347        let repo = self.git_repo();
1348        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1349        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1350
1351        let change_to_copy_record =
1352            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1353                let gix::object::tree::diff::Change::Rewrite {
1354                    source_location,
1355                    source_entry_mode,
1356                    source_id,
1357                    entry_mode: dest_entry_mode,
1358                    location: dest_location,
1359                    ..
1360                } = change
1361                else {
1362                    return Ok(None);
1363                };
1364                // TODO: Renamed symlinks cannot be returned because CopyRecord
1365                // expects `source_file: FileId`.
1366                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1367                    return Ok(None);
1368                }
1369
1370                let source = str::from_utf8(source_location)
1371                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1372                let dest = str::from_utf8(dest_location)
1373                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1374
1375                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1376                if !paths.is_none_or(|paths| paths.contains(&target)) {
1377                    return Ok(None);
1378                }
1379
1380                Ok(Some(CopyRecord {
1381                    target,
1382                    target_commit: head_id.clone(),
1383                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1384                    source_file: FileId::from_bytes(source_id.as_bytes()),
1385                    source_commit: root_id.clone(),
1386                }))
1387            };
1388
1389        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1390        root_tree
1391            .changes()
1392            .map_err(|err| BackendError::Other(err.into()))?
1393            .options(|opts| {
1394                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1395                    copies: Some(gix::diff::rewrites::Copies {
1396                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1397                        percentage: Some(0.5),
1398                    }),
1399                    percentage: Some(0.5),
1400                    limit: 1000,
1401                    track_empty: false,
1402                }));
1403            })
1404            .for_each_to_obtain_tree_with_cache(
1405                &head_tree,
1406                &mut self.new_diff_platform()?,
1407                |change| -> BackendResult<_> {
1408                    match change_to_copy_record(change) {
1409                        Ok(None) => {}
1410                        Ok(Some(change)) => records.push(Ok(change)),
1411                        Err(err) => records.push(Err(err)),
1412                    }
1413                    Ok(gix::object::tree::diff::Action::Continue)
1414                },
1415            )
1416            .map_err(|err| BackendError::Other(err.into()))?;
1417        Ok(Box::pin(futures::stream::iter(records)))
1418    }
1419
1420    #[tracing::instrument(skip(self, index))]
1421    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1422        let git_repo = self.lock_git_repo();
1423        let new_heads = index
1424            .all_heads_for_gc()
1425            .map_err(|err| BackendError::Other(err.into()))?
1426            .filter(|id| *id != self.root_commit_id);
1427        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1428        // TODO: remove unreachable entries from extras table if segment file
1429        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1430        // preserved by the keep_newer timestamp though)
1431        // TODO: remove unreachable extras table segments
1432        // TODO: pass in keep_newer to "git gc" command
1433        run_git_gc(self.git_executable.as_ref(), self.git_repo_path())
1434            .map_err(|err| BackendError::Other(err.into()))?;
1435        // Since "git gc" will move loose refs into packed refs, in-memory
1436        // packed-refs cache should be invalidated without relying on mtime.
1437        git_repo.refs.force_refresh_packed_buffer().ok();
1438        Ok(())
1439    }
1440}
1441
1442/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1443/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1444fn write_tree_conflict(
1445    repo: &gix::Repository,
1446    conflict: &Merge<TreeId>,
1447) -> BackendResult<gix::ObjectId> {
1448    // Tree entries to be written must be sorted by Entry::filename().
1449    let mut entries = itertools::chain(
1450        conflict
1451            .removes()
1452            .enumerate()
1453            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1454        conflict
1455            .adds()
1456            .enumerate()
1457            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1458    )
1459    .map(|(name, tree_id)| gix::objs::tree::Entry {
1460        mode: gix::object::tree::EntryKind::Tree.into(),
1461        filename: name.into(),
1462        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1463    })
1464    .collect_vec();
1465    let readme_id = repo
1466        .write_blob(
1467            r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1468The commit contains file conflicts, and therefore looks wrong when used with plain
1469Git or other tools that are unfamiliar with jj.
1470
1471The .jjconflict-* directories represent the different inputs to the conflict.
1472For details, see
1473https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1474
1475If you see this file in your working copy, it probably means that you used a
1476regular `git` command to check out a conflicted commit. Use `jj abandon` to
1477recover.
1478"#,
1479        )
1480        .map_err(|err| {
1481            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1482        })?
1483        .detach();
1484    entries.push(gix::objs::tree::Entry {
1485        mode: gix::object::tree::EntryKind::Blob.into(),
1486        filename: "README".into(),
1487        oid: readme_id,
1488    });
1489    entries.sort_unstable();
1490    let id = repo
1491        .write_object(gix::objs::Tree { entries })
1492        .map_err(|err| BackendError::WriteObject {
1493            object_type: "tree",
1494            source: Box::new(err),
1495        })?;
1496    Ok(id.detach())
1497}
1498
1499fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
1500    serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
1501}
1502
1503fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> {
1504    json.as_array()
1505        .unwrap()
1506        .iter()
1507        .map(conflict_term_from_json)
1508        .collect()
1509}
1510
1511fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value {
1512    serde_json::json!({
1513        "value": tree_value_to_json(&part.value),
1514    })
1515}
1516
1517fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm {
1518    let json_value = json.get("value").unwrap();
1519    ConflictTerm {
1520        value: tree_value_from_json(json_value),
1521    }
1522}
1523
1524fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
1525    match value {
1526        TreeValue::File { id, executable } => serde_json::json!({
1527             "file": {
1528                 "id": id.hex(),
1529                 "executable": executable,
1530             },
1531        }),
1532        TreeValue::Symlink(id) => serde_json::json!({
1533             "symlink_id": id.hex(),
1534        }),
1535        TreeValue::Tree(id) => serde_json::json!({
1536             "tree_id": id.hex(),
1537        }),
1538        TreeValue::GitSubmodule(id) => serde_json::json!({
1539             "submodule_id": id.hex(),
1540        }),
1541        TreeValue::Conflict(id) => serde_json::json!({
1542             "conflict_id": id.hex(),
1543        }),
1544    }
1545}
1546
1547fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
1548    if let Some(json_file) = json.get("file") {
1549        TreeValue::File {
1550            id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())),
1551            executable: json_file.get("executable").unwrap().as_bool().unwrap(),
1552        }
1553    } else if let Some(json_id) = json.get("symlink_id") {
1554        TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id)))
1555    } else if let Some(json_id) = json.get("tree_id") {
1556        TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id)))
1557    } else if let Some(json_id) = json.get("submodule_id") {
1558        TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id)))
1559    } else if let Some(json_id) = json.get("conflict_id") {
1560        TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id)))
1561    } else {
1562        panic!("unexpected json value in conflict: {json:#?}");
1563    }
1564}
1565
1566fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
1567    hex::decode(value.as_str().unwrap()).unwrap()
1568}
1569
1570#[cfg(test)]
1571mod tests {
1572    use assert_matches::assert_matches;
1573    use hex::ToHex as _;
1574    use pollster::FutureExt as _;
1575
1576    use super::*;
1577    use crate::config::ConfigLayer;
1578    use crate::config::ConfigSource;
1579    use crate::config::StackedConfig;
1580    use crate::content_hash::blake2b_hash;
1581    use crate::tests::new_temp_dir;
1582
1583    const GIT_USER: &str = "Someone";
1584    const GIT_EMAIL: &str = "someone@example.com";
1585
1586    fn git_config() -> Vec<bstr::BString> {
1587        vec![
1588            format!("user.name = {GIT_USER}").into(),
1589            format!("user.email = {GIT_EMAIL}").into(),
1590            "init.defaultBranch = master".into(),
1591        ]
1592    }
1593
1594    fn open_options() -> gix::open::Options {
1595        gix::open::Options::isolated()
1596            .config_overrides(git_config())
1597            .strict_config(true)
1598    }
1599
1600    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1601        gix::ThreadSafeRepository::init_opts(
1602            directory,
1603            gix::create::Kind::WithWorktree,
1604            gix::create::Options::default(),
1605            open_options(),
1606        )
1607        .unwrap()
1608        .to_thread_local()
1609    }
1610
1611    #[test]
1612    fn read_plain_git_commit() {
1613        let settings = user_settings();
1614        let temp_dir = new_temp_dir();
1615        let store_path = temp_dir.path();
1616        let git_repo_path = temp_dir.path().join("git");
1617        let git_repo = git_init(git_repo_path);
1618
1619        // Add a commit with some files in
1620        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1621        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1622        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1623        dir_tree_editor
1624            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1625            .unwrap();
1626        dir_tree_editor
1627            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1628            .unwrap();
1629        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1630        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1631        root_tree_builder
1632            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1633            .unwrap();
1634        let root_tree_id = root_tree_builder.write().unwrap().detach();
1635        let git_author = gix::actor::Signature {
1636            name: "git author".into(),
1637            email: "git.author@example.com".into(),
1638            time: gix::date::Time::new(1000, 60 * 60),
1639        };
1640        let git_committer = gix::actor::Signature {
1641            name: "git committer".into(),
1642            email: "git.committer@example.com".into(),
1643            time: gix::date::Time::new(2000, -480 * 60),
1644        };
1645        let git_commit_id = git_repo
1646            .commit_as(
1647                &git_committer,
1648                &git_author,
1649                "refs/heads/dummy",
1650                "git commit message",
1651                root_tree_id,
1652                [] as [gix::ObjectId; 0],
1653            )
1654            .unwrap()
1655            .detach();
1656        git_repo
1657            .find_reference("refs/heads/dummy")
1658            .unwrap()
1659            .delete()
1660            .unwrap();
1661        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1662        // The change id is the leading reverse bits of the commit id
1663        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1664        // Check that the git commit above got the hash we expect
1665        assert_eq!(
1666            git_commit_id.as_bytes(),
1667            commit_id.as_bytes(),
1668            "{git_commit_id:?} vs {commit_id:?}"
1669        );
1670
1671        // Add an empty commit on top
1672        let git_commit_id2 = git_repo
1673            .commit_as(
1674                &git_committer,
1675                &git_author,
1676                "refs/heads/dummy2",
1677                "git commit message 2",
1678                root_tree_id,
1679                [git_commit_id],
1680            )
1681            .unwrap()
1682            .detach();
1683        git_repo
1684            .find_reference("refs/heads/dummy2")
1685            .unwrap()
1686            .delete()
1687            .unwrap();
1688        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1689
1690        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1691
1692        // Import the head commit and its ancestors
1693        backend.import_head_commits([&commit_id2]).unwrap();
1694        // Ref should be created only for the head commit
1695        let git_refs = backend
1696            .git_repo()
1697            .references()
1698            .unwrap()
1699            .prefixed("refs/jj/keep/")
1700            .unwrap()
1701            .map(|git_ref| git_ref.unwrap().id().detach())
1702            .collect_vec();
1703        assert_eq!(git_refs, vec![git_commit_id2]);
1704
1705        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1706        assert_eq!(&commit.change_id, &change_id);
1707        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1708        assert_eq!(commit.predecessors, vec![]);
1709        assert_eq!(
1710            commit.root_tree.to_merge(),
1711            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1712        );
1713        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1714        assert_eq!(commit.description, "git commit message");
1715        assert_eq!(commit.author.name, "git author");
1716        assert_eq!(commit.author.email, "git.author@example.com");
1717        assert_eq!(
1718            commit.author.timestamp.timestamp,
1719            MillisSinceEpoch(1000 * 1000)
1720        );
1721        assert_eq!(commit.author.timestamp.tz_offset, 60);
1722        assert_eq!(commit.committer.name, "git committer");
1723        assert_eq!(commit.committer.email, "git.committer@example.com");
1724        assert_eq!(
1725            commit.committer.timestamp.timestamp,
1726            MillisSinceEpoch(2000 * 1000)
1727        );
1728        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1729
1730        let root_tree = backend
1731            .read_tree(
1732                RepoPath::root(),
1733                &TreeId::from_bytes(root_tree_id.as_bytes()),
1734            )
1735            .block_on()
1736            .unwrap();
1737        let mut root_entries = root_tree.entries();
1738        let dir = root_entries.next().unwrap();
1739        assert_eq!(root_entries.next(), None);
1740        assert_eq!(dir.name().as_internal_str(), "dir");
1741        assert_eq!(
1742            dir.value(),
1743            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1744        );
1745
1746        let dir_tree = backend
1747            .read_tree(
1748                RepoPath::from_internal_string("dir").unwrap(),
1749                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1750            )
1751            .block_on()
1752            .unwrap();
1753        let mut entries = dir_tree.entries();
1754        let file = entries.next().unwrap();
1755        let symlink = entries.next().unwrap();
1756        assert_eq!(entries.next(), None);
1757        assert_eq!(file.name().as_internal_str(), "normal");
1758        assert_eq!(
1759            file.value(),
1760            &TreeValue::File {
1761                id: FileId::from_bytes(blob1.as_bytes()),
1762                executable: false
1763            }
1764        );
1765        assert_eq!(symlink.name().as_internal_str(), "symlink");
1766        assert_eq!(
1767            symlink.value(),
1768            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1769        );
1770
1771        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1772        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1773        assert_eq!(commit.predecessors, vec![]);
1774        assert_eq!(
1775            commit.root_tree.to_merge(),
1776            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1777        );
1778        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1779    }
1780
1781    #[test]
1782    fn read_git_commit_without_importing() {
1783        let settings = user_settings();
1784        let temp_dir = new_temp_dir();
1785        let store_path = temp_dir.path();
1786        let git_repo_path = temp_dir.path().join("git");
1787        let git_repo = git_init(&git_repo_path);
1788
1789        let signature = gix::actor::Signature {
1790            name: GIT_USER.into(),
1791            email: GIT_EMAIL.into(),
1792            time: gix::date::Time::now_utc(),
1793        };
1794        let empty_tree_id =
1795            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1796        let git_commit_id = git_repo
1797            .commit_as(
1798                &signature,
1799                &signature,
1800                "refs/heads/main",
1801                "git commit message",
1802                empty_tree_id,
1803                [] as [gix::ObjectId; 0],
1804            )
1805            .unwrap();
1806
1807        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1808
1809        // read_commit() without import_head_commits() works as of now. This might be
1810        // changed later.
1811        assert!(backend
1812            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1813            .block_on()
1814            .is_ok());
1815        assert!(
1816            backend
1817                .cached_extra_metadata_table()
1818                .unwrap()
1819                .get_value(git_commit_id.as_bytes())
1820                .is_some(),
1821            "extra metadata should have been be created"
1822        );
1823    }
1824
1825    #[test]
1826    fn read_signed_git_commit() {
1827        let settings = user_settings();
1828        let temp_dir = new_temp_dir();
1829        let store_path = temp_dir.path();
1830        let git_repo_path = temp_dir.path().join("git");
1831        let git_repo = git_init(git_repo_path);
1832
1833        let signature = gix::actor::Signature {
1834            name: GIT_USER.into(),
1835            email: GIT_EMAIL.into(),
1836            time: gix::date::Time::now_utc(),
1837        };
1838        let empty_tree_id =
1839            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1840
1841        let secure_sig =
1842            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1843
1844        let mut commit = gix::objs::Commit {
1845            tree: empty_tree_id,
1846            parents: smallvec::SmallVec::new(),
1847            author: signature.clone(),
1848            committer: signature.clone(),
1849            encoding: None,
1850            message: "git commit message".into(),
1851            extra_headers: Vec::new(),
1852        };
1853
1854        let mut commit_buf = Vec::new();
1855        commit.write_to(&mut commit_buf).unwrap();
1856        let commit_str = std::str::from_utf8(&commit_buf).unwrap();
1857
1858        commit
1859            .extra_headers
1860            .push(("gpgsig".into(), secure_sig.into()));
1861
1862        let git_commit_id = git_repo.write_object(&commit).unwrap();
1863
1864        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1865
1866        let commit = backend
1867            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1868            .block_on()
1869            .unwrap();
1870
1871        let sig = commit.secure_sig.expect("failed to read the signature");
1872
1873        // converting to string for nicer assert diff
1874        assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig);
1875        assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str);
1876    }
1877
1878    #[test]
1879    fn round_trip_change_id_via_git_header() {
1880        let settings = user_settings_with_change_id();
1881        let temp_dir = new_temp_dir();
1882
1883        let store_path = temp_dir.path().join("store");
1884        fs::create_dir(&store_path).unwrap();
1885        let empty_store_path = temp_dir.path().join("empty_store");
1886        fs::create_dir(&empty_store_path).unwrap();
1887        let git_repo_path = temp_dir.path().join("git");
1888        let git_repo = git_init(git_repo_path);
1889
1890        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path()).unwrap();
1891        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1892        let commit = Commit {
1893            parents: vec![backend.root_commit_id().clone()],
1894            predecessors: vec![],
1895            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1896            change_id: original_change_id.clone(),
1897            description: "initial".to_string(),
1898            author: create_signature(),
1899            committer: create_signature(),
1900            secure_sig: None,
1901        };
1902
1903        let (initial_commit_id, _init_commit) =
1904            backend.write_commit(commit, None).block_on().unwrap();
1905        let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
1906        assert_eq!(
1907            commit.change_id, original_change_id,
1908            "The change-id header did not roundtrip"
1909        );
1910
1911        // Because of how change ids are also persisted in extra proto files,
1912        // initialize a new store without those files, but reuse the same git
1913        // storage. This change-id must be derived from the git commit header.
1914        let no_extra_backend =
1915            GitBackend::init_external(&settings, &empty_store_path, git_repo.path()).unwrap();
1916        let no_extra_commit = no_extra_backend
1917            .read_commit(&initial_commit_id)
1918            .block_on()
1919            .unwrap();
1920
1921        assert_eq!(
1922            no_extra_commit.change_id, original_change_id,
1923            "The change-id header did not roundtrip"
1924        );
1925    }
1926
1927    #[test]
1928    fn read_empty_string_placeholder() {
1929        let git_signature1 = gix::actor::SignatureRef {
1930            name: EMPTY_STRING_PLACEHOLDER.into(),
1931            email: "git.author@example.com".into(),
1932            time: gix::date::Time::new(1000, 60 * 60),
1933        };
1934        let signature1 = signature_from_git(git_signature1);
1935        assert!(signature1.name.is_empty());
1936        assert_eq!(signature1.email, "git.author@example.com");
1937        let git_signature2 = gix::actor::SignatureRef {
1938            name: "git committer".into(),
1939            email: EMPTY_STRING_PLACEHOLDER.into(),
1940            time: gix::date::Time::new(2000, -480 * 60),
1941        };
1942        let signature2 = signature_from_git(git_signature2);
1943        assert_eq!(signature2.name, "git committer");
1944        assert!(signature2.email.is_empty());
1945    }
1946
1947    #[test]
1948    fn write_empty_string_placeholder() {
1949        let signature1 = Signature {
1950            name: "".to_string(),
1951            email: "someone@example.com".to_string(),
1952            timestamp: Timestamp {
1953                timestamp: MillisSinceEpoch(0),
1954                tz_offset: 0,
1955            },
1956        };
1957        let git_signature1 = signature_to_git(&signature1);
1958        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
1959        assert_eq!(git_signature1.email, "someone@example.com");
1960        let signature2 = Signature {
1961            name: "Someone".to_string(),
1962            email: "".to_string(),
1963            timestamp: Timestamp {
1964                timestamp: MillisSinceEpoch(0),
1965                tz_offset: 0,
1966            },
1967        };
1968        let git_signature2 = signature_to_git(&signature2);
1969        assert_eq!(git_signature2.name, "Someone");
1970        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
1971    }
1972
1973    /// Test that parents get written correctly
1974    #[test]
1975    fn git_commit_parents() {
1976        let settings = user_settings();
1977        let temp_dir = new_temp_dir();
1978        let store_path = temp_dir.path();
1979        let git_repo_path = temp_dir.path().join("git");
1980        let git_repo = git_init(&git_repo_path);
1981
1982        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1983        let mut commit = Commit {
1984            parents: vec![],
1985            predecessors: vec![],
1986            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1987            change_id: ChangeId::from_hex("abc123"),
1988            description: "".to_string(),
1989            author: create_signature(),
1990            committer: create_signature(),
1991            secure_sig: None,
1992        };
1993
1994        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
1995            backend.write_commit(commit, None).block_on()
1996        };
1997
1998        // No parents
1999        commit.parents = vec![];
2000        assert_matches!(
2001            write_commit(commit.clone()),
2002            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2003        );
2004
2005        // Only root commit as parent
2006        commit.parents = vec![backend.root_commit_id().clone()];
2007        let first_id = write_commit(commit.clone()).unwrap().0;
2008        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
2009        assert_eq!(first_commit, commit);
2010        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
2011        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2012
2013        // Only non-root commit as parent
2014        commit.parents = vec![first_id.clone()];
2015        let second_id = write_commit(commit.clone()).unwrap().0;
2016        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
2017        assert_eq!(second_commit, commit);
2018        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
2019        assert_eq!(
2020            second_git_commit.parent_ids().collect_vec(),
2021            vec![git_id(&first_id)]
2022        );
2023
2024        // Merge commit
2025        commit.parents = vec![first_id.clone(), second_id.clone()];
2026        let merge_id = write_commit(commit.clone()).unwrap().0;
2027        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
2028        assert_eq!(merge_commit, commit);
2029        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
2030        assert_eq!(
2031            merge_git_commit.parent_ids().collect_vec(),
2032            vec![git_id(&first_id), git_id(&second_id)]
2033        );
2034
2035        // Merge commit with root as one parent
2036        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2037        assert_matches!(
2038            write_commit(commit),
2039            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2040        );
2041    }
2042
2043    #[test]
2044    fn write_tree_conflicts() {
2045        let settings = user_settings();
2046        let temp_dir = new_temp_dir();
2047        let store_path = temp_dir.path();
2048        let git_repo_path = temp_dir.path().join("git");
2049        let git_repo = git_init(&git_repo_path);
2050
2051        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2052        let create_tree = |i| {
2053            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2054            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2055            tree_builder
2056                .upsert(
2057                    format!("file{i}"),
2058                    gix::object::tree::EntryKind::Blob,
2059                    blob_id,
2060                )
2061                .unwrap();
2062            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2063        };
2064
2065        let root_tree = Merge::from_removes_adds(
2066            vec![create_tree(0), create_tree(1)],
2067            vec![create_tree(2), create_tree(3), create_tree(4)],
2068        );
2069        let mut commit = Commit {
2070            parents: vec![backend.root_commit_id().clone()],
2071            predecessors: vec![],
2072            root_tree: MergedTreeId::Merge(root_tree.clone()),
2073            change_id: ChangeId::from_hex("abc123"),
2074            description: "".to_string(),
2075            author: create_signature(),
2076            committer: create_signature(),
2077            secure_sig: None,
2078        };
2079
2080        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2081            backend.write_commit(commit, None).block_on()
2082        };
2083
2084        // When writing a tree-level conflict, the root tree on the git side has the
2085        // individual trees as subtrees.
2086        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2087        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2088        assert_eq!(read_commit, commit);
2089        let git_commit = git_repo
2090            .find_commit(gix::ObjectId::from_bytes_or_panic(
2091                read_commit_id.as_bytes(),
2092            ))
2093            .unwrap();
2094        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2095        assert!(git_tree
2096            .iter()
2097            .map(Result::unwrap)
2098            .filter(|entry| entry.filename() != b"README")
2099            .all(|entry| entry.mode().0 == 0o040000));
2100        let mut iter = git_tree.iter().map(Result::unwrap);
2101        let entry = iter.next().unwrap();
2102        assert_eq!(entry.filename(), b".jjconflict-base-0");
2103        assert_eq!(
2104            entry.id().as_bytes(),
2105            root_tree.get_remove(0).unwrap().as_bytes()
2106        );
2107        let entry = iter.next().unwrap();
2108        assert_eq!(entry.filename(), b".jjconflict-base-1");
2109        assert_eq!(
2110            entry.id().as_bytes(),
2111            root_tree.get_remove(1).unwrap().as_bytes()
2112        );
2113        let entry = iter.next().unwrap();
2114        assert_eq!(entry.filename(), b".jjconflict-side-0");
2115        assert_eq!(
2116            entry.id().as_bytes(),
2117            root_tree.get_add(0).unwrap().as_bytes()
2118        );
2119        let entry = iter.next().unwrap();
2120        assert_eq!(entry.filename(), b".jjconflict-side-1");
2121        assert_eq!(
2122            entry.id().as_bytes(),
2123            root_tree.get_add(1).unwrap().as_bytes()
2124        );
2125        let entry = iter.next().unwrap();
2126        assert_eq!(entry.filename(), b".jjconflict-side-2");
2127        assert_eq!(
2128            entry.id().as_bytes(),
2129            root_tree.get_add(2).unwrap().as_bytes()
2130        );
2131        let entry = iter.next().unwrap();
2132        assert_eq!(entry.filename(), b"README");
2133        assert_eq!(entry.mode().0, 0o100644);
2134        assert!(iter.next().is_none());
2135
2136        // When writing a single tree using the new format, it's represented by a
2137        // regular git tree.
2138        commit.root_tree = MergedTreeId::resolved(create_tree(5));
2139        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2140        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2141        assert_eq!(read_commit, commit);
2142        let git_commit = git_repo
2143            .find_commit(gix::ObjectId::from_bytes_or_panic(
2144                read_commit_id.as_bytes(),
2145            ))
2146            .unwrap();
2147        assert_eq!(
2148            MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2149            commit.root_tree
2150        );
2151    }
2152
2153    #[test]
2154    fn commit_has_ref() {
2155        let settings = user_settings();
2156        let temp_dir = new_temp_dir();
2157        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2158        let git_repo = backend.git_repo();
2159        let signature = Signature {
2160            name: "Someone".to_string(),
2161            email: "someone@example.com".to_string(),
2162            timestamp: Timestamp {
2163                timestamp: MillisSinceEpoch(0),
2164                tz_offset: 0,
2165            },
2166        };
2167        let commit = Commit {
2168            parents: vec![backend.root_commit_id().clone()],
2169            predecessors: vec![],
2170            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2171            change_id: ChangeId::new(vec![]),
2172            description: "initial".to_string(),
2173            author: signature.clone(),
2174            committer: signature,
2175            secure_sig: None,
2176        };
2177        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2178        let git_refs = git_repo.references().unwrap();
2179        let git_ref_ids: Vec<_> = git_refs
2180            .prefixed("refs/jj/keep/")
2181            .unwrap()
2182            .map(|x| x.unwrap().id().detach())
2183            .collect();
2184        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2185
2186        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2187        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2188            git_ref.unwrap().delete().unwrap();
2189        }
2190        // Re-imported commit should have new ref.
2191        backend.import_head_commits([&commit_id]).unwrap();
2192        let git_refs = git_repo.references().unwrap();
2193        let git_ref_ids: Vec<_> = git_refs
2194            .prefixed("refs/jj/keep/")
2195            .unwrap()
2196            .map(|x| x.unwrap().id().detach())
2197            .collect();
2198        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2199    }
2200
2201    #[test]
2202    fn import_head_commits_duplicates() {
2203        let settings = user_settings();
2204        let temp_dir = new_temp_dir();
2205        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2206        let git_repo = backend.git_repo();
2207
2208        let signature = gix::actor::Signature {
2209            name: GIT_USER.into(),
2210            email: GIT_EMAIL.into(),
2211            time: gix::date::Time::now_utc(),
2212        };
2213        let empty_tree_id =
2214            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2215        let git_commit_id = git_repo
2216            .commit_as(
2217                &signature,
2218                &signature,
2219                "refs/heads/main",
2220                "git commit message",
2221                empty_tree_id,
2222                [] as [gix::ObjectId; 0],
2223            )
2224            .unwrap()
2225            .detach();
2226        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2227
2228        // Ref creation shouldn't fail because of duplicated head ids.
2229        backend
2230            .import_head_commits([&commit_id, &commit_id])
2231            .unwrap();
2232        assert!(git_repo
2233            .references()
2234            .unwrap()
2235            .prefixed("refs/jj/keep/")
2236            .unwrap()
2237            .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id));
2238    }
2239
2240    #[test]
2241    fn overlapping_git_commit_id() {
2242        let settings = user_settings();
2243        let temp_dir = new_temp_dir();
2244        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2245        let commit1 = Commit {
2246            parents: vec![backend.root_commit_id().clone()],
2247            predecessors: vec![],
2248            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2249            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2250            description: "initial".to_string(),
2251            author: create_signature(),
2252            committer: create_signature(),
2253            secure_sig: None,
2254        };
2255
2256        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2257            backend.write_commit(commit, None).block_on()
2258        };
2259
2260        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2261        commit2.predecessors.push(commit_id1.clone());
2262        // `write_commit` should prevent the ids from being the same by changing the
2263        // committer timestamp of the commit it actually writes.
2264        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2265        // The returned matches the ID
2266        assert_eq!(
2267            backend.read_commit(&commit_id2).block_on().unwrap(),
2268            actual_commit2
2269        );
2270        assert_ne!(commit_id2, commit_id1);
2271        // The committer timestamp should differ
2272        assert_ne!(
2273            actual_commit2.committer.timestamp.timestamp,
2274            commit2.committer.timestamp.timestamp
2275        );
2276        // The rest of the commit should be the same
2277        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2278        assert_eq!(actual_commit2, commit2);
2279    }
2280
2281    #[test]
2282    fn write_signed_commit() {
2283        let settings = user_settings();
2284        let temp_dir = new_temp_dir();
2285        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2286
2287        let commit = Commit {
2288            parents: vec![backend.root_commit_id().clone()],
2289            predecessors: vec![],
2290            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2291            change_id: ChangeId::new(vec![]),
2292            description: "initial".to_string(),
2293            author: create_signature(),
2294            committer: create_signature(),
2295            secure_sig: None,
2296        };
2297
2298        let mut signer = |data: &_| {
2299            let hash: String = blake2b_hash(data).encode_hex();
2300            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2301        };
2302
2303        let (id, commit) = backend
2304            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2305            .block_on()
2306            .unwrap();
2307
2308        let git_repo = backend.git_repo();
2309        let obj = git_repo
2310            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2311            .unwrap();
2312        insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r"
2313        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2314        author Someone <someone@example.com> 0 +0000
2315        committer Someone <someone@example.com> 0 +0000
2316        gpgsig test sig
2317         hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518
2318
2319        initial
2320        ");
2321
2322        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2323
2324        let commit = backend.read_commit(&id).block_on().unwrap();
2325
2326        let sig = commit.secure_sig.expect("failed to read the signature");
2327        assert_eq!(&sig, &returned_sig);
2328
2329        insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r"
2330        test sig
2331        hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518
2332        ");
2333        insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r"
2334        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2335        author Someone <someone@example.com> 0 +0000
2336        committer Someone <someone@example.com> 0 +0000
2337
2338        initial
2339        ");
2340    }
2341
2342    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2343        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2344    }
2345
2346    fn create_signature() -> Signature {
2347        Signature {
2348            name: GIT_USER.to_string(),
2349            email: GIT_EMAIL.to_string(),
2350            timestamp: Timestamp {
2351                timestamp: MillisSinceEpoch(0),
2352                tz_offset: 0,
2353            },
2354        }
2355    }
2356
2357    // Not using testutils::user_settings() because there is a dependency cycle
2358    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2359    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2360    // our UserSettings type comes from jj_lib (1).
2361    fn user_settings() -> UserSettings {
2362        let config = StackedConfig::with_defaults();
2363        UserSettings::from_config(config).unwrap()
2364    }
2365
2366    fn user_settings_with_change_id() -> UserSettings {
2367        let mut config = StackedConfig::with_defaults();
2368        let mut layer = ConfigLayer::empty(ConfigSource::Default);
2369        layer.set_value("git.write-change-id-header", true).unwrap();
2370        config.add_layer(layer);
2371        UserSettings::from_config(config).unwrap()
2372    }
2373}