jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::HashSet;
19use std::ffi::OsStr;
20use std::fmt::Debug;
21use std::fmt::Error;
22use std::fmt::Formatter;
23use std::fs;
24use std::io;
25use std::io::Cursor;
26use std::path::Path;
27use std::path::PathBuf;
28use std::pin::Pin;
29use std::process::Command;
30use std::process::ExitStatus;
31use std::str;
32use std::sync::Arc;
33use std::sync::Mutex;
34use std::sync::MutexGuard;
35use std::time::SystemTime;
36
37use async_trait::async_trait;
38use bstr::BStr;
39use futures::stream::BoxStream;
40use gix::bstr::BString;
41use gix::objs::CommitRefIter;
42use gix::objs::WriteTo as _;
43use itertools::Itertools as _;
44use pollster::FutureExt as _;
45use prost::Message as _;
46use smallvec::SmallVec;
47use thiserror::Error;
48use tokio::io::AsyncRead;
49use tokio::io::AsyncReadExt as _;
50
51use crate::backend::make_root_commit;
52use crate::backend::Backend;
53use crate::backend::BackendError;
54use crate::backend::BackendInitError;
55use crate::backend::BackendLoadError;
56use crate::backend::BackendResult;
57use crate::backend::ChangeId;
58use crate::backend::Commit;
59use crate::backend::CommitId;
60use crate::backend::Conflict;
61use crate::backend::ConflictId;
62use crate::backend::ConflictTerm;
63use crate::backend::CopyHistory;
64use crate::backend::CopyId;
65use crate::backend::CopyRecord;
66use crate::backend::FileId;
67use crate::backend::MergedTreeId;
68use crate::backend::MillisSinceEpoch;
69use crate::backend::SecureSig;
70use crate::backend::Signature;
71use crate::backend::SigningFn;
72use crate::backend::SymlinkId;
73use crate::backend::Timestamp;
74use crate::backend::Tree;
75use crate::backend::TreeId;
76use crate::backend::TreeValue;
77use crate::config::ConfigGetError;
78use crate::file_util;
79use crate::file_util::BadPathEncoding;
80use crate::file_util::IoResultExt as _;
81use crate::file_util::PathError;
82use crate::hex_util;
83use crate::index::Index;
84use crate::lock::FileLock;
85use crate::merge::Merge;
86use crate::merge::MergeBuilder;
87use crate::object_id::ObjectId;
88use crate::repo_path::RepoPath;
89use crate::repo_path::RepoPathBuf;
90use crate::repo_path::RepoPathComponentBuf;
91use crate::settings::GitSettings;
92use crate::settings::UserSettings;
93use crate::stacked_table::MutableTable;
94use crate::stacked_table::ReadonlyTable;
95use crate::stacked_table::TableSegment as _;
96use crate::stacked_table::TableStore;
97use crate::stacked_table::TableStoreError;
98
99const HASH_LENGTH: usize = 20;
100const CHANGE_ID_LENGTH: usize = 16;
101/// Ref namespace used only for preventing GC.
102const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
103const CONFLICT_SUFFIX: &str = ".jjconflict";
104
105pub const JJ_TREES_COMMIT_HEADER: &str = "jj:trees";
106pub const CHANGE_ID_COMMIT_HEADER: &str = "change-id";
107
108#[derive(Debug, Error)]
109pub enum GitBackendInitError {
110    #[error("Failed to initialize git repository")]
111    InitRepository(#[source] gix::init::Error),
112    #[error("Failed to open git repository")]
113    OpenRepository(#[source] gix::open::Error),
114    #[error("Failed to encode git repository path")]
115    EncodeRepositoryPath(#[source] BadPathEncoding),
116    #[error(transparent)]
117    Config(ConfigGetError),
118    #[error(transparent)]
119    Path(PathError),
120}
121
122impl From<Box<GitBackendInitError>> for BackendInitError {
123    fn from(err: Box<GitBackendInitError>) -> Self {
124        BackendInitError(err)
125    }
126}
127
128#[derive(Debug, Error)]
129pub enum GitBackendLoadError {
130    #[error("Failed to open git repository")]
131    OpenRepository(#[source] gix::open::Error),
132    #[error("Failed to decode git repository path")]
133    DecodeRepositoryPath(#[source] BadPathEncoding),
134    #[error(transparent)]
135    Config(ConfigGetError),
136    #[error(transparent)]
137    Path(PathError),
138}
139
140impl From<Box<GitBackendLoadError>> for BackendLoadError {
141    fn from(err: Box<GitBackendLoadError>) -> Self {
142        BackendLoadError(err)
143    }
144}
145
146/// `GitBackend`-specific error that may occur after the backend is loaded.
147#[derive(Debug, Error)]
148pub enum GitBackendError {
149    #[error("Failed to read non-git metadata")]
150    ReadMetadata(#[source] TableStoreError),
151    #[error("Failed to write non-git metadata")]
152    WriteMetadata(#[source] TableStoreError),
153}
154
155impl From<GitBackendError> for BackendError {
156    fn from(err: GitBackendError) -> Self {
157        BackendError::Other(err.into())
158    }
159}
160
161#[derive(Debug, Error)]
162pub enum GitGcError {
163    #[error("Failed to run git gc command")]
164    GcCommand(#[source] std::io::Error),
165    #[error("git gc command exited with an error: {0}")]
166    GcCommandErrorStatus(ExitStatus),
167}
168
169pub struct GitBackend {
170    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
171    // cheaper to cache the thread-local instance behind a mutex than creating
172    // one for each backend method call. Our GitBackend is most likely to be
173    // used in a single-threaded context.
174    base_repo: gix::ThreadSafeRepository,
175    repo: Mutex<gix::Repository>,
176    root_commit_id: CommitId,
177    root_change_id: ChangeId,
178    empty_tree_id: TreeId,
179    extra_metadata_store: TableStore,
180    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
181    git_executable: PathBuf,
182    write_change_id_header: bool,
183}
184
185impl GitBackend {
186    pub fn name() -> &'static str {
187        "git"
188    }
189
190    fn new(
191        base_repo: gix::ThreadSafeRepository,
192        extra_metadata_store: TableStore,
193        git_settings: GitSettings,
194    ) -> Self {
195        let repo = Mutex::new(base_repo.to_thread_local());
196        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
197        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
198        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
199        GitBackend {
200            base_repo,
201            repo,
202            root_commit_id,
203            root_change_id,
204            empty_tree_id,
205            extra_metadata_store,
206            cached_extra_metadata: Mutex::new(None),
207            git_executable: git_settings.executable_path,
208            write_change_id_header: git_settings.write_change_id_header,
209        }
210    }
211
212    pub fn init_internal(
213        settings: &UserSettings,
214        store_path: &Path,
215    ) -> Result<Self, Box<GitBackendInitError>> {
216        let git_repo_path = Path::new("git");
217        let git_repo = gix::ThreadSafeRepository::init_opts(
218            store_path.join(git_repo_path),
219            gix::create::Kind::Bare,
220            gix::create::Options::default(),
221            gix_open_opts_from_settings(settings),
222        )
223        .map_err(GitBackendInitError::InitRepository)?;
224        let git_settings = settings
225            .git_settings()
226            .map_err(GitBackendInitError::Config)?;
227        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
228    }
229
230    /// Initializes backend by creating a new Git repo at the specified
231    /// workspace path. The workspace directory must exist.
232    pub fn init_colocated(
233        settings: &UserSettings,
234        store_path: &Path,
235        workspace_root: &Path,
236    ) -> Result<Self, Box<GitBackendInitError>> {
237        let canonical_workspace_root = {
238            let path = store_path.join(workspace_root);
239            dunce::canonicalize(&path)
240                .context(&path)
241                .map_err(GitBackendInitError::Path)?
242        };
243        let git_repo = gix::ThreadSafeRepository::init_opts(
244            canonical_workspace_root,
245            gix::create::Kind::WithWorktree,
246            gix::create::Options::default(),
247            gix_open_opts_from_settings(settings),
248        )
249        .map_err(GitBackendInitError::InitRepository)?;
250        let git_repo_path = workspace_root.join(".git");
251        let git_settings = settings
252            .git_settings()
253            .map_err(GitBackendInitError::Config)?;
254        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
255    }
256
257    /// Initializes backend with an existing Git repo at the specified path.
258    pub fn init_external(
259        settings: &UserSettings,
260        store_path: &Path,
261        git_repo_path: &Path,
262    ) -> Result<Self, Box<GitBackendInitError>> {
263        let canonical_git_repo_path = {
264            let path = store_path.join(git_repo_path);
265            canonicalize_git_repo_path(&path)
266                .context(&path)
267                .map_err(GitBackendInitError::Path)?
268        };
269        let git_repo = gix::ThreadSafeRepository::open_opts(
270            canonical_git_repo_path,
271            gix_open_opts_from_settings(settings),
272        )
273        .map_err(GitBackendInitError::OpenRepository)?;
274        let git_settings = settings
275            .git_settings()
276            .map_err(GitBackendInitError::Config)?;
277        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
278    }
279
280    fn init_with_repo(
281        store_path: &Path,
282        git_repo_path: &Path,
283        repo: gix::ThreadSafeRepository,
284        git_settings: GitSettings,
285    ) -> Result<Self, Box<GitBackendInitError>> {
286        let extra_path = store_path.join("extra");
287        fs::create_dir(&extra_path)
288            .context(&extra_path)
289            .map_err(GitBackendInitError::Path)?;
290        let target_path = store_path.join("git_target");
291        let git_repo_path = if cfg!(windows) && git_repo_path.is_relative() {
292            // When a repository is created in Windows, format the path with *forward
293            // slashes* and not backwards slashes. This makes it possible to use the same
294            // repository under Windows Subsystem for Linux.
295            //
296            // This only works for relative paths. If the path is absolute, there's not much
297            // we can do, and it simply won't work inside and outside WSL at the same time.
298            file_util::slash_path(git_repo_path)
299        } else {
300            git_repo_path.into()
301        };
302        let git_repo_path_bytes = file_util::path_to_bytes(&git_repo_path)
303            .map_err(GitBackendInitError::EncodeRepositoryPath)?;
304        fs::write(&target_path, git_repo_path_bytes)
305            .context(&target_path)
306            .map_err(GitBackendInitError::Path)?;
307        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
308        Ok(GitBackend::new(repo, extra_metadata_store, git_settings))
309    }
310
311    pub fn load(
312        settings: &UserSettings,
313        store_path: &Path,
314    ) -> Result<Self, Box<GitBackendLoadError>> {
315        let git_repo_path = {
316            let target_path = store_path.join("git_target");
317            let git_repo_path_bytes = fs::read(&target_path)
318                .context(&target_path)
319                .map_err(GitBackendLoadError::Path)?;
320            let git_repo_path = file_util::path_from_bytes(&git_repo_path_bytes)
321                .map_err(GitBackendLoadError::DecodeRepositoryPath)?;
322            let git_repo_path = store_path.join(git_repo_path);
323            canonicalize_git_repo_path(&git_repo_path)
324                .context(&git_repo_path)
325                .map_err(GitBackendLoadError::Path)?
326        };
327        let repo = gix::ThreadSafeRepository::open_opts(
328            git_repo_path,
329            gix_open_opts_from_settings(settings),
330        )
331        .map_err(GitBackendLoadError::OpenRepository)?;
332        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
333        let git_settings = settings
334            .git_settings()
335            .map_err(GitBackendLoadError::Config)?;
336        Ok(GitBackend::new(repo, extra_metadata_store, git_settings))
337    }
338
339    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
340        self.repo.lock().unwrap()
341    }
342
343    /// Returns new thread-local instance to access to the underlying Git repo.
344    pub fn git_repo(&self) -> gix::Repository {
345        self.base_repo.to_thread_local()
346    }
347
348    /// Path to the `.git` directory or the repository itself if it's bare.
349    pub fn git_repo_path(&self) -> &Path {
350        self.base_repo.path()
351    }
352
353    /// Path to the working directory if the repository isn't bare.
354    pub fn git_workdir(&self) -> Option<&Path> {
355        self.base_repo.work_dir()
356    }
357
358    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
359        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
360        match locked_head.as_ref() {
361            Some(head) => Ok(head.clone()),
362            None => {
363                let table = self
364                    .extra_metadata_store
365                    .get_head()
366                    .map_err(GitBackendError::ReadMetadata)?;
367                *locked_head = Some(table.clone());
368                Ok(table)
369            }
370        }
371    }
372
373    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
374        let table = self
375            .extra_metadata_store
376            .get_head_locked()
377            .map_err(GitBackendError::ReadMetadata)?;
378        Ok(table)
379    }
380
381    fn save_extra_metadata_table(
382        &self,
383        mut_table: MutableTable,
384        _table_lock: &FileLock,
385    ) -> BackendResult<()> {
386        let table = self
387            .extra_metadata_store
388            .save_table(mut_table)
389            .map_err(GitBackendError::WriteMetadata)?;
390        // Since the parent table was the head, saved table are likely to be new head.
391        // If it's not, cache will be reloaded when entry can't be found.
392        *self.cached_extra_metadata.lock().unwrap() = Some(table);
393        Ok(())
394    }
395
396    /// Imports the given commits and ancestors from the backing Git repo.
397    ///
398    /// The `head_ids` may contain commits that have already been imported, but
399    /// the caller should filter them out to eliminate redundant I/O processing.
400    #[tracing::instrument(skip(self, head_ids))]
401    pub fn import_head_commits<'a>(
402        &self,
403        head_ids: impl IntoIterator<Item = &'a CommitId>,
404    ) -> BackendResult<()> {
405        let head_ids: HashSet<&CommitId> = head_ids
406            .into_iter()
407            .filter(|&id| *id != self.root_commit_id)
408            .collect();
409        if head_ids.is_empty() {
410            return Ok(());
411        }
412
413        // Create no-gc ref even if known to the extras table. Concurrent GC
414        // process might have deleted the no-gc ref.
415        let locked_repo = self.lock_git_repo();
416        locked_repo
417            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
418            .map_err(|err| BackendError::Other(Box::new(err)))?;
419
420        // These commits are imported from Git. Make our change ids persist (otherwise
421        // future write_commit() could reassign new change id.)
422        tracing::debug!(
423            heads_count = head_ids.len(),
424            "import extra metadata entries"
425        );
426        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
427        let mut mut_table = table.start_mutation();
428        import_extra_metadata_entries_from_heads(
429            &locked_repo,
430            &mut mut_table,
431            &table_lock,
432            &head_ids,
433        )?;
434        self.save_extra_metadata_table(mut_table, &table_lock)
435    }
436
437    fn read_file_sync(&self, id: &FileId) -> BackendResult<Vec<u8>> {
438        let git_blob_id = validate_git_object_id(id)?;
439        let locked_repo = self.lock_git_repo();
440        let mut blob = locked_repo
441            .find_object(git_blob_id)
442            .map_err(|err| map_not_found_err(err, id))?
443            .try_into_blob()
444            .map_err(|err| to_read_object_err(err, id))?;
445        Ok(blob.take_data())
446    }
447
448    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
449        let attributes = gix::worktree::Stack::new(
450            Path::new(""),
451            gix::worktree::stack::State::AttributesStack(Default::default()),
452            gix::worktree::glob::pattern::Case::Sensitive,
453            Vec::new(),
454            Vec::new(),
455        );
456        let filter = gix::diff::blob::Pipeline::new(
457            Default::default(),
458            gix::filter::plumbing::Pipeline::new(
459                self.git_repo()
460                    .command_context()
461                    .map_err(|err| BackendError::Other(Box::new(err)))?,
462                Default::default(),
463            ),
464            Vec::new(),
465            Default::default(),
466        );
467        Ok(gix::diff::blob::Platform::new(
468            Default::default(),
469            filter,
470            gix::diff::blob::pipeline::Mode::ToGit,
471            attributes,
472        ))
473    }
474
475    fn read_tree_for_commit<'repo>(
476        &self,
477        repo: &'repo gix::Repository,
478        id: &CommitId,
479    ) -> BackendResult<gix::Tree<'repo>> {
480        let tree = self.read_commit(id).block_on()?.root_tree.to_merge();
481        // TODO(kfm): probably want to do something here if it is a merge
482        let tree_id = tree.first().clone();
483        let gix_id = validate_git_object_id(&tree_id)?;
484        repo.find_object(gix_id)
485            .map_err(|err| map_not_found_err(err, &tree_id))?
486            .try_into_tree()
487            .map_err(|err| to_read_object_err(err, &tree_id))
488    }
489}
490
491/// Canonicalizes the given `path` except for the last `".git"` component.
492///
493/// The last path component matters when opening a Git repo without `core.bare`
494/// config. This config is usually set, but the "repo" tool will set up such
495/// repositories and symlinks. Opening such repo with fully-canonicalized path
496/// would turn a colocated Git repo into a bare repo.
497pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
498    if path.ends_with(".git") {
499        let workdir = path.parent().unwrap();
500        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
501    } else {
502        dunce::canonicalize(path)
503    }
504}
505
506fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
507    let user_name = settings.user_name();
508    let user_email = settings.user_email();
509    gix::open::Options::default()
510        .config_overrides([
511            // Committer has to be configured to record reflog. Author isn't
512            // needed, but let's copy the same values.
513            format!("author.name={user_name}"),
514            format!("author.email={user_email}"),
515            format!("committer.name={user_name}"),
516            format!("committer.email={user_email}"),
517        ])
518        // The git_target path should point the repository, not the working directory.
519        .open_path_as_is(true)
520        // Gitoxide recommends this when correctness is preferred
521        .strict_config(true)
522}
523
524/// Parses the `jj:trees` header value.
525fn root_tree_from_git_extra_header(value: &BStr) -> Result<MergedTreeId, ()> {
526    let mut tree_ids = SmallVec::new();
527    for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') {
528        let tree_id = TreeId::try_from_hex(hex).ok_or(())?;
529        if tree_id.as_bytes().len() != HASH_LENGTH {
530            return Err(());
531        }
532        tree_ids.push(tree_id);
533    }
534    // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
535    // allowed, it would be possible to construct a commit which appears to have
536    // different contents depending on whether it is viewed using `jj` or `git`.
537    if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
538        return Err(());
539    }
540    Ok(MergedTreeId::Merge(Merge::from_vec(tree_ids)))
541}
542
543fn commit_from_git_without_root_parent(
544    id: &CommitId,
545    git_object: &gix::Object,
546    uses_tree_conflict_format: bool,
547    is_shallow: bool,
548) -> BackendResult<Commit> {
549    let commit = git_object
550        .try_to_commit_ref()
551        .map_err(|err| to_read_object_err(err, id))?;
552
553    // If the git header has a change-id field, we attempt to convert that to a
554    // valid JJ Change Id
555    let change_id = commit
556        .extra_headers()
557        .find(CHANGE_ID_COMMIT_HEADER)
558        .and_then(ChangeId::try_from_reverse_hex)
559        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
560        .unwrap_or_else(|| change_id_from_git_commit_id(id));
561
562    // shallow commits don't have parents their parents actually fetched, so we
563    // discard them here
564    // TODO: This causes issues when a shallow repository is deepened/unshallowed
565    let parents = if is_shallow {
566        vec![]
567    } else {
568        commit
569            .parents()
570            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
571            .collect_vec()
572    };
573    // If this commit is a conflict, we'll update the root tree later, when we read
574    // the extra metadata.
575    let root_tree = commit
576        .extra_headers()
577        .find(JJ_TREES_COMMIT_HEADER)
578        .map(root_tree_from_git_extra_header)
579        .transpose()
580        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?
581        .unwrap_or_else(|| {
582            let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
583            if uses_tree_conflict_format {
584                MergedTreeId::resolved(tree_id)
585            } else {
586                MergedTreeId::Legacy(tree_id)
587            }
588        });
589    // Use lossy conversion as commit message with "mojibake" is still better than
590    // nothing.
591    // TODO: what should we do with commit.encoding?
592    let description = String::from_utf8_lossy(commit.message).into_owned();
593    let author = signature_from_git(commit.author());
594    let committer = signature_from_git(commit.committer());
595
596    // If the commit is signed, extract both the signature and the signed data
597    // (which is the commit buffer with the gpgsig header omitted).
598    // We have to re-parse the raw commit data because gix CommitRef does not give
599    // us the sogned data, only the signature.
600    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
601    // function and extract everything from that. For now, this works
602    let secure_sig = commit
603        .extra_headers
604        .iter()
605        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
606        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
607        .then(|| CommitRefIter::signature(&git_object.data))
608        .transpose()
609        .map_err(|err| to_read_object_err(err, id))?
610        .flatten()
611        .map(|(sig, data)| SecureSig {
612            data: data.to_bstring().into(),
613            sig: sig.into_owned().into(),
614        });
615
616    Ok(Commit {
617        parents,
618        predecessors: vec![],
619        // If this commit has associated extra metadata, we may reset this later.
620        root_tree,
621        change_id,
622        description,
623        author,
624        committer,
625        secure_sig,
626    })
627}
628
629fn change_id_from_git_commit_id(id: &CommitId) -> ChangeId {
630    // We reverse the bits of the commit id to create the change id. We don't
631    // want to use the first bytes unmodified because then it would be ambiguous
632    // if a given hash prefix refers to the commit id or the change id. It would
633    // have been enough to pick the last 16 bytes instead of the leading 16
634    // bytes to address that. We also reverse the bits to make it less likely
635    // that users depend on any relationship between the two ids.
636    let bytes = id.as_bytes()[4..HASH_LENGTH]
637        .iter()
638        .rev()
639        .map(|b| b.reverse_bits())
640        .collect();
641    ChangeId::new(bytes)
642}
643
644const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
645
646fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
647    let name = signature.name;
648    let name = if name != EMPTY_STRING_PLACEHOLDER {
649        String::from_utf8_lossy(name).into_owned()
650    } else {
651        "".to_string()
652    };
653    let email = signature.email;
654    let email = if email != EMPTY_STRING_PLACEHOLDER {
655        String::from_utf8_lossy(email).into_owned()
656    } else {
657        "".to_string()
658    };
659    let time = signature.time().unwrap_or_default();
660    let timestamp = MillisSinceEpoch(time.seconds * 1000);
661    let tz_offset = time.offset.div_euclid(60); // in minutes
662    Signature {
663        name,
664        email,
665        timestamp: Timestamp {
666            timestamp,
667            tz_offset,
668        },
669    }
670}
671
672fn signature_to_git(signature: &Signature) -> gix::actor::Signature {
673    // git does not support empty names or emails
674    let name = if !signature.name.is_empty() {
675        &signature.name
676    } else {
677        EMPTY_STRING_PLACEHOLDER
678    };
679    let email = if !signature.email.is_empty() {
680        &signature.email
681    } else {
682        EMPTY_STRING_PLACEHOLDER
683    };
684    let time = gix::date::Time::new(
685        signature.timestamp.timestamp.0.div_euclid(1000),
686        signature.timestamp.tz_offset * 60, // in seconds
687    );
688    gix::actor::Signature {
689        name: name.into(),
690        email: email.into(),
691        time,
692    }
693}
694
695fn serialize_extras(commit: &Commit) -> Vec<u8> {
696    let mut proto = crate::protos::git_store::Commit {
697        change_id: commit.change_id.to_bytes(),
698        ..Default::default()
699    };
700    if let MergedTreeId::Merge(tree_ids) = &commit.root_tree {
701        proto.uses_tree_conflict_format = true;
702        if !tree_ids.is_resolved() {
703            // This is done for the sake of jj versions <0.28 (before commit
704            // f7b14be) being able to read the repo. At some point in the
705            // future, we can stop doing it.
706            proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
707        }
708    }
709    for predecessor in &commit.predecessors {
710        proto.predecessors.push(predecessor.to_bytes());
711    }
712    proto.encode_to_vec()
713}
714
715fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
716    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
717    if !proto.change_id.is_empty() {
718        commit.change_id = ChangeId::new(proto.change_id);
719    }
720    if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree {
721        if proto.uses_tree_conflict_format {
722            if !proto.root_tree.is_empty() {
723                let merge_builder: MergeBuilder<_> = proto
724                    .root_tree
725                    .iter()
726                    .map(|id_bytes| TreeId::from_bytes(id_bytes))
727                    .collect();
728                commit.root_tree = MergedTreeId::Merge(merge_builder.build());
729            } else {
730                // uses_tree_conflict_format was set but there was no root_tree override in the
731                // proto, which means we should just promote the tree id from the
732                // git commit to be a known-conflict-free tree
733                commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone());
734            }
735        }
736    }
737    for predecessor in &proto.predecessors {
738        commit.predecessors.push(CommitId::from_bytes(predecessor));
739    }
740}
741
742/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
743/// Used for preventing GC of commits we create.
744fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
745    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
746    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
747    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
748    gix::refs::transaction::RefEdit {
749        change: gix::refs::transaction::Change::Update {
750            log: gix::refs::transaction::LogChange {
751                message: "used by jj".into(),
752                ..Default::default()
753            },
754            expected,
755            new,
756        },
757        name: name.try_into().unwrap(),
758        deref: false,
759    }
760}
761
762fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
763    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
764    gix::refs::transaction::RefEdit {
765        change: gix::refs::transaction::Change::Delete {
766            expected,
767            log: gix::refs::transaction::RefLog::AndReference,
768        },
769        name: git_ref.name,
770        deref: false,
771    }
772}
773
774/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
775/// unreachable and non-head refs.
776fn recreate_no_gc_refs(
777    git_repo: &gix::Repository,
778    new_heads: impl IntoIterator<Item = CommitId>,
779    keep_newer: SystemTime,
780) -> BackendResult<()> {
781    // Calculate diff between existing no-gc refs and new heads.
782    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
783    let mut no_gc_refs_to_keep_count: usize = 0;
784    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
785    let git_references = git_repo
786        .references()
787        .map_err(|err| BackendError::Other(err.into()))?;
788    let no_gc_refs_iter = git_references
789        .prefixed(NO_GC_REF_NAMESPACE)
790        .map_err(|err| BackendError::Other(err.into()))?;
791    for git_ref in no_gc_refs_iter {
792        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
793        let oid = git_ref.target.try_id().ok_or_else(|| {
794            let name = git_ref.name.as_bstr();
795            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
796        })?;
797        let id = CommitId::from_bytes(oid.as_bytes());
798        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
799        if new_heads.contains(&id) && name_good {
800            no_gc_refs_to_keep_count += 1;
801            continue;
802        }
803        // Check timestamp of loose ref, but this is still racy on re-import
804        // because:
805        // - existing packed ref won't be demoted to loose ref
806        // - existing loose ref won't be touched
807        //
808        // TODO: might be better to switch to a dummy merge, where new no-gc ref
809        // will always have a unique name. Doing that with the current
810        // ref-per-head strategy would increase the number of the no-gc refs.
811        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
812        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
813        if let Ok(metadata) = loose_ref_path.metadata() {
814            let mtime = metadata.modified().expect("unsupported platform?");
815            if mtime > keep_newer {
816                tracing::trace!(?git_ref, "not deleting new");
817                no_gc_refs_to_keep_count += 1;
818                continue;
819            }
820        }
821        // Also deletes no-gc ref of random name created by old jj.
822        tracing::trace!(?git_ref, ?name_good, "will delete");
823        no_gc_refs_to_delete.push(git_ref);
824    }
825    tracing::info!(
826        new_heads_count = new_heads.len(),
827        no_gc_refs_to_keep_count,
828        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
829        "collected reachable refs"
830    );
831
832    // It's slow to delete packed refs one by one, so update refs all at once.
833    let ref_edits = itertools::chain(
834        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
835        new_heads.iter().map(to_no_gc_ref_update),
836    );
837    git_repo
838        .edit_references(ref_edits)
839        .map_err(|err| BackendError::Other(err.into()))?;
840
841    Ok(())
842}
843
844fn run_git_gc(program: &OsStr, git_dir: &Path, keep_newer: SystemTime) -> Result<(), GitGcError> {
845    let keep_newer = keep_newer
846        .duration_since(SystemTime::UNIX_EPOCH)
847        .unwrap_or_default(); // underflow
848    let mut git = Command::new(program);
849    git.arg("--git-dir=.") // turn off discovery
850        .arg("gc")
851        .arg(format!("--prune=@{} +0000", keep_newer.as_secs()));
852    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
853    // canonicalized as UNC path, which wouldn't be supported by git.
854    git.current_dir(git_dir);
855    // TODO: pass output to UI layer instead of printing directly here
856    tracing::info!(?git, "running git gc");
857    let status = git.status().map_err(GitGcError::GcCommand)?;
858    tracing::info!(?status, "git gc exited");
859    if !status.success() {
860        return Err(GitGcError::GcCommandErrorStatus(status));
861    }
862    Ok(())
863}
864
865fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
866    if id.as_bytes().len() != HASH_LENGTH {
867        return Err(BackendError::InvalidHashLength {
868            expected: HASH_LENGTH,
869            actual: id.as_bytes().len(),
870            object_type: id.object_type(),
871            hash: id.hex(),
872        });
873    }
874    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
875}
876
877fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
878    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
879        BackendError::ObjectNotFound {
880            object_type: id.object_type(),
881            hash: id.hex(),
882            source: Box::new(err),
883        }
884    } else {
885        to_read_object_err(err, id)
886    }
887}
888
889fn to_read_object_err(
890    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
891    id: &impl ObjectId,
892) -> BackendError {
893    BackendError::ReadObject {
894        object_type: id.object_type(),
895        hash: id.hex(),
896        source: err.into(),
897    }
898}
899
900fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError {
901    BackendError::InvalidUtf8 {
902        object_type: id.object_type(),
903        hash: id.hex(),
904        source,
905    }
906}
907
908fn import_extra_metadata_entries_from_heads(
909    git_repo: &gix::Repository,
910    mut_table: &mut MutableTable,
911    _table_lock: &FileLock,
912    head_ids: &HashSet<&CommitId>,
913) -> BackendResult<()> {
914    let shallow_commits = git_repo
915        .shallow_commits()
916        .map_err(|e| BackendError::Other(Box::new(e)))?;
917
918    let mut work_ids = head_ids
919        .iter()
920        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
921        .map(|&id| id.clone())
922        .collect_vec();
923    while let Some(id) = work_ids.pop() {
924        let git_object = git_repo
925            .find_object(validate_git_object_id(&id)?)
926            .map_err(|err| map_not_found_err(err, &id))?;
927        let is_shallow = shallow_commits
928            .as_ref()
929            .is_some_and(|shallow| shallow.contains(&git_object.id));
930        // TODO(#1624): Should we read the root tree here and check if it has a
931        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
932        // change the description of a commit with tree-level conflicts.
933        let commit = commit_from_git_without_root_parent(&id, &git_object, true, is_shallow)?;
934        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
935        work_ids.extend(
936            commit
937                .parents
938                .into_iter()
939                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
940        );
941    }
942    Ok(())
943}
944
945impl Debug for GitBackend {
946    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
947        f.debug_struct("GitBackend")
948            .field("path", &self.git_repo_path())
949            .finish()
950    }
951}
952
953#[async_trait]
954impl Backend for GitBackend {
955    fn as_any(&self) -> &dyn Any {
956        self
957    }
958
959    fn name(&self) -> &str {
960        Self::name()
961    }
962
963    fn commit_id_length(&self) -> usize {
964        HASH_LENGTH
965    }
966
967    fn change_id_length(&self) -> usize {
968        CHANGE_ID_LENGTH
969    }
970
971    fn root_commit_id(&self) -> &CommitId {
972        &self.root_commit_id
973    }
974
975    fn root_change_id(&self) -> &ChangeId {
976        &self.root_change_id
977    }
978
979    fn empty_tree_id(&self) -> &TreeId {
980        &self.empty_tree_id
981    }
982
983    fn concurrency(&self) -> usize {
984        1
985    }
986
987    async fn read_file(
988        &self,
989        _path: &RepoPath,
990        id: &FileId,
991    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
992        let data = self.read_file_sync(id)?;
993        Ok(Box::pin(Cursor::new(data)))
994    }
995
996    async fn write_file(
997        &self,
998        _path: &RepoPath,
999        contents: &mut (dyn AsyncRead + Send + Unpin),
1000    ) -> BackendResult<FileId> {
1001        let mut bytes = Vec::new();
1002        contents.read_to_end(&mut bytes).await.unwrap();
1003        let locked_repo = self.lock_git_repo();
1004        let oid = locked_repo
1005            .write_blob(bytes)
1006            .map_err(|err| BackendError::WriteObject {
1007                object_type: "file",
1008                source: Box::new(err),
1009            })?;
1010        Ok(FileId::new(oid.as_bytes().to_vec()))
1011    }
1012
1013    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
1014        let git_blob_id = validate_git_object_id(id)?;
1015        let locked_repo = self.lock_git_repo();
1016        let mut blob = locked_repo
1017            .find_object(git_blob_id)
1018            .map_err(|err| map_not_found_err(err, id))?
1019            .try_into_blob()
1020            .map_err(|err| to_read_object_err(err, id))?;
1021        let target = String::from_utf8(blob.take_data())
1022            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1023        Ok(target)
1024    }
1025
1026    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1027        let locked_repo = self.lock_git_repo();
1028        let oid =
1029            locked_repo
1030                .write_blob(target.as_bytes())
1031                .map_err(|err| BackendError::WriteObject {
1032                    object_type: "symlink",
1033                    source: Box::new(err),
1034                })?;
1035        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1036    }
1037
1038    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
1039        Err(BackendError::Unsupported(
1040            "The Git backend doesn't support tracked copies yet".to_string(),
1041        ))
1042    }
1043
1044    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
1045        Err(BackendError::Unsupported(
1046            "The Git backend doesn't support tracked copies yet".to_string(),
1047        ))
1048    }
1049
1050    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
1051        Err(BackendError::Unsupported(
1052            "The Git backend doesn't support tracked copies yet".to_string(),
1053        ))
1054    }
1055
1056    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1057        if id == &self.empty_tree_id {
1058            return Ok(Tree::default());
1059        }
1060        let git_tree_id = validate_git_object_id(id)?;
1061
1062        let locked_repo = self.lock_git_repo();
1063        let git_tree = locked_repo
1064            .find_object(git_tree_id)
1065            .map_err(|err| map_not_found_err(err, id))?
1066            .try_into_tree()
1067            .map_err(|err| to_read_object_err(err, id))?;
1068        let mut tree = Tree::default();
1069        for entry in git_tree.iter() {
1070            let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1071            let name =
1072                str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
1073            let (name, value) = match entry.mode().kind() {
1074                gix::object::tree::EntryKind::Tree => {
1075                    let id = TreeId::from_bytes(entry.oid().as_bytes());
1076                    (name, TreeValue::Tree(id))
1077                }
1078                gix::object::tree::EntryKind::Blob => {
1079                    let id = FileId::from_bytes(entry.oid().as_bytes());
1080                    if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) {
1081                        (
1082                            basename,
1083                            TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())),
1084                        )
1085                    } else {
1086                        (
1087                            name,
1088                            TreeValue::File {
1089                                id,
1090                                executable: false,
1091                                copy_id: CopyId::placeholder(),
1092                            },
1093                        )
1094                    }
1095                }
1096                gix::object::tree::EntryKind::BlobExecutable => {
1097                    let id = FileId::from_bytes(entry.oid().as_bytes());
1098                    (
1099                        name,
1100                        TreeValue::File {
1101                            id,
1102                            executable: true,
1103                            copy_id: CopyId::placeholder(),
1104                        },
1105                    )
1106                }
1107                gix::object::tree::EntryKind::Link => {
1108                    let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1109                    (name, TreeValue::Symlink(id))
1110                }
1111                gix::object::tree::EntryKind::Commit => {
1112                    let id = CommitId::from_bytes(entry.oid().as_bytes());
1113                    (name, TreeValue::GitSubmodule(id))
1114                }
1115            };
1116            tree.set(RepoPathComponentBuf::new(name).unwrap(), value);
1117        }
1118        Ok(tree)
1119    }
1120
1121    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1122        // Tree entries to be written must be sorted by Entry::filename(), which
1123        // is slightly different from the order of our backend::Tree.
1124        let entries = contents
1125            .entries()
1126            .map(|entry| {
1127                let name = entry.name().as_internal_str();
1128                match entry.value() {
1129                    TreeValue::File {
1130                        id,
1131                        executable: false,
1132                        copy_id: _, // TODO: Use the value
1133                    } => gix::objs::tree::Entry {
1134                        mode: gix::object::tree::EntryKind::Blob.into(),
1135                        filename: name.into(),
1136                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1137                    },
1138                    TreeValue::File {
1139                        id,
1140                        executable: true,
1141                        copy_id: _, // TODO: Use the value
1142                    } => gix::objs::tree::Entry {
1143                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1144                        filename: name.into(),
1145                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1146                    },
1147                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1148                        mode: gix::object::tree::EntryKind::Link.into(),
1149                        filename: name.into(),
1150                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1151                    },
1152                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1153                        mode: gix::object::tree::EntryKind::Tree.into(),
1154                        filename: name.into(),
1155                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1156                    },
1157                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1158                        mode: gix::object::tree::EntryKind::Commit.into(),
1159                        filename: name.into(),
1160                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1161                    },
1162                    TreeValue::Conflict(id) => gix::objs::tree::Entry {
1163                        mode: gix::object::tree::EntryKind::Blob.into(),
1164                        filename: (name.to_owned() + CONFLICT_SUFFIX).into(),
1165                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1166                    },
1167                }
1168            })
1169            .sorted_unstable()
1170            .collect();
1171        let locked_repo = self.lock_git_repo();
1172        let oid = locked_repo
1173            .write_object(gix::objs::Tree { entries })
1174            .map_err(|err| BackendError::WriteObject {
1175                object_type: "tree",
1176                source: Box::new(err),
1177            })?;
1178        Ok(TreeId::from_bytes(oid.as_bytes()))
1179    }
1180
1181    fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
1182        let data = self.read_file_sync(&FileId::new(id.to_bytes()))?;
1183        let json: serde_json::Value = serde_json::from_slice(&data).unwrap();
1184        Ok(Conflict {
1185            removes: conflict_term_list_from_json(json.get("removes").unwrap()),
1186            adds: conflict_term_list_from_json(json.get("adds").unwrap()),
1187        })
1188    }
1189
1190    fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
1191        let json = serde_json::json!({
1192            "removes": conflict_term_list_to_json(&conflict.removes),
1193            "adds": conflict_term_list_to_json(&conflict.adds),
1194        });
1195        let json_string = json.to_string();
1196        let bytes = json_string.as_bytes();
1197        let locked_repo = self.lock_git_repo();
1198        let oid = locked_repo
1199            .write_blob(bytes)
1200            .map_err(|err| BackendError::WriteObject {
1201                object_type: "conflict",
1202                source: Box::new(err),
1203            })?;
1204        Ok(ConflictId::from_bytes(oid.as_bytes()))
1205    }
1206
1207    #[tracing::instrument(skip(self))]
1208    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1209        if *id == self.root_commit_id {
1210            return Ok(make_root_commit(
1211                self.root_change_id().clone(),
1212                self.empty_tree_id.clone(),
1213            ));
1214        }
1215        let git_commit_id = validate_git_object_id(id)?;
1216
1217        let mut commit = {
1218            let locked_repo = self.lock_git_repo();
1219            let git_object = locked_repo
1220                .find_object(git_commit_id)
1221                .map_err(|err| map_not_found_err(err, id))?;
1222            let is_shallow = locked_repo
1223                .shallow_commits()
1224                .ok()
1225                .flatten()
1226                .is_some_and(|shallow| shallow.contains(&git_object.id));
1227            commit_from_git_without_root_parent(id, &git_object, false, is_shallow)?
1228        };
1229        if commit.parents.is_empty() {
1230            commit.parents.push(self.root_commit_id.clone());
1231        };
1232
1233        let table = self.cached_extra_metadata_table()?;
1234        if let Some(extras) = table.get_value(id.as_bytes()) {
1235            deserialize_extras(&mut commit, extras);
1236        } else {
1237            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1238            // there are no reachable ancestor commits without extras metadata. Git commits
1239            // imported by jj < 0.8.0 might not have extras (#924).
1240            // https://github.com/jj-vcs/jj/issues/2343
1241            tracing::info!("unimported Git commit found");
1242            self.import_head_commits([id])?;
1243            let table = self.cached_extra_metadata_table()?;
1244            let extras = table.get_value(id.as_bytes()).unwrap();
1245            deserialize_extras(&mut commit, extras);
1246        }
1247        Ok(commit)
1248    }
1249
1250    async fn write_commit(
1251        &self,
1252        mut contents: Commit,
1253        mut sign_with: Option<&mut SigningFn>,
1254    ) -> BackendResult<(CommitId, Commit)> {
1255        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1256
1257        let locked_repo = self.lock_git_repo();
1258        let git_tree_id = match &contents.root_tree {
1259            MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
1260            MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
1261                Some(tree_id) => validate_git_object_id(tree_id)?,
1262                None => write_tree_conflict(&locked_repo, tree_ids)?,
1263            },
1264        };
1265        let author = signature_to_git(&contents.author);
1266        let mut committer = signature_to_git(&contents.committer);
1267        let message = &contents.description;
1268        if contents.parents.is_empty() {
1269            return Err(BackendError::Other(
1270                "Cannot write a commit with no parents".into(),
1271            ));
1272        }
1273        let mut parents = SmallVec::new();
1274        for parent_id in &contents.parents {
1275            if *parent_id == self.root_commit_id {
1276                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1277                // add it to the list of parents to write in the Git commit. We also check that
1278                // there are no other parents since Git cannot represent a merge between a root
1279                // commit and another commit.
1280                if contents.parents.len() > 1 {
1281                    return Err(BackendError::Unsupported(
1282                        "The Git backend does not support creating merge commits with the root \
1283                         commit as one of the parents."
1284                            .to_owned(),
1285                    ));
1286                }
1287            } else {
1288                parents.push(validate_git_object_id(parent_id)?);
1289            }
1290        }
1291        let mut extra_headers: Vec<(BString, BString)> = vec![];
1292        if let MergedTreeId::Merge(tree_ids) = &contents.root_tree {
1293            if !tree_ids.is_resolved() {
1294                let value = tree_ids.iter().map(|id| id.hex()).join(" ");
1295                extra_headers.push((JJ_TREES_COMMIT_HEADER.into(), value.into()));
1296            }
1297        }
1298        if self.write_change_id_header {
1299            extra_headers.push((
1300                CHANGE_ID_COMMIT_HEADER.into(),
1301                contents.change_id.reverse_hex().into(),
1302            ));
1303        }
1304
1305        let extras = serialize_extras(&contents);
1306
1307        // If two writers write commits of the same id with different metadata, they
1308        // will both succeed and the metadata entries will be "merged" later. Since
1309        // metadata entry is keyed by the commit id, one of the entries would be lost.
1310        // To prevent such race condition locally, we extend the scope covered by the
1311        // table lock. This is still racy if multiple machines are involved and the
1312        // repository is rsync-ed.
1313        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1314        let id = loop {
1315            let mut commit = gix::objs::Commit {
1316                message: message.to_owned().into(),
1317                tree: git_tree_id,
1318                author: author.clone(),
1319                committer: committer.clone(),
1320                encoding: None,
1321                parents: parents.clone(),
1322                extra_headers: extra_headers.clone(),
1323            };
1324
1325            if let Some(sign) = &mut sign_with {
1326                // we don't use gix pool, but at least use their heuristic
1327                let mut data = Vec::with_capacity(512);
1328                commit.write_to(&mut data).unwrap();
1329
1330                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1331                    object_type: "commit",
1332                    source: Box::new(err),
1333                })?;
1334                commit
1335                    .extra_headers
1336                    .push(("gpgsig".into(), sig.clone().into()));
1337                contents.secure_sig = Some(SecureSig { data, sig });
1338            }
1339
1340            let git_id =
1341                locked_repo
1342                    .write_object(&commit)
1343                    .map_err(|err| BackendError::WriteObject {
1344                        object_type: "commit",
1345                        source: Box::new(err),
1346                    })?;
1347
1348            match table.get_value(git_id.as_bytes()) {
1349                Some(existing_extras) if existing_extras != extras => {
1350                    // It's possible a commit already exists with the same
1351                    // commit id but different change id. Adjust the timestamp
1352                    // until this is no longer the case.
1353                    //
1354                    // For example, this can happen when rebasing duplicate
1355                    // commits, https://github.com/jj-vcs/jj/issues/694.
1356                    //
1357                    // `jj` resets the committer timestamp to the current
1358                    // timestamp whenever it rewrites a commit. So, it's
1359                    // unlikely for the timestamp to be 0 even if the original
1360                    // commit had its timestamp set to 0. Moreover, we test that
1361                    // a commit with a negative timestamp can still be written
1362                    // and read back by `jj`.
1363                    committer.time.seconds -= 1;
1364                }
1365                _ => break CommitId::from_bytes(git_id.as_bytes()),
1366            }
1367        };
1368
1369        // Everything up to this point had no permanent effect on the repo except
1370        // GC-able objects
1371        locked_repo
1372            .edit_reference(to_no_gc_ref_update(&id))
1373            .map_err(|err| BackendError::Other(Box::new(err)))?;
1374
1375        // Update the signature to match the one that was actually written to the object
1376        // store
1377        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1378        let mut mut_table = table.start_mutation();
1379        mut_table.add_entry(id.to_bytes(), extras);
1380        self.save_extra_metadata_table(mut_table, &table_lock)?;
1381        Ok((id, contents))
1382    }
1383
1384    fn get_copy_records(
1385        &self,
1386        paths: Option<&[RepoPathBuf]>,
1387        root_id: &CommitId,
1388        head_id: &CommitId,
1389    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
1390        let repo = self.git_repo();
1391        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1392        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1393
1394        let change_to_copy_record =
1395            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1396                let gix::object::tree::diff::Change::Rewrite {
1397                    source_location,
1398                    source_entry_mode,
1399                    source_id,
1400                    entry_mode: dest_entry_mode,
1401                    location: dest_location,
1402                    ..
1403                } = change
1404                else {
1405                    return Ok(None);
1406                };
1407                // TODO: Renamed symlinks cannot be returned because CopyRecord
1408                // expects `source_file: FileId`.
1409                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1410                    return Ok(None);
1411                }
1412
1413                let source = str::from_utf8(source_location)
1414                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1415                let dest = str::from_utf8(dest_location)
1416                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1417
1418                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1419                if !paths.is_none_or(|paths| paths.contains(&target)) {
1420                    return Ok(None);
1421                }
1422
1423                Ok(Some(CopyRecord {
1424                    target,
1425                    target_commit: head_id.clone(),
1426                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1427                    source_file: FileId::from_bytes(source_id.as_bytes()),
1428                    source_commit: root_id.clone(),
1429                }))
1430            };
1431
1432        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1433        root_tree
1434            .changes()
1435            .map_err(|err| BackendError::Other(err.into()))?
1436            .options(|opts| {
1437                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1438                    copies: Some(gix::diff::rewrites::Copies {
1439                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1440                        percentage: Some(0.5),
1441                    }),
1442                    percentage: Some(0.5),
1443                    limit: 1000,
1444                    track_empty: false,
1445                }));
1446            })
1447            .for_each_to_obtain_tree_with_cache(
1448                &head_tree,
1449                &mut self.new_diff_platform()?,
1450                |change| -> BackendResult<_> {
1451                    match change_to_copy_record(change) {
1452                        Ok(None) => {}
1453                        Ok(Some(change)) => records.push(Ok(change)),
1454                        Err(err) => records.push(Err(err)),
1455                    }
1456                    Ok(gix::object::tree::diff::Action::Continue)
1457                },
1458            )
1459            .map_err(|err| BackendError::Other(err.into()))?;
1460        Ok(Box::pin(futures::stream::iter(records)))
1461    }
1462
1463    #[tracing::instrument(skip(self, index))]
1464    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1465        let git_repo = self.lock_git_repo();
1466        let new_heads = index
1467            .all_heads_for_gc()
1468            .map_err(|err| BackendError::Other(err.into()))?
1469            .filter(|id| *id != self.root_commit_id);
1470        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1471        // TODO: remove unreachable entries from extras table if segment file
1472        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1473        // preserved by the keep_newer timestamp though)
1474        // TODO: remove unreachable extras table segments
1475        run_git_gc(
1476            self.git_executable.as_ref(),
1477            self.git_repo_path(),
1478            keep_newer,
1479        )
1480        .map_err(|err| BackendError::Other(err.into()))?;
1481        // Since "git gc" will move loose refs into packed refs, in-memory
1482        // packed-refs cache should be invalidated without relying on mtime.
1483        git_repo.refs.force_refresh_packed_buffer().ok();
1484        Ok(())
1485    }
1486}
1487
1488/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1489/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1490fn write_tree_conflict(
1491    repo: &gix::Repository,
1492    conflict: &Merge<TreeId>,
1493) -> BackendResult<gix::ObjectId> {
1494    // Tree entries to be written must be sorted by Entry::filename().
1495    let mut entries = itertools::chain(
1496        conflict
1497            .removes()
1498            .enumerate()
1499            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1500        conflict
1501            .adds()
1502            .enumerate()
1503            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1504    )
1505    .map(|(name, tree_id)| gix::objs::tree::Entry {
1506        mode: gix::object::tree::EntryKind::Tree.into(),
1507        filename: name.into(),
1508        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1509    })
1510    .collect_vec();
1511    let readme_id = repo
1512        .write_blob(
1513            r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1514The commit contains file conflicts, and therefore looks wrong when used with plain
1515Git or other tools that are unfamiliar with jj.
1516
1517The .jjconflict-* directories represent the different inputs to the conflict.
1518For details, see
1519https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1520
1521If you see this file in your working copy, it probably means that you used a
1522regular `git` command to check out a conflicted commit. Use `jj abandon` to
1523recover.
1524"#,
1525        )
1526        .map_err(|err| {
1527            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1528        })?
1529        .detach();
1530    entries.push(gix::objs::tree::Entry {
1531        mode: gix::object::tree::EntryKind::Blob.into(),
1532        filename: "README".into(),
1533        oid: readme_id,
1534    });
1535    entries.sort_unstable();
1536    let id = repo
1537        .write_object(gix::objs::Tree { entries })
1538        .map_err(|err| BackendError::WriteObject {
1539            object_type: "tree",
1540            source: Box::new(err),
1541        })?;
1542    Ok(id.detach())
1543}
1544
1545fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
1546    serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
1547}
1548
1549fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> {
1550    json.as_array()
1551        .unwrap()
1552        .iter()
1553        .map(conflict_term_from_json)
1554        .collect()
1555}
1556
1557fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value {
1558    serde_json::json!({
1559        "value": tree_value_to_json(&part.value),
1560    })
1561}
1562
1563fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm {
1564    let json_value = json.get("value").unwrap();
1565    ConflictTerm {
1566        value: tree_value_from_json(json_value),
1567    }
1568}
1569
1570fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
1571    match value {
1572        TreeValue::File {
1573            id,
1574            executable,
1575            copy_id: _,
1576        } => serde_json::json!({
1577             "file": {
1578                 "id": id.hex(),
1579                 "executable": executable,
1580             },
1581        }),
1582        TreeValue::Symlink(id) => serde_json::json!({
1583             "symlink_id": id.hex(),
1584        }),
1585        TreeValue::Tree(id) => serde_json::json!({
1586             "tree_id": id.hex(),
1587        }),
1588        TreeValue::GitSubmodule(id) => serde_json::json!({
1589             "submodule_id": id.hex(),
1590        }),
1591        TreeValue::Conflict(id) => serde_json::json!({
1592             "conflict_id": id.hex(),
1593        }),
1594    }
1595}
1596
1597fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
1598    if let Some(json_file) = json.get("file") {
1599        TreeValue::File {
1600            id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())),
1601            executable: json_file.get("executable").unwrap().as_bool().unwrap(),
1602            copy_id: CopyId::placeholder(),
1603        }
1604    } else if let Some(json_id) = json.get("symlink_id") {
1605        TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id)))
1606    } else if let Some(json_id) = json.get("tree_id") {
1607        TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id)))
1608    } else if let Some(json_id) = json.get("submodule_id") {
1609        TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id)))
1610    } else if let Some(json_id) = json.get("conflict_id") {
1611        TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id)))
1612    } else {
1613        panic!("unexpected json value in conflict: {json:#?}");
1614    }
1615}
1616
1617fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
1618    hex_util::decode_hex(value.as_str().unwrap()).unwrap()
1619}
1620
1621#[cfg(test)]
1622mod tests {
1623    use assert_matches::assert_matches;
1624    use gix::date::parse::TimeBuf;
1625    use pollster::FutureExt as _;
1626
1627    use super::*;
1628    use crate::config::StackedConfig;
1629    use crate::content_hash::blake2b_hash;
1630    use crate::tests::new_temp_dir;
1631
1632    const GIT_USER: &str = "Someone";
1633    const GIT_EMAIL: &str = "someone@example.com";
1634
1635    fn git_config() -> Vec<bstr::BString> {
1636        vec![
1637            format!("user.name = {GIT_USER}").into(),
1638            format!("user.email = {GIT_EMAIL}").into(),
1639            "init.defaultBranch = master".into(),
1640        ]
1641    }
1642
1643    fn open_options() -> gix::open::Options {
1644        gix::open::Options::isolated()
1645            .config_overrides(git_config())
1646            .strict_config(true)
1647    }
1648
1649    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1650        gix::ThreadSafeRepository::init_opts(
1651            directory,
1652            gix::create::Kind::WithWorktree,
1653            gix::create::Options::default(),
1654            open_options(),
1655        )
1656        .unwrap()
1657        .to_thread_local()
1658    }
1659
1660    #[test]
1661    fn read_plain_git_commit() {
1662        let settings = user_settings();
1663        let temp_dir = new_temp_dir();
1664        let store_path = temp_dir.path();
1665        let git_repo_path = temp_dir.path().join("git");
1666        let git_repo = git_init(git_repo_path);
1667
1668        // Add a commit with some files in
1669        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1670        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1671        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1672        dir_tree_editor
1673            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1674            .unwrap();
1675        dir_tree_editor
1676            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1677            .unwrap();
1678        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1679        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1680        root_tree_builder
1681            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1682            .unwrap();
1683        let root_tree_id = root_tree_builder.write().unwrap().detach();
1684        let git_author = gix::actor::Signature {
1685            name: "git author".into(),
1686            email: "git.author@example.com".into(),
1687            time: gix::date::Time::new(1000, 60 * 60),
1688        };
1689        let git_committer = gix::actor::Signature {
1690            name: "git committer".into(),
1691            email: "git.committer@example.com".into(),
1692            time: gix::date::Time::new(2000, -480 * 60),
1693        };
1694        let git_commit_id = git_repo
1695            .commit_as(
1696                git_committer.to_ref(&mut TimeBuf::default()),
1697                git_author.to_ref(&mut TimeBuf::default()),
1698                "refs/heads/dummy",
1699                "git commit message",
1700                root_tree_id,
1701                [] as [gix::ObjectId; 0],
1702            )
1703            .unwrap()
1704            .detach();
1705        git_repo
1706            .find_reference("refs/heads/dummy")
1707            .unwrap()
1708            .delete()
1709            .unwrap();
1710        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1711        // The change id is the leading reverse bits of the commit id
1712        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1713        // Check that the git commit above got the hash we expect
1714        assert_eq!(
1715            git_commit_id.as_bytes(),
1716            commit_id.as_bytes(),
1717            "{git_commit_id:?} vs {commit_id:?}"
1718        );
1719
1720        // Add an empty commit on top
1721        let git_commit_id2 = git_repo
1722            .commit_as(
1723                git_committer.to_ref(&mut TimeBuf::default()),
1724                git_author.to_ref(&mut TimeBuf::default()),
1725                "refs/heads/dummy2",
1726                "git commit message 2",
1727                root_tree_id,
1728                [git_commit_id],
1729            )
1730            .unwrap()
1731            .detach();
1732        git_repo
1733            .find_reference("refs/heads/dummy2")
1734            .unwrap()
1735            .delete()
1736            .unwrap();
1737        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1738
1739        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1740
1741        // Import the head commit and its ancestors
1742        backend.import_head_commits([&commit_id2]).unwrap();
1743        // Ref should be created only for the head commit
1744        let git_refs = backend
1745            .git_repo()
1746            .references()
1747            .unwrap()
1748            .prefixed("refs/jj/keep/")
1749            .unwrap()
1750            .map(|git_ref| git_ref.unwrap().id().detach())
1751            .collect_vec();
1752        assert_eq!(git_refs, vec![git_commit_id2]);
1753
1754        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1755        assert_eq!(&commit.change_id, &change_id);
1756        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1757        assert_eq!(commit.predecessors, vec![]);
1758        assert_eq!(
1759            commit.root_tree.to_merge(),
1760            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1761        );
1762        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1763        assert_eq!(commit.description, "git commit message");
1764        assert_eq!(commit.author.name, "git author");
1765        assert_eq!(commit.author.email, "git.author@example.com");
1766        assert_eq!(
1767            commit.author.timestamp.timestamp,
1768            MillisSinceEpoch(1000 * 1000)
1769        );
1770        assert_eq!(commit.author.timestamp.tz_offset, 60);
1771        assert_eq!(commit.committer.name, "git committer");
1772        assert_eq!(commit.committer.email, "git.committer@example.com");
1773        assert_eq!(
1774            commit.committer.timestamp.timestamp,
1775            MillisSinceEpoch(2000 * 1000)
1776        );
1777        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1778
1779        let root_tree = backend
1780            .read_tree(
1781                RepoPath::root(),
1782                &TreeId::from_bytes(root_tree_id.as_bytes()),
1783            )
1784            .block_on()
1785            .unwrap();
1786        let mut root_entries = root_tree.entries();
1787        let dir = root_entries.next().unwrap();
1788        assert_eq!(root_entries.next(), None);
1789        assert_eq!(dir.name().as_internal_str(), "dir");
1790        assert_eq!(
1791            dir.value(),
1792            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1793        );
1794
1795        let dir_tree = backend
1796            .read_tree(
1797                RepoPath::from_internal_string("dir").unwrap(),
1798                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1799            )
1800            .block_on()
1801            .unwrap();
1802        let mut entries = dir_tree.entries();
1803        let file = entries.next().unwrap();
1804        let symlink = entries.next().unwrap();
1805        assert_eq!(entries.next(), None);
1806        assert_eq!(file.name().as_internal_str(), "normal");
1807        assert_eq!(
1808            file.value(),
1809            &TreeValue::File {
1810                id: FileId::from_bytes(blob1.as_bytes()),
1811                executable: false,
1812                copy_id: CopyId::placeholder(),
1813            }
1814        );
1815        assert_eq!(symlink.name().as_internal_str(), "symlink");
1816        assert_eq!(
1817            symlink.value(),
1818            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1819        );
1820
1821        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1822        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1823        assert_eq!(commit.predecessors, vec![]);
1824        assert_eq!(
1825            commit.root_tree.to_merge(),
1826            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1827        );
1828        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1829    }
1830
1831    #[test]
1832    fn read_git_commit_without_importing() {
1833        let settings = user_settings();
1834        let temp_dir = new_temp_dir();
1835        let store_path = temp_dir.path();
1836        let git_repo_path = temp_dir.path().join("git");
1837        let git_repo = git_init(&git_repo_path);
1838
1839        let signature = gix::actor::Signature {
1840            name: GIT_USER.into(),
1841            email: GIT_EMAIL.into(),
1842            time: gix::date::Time::now_utc(),
1843        };
1844        let empty_tree_id =
1845            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1846        let git_commit_id = git_repo
1847            .commit_as(
1848                signature.to_ref(&mut TimeBuf::default()),
1849                signature.to_ref(&mut TimeBuf::default()),
1850                "refs/heads/main",
1851                "git commit message",
1852                empty_tree_id,
1853                [] as [gix::ObjectId; 0],
1854            )
1855            .unwrap();
1856
1857        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1858
1859        // read_commit() without import_head_commits() works as of now. This might be
1860        // changed later.
1861        assert!(backend
1862            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1863            .block_on()
1864            .is_ok());
1865        assert!(
1866            backend
1867                .cached_extra_metadata_table()
1868                .unwrap()
1869                .get_value(git_commit_id.as_bytes())
1870                .is_some(),
1871            "extra metadata should have been be created"
1872        );
1873    }
1874
1875    #[test]
1876    fn read_signed_git_commit() {
1877        let settings = user_settings();
1878        let temp_dir = new_temp_dir();
1879        let store_path = temp_dir.path();
1880        let git_repo_path = temp_dir.path().join("git");
1881        let git_repo = git_init(git_repo_path);
1882
1883        let signature = gix::actor::Signature {
1884            name: GIT_USER.into(),
1885            email: GIT_EMAIL.into(),
1886            time: gix::date::Time::now_utc(),
1887        };
1888        let empty_tree_id =
1889            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1890
1891        let secure_sig =
1892            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1893
1894        let mut commit = gix::objs::Commit {
1895            tree: empty_tree_id,
1896            parents: smallvec::SmallVec::new(),
1897            author: signature.clone(),
1898            committer: signature.clone(),
1899            encoding: None,
1900            message: "git commit message".into(),
1901            extra_headers: Vec::new(),
1902        };
1903
1904        let mut commit_buf = Vec::new();
1905        commit.write_to(&mut commit_buf).unwrap();
1906        let commit_str = std::str::from_utf8(&commit_buf).unwrap();
1907
1908        commit
1909            .extra_headers
1910            .push(("gpgsig".into(), secure_sig.into()));
1911
1912        let git_commit_id = git_repo.write_object(&commit).unwrap();
1913
1914        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1915
1916        let commit = backend
1917            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1918            .block_on()
1919            .unwrap();
1920
1921        let sig = commit.secure_sig.expect("failed to read the signature");
1922
1923        // converting to string for nicer assert diff
1924        assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig);
1925        assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str);
1926    }
1927
1928    #[test]
1929    fn round_trip_change_id_via_git_header() {
1930        let settings = user_settings();
1931        let temp_dir = new_temp_dir();
1932
1933        let store_path = temp_dir.path().join("store");
1934        fs::create_dir(&store_path).unwrap();
1935        let empty_store_path = temp_dir.path().join("empty_store");
1936        fs::create_dir(&empty_store_path).unwrap();
1937        let git_repo_path = temp_dir.path().join("git");
1938        let git_repo = git_init(git_repo_path);
1939
1940        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path()).unwrap();
1941        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1942        let commit = Commit {
1943            parents: vec![backend.root_commit_id().clone()],
1944            predecessors: vec![],
1945            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1946            change_id: original_change_id.clone(),
1947            description: "initial".to_string(),
1948            author: create_signature(),
1949            committer: create_signature(),
1950            secure_sig: None,
1951        };
1952
1953        let (initial_commit_id, _init_commit) =
1954            backend.write_commit(commit, None).block_on().unwrap();
1955        let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
1956        assert_eq!(
1957            commit.change_id, original_change_id,
1958            "The change-id header did not roundtrip"
1959        );
1960
1961        // Because of how change ids are also persisted in extra proto files,
1962        // initialize a new store without those files, but reuse the same git
1963        // storage. This change-id must be derived from the git commit header.
1964        let no_extra_backend =
1965            GitBackend::init_external(&settings, &empty_store_path, git_repo.path()).unwrap();
1966        let no_extra_commit = no_extra_backend
1967            .read_commit(&initial_commit_id)
1968            .block_on()
1969            .unwrap();
1970
1971        assert_eq!(
1972            no_extra_commit.change_id, original_change_id,
1973            "The change-id header did not roundtrip"
1974        );
1975    }
1976
1977    #[test]
1978    fn read_empty_string_placeholder() {
1979        let git_signature1 = gix::actor::Signature {
1980            name: EMPTY_STRING_PLACEHOLDER.into(),
1981            email: "git.author@example.com".into(),
1982            time: gix::date::Time::new(1000, 60 * 60),
1983        };
1984        let signature1 = signature_from_git(git_signature1.to_ref(&mut TimeBuf::default()));
1985        assert!(signature1.name.is_empty());
1986        assert_eq!(signature1.email, "git.author@example.com");
1987        let git_signature2 = gix::actor::Signature {
1988            name: "git committer".into(),
1989            email: EMPTY_STRING_PLACEHOLDER.into(),
1990            time: gix::date::Time::new(2000, -480 * 60),
1991        };
1992        let signature2 = signature_from_git(git_signature2.to_ref(&mut TimeBuf::default()));
1993        assert_eq!(signature2.name, "git committer");
1994        assert!(signature2.email.is_empty());
1995    }
1996
1997    #[test]
1998    fn write_empty_string_placeholder() {
1999        let signature1 = Signature {
2000            name: "".to_string(),
2001            email: "someone@example.com".to_string(),
2002            timestamp: Timestamp {
2003                timestamp: MillisSinceEpoch(0),
2004                tz_offset: 0,
2005            },
2006        };
2007        let git_signature1 = signature_to_git(&signature1);
2008        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
2009        assert_eq!(git_signature1.email, "someone@example.com");
2010        let signature2 = Signature {
2011            name: "Someone".to_string(),
2012            email: "".to_string(),
2013            timestamp: Timestamp {
2014                timestamp: MillisSinceEpoch(0),
2015                tz_offset: 0,
2016            },
2017        };
2018        let git_signature2 = signature_to_git(&signature2);
2019        assert_eq!(git_signature2.name, "Someone");
2020        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
2021    }
2022
2023    /// Test that parents get written correctly
2024    #[test]
2025    fn git_commit_parents() {
2026        let settings = user_settings();
2027        let temp_dir = new_temp_dir();
2028        let store_path = temp_dir.path();
2029        let git_repo_path = temp_dir.path().join("git");
2030        let git_repo = git_init(&git_repo_path);
2031
2032        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2033        let mut commit = Commit {
2034            parents: vec![],
2035            predecessors: vec![],
2036            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2037            change_id: ChangeId::from_hex("abc123"),
2038            description: "".to_string(),
2039            author: create_signature(),
2040            committer: create_signature(),
2041            secure_sig: None,
2042        };
2043
2044        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2045            backend.write_commit(commit, None).block_on()
2046        };
2047
2048        // No parents
2049        commit.parents = vec![];
2050        assert_matches!(
2051            write_commit(commit.clone()),
2052            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2053        );
2054
2055        // Only root commit as parent
2056        commit.parents = vec![backend.root_commit_id().clone()];
2057        let first_id = write_commit(commit.clone()).unwrap().0;
2058        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
2059        assert_eq!(first_commit, commit);
2060        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
2061        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2062
2063        // Only non-root commit as parent
2064        commit.parents = vec![first_id.clone()];
2065        let second_id = write_commit(commit.clone()).unwrap().0;
2066        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
2067        assert_eq!(second_commit, commit);
2068        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
2069        assert_eq!(
2070            second_git_commit.parent_ids().collect_vec(),
2071            vec![git_id(&first_id)]
2072        );
2073
2074        // Merge commit
2075        commit.parents = vec![first_id.clone(), second_id.clone()];
2076        let merge_id = write_commit(commit.clone()).unwrap().0;
2077        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
2078        assert_eq!(merge_commit, commit);
2079        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
2080        assert_eq!(
2081            merge_git_commit.parent_ids().collect_vec(),
2082            vec![git_id(&first_id), git_id(&second_id)]
2083        );
2084
2085        // Merge commit with root as one parent
2086        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2087        assert_matches!(
2088            write_commit(commit),
2089            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2090        );
2091    }
2092
2093    #[test]
2094    fn write_tree_conflicts() {
2095        let settings = user_settings();
2096        let temp_dir = new_temp_dir();
2097        let store_path = temp_dir.path();
2098        let git_repo_path = temp_dir.path().join("git");
2099        let git_repo = git_init(&git_repo_path);
2100
2101        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2102        let create_tree = |i| {
2103            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2104            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2105            tree_builder
2106                .upsert(
2107                    format!("file{i}"),
2108                    gix::object::tree::EntryKind::Blob,
2109                    blob_id,
2110                )
2111                .unwrap();
2112            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2113        };
2114
2115        let root_tree = Merge::from_removes_adds(
2116            vec![create_tree(0), create_tree(1)],
2117            vec![create_tree(2), create_tree(3), create_tree(4)],
2118        );
2119        let mut commit = Commit {
2120            parents: vec![backend.root_commit_id().clone()],
2121            predecessors: vec![],
2122            root_tree: MergedTreeId::Merge(root_tree.clone()),
2123            change_id: ChangeId::from_hex("abc123"),
2124            description: "".to_string(),
2125            author: create_signature(),
2126            committer: create_signature(),
2127            secure_sig: None,
2128        };
2129
2130        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2131            backend.write_commit(commit, None).block_on()
2132        };
2133
2134        // When writing a tree-level conflict, the root tree on the git side has the
2135        // individual trees as subtrees.
2136        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2137        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2138        assert_eq!(read_commit, commit);
2139        let git_commit = git_repo
2140            .find_commit(gix::ObjectId::from_bytes_or_panic(
2141                read_commit_id.as_bytes(),
2142            ))
2143            .unwrap();
2144        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2145        assert!(git_tree
2146            .iter()
2147            .map(Result::unwrap)
2148            .filter(|entry| entry.filename() != b"README")
2149            .all(|entry| entry.mode().value() == 0o040000));
2150        let mut iter = git_tree.iter().map(Result::unwrap);
2151        let entry = iter.next().unwrap();
2152        assert_eq!(entry.filename(), b".jjconflict-base-0");
2153        assert_eq!(
2154            entry.id().as_bytes(),
2155            root_tree.get_remove(0).unwrap().as_bytes()
2156        );
2157        let entry = iter.next().unwrap();
2158        assert_eq!(entry.filename(), b".jjconflict-base-1");
2159        assert_eq!(
2160            entry.id().as_bytes(),
2161            root_tree.get_remove(1).unwrap().as_bytes()
2162        );
2163        let entry = iter.next().unwrap();
2164        assert_eq!(entry.filename(), b".jjconflict-side-0");
2165        assert_eq!(
2166            entry.id().as_bytes(),
2167            root_tree.get_add(0).unwrap().as_bytes()
2168        );
2169        let entry = iter.next().unwrap();
2170        assert_eq!(entry.filename(), b".jjconflict-side-1");
2171        assert_eq!(
2172            entry.id().as_bytes(),
2173            root_tree.get_add(1).unwrap().as_bytes()
2174        );
2175        let entry = iter.next().unwrap();
2176        assert_eq!(entry.filename(), b".jjconflict-side-2");
2177        assert_eq!(
2178            entry.id().as_bytes(),
2179            root_tree.get_add(2).unwrap().as_bytes()
2180        );
2181        let entry = iter.next().unwrap();
2182        assert_eq!(entry.filename(), b"README");
2183        assert_eq!(entry.mode().value(), 0o100644);
2184        assert!(iter.next().is_none());
2185
2186        // When writing a single tree using the new format, it's represented by a
2187        // regular git tree.
2188        commit.root_tree = MergedTreeId::resolved(create_tree(5));
2189        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2190        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2191        assert_eq!(read_commit, commit);
2192        let git_commit = git_repo
2193            .find_commit(gix::ObjectId::from_bytes_or_panic(
2194                read_commit_id.as_bytes(),
2195            ))
2196            .unwrap();
2197        assert_eq!(
2198            MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2199            commit.root_tree
2200        );
2201    }
2202
2203    #[test]
2204    fn commit_has_ref() {
2205        let settings = user_settings();
2206        let temp_dir = new_temp_dir();
2207        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2208        let git_repo = backend.git_repo();
2209        let signature = Signature {
2210            name: "Someone".to_string(),
2211            email: "someone@example.com".to_string(),
2212            timestamp: Timestamp {
2213                timestamp: MillisSinceEpoch(0),
2214                tz_offset: 0,
2215            },
2216        };
2217        let commit = Commit {
2218            parents: vec![backend.root_commit_id().clone()],
2219            predecessors: vec![],
2220            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2221            change_id: ChangeId::new(vec![42; 16]),
2222            description: "initial".to_string(),
2223            author: signature.clone(),
2224            committer: signature,
2225            secure_sig: None,
2226        };
2227        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2228        let git_refs = git_repo.references().unwrap();
2229        let git_ref_ids: Vec<_> = git_refs
2230            .prefixed("refs/jj/keep/")
2231            .unwrap()
2232            .map(|x| x.unwrap().id().detach())
2233            .collect();
2234        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2235
2236        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2237        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2238            git_ref.unwrap().delete().unwrap();
2239        }
2240        // Re-imported commit should have new ref.
2241        backend.import_head_commits([&commit_id]).unwrap();
2242        let git_refs = git_repo.references().unwrap();
2243        let git_ref_ids: Vec<_> = git_refs
2244            .prefixed("refs/jj/keep/")
2245            .unwrap()
2246            .map(|x| x.unwrap().id().detach())
2247            .collect();
2248        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2249    }
2250
2251    #[test]
2252    fn import_head_commits_duplicates() {
2253        let settings = user_settings();
2254        let temp_dir = new_temp_dir();
2255        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2256        let git_repo = backend.git_repo();
2257
2258        let signature = gix::actor::Signature {
2259            name: GIT_USER.into(),
2260            email: GIT_EMAIL.into(),
2261            time: gix::date::Time::now_utc(),
2262        };
2263        let empty_tree_id =
2264            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2265        let git_commit_id = git_repo
2266            .commit_as(
2267                signature.to_ref(&mut TimeBuf::default()),
2268                signature.to_ref(&mut TimeBuf::default()),
2269                "refs/heads/main",
2270                "git commit message",
2271                empty_tree_id,
2272                [] as [gix::ObjectId; 0],
2273            )
2274            .unwrap()
2275            .detach();
2276        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2277
2278        // Ref creation shouldn't fail because of duplicated head ids.
2279        backend
2280            .import_head_commits([&commit_id, &commit_id])
2281            .unwrap();
2282        assert!(git_repo
2283            .references()
2284            .unwrap()
2285            .prefixed("refs/jj/keep/")
2286            .unwrap()
2287            .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id));
2288    }
2289
2290    #[test]
2291    fn overlapping_git_commit_id() {
2292        let settings = user_settings();
2293        let temp_dir = new_temp_dir();
2294        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2295        let commit1 = Commit {
2296            parents: vec![backend.root_commit_id().clone()],
2297            predecessors: vec![],
2298            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2299            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2300            description: "initial".to_string(),
2301            author: create_signature(),
2302            committer: create_signature(),
2303            secure_sig: None,
2304        };
2305
2306        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2307            backend.write_commit(commit, None).block_on()
2308        };
2309
2310        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2311        commit2.predecessors.push(commit_id1.clone());
2312        // `write_commit` should prevent the ids from being the same by changing the
2313        // committer timestamp of the commit it actually writes.
2314        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2315        // The returned matches the ID
2316        assert_eq!(
2317            backend.read_commit(&commit_id2).block_on().unwrap(),
2318            actual_commit2
2319        );
2320        assert_ne!(commit_id2, commit_id1);
2321        // The committer timestamp should differ
2322        assert_ne!(
2323            actual_commit2.committer.timestamp.timestamp,
2324            commit2.committer.timestamp.timestamp
2325        );
2326        // The rest of the commit should be the same
2327        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2328        assert_eq!(actual_commit2, commit2);
2329    }
2330
2331    #[test]
2332    fn write_signed_commit() {
2333        let settings = user_settings();
2334        let temp_dir = new_temp_dir();
2335        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2336
2337        let commit = Commit {
2338            parents: vec![backend.root_commit_id().clone()],
2339            predecessors: vec![],
2340            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2341            change_id: ChangeId::new(vec![42; 16]),
2342            description: "initial".to_string(),
2343            author: create_signature(),
2344            committer: create_signature(),
2345            secure_sig: None,
2346        };
2347
2348        let mut signer = |data: &_| {
2349            let hash: String = hex_util::encode_hex(&blake2b_hash(data));
2350            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2351        };
2352
2353        let (id, commit) = backend
2354            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2355            .block_on()
2356            .unwrap();
2357
2358        let git_repo = backend.git_repo();
2359        let obj = git_repo
2360            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2361            .unwrap();
2362        insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r"
2363        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2364        author Someone <someone@example.com> 0 +0000
2365        committer Someone <someone@example.com> 0 +0000
2366        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2367        gpgsig test sig
2368         hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2369
2370        initial
2371        ");
2372
2373        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2374
2375        let commit = backend.read_commit(&id).block_on().unwrap();
2376
2377        let sig = commit.secure_sig.expect("failed to read the signature");
2378        assert_eq!(&sig, &returned_sig);
2379
2380        insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r"
2381        test sig
2382        hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2383        ");
2384        insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r"
2385        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2386        author Someone <someone@example.com> 0 +0000
2387        committer Someone <someone@example.com> 0 +0000
2388        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2389
2390        initial
2391        ");
2392    }
2393
2394    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2395        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2396    }
2397
2398    fn create_signature() -> Signature {
2399        Signature {
2400            name: GIT_USER.to_string(),
2401            email: GIT_EMAIL.to_string(),
2402            timestamp: Timestamp {
2403                timestamp: MillisSinceEpoch(0),
2404                tz_offset: 0,
2405            },
2406        }
2407    }
2408
2409    // Not using testutils::user_settings() because there is a dependency cycle
2410    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2411    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2412    // our UserSettings type comes from jj_lib (1).
2413    fn user_settings() -> UserSettings {
2414        let config = StackedConfig::with_defaults();
2415        UserSettings::from_config(config).unwrap()
2416    }
2417}