jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::collections::HashSet;
18use std::ffi::OsStr;
19use std::fmt::Debug;
20use std::fmt::Error;
21use std::fmt::Formatter;
22use std::fs;
23use std::io;
24use std::io::Cursor;
25use std::path::Path;
26use std::path::PathBuf;
27use std::pin::Pin;
28use std::process::Command;
29use std::process::ExitStatus;
30use std::str::Utf8Error;
31use std::sync::Arc;
32use std::sync::Mutex;
33use std::sync::MutexGuard;
34use std::time::SystemTime;
35
36use async_trait::async_trait;
37use bstr::BStr;
38use futures::stream::BoxStream;
39use gix::bstr::BString;
40use gix::objs::CommitRefIter;
41use gix::objs::WriteTo as _;
42use itertools::Itertools as _;
43use once_cell::sync::OnceCell as OnceLock;
44use pollster::FutureExt as _;
45use prost::Message as _;
46use smallvec::SmallVec;
47use thiserror::Error;
48use tokio::io::AsyncRead;
49use tokio::io::AsyncReadExt as _;
50
51use crate::backend::Backend;
52use crate::backend::BackendError;
53use crate::backend::BackendInitError;
54use crate::backend::BackendLoadError;
55use crate::backend::BackendResult;
56use crate::backend::ChangeId;
57use crate::backend::Commit;
58use crate::backend::CommitId;
59use crate::backend::CopyHistory;
60use crate::backend::CopyId;
61use crate::backend::CopyRecord;
62use crate::backend::FileId;
63use crate::backend::MergedTreeId;
64use crate::backend::MillisSinceEpoch;
65use crate::backend::SecureSig;
66use crate::backend::Signature;
67use crate::backend::SigningFn;
68use crate::backend::SymlinkId;
69use crate::backend::Timestamp;
70use crate::backend::Tree;
71use crate::backend::TreeId;
72use crate::backend::TreeValue;
73use crate::backend::make_root_commit;
74use crate::config::ConfigGetError;
75use crate::file_util;
76use crate::file_util::BadPathEncoding;
77use crate::file_util::IoResultExt as _;
78use crate::file_util::PathError;
79use crate::index::Index;
80use crate::lock::FileLock;
81use crate::merge::Merge;
82use crate::merge::MergeBuilder;
83use crate::object_id::ObjectId;
84use crate::repo_path::RepoPath;
85use crate::repo_path::RepoPathBuf;
86use crate::repo_path::RepoPathComponentBuf;
87use crate::settings::GitSettings;
88use crate::settings::UserSettings;
89use crate::stacked_table::MutableTable;
90use crate::stacked_table::ReadonlyTable;
91use crate::stacked_table::TableSegment as _;
92use crate::stacked_table::TableStore;
93use crate::stacked_table::TableStoreError;
94
95const HASH_LENGTH: usize = 20;
96const CHANGE_ID_LENGTH: usize = 16;
97/// Ref namespace used only for preventing GC.
98const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
99
100pub const JJ_TREES_COMMIT_HEADER: &str = "jj:trees";
101pub const CHANGE_ID_COMMIT_HEADER: &str = "change-id";
102
103#[derive(Debug, Error)]
104pub enum GitBackendInitError {
105    #[error("Failed to initialize git repository")]
106    InitRepository(#[source] gix::init::Error),
107    #[error("Failed to open git repository")]
108    OpenRepository(#[source] gix::open::Error),
109    #[error("Failed to encode git repository path")]
110    EncodeRepositoryPath(#[source] BadPathEncoding),
111    #[error(transparent)]
112    Config(ConfigGetError),
113    #[error(transparent)]
114    Path(PathError),
115}
116
117impl From<Box<GitBackendInitError>> for BackendInitError {
118    fn from(err: Box<GitBackendInitError>) -> Self {
119        Self(err)
120    }
121}
122
123#[derive(Debug, Error)]
124pub enum GitBackendLoadError {
125    #[error("Failed to open git repository")]
126    OpenRepository(#[source] gix::open::Error),
127    #[error("Failed to decode git repository path")]
128    DecodeRepositoryPath(#[source] BadPathEncoding),
129    #[error(transparent)]
130    Config(ConfigGetError),
131    #[error(transparent)]
132    Path(PathError),
133}
134
135impl From<Box<GitBackendLoadError>> for BackendLoadError {
136    fn from(err: Box<GitBackendLoadError>) -> Self {
137        Self(err)
138    }
139}
140
141/// `GitBackend`-specific error that may occur after the backend is loaded.
142#[derive(Debug, Error)]
143pub enum GitBackendError {
144    #[error("Failed to read non-git metadata")]
145    ReadMetadata(#[source] TableStoreError),
146    #[error("Failed to write non-git metadata")]
147    WriteMetadata(#[source] TableStoreError),
148}
149
150impl From<GitBackendError> for BackendError {
151    fn from(err: GitBackendError) -> Self {
152        Self::Other(err.into())
153    }
154}
155
156#[derive(Debug, Error)]
157pub enum GitGcError {
158    #[error("Failed to run git gc command")]
159    GcCommand(#[source] std::io::Error),
160    #[error("git gc command exited with an error: {0}")]
161    GcCommandErrorStatus(ExitStatus),
162}
163
164pub struct GitBackend {
165    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
166    // cheaper to cache the thread-local instance behind a mutex than creating
167    // one for each backend method call. Our GitBackend is most likely to be
168    // used in a single-threaded context.
169    base_repo: gix::ThreadSafeRepository,
170    repo: Mutex<gix::Repository>,
171    root_commit_id: CommitId,
172    root_change_id: ChangeId,
173    empty_tree_id: TreeId,
174    shallow_root_ids: OnceLock<Vec<CommitId>>,
175    extra_metadata_store: TableStore,
176    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
177    git_executable: PathBuf,
178    write_change_id_header: bool,
179}
180
181impl GitBackend {
182    pub fn name() -> &'static str {
183        "git"
184    }
185
186    fn new(
187        base_repo: gix::ThreadSafeRepository,
188        extra_metadata_store: TableStore,
189        git_settings: GitSettings,
190    ) -> Self {
191        let repo = Mutex::new(base_repo.to_thread_local());
192        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
193        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
194        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
195        Self {
196            base_repo,
197            repo,
198            root_commit_id,
199            root_change_id,
200            empty_tree_id,
201            shallow_root_ids: OnceLock::new(),
202            extra_metadata_store,
203            cached_extra_metadata: Mutex::new(None),
204            git_executable: git_settings.executable_path,
205            write_change_id_header: git_settings.write_change_id_header,
206        }
207    }
208
209    pub fn init_internal(
210        settings: &UserSettings,
211        store_path: &Path,
212    ) -> Result<Self, Box<GitBackendInitError>> {
213        let git_repo_path = Path::new("git");
214        let git_repo = gix::ThreadSafeRepository::init_opts(
215            store_path.join(git_repo_path),
216            gix::create::Kind::Bare,
217            gix::create::Options::default(),
218            gix_open_opts_from_settings(settings),
219        )
220        .map_err(GitBackendInitError::InitRepository)?;
221        let git_settings = settings
222            .git_settings()
223            .map_err(GitBackendInitError::Config)?;
224        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
225    }
226
227    /// Initializes backend by creating a new Git repo at the specified
228    /// workspace path. The workspace directory must exist.
229    pub fn init_colocated(
230        settings: &UserSettings,
231        store_path: &Path,
232        workspace_root: &Path,
233    ) -> Result<Self, Box<GitBackendInitError>> {
234        let canonical_workspace_root = {
235            let path = store_path.join(workspace_root);
236            dunce::canonicalize(&path)
237                .context(&path)
238                .map_err(GitBackendInitError::Path)?
239        };
240        let git_repo = gix::ThreadSafeRepository::init_opts(
241            canonical_workspace_root,
242            gix::create::Kind::WithWorktree,
243            gix::create::Options::default(),
244            gix_open_opts_from_settings(settings),
245        )
246        .map_err(GitBackendInitError::InitRepository)?;
247        let git_repo_path = workspace_root.join(".git");
248        let git_settings = settings
249            .git_settings()
250            .map_err(GitBackendInitError::Config)?;
251        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
252    }
253
254    /// Initializes backend with an existing Git repo at the specified path.
255    pub fn init_external(
256        settings: &UserSettings,
257        store_path: &Path,
258        git_repo_path: &Path,
259    ) -> Result<Self, Box<GitBackendInitError>> {
260        let canonical_git_repo_path = {
261            let path = store_path.join(git_repo_path);
262            canonicalize_git_repo_path(&path)
263                .context(&path)
264                .map_err(GitBackendInitError::Path)?
265        };
266        let git_repo = gix::ThreadSafeRepository::open_opts(
267            canonical_git_repo_path,
268            gix_open_opts_from_settings(settings),
269        )
270        .map_err(GitBackendInitError::OpenRepository)?;
271        let git_settings = settings
272            .git_settings()
273            .map_err(GitBackendInitError::Config)?;
274        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
275    }
276
277    fn init_with_repo(
278        store_path: &Path,
279        git_repo_path: &Path,
280        repo: gix::ThreadSafeRepository,
281        git_settings: GitSettings,
282    ) -> Result<Self, Box<GitBackendInitError>> {
283        let extra_path = store_path.join("extra");
284        fs::create_dir(&extra_path)
285            .context(&extra_path)
286            .map_err(GitBackendInitError::Path)?;
287        let target_path = store_path.join("git_target");
288        let git_repo_path = if cfg!(windows) && git_repo_path.is_relative() {
289            // When a repository is created in Windows, format the path with *forward
290            // slashes* and not backwards slashes. This makes it possible to use the same
291            // repository under Windows Subsystem for Linux.
292            //
293            // This only works for relative paths. If the path is absolute, there's not much
294            // we can do, and it simply won't work inside and outside WSL at the same time.
295            file_util::slash_path(git_repo_path)
296        } else {
297            git_repo_path.into()
298        };
299        let git_repo_path_bytes = file_util::path_to_bytes(&git_repo_path)
300            .map_err(GitBackendInitError::EncodeRepositoryPath)?;
301        fs::write(&target_path, git_repo_path_bytes)
302            .context(&target_path)
303            .map_err(GitBackendInitError::Path)?;
304        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
305        Ok(Self::new(repo, extra_metadata_store, git_settings))
306    }
307
308    pub fn load(
309        settings: &UserSettings,
310        store_path: &Path,
311    ) -> Result<Self, Box<GitBackendLoadError>> {
312        let git_repo_path = {
313            let target_path = store_path.join("git_target");
314            let git_repo_path_bytes = fs::read(&target_path)
315                .context(&target_path)
316                .map_err(GitBackendLoadError::Path)?;
317            let git_repo_path = file_util::path_from_bytes(&git_repo_path_bytes)
318                .map_err(GitBackendLoadError::DecodeRepositoryPath)?;
319            let git_repo_path = store_path.join(git_repo_path);
320            canonicalize_git_repo_path(&git_repo_path)
321                .context(&git_repo_path)
322                .map_err(GitBackendLoadError::Path)?
323        };
324        let repo = gix::ThreadSafeRepository::open_opts(
325            git_repo_path,
326            gix_open_opts_from_settings(settings),
327        )
328        .map_err(GitBackendLoadError::OpenRepository)?;
329        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
330        let git_settings = settings
331            .git_settings()
332            .map_err(GitBackendLoadError::Config)?;
333        Ok(Self::new(repo, extra_metadata_store, git_settings))
334    }
335
336    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
337        self.repo.lock().unwrap()
338    }
339
340    /// Returns new thread-local instance to access to the underlying Git repo.
341    pub fn git_repo(&self) -> gix::Repository {
342        self.base_repo.to_thread_local()
343    }
344
345    /// Path to the `.git` directory or the repository itself if it's bare.
346    pub fn git_repo_path(&self) -> &Path {
347        self.base_repo.path()
348    }
349
350    /// Path to the working directory if the repository isn't bare.
351    pub fn git_workdir(&self) -> Option<&Path> {
352        self.base_repo.work_dir()
353    }
354
355    fn shallow_root_ids(&self, git_repo: &gix::Repository) -> BackendResult<&[CommitId]> {
356        // The list of shallow roots is cached by gix, but it's still expensive
357        // to stat file on every read_object() call. Refreshing shallow roots is
358        // also bad for consistency reasons.
359        self.shallow_root_ids
360            .get_or_try_init(|| {
361                let maybe_oids = git_repo
362                    .shallow_commits()
363                    .map_err(|err| BackendError::Other(err.into()))?;
364                let commit_ids = maybe_oids.map_or(vec![], |oids| {
365                    oids.iter()
366                        .map(|oid| CommitId::from_bytes(oid.as_bytes()))
367                        .collect()
368                });
369                Ok(commit_ids)
370            })
371            .map(AsRef::as_ref)
372    }
373
374    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
375        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
376        match locked_head.as_ref() {
377            Some(head) => Ok(head.clone()),
378            None => {
379                let table = self
380                    .extra_metadata_store
381                    .get_head()
382                    .map_err(GitBackendError::ReadMetadata)?;
383                *locked_head = Some(table.clone());
384                Ok(table)
385            }
386        }
387    }
388
389    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
390        let table = self
391            .extra_metadata_store
392            .get_head_locked()
393            .map_err(GitBackendError::ReadMetadata)?;
394        Ok(table)
395    }
396
397    fn save_extra_metadata_table(
398        &self,
399        mut_table: MutableTable,
400        _table_lock: &FileLock,
401    ) -> BackendResult<()> {
402        let table = self
403            .extra_metadata_store
404            .save_table(mut_table)
405            .map_err(GitBackendError::WriteMetadata)?;
406        // Since the parent table was the head, saved table are likely to be new head.
407        // If it's not, cache will be reloaded when entry can't be found.
408        *self.cached_extra_metadata.lock().unwrap() = Some(table);
409        Ok(())
410    }
411
412    /// Imports the given commits and ancestors from the backing Git repo.
413    ///
414    /// The `head_ids` may contain commits that have already been imported, but
415    /// the caller should filter them out to eliminate redundant I/O processing.
416    #[tracing::instrument(skip(self, head_ids))]
417    pub fn import_head_commits<'a>(
418        &self,
419        head_ids: impl IntoIterator<Item = &'a CommitId>,
420    ) -> BackendResult<()> {
421        let head_ids: HashSet<&CommitId> = head_ids
422            .into_iter()
423            .filter(|&id| *id != self.root_commit_id)
424            .collect();
425        if head_ids.is_empty() {
426            return Ok(());
427        }
428
429        // Create no-gc ref even if known to the extras table. Concurrent GC
430        // process might have deleted the no-gc ref.
431        let locked_repo = self.lock_git_repo();
432        locked_repo
433            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
434            .map_err(|err| BackendError::Other(Box::new(err)))?;
435
436        // These commits are imported from Git. Make our change ids persist (otherwise
437        // future write_commit() could reassign new change id.)
438        tracing::debug!(
439            heads_count = head_ids.len(),
440            "import extra metadata entries"
441        );
442        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
443        let mut mut_table = table.start_mutation();
444        import_extra_metadata_entries_from_heads(
445            &locked_repo,
446            &mut mut_table,
447            &table_lock,
448            &head_ids,
449            self.shallow_root_ids(&locked_repo)?,
450        )?;
451        self.save_extra_metadata_table(mut_table, &table_lock)
452    }
453
454    fn read_file_sync(&self, id: &FileId) -> BackendResult<Vec<u8>> {
455        let git_blob_id = validate_git_object_id(id)?;
456        let locked_repo = self.lock_git_repo();
457        let mut blob = locked_repo
458            .find_object(git_blob_id)
459            .map_err(|err| map_not_found_err(err, id))?
460            .try_into_blob()
461            .map_err(|err| to_read_object_err(err, id))?;
462        Ok(blob.take_data())
463    }
464
465    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
466        let attributes = gix::worktree::Stack::new(
467            Path::new(""),
468            gix::worktree::stack::State::AttributesStack(Default::default()),
469            gix::worktree::glob::pattern::Case::Sensitive,
470            Vec::new(),
471            Vec::new(),
472        );
473        let filter = gix::diff::blob::Pipeline::new(
474            Default::default(),
475            gix::filter::plumbing::Pipeline::new(
476                self.git_repo()
477                    .command_context()
478                    .map_err(|err| BackendError::Other(Box::new(err)))?,
479                Default::default(),
480            ),
481            Vec::new(),
482            Default::default(),
483        );
484        Ok(gix::diff::blob::Platform::new(
485            Default::default(),
486            filter,
487            gix::diff::blob::pipeline::Mode::ToGit,
488            attributes,
489        ))
490    }
491
492    fn read_tree_for_commit<'repo>(
493        &self,
494        repo: &'repo gix::Repository,
495        id: &CommitId,
496    ) -> BackendResult<gix::Tree<'repo>> {
497        let tree = self.read_commit(id).block_on()?.root_tree.into_merge();
498        // TODO(kfm): probably want to do something here if it is a merge
499        let tree_id = tree.first().clone();
500        let gix_id = validate_git_object_id(&tree_id)?;
501        repo.find_object(gix_id)
502            .map_err(|err| map_not_found_err(err, &tree_id))?
503            .try_into_tree()
504            .map_err(|err| to_read_object_err(err, &tree_id))
505    }
506}
507
508/// Canonicalizes the given `path` except for the last `".git"` component.
509///
510/// The last path component matters when opening a Git repo without `core.bare`
511/// config. This config is usually set, but the "repo" tool will set up such
512/// repositories and symlinks. Opening such repo with fully-canonicalized path
513/// would turn a colocated Git repo into a bare repo.
514pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
515    if path.ends_with(".git") {
516        let workdir = path.parent().unwrap();
517        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
518    } else {
519        dunce::canonicalize(path)
520    }
521}
522
523fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
524    let user_name = settings.user_name();
525    let user_email = settings.user_email();
526    gix::open::Options::default()
527        .config_overrides([
528            // Committer has to be configured to record reflog. Author isn't
529            // needed, but let's copy the same values.
530            format!("author.name={user_name}"),
531            format!("author.email={user_email}"),
532            format!("committer.name={user_name}"),
533            format!("committer.email={user_email}"),
534        ])
535        // The git_target path should point the repository, not the working directory.
536        .open_path_as_is(true)
537        // Gitoxide recommends this when correctness is preferred
538        .strict_config(true)
539}
540
541/// Parses the `jj:trees` header value.
542fn root_tree_from_git_extra_header(value: &BStr) -> Result<MergedTreeId, ()> {
543    let mut tree_ids = SmallVec::new();
544    for hex in value.split(|b| *b == b' ') {
545        let tree_id = TreeId::try_from_hex(hex).ok_or(())?;
546        if tree_id.as_bytes().len() != HASH_LENGTH {
547            return Err(());
548        }
549        tree_ids.push(tree_id);
550    }
551    // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
552    // allowed, it would be possible to construct a commit which appears to have
553    // different contents depending on whether it is viewed using `jj` or `git`.
554    if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
555        return Err(());
556    }
557    Ok(MergedTreeId::new(Merge::from_vec(tree_ids)))
558}
559
560fn commit_from_git_without_root_parent(
561    id: &CommitId,
562    git_object: &gix::Object,
563    is_shallow: bool,
564) -> BackendResult<Commit> {
565    let commit = git_object
566        .try_to_commit_ref()
567        .map_err(|err| to_read_object_err(err, id))?;
568
569    // If the git header has a change-id field, we attempt to convert that to a
570    // valid JJ Change Id
571    let change_id = extract_change_id_from_commit(&commit)
572        .unwrap_or_else(|| synthetic_change_id_from_git_commit_id(id));
573
574    // shallow commits don't have parents their parents actually fetched, so we
575    // discard them here
576    // TODO: This causes issues when a shallow repository is deepened/unshallowed
577    let parents = if is_shallow {
578        vec![]
579    } else {
580        commit
581            .parents()
582            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
583            .collect_vec()
584    };
585    // Conflicted commits written before we started using the `jj:trees` header
586    // (~March 2024) may have the root trees stored in the extra metadata table
587    // instead. For such commits, we'll update the root tree later when we read the
588    // extra metadata.
589    let root_tree = commit
590        .extra_headers()
591        .find(JJ_TREES_COMMIT_HEADER)
592        .map(root_tree_from_git_extra_header)
593        .transpose()
594        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?
595        .unwrap_or_else(|| {
596            let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
597            MergedTreeId::resolved(tree_id)
598        });
599    // Use lossy conversion as commit message with "mojibake" is still better than
600    // nothing.
601    // TODO: what should we do with commit.encoding?
602    let description = String::from_utf8_lossy(commit.message).into_owned();
603    let author = signature_from_git(commit.author());
604    let committer = signature_from_git(commit.committer());
605
606    // If the commit is signed, extract both the signature and the signed data
607    // (which is the commit buffer with the gpgsig header omitted).
608    // We have to re-parse the raw commit data because gix CommitRef does not give
609    // us the sogned data, only the signature.
610    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
611    // function and extract everything from that. For now, this works
612    let secure_sig = commit
613        .extra_headers
614        .iter()
615        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
616        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
617        .then(|| CommitRefIter::signature(&git_object.data))
618        .transpose()
619        .map_err(|err| to_read_object_err(err, id))?
620        .flatten()
621        .map(|(sig, data)| SecureSig {
622            data: data.to_bstring().into(),
623            sig: sig.into_owned().into(),
624        });
625
626    Ok(Commit {
627        parents,
628        predecessors: vec![],
629        // If this commit has associated extra metadata, we may reset this later.
630        root_tree,
631        change_id,
632        description,
633        author,
634        committer,
635        secure_sig,
636    })
637}
638
639/// Extracts change id from commit headers.
640pub fn extract_change_id_from_commit(commit: &gix::objs::CommitRef) -> Option<ChangeId> {
641    commit
642        .extra_headers()
643        .find(CHANGE_ID_COMMIT_HEADER)
644        .and_then(ChangeId::try_from_reverse_hex)
645        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
646}
647
648/// Deterministically creates a change id based on the commit id
649///
650/// Used when we get a commit without a change id. The exact algorithm for the
651/// computation should not be relied upon.
652pub fn synthetic_change_id_from_git_commit_id(id: &CommitId) -> ChangeId {
653    // We reverse the bits of the commit id to create the change id. We don't
654    // want to use the first bytes unmodified because then it would be ambiguous
655    // if a given hash prefix refers to the commit id or the change id. It would
656    // have been enough to pick the last 16 bytes instead of the leading 16
657    // bytes to address that. We also reverse the bits to make it less likely
658    // that users depend on any relationship between the two ids.
659    let bytes = id.as_bytes()[4..HASH_LENGTH]
660        .iter()
661        .rev()
662        .map(|b| b.reverse_bits())
663        .collect();
664    ChangeId::new(bytes)
665}
666
667const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
668
669fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
670    let name = signature.name;
671    let name = if name != EMPTY_STRING_PLACEHOLDER {
672        String::from_utf8_lossy(name).into_owned()
673    } else {
674        "".to_string()
675    };
676    let email = signature.email;
677    let email = if email != EMPTY_STRING_PLACEHOLDER {
678        String::from_utf8_lossy(email).into_owned()
679    } else {
680        "".to_string()
681    };
682    let time = signature.time().unwrap_or_default();
683    let timestamp = MillisSinceEpoch(time.seconds * 1000);
684    let tz_offset = time.offset.div_euclid(60); // in minutes
685    Signature {
686        name,
687        email,
688        timestamp: Timestamp {
689            timestamp,
690            tz_offset,
691        },
692    }
693}
694
695fn signature_to_git(signature: &Signature) -> gix::actor::Signature {
696    // git does not support empty names or emails
697    let name = if !signature.name.is_empty() {
698        &signature.name
699    } else {
700        EMPTY_STRING_PLACEHOLDER
701    };
702    let email = if !signature.email.is_empty() {
703        &signature.email
704    } else {
705        EMPTY_STRING_PLACEHOLDER
706    };
707    let time = gix::date::Time::new(
708        signature.timestamp.timestamp.0.div_euclid(1000),
709        signature.timestamp.tz_offset * 60, // in seconds
710    );
711    gix::actor::Signature {
712        name: name.into(),
713        email: email.into(),
714        time,
715    }
716}
717
718fn serialize_extras(commit: &Commit) -> Vec<u8> {
719    let mut proto = crate::protos::git_store::Commit {
720        change_id: commit.change_id.to_bytes(),
721        ..Default::default()
722    };
723    proto.uses_tree_conflict_format = true;
724    let tree_ids = commit.root_tree.as_merge();
725    if !tree_ids.is_resolved() {
726        // This is done for the sake of jj versions <0.28 (before commit
727        // f7b14be) being able to read the repo. At some point in the
728        // future, we can stop doing it.
729        proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
730    }
731    for predecessor in &commit.predecessors {
732        proto.predecessors.push(predecessor.to_bytes());
733    }
734    proto.encode_to_vec()
735}
736
737fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
738    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
739    if !proto.change_id.is_empty() {
740        commit.change_id = ChangeId::new(proto.change_id);
741    }
742    if commit.root_tree.as_merge().is_resolved()
743        && proto.uses_tree_conflict_format
744        && !proto.root_tree.is_empty()
745    {
746        let merge_builder: MergeBuilder<_> = proto
747            .root_tree
748            .iter()
749            .map(|id_bytes| TreeId::from_bytes(id_bytes))
750            .collect();
751        commit.root_tree = MergedTreeId::new(merge_builder.build());
752    }
753    for predecessor in &proto.predecessors {
754        commit.predecessors.push(CommitId::from_bytes(predecessor));
755    }
756}
757
758/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
759/// Used for preventing GC of commits we create.
760fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
761    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
762    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
763    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
764    gix::refs::transaction::RefEdit {
765        change: gix::refs::transaction::Change::Update {
766            log: gix::refs::transaction::LogChange {
767                message: "used by jj".into(),
768                ..Default::default()
769            },
770            expected,
771            new,
772        },
773        name: name.try_into().unwrap(),
774        deref: false,
775    }
776}
777
778fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
779    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
780    gix::refs::transaction::RefEdit {
781        change: gix::refs::transaction::Change::Delete {
782            expected,
783            log: gix::refs::transaction::RefLog::AndReference,
784        },
785        name: git_ref.name,
786        deref: false,
787    }
788}
789
790/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
791/// unreachable and non-head refs.
792fn recreate_no_gc_refs(
793    git_repo: &gix::Repository,
794    new_heads: impl IntoIterator<Item = CommitId>,
795    keep_newer: SystemTime,
796) -> BackendResult<()> {
797    // Calculate diff between existing no-gc refs and new heads.
798    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
799    let mut no_gc_refs_to_keep_count: usize = 0;
800    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
801    let git_references = git_repo
802        .references()
803        .map_err(|err| BackendError::Other(err.into()))?;
804    let no_gc_refs_iter = git_references
805        .prefixed(NO_GC_REF_NAMESPACE)
806        .map_err(|err| BackendError::Other(err.into()))?;
807    for git_ref in no_gc_refs_iter {
808        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
809        let oid = git_ref.target.try_id().ok_or_else(|| {
810            let name = git_ref.name.as_bstr();
811            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
812        })?;
813        let id = CommitId::from_bytes(oid.as_bytes());
814        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
815        if new_heads.contains(&id) && name_good {
816            no_gc_refs_to_keep_count += 1;
817            continue;
818        }
819        // Check timestamp of loose ref, but this is still racy on re-import
820        // because:
821        // - existing packed ref won't be demoted to loose ref
822        // - existing loose ref won't be touched
823        //
824        // TODO: might be better to switch to a dummy merge, where new no-gc ref
825        // will always have a unique name. Doing that with the current
826        // ref-per-head strategy would increase the number of the no-gc refs.
827        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
828        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
829        if let Ok(metadata) = loose_ref_path.metadata() {
830            let mtime = metadata.modified().expect("unsupported platform?");
831            if mtime > keep_newer {
832                tracing::trace!(?git_ref, "not deleting new");
833                no_gc_refs_to_keep_count += 1;
834                continue;
835            }
836        }
837        // Also deletes no-gc ref of random name created by old jj.
838        tracing::trace!(?git_ref, ?name_good, "will delete");
839        no_gc_refs_to_delete.push(git_ref);
840    }
841    tracing::info!(
842        new_heads_count = new_heads.len(),
843        no_gc_refs_to_keep_count,
844        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
845        "collected reachable refs"
846    );
847
848    // It's slow to delete packed refs one by one, so update refs all at once.
849    let ref_edits = itertools::chain(
850        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
851        new_heads.iter().map(to_no_gc_ref_update),
852    );
853    git_repo
854        .edit_references(ref_edits)
855        .map_err(|err| BackendError::Other(err.into()))?;
856
857    Ok(())
858}
859
860fn run_git_gc(program: &OsStr, git_dir: &Path, keep_newer: SystemTime) -> Result<(), GitGcError> {
861    let keep_newer = keep_newer
862        .duration_since(SystemTime::UNIX_EPOCH)
863        .unwrap_or_default(); // underflow
864    let mut git = Command::new(program);
865    git.arg("--git-dir=.") // turn off discovery
866        .arg("gc")
867        .arg(format!("--prune=@{} +0000", keep_newer.as_secs()));
868    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
869    // canonicalized as UNC path, which wouldn't be supported by git.
870    git.current_dir(git_dir);
871    // TODO: pass output to UI layer instead of printing directly here
872    tracing::info!(?git, "running git gc");
873    let status = git.status().map_err(GitGcError::GcCommand)?;
874    tracing::info!(?status, "git gc exited");
875    if !status.success() {
876        return Err(GitGcError::GcCommandErrorStatus(status));
877    }
878    Ok(())
879}
880
881fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
882    if id.as_bytes().len() != HASH_LENGTH {
883        return Err(BackendError::InvalidHashLength {
884            expected: HASH_LENGTH,
885            actual: id.as_bytes().len(),
886            object_type: id.object_type(),
887            hash: id.hex(),
888        });
889    }
890    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
891}
892
893fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
894    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
895        BackendError::ObjectNotFound {
896            object_type: id.object_type(),
897            hash: id.hex(),
898            source: Box::new(err),
899        }
900    } else {
901        to_read_object_err(err, id)
902    }
903}
904
905fn to_read_object_err(
906    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
907    id: &impl ObjectId,
908) -> BackendError {
909    BackendError::ReadObject {
910        object_type: id.object_type(),
911        hash: id.hex(),
912        source: err.into(),
913    }
914}
915
916fn to_invalid_utf8_err(source: Utf8Error, id: &impl ObjectId) -> BackendError {
917    BackendError::InvalidUtf8 {
918        object_type: id.object_type(),
919        hash: id.hex(),
920        source,
921    }
922}
923
924fn import_extra_metadata_entries_from_heads(
925    git_repo: &gix::Repository,
926    mut_table: &mut MutableTable,
927    _table_lock: &FileLock,
928    head_ids: &HashSet<&CommitId>,
929    shallow_roots: &[CommitId],
930) -> BackendResult<()> {
931    let mut work_ids = head_ids
932        .iter()
933        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
934        .map(|&id| id.clone())
935        .collect_vec();
936    while let Some(id) = work_ids.pop() {
937        let git_object = git_repo
938            .find_object(validate_git_object_id(&id)?)
939            .map_err(|err| map_not_found_err(err, &id))?;
940        let is_shallow = shallow_roots.contains(&id);
941        // TODO(#1624): Should we read the root tree here and check if it has a
942        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
943        // change the description of a commit with tree-level conflicts.
944        let commit = commit_from_git_without_root_parent(&id, &git_object, is_shallow)?;
945        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
946        work_ids.extend(
947            commit
948                .parents
949                .into_iter()
950                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
951        );
952    }
953    Ok(())
954}
955
956impl Debug for GitBackend {
957    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
958        f.debug_struct("GitBackend")
959            .field("path", &self.git_repo_path())
960            .finish()
961    }
962}
963
964#[async_trait]
965impl Backend for GitBackend {
966    fn name(&self) -> &str {
967        Self::name()
968    }
969
970    fn commit_id_length(&self) -> usize {
971        HASH_LENGTH
972    }
973
974    fn change_id_length(&self) -> usize {
975        CHANGE_ID_LENGTH
976    }
977
978    fn root_commit_id(&self) -> &CommitId {
979        &self.root_commit_id
980    }
981
982    fn root_change_id(&self) -> &ChangeId {
983        &self.root_change_id
984    }
985
986    fn empty_tree_id(&self) -> &TreeId {
987        &self.empty_tree_id
988    }
989
990    fn concurrency(&self) -> usize {
991        1
992    }
993
994    async fn read_file(
995        &self,
996        _path: &RepoPath,
997        id: &FileId,
998    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
999        let data = self.read_file_sync(id)?;
1000        Ok(Box::pin(Cursor::new(data)))
1001    }
1002
1003    async fn write_file(
1004        &self,
1005        _path: &RepoPath,
1006        contents: &mut (dyn AsyncRead + Send + Unpin),
1007    ) -> BackendResult<FileId> {
1008        let mut bytes = Vec::new();
1009        contents.read_to_end(&mut bytes).await.unwrap();
1010        let locked_repo = self.lock_git_repo();
1011        let oid = locked_repo
1012            .write_blob(bytes)
1013            .map_err(|err| BackendError::WriteObject {
1014                object_type: "file",
1015                source: Box::new(err),
1016            })?;
1017        Ok(FileId::new(oid.as_bytes().to_vec()))
1018    }
1019
1020    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
1021        let git_blob_id = validate_git_object_id(id)?;
1022        let locked_repo = self.lock_git_repo();
1023        let mut blob = locked_repo
1024            .find_object(git_blob_id)
1025            .map_err(|err| map_not_found_err(err, id))?
1026            .try_into_blob()
1027            .map_err(|err| to_read_object_err(err, id))?;
1028        let target = String::from_utf8(blob.take_data())
1029            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1030        Ok(target)
1031    }
1032
1033    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1034        let locked_repo = self.lock_git_repo();
1035        let oid =
1036            locked_repo
1037                .write_blob(target.as_bytes())
1038                .map_err(|err| BackendError::WriteObject {
1039                    object_type: "symlink",
1040                    source: Box::new(err),
1041                })?;
1042        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1043    }
1044
1045    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
1046        Err(BackendError::Unsupported(
1047            "The Git backend doesn't support tracked copies yet".to_string(),
1048        ))
1049    }
1050
1051    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
1052        Err(BackendError::Unsupported(
1053            "The Git backend doesn't support tracked copies yet".to_string(),
1054        ))
1055    }
1056
1057    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
1058        Err(BackendError::Unsupported(
1059            "The Git backend doesn't support tracked copies yet".to_string(),
1060        ))
1061    }
1062
1063    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1064        if id == &self.empty_tree_id {
1065            return Ok(Tree::default());
1066        }
1067        let git_tree_id = validate_git_object_id(id)?;
1068
1069        let locked_repo = self.lock_git_repo();
1070        let git_tree = locked_repo
1071            .find_object(git_tree_id)
1072            .map_err(|err| map_not_found_err(err, id))?
1073            .try_into_tree()
1074            .map_err(|err| to_read_object_err(err, id))?;
1075        let mut entries: Vec<_> = git_tree
1076            .iter()
1077            .map(|entry| -> BackendResult<_> {
1078                let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1079                let name = RepoPathComponentBuf::new(
1080                    str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?,
1081                )
1082                .unwrap();
1083                let value = match entry.mode().kind() {
1084                    gix::object::tree::EntryKind::Tree => {
1085                        let id = TreeId::from_bytes(entry.oid().as_bytes());
1086                        TreeValue::Tree(id)
1087                    }
1088                    gix::object::tree::EntryKind::Blob => {
1089                        let id = FileId::from_bytes(entry.oid().as_bytes());
1090                        TreeValue::File {
1091                            id,
1092                            executable: false,
1093                            copy_id: CopyId::placeholder(),
1094                        }
1095                    }
1096                    gix::object::tree::EntryKind::BlobExecutable => {
1097                        let id = FileId::from_bytes(entry.oid().as_bytes());
1098                        TreeValue::File {
1099                            id,
1100                            executable: true,
1101                            copy_id: CopyId::placeholder(),
1102                        }
1103                    }
1104                    gix::object::tree::EntryKind::Link => {
1105                        let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1106                        TreeValue::Symlink(id)
1107                    }
1108                    gix::object::tree::EntryKind::Commit => {
1109                        let id = CommitId::from_bytes(entry.oid().as_bytes());
1110                        TreeValue::GitSubmodule(id)
1111                    }
1112                };
1113                Ok((name, value))
1114            })
1115            .try_collect()?;
1116        // While Git tree entries are sorted, the rule is slightly different.
1117        // Directory names are sorted as if they had trailing "/".
1118        if !entries.is_sorted_by_key(|(name, _)| name) {
1119            entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
1120        }
1121        Ok(Tree::from_sorted_entries(entries))
1122    }
1123
1124    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1125        // Tree entries to be written must be sorted by Entry::filename(), which
1126        // is slightly different from the order of our backend::Tree.
1127        let entries = contents
1128            .entries()
1129            .map(|entry| {
1130                let filename = BString::from(entry.name().as_internal_str());
1131                match entry.value() {
1132                    TreeValue::File {
1133                        id,
1134                        executable: false,
1135                        copy_id: _, // TODO: Use the value
1136                    } => gix::objs::tree::Entry {
1137                        mode: gix::object::tree::EntryKind::Blob.into(),
1138                        filename,
1139                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1140                    },
1141                    TreeValue::File {
1142                        id,
1143                        executable: true,
1144                        copy_id: _, // TODO: Use the value
1145                    } => gix::objs::tree::Entry {
1146                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1147                        filename,
1148                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1149                    },
1150                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1151                        mode: gix::object::tree::EntryKind::Link.into(),
1152                        filename,
1153                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1154                    },
1155                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1156                        mode: gix::object::tree::EntryKind::Tree.into(),
1157                        filename,
1158                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1159                    },
1160                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1161                        mode: gix::object::tree::EntryKind::Commit.into(),
1162                        filename,
1163                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1164                    },
1165                }
1166            })
1167            .sorted_unstable()
1168            .collect();
1169        let locked_repo = self.lock_git_repo();
1170        let oid = locked_repo
1171            .write_object(gix::objs::Tree { entries })
1172            .map_err(|err| BackendError::WriteObject {
1173                object_type: "tree",
1174                source: Box::new(err),
1175            })?;
1176        Ok(TreeId::from_bytes(oid.as_bytes()))
1177    }
1178
1179    #[tracing::instrument(skip(self))]
1180    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1181        if *id == self.root_commit_id {
1182            return Ok(make_root_commit(
1183                self.root_change_id().clone(),
1184                self.empty_tree_id.clone(),
1185            ));
1186        }
1187        let git_commit_id = validate_git_object_id(id)?;
1188
1189        let mut commit = {
1190            let locked_repo = self.lock_git_repo();
1191            let git_object = locked_repo
1192                .find_object(git_commit_id)
1193                .map_err(|err| map_not_found_err(err, id))?;
1194            let is_shallow = self.shallow_root_ids(&locked_repo)?.contains(id);
1195            commit_from_git_without_root_parent(id, &git_object, is_shallow)?
1196        };
1197        if commit.parents.is_empty() {
1198            commit.parents.push(self.root_commit_id.clone());
1199        };
1200
1201        let table = self.cached_extra_metadata_table()?;
1202        if let Some(extras) = table.get_value(id.as_bytes()) {
1203            deserialize_extras(&mut commit, extras);
1204        } else {
1205            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1206            // there are no reachable ancestor commits without extras metadata. Git commits
1207            // imported by jj < 0.8.0 might not have extras (#924).
1208            // https://github.com/jj-vcs/jj/issues/2343
1209            tracing::info!("unimported Git commit found");
1210            self.import_head_commits([id])?;
1211            let table = self.cached_extra_metadata_table()?;
1212            let extras = table.get_value(id.as_bytes()).unwrap();
1213            deserialize_extras(&mut commit, extras);
1214        }
1215        Ok(commit)
1216    }
1217
1218    async fn write_commit(
1219        &self,
1220        mut contents: Commit,
1221        mut sign_with: Option<&mut SigningFn>,
1222    ) -> BackendResult<(CommitId, Commit)> {
1223        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1224
1225        let locked_repo = self.lock_git_repo();
1226        let tree_ids = contents.root_tree.as_merge();
1227        let git_tree_id = match tree_ids.as_resolved() {
1228            Some(tree_id) => validate_git_object_id(tree_id)?,
1229            None => write_tree_conflict(&locked_repo, tree_ids)?,
1230        };
1231        let author = signature_to_git(&contents.author);
1232        let mut committer = signature_to_git(&contents.committer);
1233        let message = &contents.description;
1234        if contents.parents.is_empty() {
1235            return Err(BackendError::Other(
1236                "Cannot write a commit with no parents".into(),
1237            ));
1238        }
1239        let mut parents = SmallVec::new();
1240        for parent_id in &contents.parents {
1241            if *parent_id == self.root_commit_id {
1242                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1243                // add it to the list of parents to write in the Git commit. We also check that
1244                // there are no other parents since Git cannot represent a merge between a root
1245                // commit and another commit.
1246                if contents.parents.len() > 1 {
1247                    return Err(BackendError::Unsupported(
1248                        "The Git backend does not support creating merge commits with the root \
1249                         commit as one of the parents."
1250                            .to_owned(),
1251                    ));
1252                }
1253            } else {
1254                parents.push(validate_git_object_id(parent_id)?);
1255            }
1256        }
1257        let mut extra_headers: Vec<(BString, BString)> = vec![];
1258        if !tree_ids.is_resolved() {
1259            let value = tree_ids.iter().map(|id| id.hex()).join(" ");
1260            extra_headers.push((JJ_TREES_COMMIT_HEADER.into(), value.into()));
1261        }
1262        if self.write_change_id_header {
1263            extra_headers.push((
1264                CHANGE_ID_COMMIT_HEADER.into(),
1265                contents.change_id.reverse_hex().into(),
1266            ));
1267        }
1268
1269        let extras = serialize_extras(&contents);
1270
1271        // If two writers write commits of the same id with different metadata, they
1272        // will both succeed and the metadata entries will be "merged" later. Since
1273        // metadata entry is keyed by the commit id, one of the entries would be lost.
1274        // To prevent such race condition locally, we extend the scope covered by the
1275        // table lock. This is still racy if multiple machines are involved and the
1276        // repository is rsync-ed.
1277        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1278        let id = loop {
1279            let mut commit = gix::objs::Commit {
1280                message: message.to_owned().into(),
1281                tree: git_tree_id,
1282                author: author.clone(),
1283                committer: committer.clone(),
1284                encoding: None,
1285                parents: parents.clone(),
1286                extra_headers: extra_headers.clone(),
1287            };
1288
1289            if let Some(sign) = &mut sign_with {
1290                // we don't use gix pool, but at least use their heuristic
1291                let mut data = Vec::with_capacity(512);
1292                commit.write_to(&mut data).unwrap();
1293
1294                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1295                    object_type: "commit",
1296                    source: Box::new(err),
1297                })?;
1298                commit
1299                    .extra_headers
1300                    .push(("gpgsig".into(), sig.clone().into()));
1301                contents.secure_sig = Some(SecureSig { data, sig });
1302            }
1303
1304            let git_id =
1305                locked_repo
1306                    .write_object(&commit)
1307                    .map_err(|err| BackendError::WriteObject {
1308                        object_type: "commit",
1309                        source: Box::new(err),
1310                    })?;
1311
1312            match table.get_value(git_id.as_bytes()) {
1313                Some(existing_extras) if existing_extras != extras => {
1314                    // It's possible a commit already exists with the same
1315                    // commit id but different change id. Adjust the timestamp
1316                    // until this is no longer the case.
1317                    //
1318                    // For example, this can happen when rebasing duplicate
1319                    // commits, https://github.com/jj-vcs/jj/issues/694.
1320                    //
1321                    // `jj` resets the committer timestamp to the current
1322                    // timestamp whenever it rewrites a commit. So, it's
1323                    // unlikely for the timestamp to be 0 even if the original
1324                    // commit had its timestamp set to 0. Moreover, we test that
1325                    // a commit with a negative timestamp can still be written
1326                    // and read back by `jj`.
1327                    committer.time.seconds -= 1;
1328                }
1329                _ => break CommitId::from_bytes(git_id.as_bytes()),
1330            }
1331        };
1332
1333        // Everything up to this point had no permanent effect on the repo except
1334        // GC-able objects
1335        locked_repo
1336            .edit_reference(to_no_gc_ref_update(&id))
1337            .map_err(|err| BackendError::Other(Box::new(err)))?;
1338
1339        // Update the signature to match the one that was actually written to the object
1340        // store
1341        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1342        let mut mut_table = table.start_mutation();
1343        mut_table.add_entry(id.to_bytes(), extras);
1344        self.save_extra_metadata_table(mut_table, &table_lock)?;
1345        Ok((id, contents))
1346    }
1347
1348    fn get_copy_records(
1349        &self,
1350        paths: Option<&[RepoPathBuf]>,
1351        root_id: &CommitId,
1352        head_id: &CommitId,
1353    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
1354        let repo = self.git_repo();
1355        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1356        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1357
1358        let change_to_copy_record =
1359            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1360                let gix::object::tree::diff::Change::Rewrite {
1361                    source_location,
1362                    source_entry_mode,
1363                    source_id,
1364                    entry_mode: dest_entry_mode,
1365                    location: dest_location,
1366                    ..
1367                } = change
1368                else {
1369                    return Ok(None);
1370                };
1371                // TODO: Renamed symlinks cannot be returned because CopyRecord
1372                // expects `source_file: FileId`.
1373                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1374                    return Ok(None);
1375                }
1376
1377                let source = str::from_utf8(source_location)
1378                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1379                let dest = str::from_utf8(dest_location)
1380                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1381
1382                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1383                if !paths.is_none_or(|paths| paths.contains(&target)) {
1384                    return Ok(None);
1385                }
1386
1387                Ok(Some(CopyRecord {
1388                    target,
1389                    target_commit: head_id.clone(),
1390                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1391                    source_file: FileId::from_bytes(source_id.as_bytes()),
1392                    source_commit: root_id.clone(),
1393                }))
1394            };
1395
1396        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1397        root_tree
1398            .changes()
1399            .map_err(|err| BackendError::Other(err.into()))?
1400            .options(|opts| {
1401                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1402                    copies: Some(gix::diff::rewrites::Copies {
1403                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1404                        percentage: Some(0.5),
1405                    }),
1406                    percentage: Some(0.5),
1407                    limit: 1000,
1408                    track_empty: false,
1409                }));
1410            })
1411            .for_each_to_obtain_tree_with_cache(
1412                &head_tree,
1413                &mut self.new_diff_platform()?,
1414                |change| -> BackendResult<_> {
1415                    match change_to_copy_record(change) {
1416                        Ok(None) => {}
1417                        Ok(Some(change)) => records.push(Ok(change)),
1418                        Err(err) => records.push(Err(err)),
1419                    }
1420                    Ok(gix::object::tree::diff::Action::Continue)
1421                },
1422            )
1423            .map_err(|err| BackendError::Other(err.into()))?;
1424        Ok(Box::pin(futures::stream::iter(records)))
1425    }
1426
1427    #[tracing::instrument(skip(self, index))]
1428    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1429        let git_repo = self.lock_git_repo();
1430        let new_heads = index
1431            .all_heads_for_gc()
1432            .map_err(|err| BackendError::Other(err.into()))?
1433            .filter(|id| *id != self.root_commit_id);
1434        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1435        // TODO: remove unreachable entries from extras table if segment file
1436        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1437        // preserved by the keep_newer timestamp though)
1438        // TODO: remove unreachable extras table segments
1439        run_git_gc(
1440            self.git_executable.as_ref(),
1441            self.git_repo_path(),
1442            keep_newer,
1443        )
1444        .map_err(|err| BackendError::Other(err.into()))?;
1445        // Since "git gc" will move loose refs into packed refs, in-memory
1446        // packed-refs cache should be invalidated without relying on mtime.
1447        git_repo.refs.force_refresh_packed_buffer().ok();
1448        Ok(())
1449    }
1450}
1451
1452/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1453/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1454fn write_tree_conflict(
1455    repo: &gix::Repository,
1456    conflict: &Merge<TreeId>,
1457) -> BackendResult<gix::ObjectId> {
1458    // Tree entries to be written must be sorted by Entry::filename().
1459    let mut entries = itertools::chain(
1460        conflict
1461            .removes()
1462            .enumerate()
1463            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1464        conflict
1465            .adds()
1466            .enumerate()
1467            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1468    )
1469    .map(|(name, tree_id)| gix::objs::tree::Entry {
1470        mode: gix::object::tree::EntryKind::Tree.into(),
1471        filename: name.into(),
1472        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1473    })
1474    .collect_vec();
1475    let readme_id = repo
1476        .write_blob(
1477            r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1478The commit contains file conflicts, and therefore looks wrong when used with plain
1479Git or other tools that are unfamiliar with jj.
1480
1481The .jjconflict-* directories represent the different inputs to the conflict.
1482For details, see
1483https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1484
1485If you see this file in your working copy, it probably means that you used a
1486regular `git` command to check out a conflicted commit. Use `jj abandon` to
1487recover.
1488"#,
1489        )
1490        .map_err(|err| {
1491            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1492        })?
1493        .detach();
1494    entries.push(gix::objs::tree::Entry {
1495        mode: gix::object::tree::EntryKind::Blob.into(),
1496        filename: "README".into(),
1497        oid: readme_id,
1498    });
1499    entries.sort_unstable();
1500    let id = repo
1501        .write_object(gix::objs::Tree { entries })
1502        .map_err(|err| BackendError::WriteObject {
1503            object_type: "tree",
1504            source: Box::new(err),
1505        })?;
1506    Ok(id.detach())
1507}
1508
1509#[cfg(test)]
1510mod tests {
1511    use assert_matches::assert_matches;
1512    use gix::date::parse::TimeBuf;
1513    use gix::objs::CommitRef;
1514    use indoc::indoc;
1515    use pollster::FutureExt as _;
1516
1517    use super::*;
1518    use crate::config::StackedConfig;
1519    use crate::content_hash::blake2b_hash;
1520    use crate::hex_util;
1521    use crate::tests::new_temp_dir;
1522
1523    const GIT_USER: &str = "Someone";
1524    const GIT_EMAIL: &str = "someone@example.com";
1525
1526    fn git_config() -> Vec<bstr::BString> {
1527        vec![
1528            format!("user.name = {GIT_USER}").into(),
1529            format!("user.email = {GIT_EMAIL}").into(),
1530            "init.defaultBranch = master".into(),
1531        ]
1532    }
1533
1534    fn open_options() -> gix::open::Options {
1535        gix::open::Options::isolated()
1536            .config_overrides(git_config())
1537            .strict_config(true)
1538    }
1539
1540    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1541        gix::ThreadSafeRepository::init_opts(
1542            directory,
1543            gix::create::Kind::WithWorktree,
1544            gix::create::Options::default(),
1545            open_options(),
1546        )
1547        .unwrap()
1548        .to_thread_local()
1549    }
1550
1551    #[test]
1552    fn read_plain_git_commit() {
1553        let settings = user_settings();
1554        let temp_dir = new_temp_dir();
1555        let store_path = temp_dir.path();
1556        let git_repo_path = temp_dir.path().join("git");
1557        let git_repo = git_init(git_repo_path);
1558
1559        // Add a commit with some files in
1560        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1561        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1562        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1563        dir_tree_editor
1564            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1565            .unwrap();
1566        dir_tree_editor
1567            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1568            .unwrap();
1569        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1570        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1571        root_tree_builder
1572            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1573            .unwrap();
1574        let root_tree_id = root_tree_builder.write().unwrap().detach();
1575        let git_author = gix::actor::Signature {
1576            name: "git author".into(),
1577            email: "git.author@example.com".into(),
1578            time: gix::date::Time::new(1000, 60 * 60),
1579        };
1580        let git_committer = gix::actor::Signature {
1581            name: "git committer".into(),
1582            email: "git.committer@example.com".into(),
1583            time: gix::date::Time::new(2000, -480 * 60),
1584        };
1585        let git_commit_id = git_repo
1586            .commit_as(
1587                git_committer.to_ref(&mut TimeBuf::default()),
1588                git_author.to_ref(&mut TimeBuf::default()),
1589                "refs/heads/dummy",
1590                "git commit message",
1591                root_tree_id,
1592                [] as [gix::ObjectId; 0],
1593            )
1594            .unwrap()
1595            .detach();
1596        git_repo
1597            .find_reference("refs/heads/dummy")
1598            .unwrap()
1599            .delete()
1600            .unwrap();
1601        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1602        // The change id is the leading reverse bits of the commit id
1603        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1604        // Check that the git commit above got the hash we expect
1605        assert_eq!(
1606            git_commit_id.as_bytes(),
1607            commit_id.as_bytes(),
1608            "{git_commit_id:?} vs {commit_id:?}"
1609        );
1610
1611        // Add an empty commit on top
1612        let git_commit_id2 = git_repo
1613            .commit_as(
1614                git_committer.to_ref(&mut TimeBuf::default()),
1615                git_author.to_ref(&mut TimeBuf::default()),
1616                "refs/heads/dummy2",
1617                "git commit message 2",
1618                root_tree_id,
1619                [git_commit_id],
1620            )
1621            .unwrap()
1622            .detach();
1623        git_repo
1624            .find_reference("refs/heads/dummy2")
1625            .unwrap()
1626            .delete()
1627            .unwrap();
1628        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1629
1630        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1631
1632        // Import the head commit and its ancestors
1633        backend.import_head_commits([&commit_id2]).unwrap();
1634        // Ref should be created only for the head commit
1635        let git_refs = backend
1636            .git_repo()
1637            .references()
1638            .unwrap()
1639            .prefixed("refs/jj/keep/")
1640            .unwrap()
1641            .map(|git_ref| git_ref.unwrap().id().detach())
1642            .collect_vec();
1643        assert_eq!(git_refs, vec![git_commit_id2]);
1644
1645        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1646        assert_eq!(&commit.change_id, &change_id);
1647        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1648        assert_eq!(commit.predecessors, vec![]);
1649        assert_eq!(
1650            commit.root_tree,
1651            MergedTreeId::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1652        );
1653        assert_eq!(commit.description, "git commit message");
1654        assert_eq!(commit.author.name, "git author");
1655        assert_eq!(commit.author.email, "git.author@example.com");
1656        assert_eq!(
1657            commit.author.timestamp.timestamp,
1658            MillisSinceEpoch(1000 * 1000)
1659        );
1660        assert_eq!(commit.author.timestamp.tz_offset, 60);
1661        assert_eq!(commit.committer.name, "git committer");
1662        assert_eq!(commit.committer.email, "git.committer@example.com");
1663        assert_eq!(
1664            commit.committer.timestamp.timestamp,
1665            MillisSinceEpoch(2000 * 1000)
1666        );
1667        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1668
1669        let root_tree = backend
1670            .read_tree(
1671                RepoPath::root(),
1672                &TreeId::from_bytes(root_tree_id.as_bytes()),
1673            )
1674            .block_on()
1675            .unwrap();
1676        let mut root_entries = root_tree.entries();
1677        let dir = root_entries.next().unwrap();
1678        assert_eq!(root_entries.next(), None);
1679        assert_eq!(dir.name().as_internal_str(), "dir");
1680        assert_eq!(
1681            dir.value(),
1682            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1683        );
1684
1685        let dir_tree = backend
1686            .read_tree(
1687                RepoPath::from_internal_string("dir").unwrap(),
1688                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1689            )
1690            .block_on()
1691            .unwrap();
1692        let mut entries = dir_tree.entries();
1693        let file = entries.next().unwrap();
1694        let symlink = entries.next().unwrap();
1695        assert_eq!(entries.next(), None);
1696        assert_eq!(file.name().as_internal_str(), "normal");
1697        assert_eq!(
1698            file.value(),
1699            &TreeValue::File {
1700                id: FileId::from_bytes(blob1.as_bytes()),
1701                executable: false,
1702                copy_id: CopyId::placeholder(),
1703            }
1704        );
1705        assert_eq!(symlink.name().as_internal_str(), "symlink");
1706        assert_eq!(
1707            symlink.value(),
1708            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1709        );
1710
1711        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1712        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1713        assert_eq!(commit.predecessors, vec![]);
1714        assert_eq!(
1715            commit.root_tree,
1716            MergedTreeId::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1717        );
1718    }
1719
1720    #[test]
1721    fn read_git_commit_without_importing() {
1722        let settings = user_settings();
1723        let temp_dir = new_temp_dir();
1724        let store_path = temp_dir.path();
1725        let git_repo_path = temp_dir.path().join("git");
1726        let git_repo = git_init(&git_repo_path);
1727
1728        let signature = gix::actor::Signature {
1729            name: GIT_USER.into(),
1730            email: GIT_EMAIL.into(),
1731            time: gix::date::Time::now_utc(),
1732        };
1733        let empty_tree_id =
1734            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1735        let git_commit_id = git_repo
1736            .commit_as(
1737                signature.to_ref(&mut TimeBuf::default()),
1738                signature.to_ref(&mut TimeBuf::default()),
1739                "refs/heads/main",
1740                "git commit message",
1741                empty_tree_id,
1742                [] as [gix::ObjectId; 0],
1743            )
1744            .unwrap();
1745
1746        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1747
1748        // read_commit() without import_head_commits() works as of now. This might be
1749        // changed later.
1750        assert!(
1751            backend
1752                .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1753                .block_on()
1754                .is_ok()
1755        );
1756        assert!(
1757            backend
1758                .cached_extra_metadata_table()
1759                .unwrap()
1760                .get_value(git_commit_id.as_bytes())
1761                .is_some(),
1762            "extra metadata should have been be created"
1763        );
1764    }
1765
1766    #[test]
1767    fn read_signed_git_commit() {
1768        let settings = user_settings();
1769        let temp_dir = new_temp_dir();
1770        let store_path = temp_dir.path();
1771        let git_repo_path = temp_dir.path().join("git");
1772        let git_repo = git_init(git_repo_path);
1773
1774        let signature = gix::actor::Signature {
1775            name: GIT_USER.into(),
1776            email: GIT_EMAIL.into(),
1777            time: gix::date::Time::now_utc(),
1778        };
1779        let empty_tree_id =
1780            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1781
1782        let secure_sig =
1783            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1784
1785        let mut commit = gix::objs::Commit {
1786            tree: empty_tree_id,
1787            parents: smallvec::SmallVec::new(),
1788            author: signature.clone(),
1789            committer: signature.clone(),
1790            encoding: None,
1791            message: "git commit message".into(),
1792            extra_headers: Vec::new(),
1793        };
1794
1795        let mut commit_buf = Vec::new();
1796        commit.write_to(&mut commit_buf).unwrap();
1797        let commit_str = str::from_utf8(&commit_buf).unwrap();
1798
1799        commit
1800            .extra_headers
1801            .push(("gpgsig".into(), secure_sig.into()));
1802
1803        let git_commit_id = git_repo.write_object(&commit).unwrap();
1804
1805        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1806
1807        let commit = backend
1808            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1809            .block_on()
1810            .unwrap();
1811
1812        let sig = commit.secure_sig.expect("failed to read the signature");
1813
1814        // converting to string for nicer assert diff
1815        assert_eq!(str::from_utf8(&sig.sig).unwrap(), secure_sig);
1816        assert_eq!(str::from_utf8(&sig.data).unwrap(), commit_str);
1817    }
1818
1819    #[test]
1820    fn change_id_parsing() {
1821        let id = |commit_object_bytes: &[u8]| {
1822            extract_change_id_from_commit(&CommitRef::from_bytes(commit_object_bytes).unwrap())
1823        };
1824
1825        let commit_with_id = indoc! {b"
1826            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1827            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1828            author JJ Fan <jjfan@example.com> 1757112665 -0700
1829            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1830            extra-header blah
1831            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1832
1833            test-commit
1834        "};
1835        insta::assert_compact_debug_snapshot!(
1836            id(commit_with_id),
1837            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1838        );
1839
1840        let commit_without_id = indoc! {b"
1841            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1842            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1843            author JJ Fan <jjfan@example.com> 1757112665 -0700
1844            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1845            extra-header blah
1846
1847            no id in header
1848        "};
1849        insta::assert_compact_debug_snapshot!(
1850            id(commit_without_id),
1851            @"None"
1852        );
1853
1854        let commit = indoc! {b"
1855            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1856            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1857            author JJ Fan <jjfan@example.com> 1757112665 -0700
1858            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1859            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1860            extra-header blah
1861            change-id abcabcabcabcabcabcabcabcabcabcab
1862
1863            valid change id first
1864        "};
1865        insta::assert_compact_debug_snapshot!(
1866            id(commit),
1867            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1868        );
1869
1870        // We only look at the first change id if multiple are present, so this should
1871        // error
1872        let commit = indoc! {b"
1873            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1874            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1875            author JJ Fan <jjfan@example.com> 1757112665 -0700
1876            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1877            change-id abcabcabcabcabcabcabcabcabcabcab
1878            extra-header blah
1879            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1880
1881            valid change id first
1882        "};
1883        insta::assert_compact_debug_snapshot!(
1884            id(commit),
1885            @"None"
1886        );
1887    }
1888
1889    #[test]
1890    fn round_trip_change_id_via_git_header() {
1891        let settings = user_settings();
1892        let temp_dir = new_temp_dir();
1893
1894        let store_path = temp_dir.path().join("store");
1895        fs::create_dir(&store_path).unwrap();
1896        let empty_store_path = temp_dir.path().join("empty_store");
1897        fs::create_dir(&empty_store_path).unwrap();
1898        let git_repo_path = temp_dir.path().join("git");
1899        let git_repo = git_init(git_repo_path);
1900
1901        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path()).unwrap();
1902        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1903        let commit = Commit {
1904            parents: vec![backend.root_commit_id().clone()],
1905            predecessors: vec![],
1906            root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()),
1907            change_id: original_change_id.clone(),
1908            description: "initial".to_string(),
1909            author: create_signature(),
1910            committer: create_signature(),
1911            secure_sig: None,
1912        };
1913
1914        let (initial_commit_id, _init_commit) =
1915            backend.write_commit(commit, None).block_on().unwrap();
1916        let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
1917        assert_eq!(
1918            commit.change_id, original_change_id,
1919            "The change-id header did not roundtrip"
1920        );
1921
1922        // Because of how change ids are also persisted in extra proto files,
1923        // initialize a new store without those files, but reuse the same git
1924        // storage. This change-id must be derived from the git commit header.
1925        let no_extra_backend =
1926            GitBackend::init_external(&settings, &empty_store_path, git_repo.path()).unwrap();
1927        let no_extra_commit = no_extra_backend
1928            .read_commit(&initial_commit_id)
1929            .block_on()
1930            .unwrap();
1931
1932        assert_eq!(
1933            no_extra_commit.change_id, original_change_id,
1934            "The change-id header did not roundtrip"
1935        );
1936    }
1937
1938    #[test]
1939    fn read_empty_string_placeholder() {
1940        let git_signature1 = gix::actor::Signature {
1941            name: EMPTY_STRING_PLACEHOLDER.into(),
1942            email: "git.author@example.com".into(),
1943            time: gix::date::Time::new(1000, 60 * 60),
1944        };
1945        let signature1 = signature_from_git(git_signature1.to_ref(&mut TimeBuf::default()));
1946        assert!(signature1.name.is_empty());
1947        assert_eq!(signature1.email, "git.author@example.com");
1948        let git_signature2 = gix::actor::Signature {
1949            name: "git committer".into(),
1950            email: EMPTY_STRING_PLACEHOLDER.into(),
1951            time: gix::date::Time::new(2000, -480 * 60),
1952        };
1953        let signature2 = signature_from_git(git_signature2.to_ref(&mut TimeBuf::default()));
1954        assert_eq!(signature2.name, "git committer");
1955        assert!(signature2.email.is_empty());
1956    }
1957
1958    #[test]
1959    fn write_empty_string_placeholder() {
1960        let signature1 = Signature {
1961            name: "".to_string(),
1962            email: "someone@example.com".to_string(),
1963            timestamp: Timestamp {
1964                timestamp: MillisSinceEpoch(0),
1965                tz_offset: 0,
1966            },
1967        };
1968        let git_signature1 = signature_to_git(&signature1);
1969        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
1970        assert_eq!(git_signature1.email, "someone@example.com");
1971        let signature2 = Signature {
1972            name: "Someone".to_string(),
1973            email: "".to_string(),
1974            timestamp: Timestamp {
1975                timestamp: MillisSinceEpoch(0),
1976                tz_offset: 0,
1977            },
1978        };
1979        let git_signature2 = signature_to_git(&signature2);
1980        assert_eq!(git_signature2.name, "Someone");
1981        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
1982    }
1983
1984    /// Test that parents get written correctly
1985    #[test]
1986    fn git_commit_parents() {
1987        let settings = user_settings();
1988        let temp_dir = new_temp_dir();
1989        let store_path = temp_dir.path();
1990        let git_repo_path = temp_dir.path().join("git");
1991        let git_repo = git_init(&git_repo_path);
1992
1993        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1994        let mut commit = Commit {
1995            parents: vec![],
1996            predecessors: vec![],
1997            root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()),
1998            change_id: ChangeId::from_hex("abc123"),
1999            description: "".to_string(),
2000            author: create_signature(),
2001            committer: create_signature(),
2002            secure_sig: None,
2003        };
2004
2005        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2006            backend.write_commit(commit, None).block_on()
2007        };
2008
2009        // No parents
2010        commit.parents = vec![];
2011        assert_matches!(
2012            write_commit(commit.clone()),
2013            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2014        );
2015
2016        // Only root commit as parent
2017        commit.parents = vec![backend.root_commit_id().clone()];
2018        let first_id = write_commit(commit.clone()).unwrap().0;
2019        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
2020        assert_eq!(first_commit, commit);
2021        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
2022        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2023
2024        // Only non-root commit as parent
2025        commit.parents = vec![first_id.clone()];
2026        let second_id = write_commit(commit.clone()).unwrap().0;
2027        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
2028        assert_eq!(second_commit, commit);
2029        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
2030        assert_eq!(
2031            second_git_commit.parent_ids().collect_vec(),
2032            vec![git_id(&first_id)]
2033        );
2034
2035        // Merge commit
2036        commit.parents = vec![first_id.clone(), second_id.clone()];
2037        let merge_id = write_commit(commit.clone()).unwrap().0;
2038        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
2039        assert_eq!(merge_commit, commit);
2040        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
2041        assert_eq!(
2042            merge_git_commit.parent_ids().collect_vec(),
2043            vec![git_id(&first_id), git_id(&second_id)]
2044        );
2045
2046        // Merge commit with root as one parent
2047        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2048        assert_matches!(
2049            write_commit(commit),
2050            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2051        );
2052    }
2053
2054    #[test]
2055    fn write_tree_conflicts() {
2056        let settings = user_settings();
2057        let temp_dir = new_temp_dir();
2058        let store_path = temp_dir.path();
2059        let git_repo_path = temp_dir.path().join("git");
2060        let git_repo = git_init(&git_repo_path);
2061
2062        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2063        let create_tree = |i| {
2064            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2065            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2066            tree_builder
2067                .upsert(
2068                    format!("file{i}"),
2069                    gix::object::tree::EntryKind::Blob,
2070                    blob_id,
2071                )
2072                .unwrap();
2073            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2074        };
2075
2076        let root_tree = Merge::from_removes_adds(
2077            vec![create_tree(0), create_tree(1)],
2078            vec![create_tree(2), create_tree(3), create_tree(4)],
2079        );
2080        let mut commit = Commit {
2081            parents: vec![backend.root_commit_id().clone()],
2082            predecessors: vec![],
2083            root_tree: MergedTreeId::new(root_tree.clone()),
2084            change_id: ChangeId::from_hex("abc123"),
2085            description: "".to_string(),
2086            author: create_signature(),
2087            committer: create_signature(),
2088            secure_sig: None,
2089        };
2090
2091        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2092            backend.write_commit(commit, None).block_on()
2093        };
2094
2095        // When writing a tree-level conflict, the root tree on the git side has the
2096        // individual trees as subtrees.
2097        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2098        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2099        assert_eq!(read_commit, commit);
2100        let git_commit = git_repo
2101            .find_commit(gix::ObjectId::from_bytes_or_panic(
2102                read_commit_id.as_bytes(),
2103            ))
2104            .unwrap();
2105        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2106        assert!(
2107            git_tree
2108                .iter()
2109                .map(Result::unwrap)
2110                .filter(|entry| entry.filename() != b"README")
2111                .all(|entry| entry.mode().value() == 0o040000)
2112        );
2113        let mut iter = git_tree.iter().map(Result::unwrap);
2114        let entry = iter.next().unwrap();
2115        assert_eq!(entry.filename(), b".jjconflict-base-0");
2116        assert_eq!(
2117            entry.id().as_bytes(),
2118            root_tree.get_remove(0).unwrap().as_bytes()
2119        );
2120        let entry = iter.next().unwrap();
2121        assert_eq!(entry.filename(), b".jjconflict-base-1");
2122        assert_eq!(
2123            entry.id().as_bytes(),
2124            root_tree.get_remove(1).unwrap().as_bytes()
2125        );
2126        let entry = iter.next().unwrap();
2127        assert_eq!(entry.filename(), b".jjconflict-side-0");
2128        assert_eq!(
2129            entry.id().as_bytes(),
2130            root_tree.get_add(0).unwrap().as_bytes()
2131        );
2132        let entry = iter.next().unwrap();
2133        assert_eq!(entry.filename(), b".jjconflict-side-1");
2134        assert_eq!(
2135            entry.id().as_bytes(),
2136            root_tree.get_add(1).unwrap().as_bytes()
2137        );
2138        let entry = iter.next().unwrap();
2139        assert_eq!(entry.filename(), b".jjconflict-side-2");
2140        assert_eq!(
2141            entry.id().as_bytes(),
2142            root_tree.get_add(2).unwrap().as_bytes()
2143        );
2144        let entry = iter.next().unwrap();
2145        assert_eq!(entry.filename(), b"README");
2146        assert_eq!(entry.mode().value(), 0o100644);
2147        assert!(iter.next().is_none());
2148
2149        // When writing a single tree using the new format, it's represented by a
2150        // regular git tree.
2151        commit.root_tree = MergedTreeId::resolved(create_tree(5));
2152        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2153        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2154        assert_eq!(read_commit, commit);
2155        let git_commit = git_repo
2156            .find_commit(gix::ObjectId::from_bytes_or_panic(
2157                read_commit_id.as_bytes(),
2158            ))
2159            .unwrap();
2160        assert_eq!(
2161            MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2162            commit.root_tree
2163        );
2164    }
2165
2166    #[test]
2167    fn commit_has_ref() {
2168        let settings = user_settings();
2169        let temp_dir = new_temp_dir();
2170        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2171        let git_repo = backend.git_repo();
2172        let signature = Signature {
2173            name: "Someone".to_string(),
2174            email: "someone@example.com".to_string(),
2175            timestamp: Timestamp {
2176                timestamp: MillisSinceEpoch(0),
2177                tz_offset: 0,
2178            },
2179        };
2180        let commit = Commit {
2181            parents: vec![backend.root_commit_id().clone()],
2182            predecessors: vec![],
2183            root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()),
2184            change_id: ChangeId::new(vec![42; 16]),
2185            description: "initial".to_string(),
2186            author: signature.clone(),
2187            committer: signature,
2188            secure_sig: None,
2189        };
2190        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2191        let git_refs = git_repo.references().unwrap();
2192        let git_ref_ids: Vec<_> = git_refs
2193            .prefixed("refs/jj/keep/")
2194            .unwrap()
2195            .map(|x| x.unwrap().id().detach())
2196            .collect();
2197        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2198
2199        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2200        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2201            git_ref.unwrap().delete().unwrap();
2202        }
2203        // Re-imported commit should have new ref.
2204        backend.import_head_commits([&commit_id]).unwrap();
2205        let git_refs = git_repo.references().unwrap();
2206        let git_ref_ids: Vec<_> = git_refs
2207            .prefixed("refs/jj/keep/")
2208            .unwrap()
2209            .map(|x| x.unwrap().id().detach())
2210            .collect();
2211        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2212    }
2213
2214    #[test]
2215    fn import_head_commits_duplicates() {
2216        let settings = user_settings();
2217        let temp_dir = new_temp_dir();
2218        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2219        let git_repo = backend.git_repo();
2220
2221        let signature = gix::actor::Signature {
2222            name: GIT_USER.into(),
2223            email: GIT_EMAIL.into(),
2224            time: gix::date::Time::now_utc(),
2225        };
2226        let empty_tree_id =
2227            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2228        let git_commit_id = git_repo
2229            .commit_as(
2230                signature.to_ref(&mut TimeBuf::default()),
2231                signature.to_ref(&mut TimeBuf::default()),
2232                "refs/heads/main",
2233                "git commit message",
2234                empty_tree_id,
2235                [] as [gix::ObjectId; 0],
2236            )
2237            .unwrap()
2238            .detach();
2239        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2240
2241        // Ref creation shouldn't fail because of duplicated head ids.
2242        backend
2243            .import_head_commits([&commit_id, &commit_id])
2244            .unwrap();
2245        assert!(
2246            git_repo
2247                .references()
2248                .unwrap()
2249                .prefixed("refs/jj/keep/")
2250                .unwrap()
2251                .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id)
2252        );
2253    }
2254
2255    #[test]
2256    fn overlapping_git_commit_id() {
2257        let settings = user_settings();
2258        let temp_dir = new_temp_dir();
2259        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2260        let commit1 = Commit {
2261            parents: vec![backend.root_commit_id().clone()],
2262            predecessors: vec![],
2263            root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()),
2264            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2265            description: "initial".to_string(),
2266            author: create_signature(),
2267            committer: create_signature(),
2268            secure_sig: None,
2269        };
2270
2271        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2272            backend.write_commit(commit, None).block_on()
2273        };
2274
2275        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2276        commit2.predecessors.push(commit_id1.clone());
2277        // `write_commit` should prevent the ids from being the same by changing the
2278        // committer timestamp of the commit it actually writes.
2279        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2280        // The returned matches the ID
2281        assert_eq!(
2282            backend.read_commit(&commit_id2).block_on().unwrap(),
2283            actual_commit2
2284        );
2285        assert_ne!(commit_id2, commit_id1);
2286        // The committer timestamp should differ
2287        assert_ne!(
2288            actual_commit2.committer.timestamp.timestamp,
2289            commit2.committer.timestamp.timestamp
2290        );
2291        // The rest of the commit should be the same
2292        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2293        assert_eq!(actual_commit2, commit2);
2294    }
2295
2296    #[test]
2297    fn write_signed_commit() {
2298        let settings = user_settings();
2299        let temp_dir = new_temp_dir();
2300        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2301
2302        let commit = Commit {
2303            parents: vec![backend.root_commit_id().clone()],
2304            predecessors: vec![],
2305            root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()),
2306            change_id: ChangeId::new(vec![42; 16]),
2307            description: "initial".to_string(),
2308            author: create_signature(),
2309            committer: create_signature(),
2310            secure_sig: None,
2311        };
2312
2313        let mut signer = |data: &_| {
2314            let hash: String = hex_util::encode_hex(&blake2b_hash(data));
2315            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2316        };
2317
2318        let (id, commit) = backend
2319            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2320            .block_on()
2321            .unwrap();
2322
2323        let git_repo = backend.git_repo();
2324        let obj = git_repo
2325            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2326            .unwrap();
2327        insta::assert_snapshot!(str::from_utf8(&obj.data).unwrap(), @r"
2328        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2329        author Someone <someone@example.com> 0 +0000
2330        committer Someone <someone@example.com> 0 +0000
2331        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2332        gpgsig test sig
2333         hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2334
2335        initial
2336        ");
2337
2338        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2339
2340        let commit = backend.read_commit(&id).block_on().unwrap();
2341
2342        let sig = commit.secure_sig.expect("failed to read the signature");
2343        assert_eq!(&sig, &returned_sig);
2344
2345        insta::assert_snapshot!(str::from_utf8(&sig.sig).unwrap(), @r"
2346        test sig
2347        hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2348        ");
2349        insta::assert_snapshot!(str::from_utf8(&sig.data).unwrap(), @r"
2350        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2351        author Someone <someone@example.com> 0 +0000
2352        committer Someone <someone@example.com> 0 +0000
2353        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2354
2355        initial
2356        ");
2357    }
2358
2359    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2360        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2361    }
2362
2363    fn create_signature() -> Signature {
2364        Signature {
2365            name: GIT_USER.to_string(),
2366            email: GIT_EMAIL.to_string(),
2367            timestamp: Timestamp {
2368                timestamp: MillisSinceEpoch(0),
2369                tz_offset: 0,
2370            },
2371        }
2372    }
2373
2374    // Not using testutils::user_settings() because there is a dependency cycle
2375    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2376    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2377    // our UserSettings type comes from jj_lib (1).
2378    fn user_settings() -> UserSettings {
2379        let config = StackedConfig::with_defaults();
2380        UserSettings::from_config(config).unwrap()
2381    }
2382}