jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::HashSet;
19use std::ffi::OsStr;
20use std::fmt::Debug;
21use std::fmt::Error;
22use std::fmt::Formatter;
23use std::fs;
24use std::io;
25use std::io::Cursor;
26use std::path::Path;
27use std::path::PathBuf;
28use std::pin::Pin;
29use std::process::Command;
30use std::process::ExitStatus;
31use std::str;
32use std::sync::Arc;
33use std::sync::Mutex;
34use std::sync::MutexGuard;
35use std::time::SystemTime;
36
37use async_trait::async_trait;
38use bstr::BStr;
39use futures::stream::BoxStream;
40use gix::bstr::BString;
41use gix::objs::CommitRefIter;
42use gix::objs::WriteTo as _;
43use itertools::Itertools as _;
44use once_cell::sync::OnceCell as OnceLock;
45use pollster::FutureExt as _;
46use prost::Message as _;
47use smallvec::SmallVec;
48use thiserror::Error;
49use tokio::io::AsyncRead;
50use tokio::io::AsyncReadExt as _;
51
52use crate::backend::Backend;
53use crate::backend::BackendError;
54use crate::backend::BackendInitError;
55use crate::backend::BackendLoadError;
56use crate::backend::BackendResult;
57use crate::backend::ChangeId;
58use crate::backend::Commit;
59use crate::backend::CommitId;
60use crate::backend::Conflict;
61use crate::backend::ConflictId;
62use crate::backend::ConflictTerm;
63use crate::backend::CopyHistory;
64use crate::backend::CopyId;
65use crate::backend::CopyRecord;
66use crate::backend::FileId;
67use crate::backend::MergedTreeId;
68use crate::backend::MillisSinceEpoch;
69use crate::backend::SecureSig;
70use crate::backend::Signature;
71use crate::backend::SigningFn;
72use crate::backend::SymlinkId;
73use crate::backend::Timestamp;
74use crate::backend::Tree;
75use crate::backend::TreeId;
76use crate::backend::TreeValue;
77use crate::backend::make_root_commit;
78use crate::config::ConfigGetError;
79use crate::file_util;
80use crate::file_util::BadPathEncoding;
81use crate::file_util::IoResultExt as _;
82use crate::file_util::PathError;
83use crate::hex_util;
84use crate::index::Index;
85use crate::lock::FileLock;
86use crate::merge::Merge;
87use crate::merge::MergeBuilder;
88use crate::object_id::ObjectId;
89use crate::repo_path::RepoPath;
90use crate::repo_path::RepoPathBuf;
91use crate::repo_path::RepoPathComponentBuf;
92use crate::settings::GitSettings;
93use crate::settings::UserSettings;
94use crate::stacked_table::MutableTable;
95use crate::stacked_table::ReadonlyTable;
96use crate::stacked_table::TableSegment as _;
97use crate::stacked_table::TableStore;
98use crate::stacked_table::TableStoreError;
99
100const HASH_LENGTH: usize = 20;
101const CHANGE_ID_LENGTH: usize = 16;
102/// Ref namespace used only for preventing GC.
103const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
104const CONFLICT_SUFFIX: &str = ".jjconflict";
105
106pub const JJ_TREES_COMMIT_HEADER: &str = "jj:trees";
107pub const CHANGE_ID_COMMIT_HEADER: &str = "change-id";
108
109#[derive(Debug, Error)]
110pub enum GitBackendInitError {
111    #[error("Failed to initialize git repository")]
112    InitRepository(#[source] gix::init::Error),
113    #[error("Failed to open git repository")]
114    OpenRepository(#[source] gix::open::Error),
115    #[error("Failed to encode git repository path")]
116    EncodeRepositoryPath(#[source] BadPathEncoding),
117    #[error(transparent)]
118    Config(ConfigGetError),
119    #[error(transparent)]
120    Path(PathError),
121}
122
123impl From<Box<GitBackendInitError>> for BackendInitError {
124    fn from(err: Box<GitBackendInitError>) -> Self {
125        Self(err)
126    }
127}
128
129#[derive(Debug, Error)]
130pub enum GitBackendLoadError {
131    #[error("Failed to open git repository")]
132    OpenRepository(#[source] gix::open::Error),
133    #[error("Failed to decode git repository path")]
134    DecodeRepositoryPath(#[source] BadPathEncoding),
135    #[error(transparent)]
136    Config(ConfigGetError),
137    #[error(transparent)]
138    Path(PathError),
139}
140
141impl From<Box<GitBackendLoadError>> for BackendLoadError {
142    fn from(err: Box<GitBackendLoadError>) -> Self {
143        Self(err)
144    }
145}
146
147/// `GitBackend`-specific error that may occur after the backend is loaded.
148#[derive(Debug, Error)]
149pub enum GitBackendError {
150    #[error("Failed to read non-git metadata")]
151    ReadMetadata(#[source] TableStoreError),
152    #[error("Failed to write non-git metadata")]
153    WriteMetadata(#[source] TableStoreError),
154}
155
156impl From<GitBackendError> for BackendError {
157    fn from(err: GitBackendError) -> Self {
158        Self::Other(err.into())
159    }
160}
161
162#[derive(Debug, Error)]
163pub enum GitGcError {
164    #[error("Failed to run git gc command")]
165    GcCommand(#[source] std::io::Error),
166    #[error("git gc command exited with an error: {0}")]
167    GcCommandErrorStatus(ExitStatus),
168}
169
170pub struct GitBackend {
171    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
172    // cheaper to cache the thread-local instance behind a mutex than creating
173    // one for each backend method call. Our GitBackend is most likely to be
174    // used in a single-threaded context.
175    base_repo: gix::ThreadSafeRepository,
176    repo: Mutex<gix::Repository>,
177    root_commit_id: CommitId,
178    root_change_id: ChangeId,
179    empty_tree_id: TreeId,
180    shallow_root_ids: OnceLock<Vec<CommitId>>,
181    extra_metadata_store: TableStore,
182    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
183    git_executable: PathBuf,
184    write_change_id_header: bool,
185}
186
187impl GitBackend {
188    pub fn name() -> &'static str {
189        "git"
190    }
191
192    fn new(
193        base_repo: gix::ThreadSafeRepository,
194        extra_metadata_store: TableStore,
195        git_settings: GitSettings,
196    ) -> Self {
197        let repo = Mutex::new(base_repo.to_thread_local());
198        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
199        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
200        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
201        Self {
202            base_repo,
203            repo,
204            root_commit_id,
205            root_change_id,
206            empty_tree_id,
207            shallow_root_ids: OnceLock::new(),
208            extra_metadata_store,
209            cached_extra_metadata: Mutex::new(None),
210            git_executable: git_settings.executable_path,
211            write_change_id_header: git_settings.write_change_id_header,
212        }
213    }
214
215    pub fn init_internal(
216        settings: &UserSettings,
217        store_path: &Path,
218    ) -> Result<Self, Box<GitBackendInitError>> {
219        let git_repo_path = Path::new("git");
220        let git_repo = gix::ThreadSafeRepository::init_opts(
221            store_path.join(git_repo_path),
222            gix::create::Kind::Bare,
223            gix::create::Options::default(),
224            gix_open_opts_from_settings(settings),
225        )
226        .map_err(GitBackendInitError::InitRepository)?;
227        let git_settings = settings
228            .git_settings()
229            .map_err(GitBackendInitError::Config)?;
230        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
231    }
232
233    /// Initializes backend by creating a new Git repo at the specified
234    /// workspace path. The workspace directory must exist.
235    pub fn init_colocated(
236        settings: &UserSettings,
237        store_path: &Path,
238        workspace_root: &Path,
239    ) -> Result<Self, Box<GitBackendInitError>> {
240        let canonical_workspace_root = {
241            let path = store_path.join(workspace_root);
242            dunce::canonicalize(&path)
243                .context(&path)
244                .map_err(GitBackendInitError::Path)?
245        };
246        let git_repo = gix::ThreadSafeRepository::init_opts(
247            canonical_workspace_root,
248            gix::create::Kind::WithWorktree,
249            gix::create::Options::default(),
250            gix_open_opts_from_settings(settings),
251        )
252        .map_err(GitBackendInitError::InitRepository)?;
253        let git_repo_path = workspace_root.join(".git");
254        let git_settings = settings
255            .git_settings()
256            .map_err(GitBackendInitError::Config)?;
257        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
258    }
259
260    /// Initializes backend with an existing Git repo at the specified path.
261    pub fn init_external(
262        settings: &UserSettings,
263        store_path: &Path,
264        git_repo_path: &Path,
265    ) -> Result<Self, Box<GitBackendInitError>> {
266        let canonical_git_repo_path = {
267            let path = store_path.join(git_repo_path);
268            canonicalize_git_repo_path(&path)
269                .context(&path)
270                .map_err(GitBackendInitError::Path)?
271        };
272        let git_repo = gix::ThreadSafeRepository::open_opts(
273            canonical_git_repo_path,
274            gix_open_opts_from_settings(settings),
275        )
276        .map_err(GitBackendInitError::OpenRepository)?;
277        let git_settings = settings
278            .git_settings()
279            .map_err(GitBackendInitError::Config)?;
280        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
281    }
282
283    fn init_with_repo(
284        store_path: &Path,
285        git_repo_path: &Path,
286        repo: gix::ThreadSafeRepository,
287        git_settings: GitSettings,
288    ) -> Result<Self, Box<GitBackendInitError>> {
289        let extra_path = store_path.join("extra");
290        fs::create_dir(&extra_path)
291            .context(&extra_path)
292            .map_err(GitBackendInitError::Path)?;
293        let target_path = store_path.join("git_target");
294        let git_repo_path = if cfg!(windows) && git_repo_path.is_relative() {
295            // When a repository is created in Windows, format the path with *forward
296            // slashes* and not backwards slashes. This makes it possible to use the same
297            // repository under Windows Subsystem for Linux.
298            //
299            // This only works for relative paths. If the path is absolute, there's not much
300            // we can do, and it simply won't work inside and outside WSL at the same time.
301            file_util::slash_path(git_repo_path)
302        } else {
303            git_repo_path.into()
304        };
305        let git_repo_path_bytes = file_util::path_to_bytes(&git_repo_path)
306            .map_err(GitBackendInitError::EncodeRepositoryPath)?;
307        fs::write(&target_path, git_repo_path_bytes)
308            .context(&target_path)
309            .map_err(GitBackendInitError::Path)?;
310        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
311        Ok(Self::new(repo, extra_metadata_store, git_settings))
312    }
313
314    pub fn load(
315        settings: &UserSettings,
316        store_path: &Path,
317    ) -> Result<Self, Box<GitBackendLoadError>> {
318        let git_repo_path = {
319            let target_path = store_path.join("git_target");
320            let git_repo_path_bytes = fs::read(&target_path)
321                .context(&target_path)
322                .map_err(GitBackendLoadError::Path)?;
323            let git_repo_path = file_util::path_from_bytes(&git_repo_path_bytes)
324                .map_err(GitBackendLoadError::DecodeRepositoryPath)?;
325            let git_repo_path = store_path.join(git_repo_path);
326            canonicalize_git_repo_path(&git_repo_path)
327                .context(&git_repo_path)
328                .map_err(GitBackendLoadError::Path)?
329        };
330        let repo = gix::ThreadSafeRepository::open_opts(
331            git_repo_path,
332            gix_open_opts_from_settings(settings),
333        )
334        .map_err(GitBackendLoadError::OpenRepository)?;
335        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
336        let git_settings = settings
337            .git_settings()
338            .map_err(GitBackendLoadError::Config)?;
339        Ok(Self::new(repo, extra_metadata_store, git_settings))
340    }
341
342    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
343        self.repo.lock().unwrap()
344    }
345
346    /// Returns new thread-local instance to access to the underlying Git repo.
347    pub fn git_repo(&self) -> gix::Repository {
348        self.base_repo.to_thread_local()
349    }
350
351    /// Path to the `.git` directory or the repository itself if it's bare.
352    pub fn git_repo_path(&self) -> &Path {
353        self.base_repo.path()
354    }
355
356    /// Path to the working directory if the repository isn't bare.
357    pub fn git_workdir(&self) -> Option<&Path> {
358        self.base_repo.work_dir()
359    }
360
361    fn shallow_root_ids(&self, git_repo: &gix::Repository) -> BackendResult<&[CommitId]> {
362        // The list of shallow roots is cached by gix, but it's still expensive
363        // to stat file on every read_object() call. Refreshing shallow roots is
364        // also bad for consistency reasons.
365        self.shallow_root_ids
366            .get_or_try_init(|| {
367                let maybe_oids = git_repo
368                    .shallow_commits()
369                    .map_err(|err| BackendError::Other(err.into()))?;
370                let commit_ids = maybe_oids.map_or(vec![], |oids| {
371                    oids.iter()
372                        .map(|oid| CommitId::from_bytes(oid.as_bytes()))
373                        .collect()
374                });
375                Ok(commit_ids)
376            })
377            .map(AsRef::as_ref)
378    }
379
380    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
381        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
382        match locked_head.as_ref() {
383            Some(head) => Ok(head.clone()),
384            None => {
385                let table = self
386                    .extra_metadata_store
387                    .get_head()
388                    .map_err(GitBackendError::ReadMetadata)?;
389                *locked_head = Some(table.clone());
390                Ok(table)
391            }
392        }
393    }
394
395    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
396        let table = self
397            .extra_metadata_store
398            .get_head_locked()
399            .map_err(GitBackendError::ReadMetadata)?;
400        Ok(table)
401    }
402
403    fn save_extra_metadata_table(
404        &self,
405        mut_table: MutableTable,
406        _table_lock: &FileLock,
407    ) -> BackendResult<()> {
408        let table = self
409            .extra_metadata_store
410            .save_table(mut_table)
411            .map_err(GitBackendError::WriteMetadata)?;
412        // Since the parent table was the head, saved table are likely to be new head.
413        // If it's not, cache will be reloaded when entry can't be found.
414        *self.cached_extra_metadata.lock().unwrap() = Some(table);
415        Ok(())
416    }
417
418    /// Imports the given commits and ancestors from the backing Git repo.
419    ///
420    /// The `head_ids` may contain commits that have already been imported, but
421    /// the caller should filter them out to eliminate redundant I/O processing.
422    #[tracing::instrument(skip(self, head_ids))]
423    pub fn import_head_commits<'a>(
424        &self,
425        head_ids: impl IntoIterator<Item = &'a CommitId>,
426    ) -> BackendResult<()> {
427        let head_ids: HashSet<&CommitId> = head_ids
428            .into_iter()
429            .filter(|&id| *id != self.root_commit_id)
430            .collect();
431        if head_ids.is_empty() {
432            return Ok(());
433        }
434
435        // Create no-gc ref even if known to the extras table. Concurrent GC
436        // process might have deleted the no-gc ref.
437        let locked_repo = self.lock_git_repo();
438        locked_repo
439            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
440            .map_err(|err| BackendError::Other(Box::new(err)))?;
441
442        // These commits are imported from Git. Make our change ids persist (otherwise
443        // future write_commit() could reassign new change id.)
444        tracing::debug!(
445            heads_count = head_ids.len(),
446            "import extra metadata entries"
447        );
448        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
449        let mut mut_table = table.start_mutation();
450        import_extra_metadata_entries_from_heads(
451            &locked_repo,
452            &mut mut_table,
453            &table_lock,
454            &head_ids,
455            self.shallow_root_ids(&locked_repo)?,
456        )?;
457        self.save_extra_metadata_table(mut_table, &table_lock)
458    }
459
460    fn read_file_sync(&self, id: &FileId) -> BackendResult<Vec<u8>> {
461        let git_blob_id = validate_git_object_id(id)?;
462        let locked_repo = self.lock_git_repo();
463        let mut blob = locked_repo
464            .find_object(git_blob_id)
465            .map_err(|err| map_not_found_err(err, id))?
466            .try_into_blob()
467            .map_err(|err| to_read_object_err(err, id))?;
468        Ok(blob.take_data())
469    }
470
471    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
472        let attributes = gix::worktree::Stack::new(
473            Path::new(""),
474            gix::worktree::stack::State::AttributesStack(Default::default()),
475            gix::worktree::glob::pattern::Case::Sensitive,
476            Vec::new(),
477            Vec::new(),
478        );
479        let filter = gix::diff::blob::Pipeline::new(
480            Default::default(),
481            gix::filter::plumbing::Pipeline::new(
482                self.git_repo()
483                    .command_context()
484                    .map_err(|err| BackendError::Other(Box::new(err)))?,
485                Default::default(),
486            ),
487            Vec::new(),
488            Default::default(),
489        );
490        Ok(gix::diff::blob::Platform::new(
491            Default::default(),
492            filter,
493            gix::diff::blob::pipeline::Mode::ToGit,
494            attributes,
495        ))
496    }
497
498    fn read_tree_for_commit<'repo>(
499        &self,
500        repo: &'repo gix::Repository,
501        id: &CommitId,
502    ) -> BackendResult<gix::Tree<'repo>> {
503        let tree = self.read_commit(id).block_on()?.root_tree.to_merge();
504        // TODO(kfm): probably want to do something here if it is a merge
505        let tree_id = tree.first().clone();
506        let gix_id = validate_git_object_id(&tree_id)?;
507        repo.find_object(gix_id)
508            .map_err(|err| map_not_found_err(err, &tree_id))?
509            .try_into_tree()
510            .map_err(|err| to_read_object_err(err, &tree_id))
511    }
512}
513
514/// Canonicalizes the given `path` except for the last `".git"` component.
515///
516/// The last path component matters when opening a Git repo without `core.bare`
517/// config. This config is usually set, but the "repo" tool will set up such
518/// repositories and symlinks. Opening such repo with fully-canonicalized path
519/// would turn a colocated Git repo into a bare repo.
520pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
521    if path.ends_with(".git") {
522        let workdir = path.parent().unwrap();
523        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
524    } else {
525        dunce::canonicalize(path)
526    }
527}
528
529fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
530    let user_name = settings.user_name();
531    let user_email = settings.user_email();
532    gix::open::Options::default()
533        .config_overrides([
534            // Committer has to be configured to record reflog. Author isn't
535            // needed, but let's copy the same values.
536            format!("author.name={user_name}"),
537            format!("author.email={user_email}"),
538            format!("committer.name={user_name}"),
539            format!("committer.email={user_email}"),
540        ])
541        // The git_target path should point the repository, not the working directory.
542        .open_path_as_is(true)
543        // Gitoxide recommends this when correctness is preferred
544        .strict_config(true)
545}
546
547/// Parses the `jj:trees` header value.
548fn root_tree_from_git_extra_header(value: &BStr) -> Result<MergedTreeId, ()> {
549    let mut tree_ids = SmallVec::new();
550    for hex in value.split(|b| *b == b' ') {
551        let tree_id = TreeId::try_from_hex(hex).ok_or(())?;
552        if tree_id.as_bytes().len() != HASH_LENGTH {
553            return Err(());
554        }
555        tree_ids.push(tree_id);
556    }
557    // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
558    // allowed, it would be possible to construct a commit which appears to have
559    // different contents depending on whether it is viewed using `jj` or `git`.
560    if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
561        return Err(());
562    }
563    Ok(MergedTreeId::Merge(Merge::from_vec(tree_ids)))
564}
565
566fn commit_from_git_without_root_parent(
567    id: &CommitId,
568    git_object: &gix::Object,
569    uses_tree_conflict_format: bool,
570    is_shallow: bool,
571) -> BackendResult<Commit> {
572    let commit = git_object
573        .try_to_commit_ref()
574        .map_err(|err| to_read_object_err(err, id))?;
575
576    // If the git header has a change-id field, we attempt to convert that to a
577    // valid JJ Change Id
578    let change_id = commit
579        .extra_headers()
580        .find(CHANGE_ID_COMMIT_HEADER)
581        .and_then(ChangeId::try_from_reverse_hex)
582        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
583        .unwrap_or_else(|| change_id_from_git_commit_id(id));
584
585    // shallow commits don't have parents their parents actually fetched, so we
586    // discard them here
587    // TODO: This causes issues when a shallow repository is deepened/unshallowed
588    let parents = if is_shallow {
589        vec![]
590    } else {
591        commit
592            .parents()
593            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
594            .collect_vec()
595    };
596    // If this commit is a conflict, we'll update the root tree later, when we read
597    // the extra metadata.
598    let root_tree = commit
599        .extra_headers()
600        .find(JJ_TREES_COMMIT_HEADER)
601        .map(root_tree_from_git_extra_header)
602        .transpose()
603        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?
604        .unwrap_or_else(|| {
605            let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
606            if uses_tree_conflict_format {
607                MergedTreeId::resolved(tree_id)
608            } else {
609                MergedTreeId::Legacy(tree_id)
610            }
611        });
612    // Use lossy conversion as commit message with "mojibake" is still better than
613    // nothing.
614    // TODO: what should we do with commit.encoding?
615    let description = String::from_utf8_lossy(commit.message).into_owned();
616    let author = signature_from_git(commit.author());
617    let committer = signature_from_git(commit.committer());
618
619    // If the commit is signed, extract both the signature and the signed data
620    // (which is the commit buffer with the gpgsig header omitted).
621    // We have to re-parse the raw commit data because gix CommitRef does not give
622    // us the sogned data, only the signature.
623    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
624    // function and extract everything from that. For now, this works
625    let secure_sig = commit
626        .extra_headers
627        .iter()
628        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
629        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
630        .then(|| CommitRefIter::signature(&git_object.data))
631        .transpose()
632        .map_err(|err| to_read_object_err(err, id))?
633        .flatten()
634        .map(|(sig, data)| SecureSig {
635            data: data.to_bstring().into(),
636            sig: sig.into_owned().into(),
637        });
638
639    Ok(Commit {
640        parents,
641        predecessors: vec![],
642        // If this commit has associated extra metadata, we may reset this later.
643        root_tree,
644        change_id,
645        description,
646        author,
647        committer,
648        secure_sig,
649    })
650}
651
652fn change_id_from_git_commit_id(id: &CommitId) -> ChangeId {
653    // We reverse the bits of the commit id to create the change id. We don't
654    // want to use the first bytes unmodified because then it would be ambiguous
655    // if a given hash prefix refers to the commit id or the change id. It would
656    // have been enough to pick the last 16 bytes instead of the leading 16
657    // bytes to address that. We also reverse the bits to make it less likely
658    // that users depend on any relationship between the two ids.
659    let bytes = id.as_bytes()[4..HASH_LENGTH]
660        .iter()
661        .rev()
662        .map(|b| b.reverse_bits())
663        .collect();
664    ChangeId::new(bytes)
665}
666
667const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
668
669fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
670    let name = signature.name;
671    let name = if name != EMPTY_STRING_PLACEHOLDER {
672        String::from_utf8_lossy(name).into_owned()
673    } else {
674        "".to_string()
675    };
676    let email = signature.email;
677    let email = if email != EMPTY_STRING_PLACEHOLDER {
678        String::from_utf8_lossy(email).into_owned()
679    } else {
680        "".to_string()
681    };
682    let time = signature.time().unwrap_or_default();
683    let timestamp = MillisSinceEpoch(time.seconds * 1000);
684    let tz_offset = time.offset.div_euclid(60); // in minutes
685    Signature {
686        name,
687        email,
688        timestamp: Timestamp {
689            timestamp,
690            tz_offset,
691        },
692    }
693}
694
695fn signature_to_git(signature: &Signature) -> gix::actor::Signature {
696    // git does not support empty names or emails
697    let name = if !signature.name.is_empty() {
698        &signature.name
699    } else {
700        EMPTY_STRING_PLACEHOLDER
701    };
702    let email = if !signature.email.is_empty() {
703        &signature.email
704    } else {
705        EMPTY_STRING_PLACEHOLDER
706    };
707    let time = gix::date::Time::new(
708        signature.timestamp.timestamp.0.div_euclid(1000),
709        signature.timestamp.tz_offset * 60, // in seconds
710    );
711    gix::actor::Signature {
712        name: name.into(),
713        email: email.into(),
714        time,
715    }
716}
717
718fn serialize_extras(commit: &Commit) -> Vec<u8> {
719    let mut proto = crate::protos::git_store::Commit {
720        change_id: commit.change_id.to_bytes(),
721        ..Default::default()
722    };
723    if let MergedTreeId::Merge(tree_ids) = &commit.root_tree {
724        proto.uses_tree_conflict_format = true;
725        if !tree_ids.is_resolved() {
726            // This is done for the sake of jj versions <0.28 (before commit
727            // f7b14be) being able to read the repo. At some point in the
728            // future, we can stop doing it.
729            proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
730        }
731    }
732    for predecessor in &commit.predecessors {
733        proto.predecessors.push(predecessor.to_bytes());
734    }
735    proto.encode_to_vec()
736}
737
738fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
739    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
740    if !proto.change_id.is_empty() {
741        commit.change_id = ChangeId::new(proto.change_id);
742    }
743    if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree {
744        if proto.uses_tree_conflict_format {
745            if !proto.root_tree.is_empty() {
746                let merge_builder: MergeBuilder<_> = proto
747                    .root_tree
748                    .iter()
749                    .map(|id_bytes| TreeId::from_bytes(id_bytes))
750                    .collect();
751                commit.root_tree = MergedTreeId::Merge(merge_builder.build());
752            } else {
753                // uses_tree_conflict_format was set but there was no root_tree override in the
754                // proto, which means we should just promote the tree id from the
755                // git commit to be a known-conflict-free tree
756                commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone());
757            }
758        }
759    }
760    for predecessor in &proto.predecessors {
761        commit.predecessors.push(CommitId::from_bytes(predecessor));
762    }
763}
764
765/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
766/// Used for preventing GC of commits we create.
767fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
768    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
769    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
770    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
771    gix::refs::transaction::RefEdit {
772        change: gix::refs::transaction::Change::Update {
773            log: gix::refs::transaction::LogChange {
774                message: "used by jj".into(),
775                ..Default::default()
776            },
777            expected,
778            new,
779        },
780        name: name.try_into().unwrap(),
781        deref: false,
782    }
783}
784
785fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
786    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
787    gix::refs::transaction::RefEdit {
788        change: gix::refs::transaction::Change::Delete {
789            expected,
790            log: gix::refs::transaction::RefLog::AndReference,
791        },
792        name: git_ref.name,
793        deref: false,
794    }
795}
796
797/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
798/// unreachable and non-head refs.
799fn recreate_no_gc_refs(
800    git_repo: &gix::Repository,
801    new_heads: impl IntoIterator<Item = CommitId>,
802    keep_newer: SystemTime,
803) -> BackendResult<()> {
804    // Calculate diff between existing no-gc refs and new heads.
805    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
806    let mut no_gc_refs_to_keep_count: usize = 0;
807    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
808    let git_references = git_repo
809        .references()
810        .map_err(|err| BackendError::Other(err.into()))?;
811    let no_gc_refs_iter = git_references
812        .prefixed(NO_GC_REF_NAMESPACE)
813        .map_err(|err| BackendError::Other(err.into()))?;
814    for git_ref in no_gc_refs_iter {
815        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
816        let oid = git_ref.target.try_id().ok_or_else(|| {
817            let name = git_ref.name.as_bstr();
818            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
819        })?;
820        let id = CommitId::from_bytes(oid.as_bytes());
821        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
822        if new_heads.contains(&id) && name_good {
823            no_gc_refs_to_keep_count += 1;
824            continue;
825        }
826        // Check timestamp of loose ref, but this is still racy on re-import
827        // because:
828        // - existing packed ref won't be demoted to loose ref
829        // - existing loose ref won't be touched
830        //
831        // TODO: might be better to switch to a dummy merge, where new no-gc ref
832        // will always have a unique name. Doing that with the current
833        // ref-per-head strategy would increase the number of the no-gc refs.
834        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
835        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
836        if let Ok(metadata) = loose_ref_path.metadata() {
837            let mtime = metadata.modified().expect("unsupported platform?");
838            if mtime > keep_newer {
839                tracing::trace!(?git_ref, "not deleting new");
840                no_gc_refs_to_keep_count += 1;
841                continue;
842            }
843        }
844        // Also deletes no-gc ref of random name created by old jj.
845        tracing::trace!(?git_ref, ?name_good, "will delete");
846        no_gc_refs_to_delete.push(git_ref);
847    }
848    tracing::info!(
849        new_heads_count = new_heads.len(),
850        no_gc_refs_to_keep_count,
851        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
852        "collected reachable refs"
853    );
854
855    // It's slow to delete packed refs one by one, so update refs all at once.
856    let ref_edits = itertools::chain(
857        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
858        new_heads.iter().map(to_no_gc_ref_update),
859    );
860    git_repo
861        .edit_references(ref_edits)
862        .map_err(|err| BackendError::Other(err.into()))?;
863
864    Ok(())
865}
866
867fn run_git_gc(program: &OsStr, git_dir: &Path, keep_newer: SystemTime) -> Result<(), GitGcError> {
868    let keep_newer = keep_newer
869        .duration_since(SystemTime::UNIX_EPOCH)
870        .unwrap_or_default(); // underflow
871    let mut git = Command::new(program);
872    git.arg("--git-dir=.") // turn off discovery
873        .arg("gc")
874        .arg(format!("--prune=@{} +0000", keep_newer.as_secs()));
875    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
876    // canonicalized as UNC path, which wouldn't be supported by git.
877    git.current_dir(git_dir);
878    // TODO: pass output to UI layer instead of printing directly here
879    tracing::info!(?git, "running git gc");
880    let status = git.status().map_err(GitGcError::GcCommand)?;
881    tracing::info!(?status, "git gc exited");
882    if !status.success() {
883        return Err(GitGcError::GcCommandErrorStatus(status));
884    }
885    Ok(())
886}
887
888fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
889    if id.as_bytes().len() != HASH_LENGTH {
890        return Err(BackendError::InvalidHashLength {
891            expected: HASH_LENGTH,
892            actual: id.as_bytes().len(),
893            object_type: id.object_type(),
894            hash: id.hex(),
895        });
896    }
897    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
898}
899
900fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
901    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
902        BackendError::ObjectNotFound {
903            object_type: id.object_type(),
904            hash: id.hex(),
905            source: Box::new(err),
906        }
907    } else {
908        to_read_object_err(err, id)
909    }
910}
911
912fn to_read_object_err(
913    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
914    id: &impl ObjectId,
915) -> BackendError {
916    BackendError::ReadObject {
917        object_type: id.object_type(),
918        hash: id.hex(),
919        source: err.into(),
920    }
921}
922
923fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError {
924    BackendError::InvalidUtf8 {
925        object_type: id.object_type(),
926        hash: id.hex(),
927        source,
928    }
929}
930
931fn import_extra_metadata_entries_from_heads(
932    git_repo: &gix::Repository,
933    mut_table: &mut MutableTable,
934    _table_lock: &FileLock,
935    head_ids: &HashSet<&CommitId>,
936    shallow_roots: &[CommitId],
937) -> BackendResult<()> {
938    let mut work_ids = head_ids
939        .iter()
940        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
941        .map(|&id| id.clone())
942        .collect_vec();
943    while let Some(id) = work_ids.pop() {
944        let git_object = git_repo
945            .find_object(validate_git_object_id(&id)?)
946            .map_err(|err| map_not_found_err(err, &id))?;
947        let is_shallow = shallow_roots.contains(&id);
948        // TODO(#1624): Should we read the root tree here and check if it has a
949        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
950        // change the description of a commit with tree-level conflicts.
951        let commit = commit_from_git_without_root_parent(&id, &git_object, true, is_shallow)?;
952        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
953        work_ids.extend(
954            commit
955                .parents
956                .into_iter()
957                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
958        );
959    }
960    Ok(())
961}
962
963impl Debug for GitBackend {
964    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
965        f.debug_struct("GitBackend")
966            .field("path", &self.git_repo_path())
967            .finish()
968    }
969}
970
971#[async_trait]
972impl Backend for GitBackend {
973    fn as_any(&self) -> &dyn Any {
974        self
975    }
976
977    fn name(&self) -> &str {
978        Self::name()
979    }
980
981    fn commit_id_length(&self) -> usize {
982        HASH_LENGTH
983    }
984
985    fn change_id_length(&self) -> usize {
986        CHANGE_ID_LENGTH
987    }
988
989    fn root_commit_id(&self) -> &CommitId {
990        &self.root_commit_id
991    }
992
993    fn root_change_id(&self) -> &ChangeId {
994        &self.root_change_id
995    }
996
997    fn empty_tree_id(&self) -> &TreeId {
998        &self.empty_tree_id
999    }
1000
1001    fn concurrency(&self) -> usize {
1002        1
1003    }
1004
1005    async fn read_file(
1006        &self,
1007        _path: &RepoPath,
1008        id: &FileId,
1009    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
1010        let data = self.read_file_sync(id)?;
1011        Ok(Box::pin(Cursor::new(data)))
1012    }
1013
1014    async fn write_file(
1015        &self,
1016        _path: &RepoPath,
1017        contents: &mut (dyn AsyncRead + Send + Unpin),
1018    ) -> BackendResult<FileId> {
1019        let mut bytes = Vec::new();
1020        contents.read_to_end(&mut bytes).await.unwrap();
1021        let locked_repo = self.lock_git_repo();
1022        let oid = locked_repo
1023            .write_blob(bytes)
1024            .map_err(|err| BackendError::WriteObject {
1025                object_type: "file",
1026                source: Box::new(err),
1027            })?;
1028        Ok(FileId::new(oid.as_bytes().to_vec()))
1029    }
1030
1031    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
1032        let git_blob_id = validate_git_object_id(id)?;
1033        let locked_repo = self.lock_git_repo();
1034        let mut blob = locked_repo
1035            .find_object(git_blob_id)
1036            .map_err(|err| map_not_found_err(err, id))?
1037            .try_into_blob()
1038            .map_err(|err| to_read_object_err(err, id))?;
1039        let target = String::from_utf8(blob.take_data())
1040            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1041        Ok(target)
1042    }
1043
1044    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1045        let locked_repo = self.lock_git_repo();
1046        let oid =
1047            locked_repo
1048                .write_blob(target.as_bytes())
1049                .map_err(|err| BackendError::WriteObject {
1050                    object_type: "symlink",
1051                    source: Box::new(err),
1052                })?;
1053        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1054    }
1055
1056    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
1057        Err(BackendError::Unsupported(
1058            "The Git backend doesn't support tracked copies yet".to_string(),
1059        ))
1060    }
1061
1062    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
1063        Err(BackendError::Unsupported(
1064            "The Git backend doesn't support tracked copies yet".to_string(),
1065        ))
1066    }
1067
1068    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
1069        Err(BackendError::Unsupported(
1070            "The Git backend doesn't support tracked copies yet".to_string(),
1071        ))
1072    }
1073
1074    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1075        if id == &self.empty_tree_id {
1076            return Ok(Tree::default());
1077        }
1078        let git_tree_id = validate_git_object_id(id)?;
1079
1080        let locked_repo = self.lock_git_repo();
1081        let git_tree = locked_repo
1082            .find_object(git_tree_id)
1083            .map_err(|err| map_not_found_err(err, id))?
1084            .try_into_tree()
1085            .map_err(|err| to_read_object_err(err, id))?;
1086        let mut entries: Vec<_> = git_tree
1087            .iter()
1088            .map(|entry| -> BackendResult<_> {
1089                let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1090                let name =
1091                    str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
1092                let (name, value) = match entry.mode().kind() {
1093                    gix::object::tree::EntryKind::Tree => {
1094                        let id = TreeId::from_bytes(entry.oid().as_bytes());
1095                        (name, TreeValue::Tree(id))
1096                    }
1097                    gix::object::tree::EntryKind::Blob => {
1098                        let id = FileId::from_bytes(entry.oid().as_bytes());
1099                        if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) {
1100                            (
1101                                basename,
1102                                TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())),
1103                            )
1104                        } else {
1105                            (
1106                                name,
1107                                TreeValue::File {
1108                                    id,
1109                                    executable: false,
1110                                    copy_id: CopyId::placeholder(),
1111                                },
1112                            )
1113                        }
1114                    }
1115                    gix::object::tree::EntryKind::BlobExecutable => {
1116                        let id = FileId::from_bytes(entry.oid().as_bytes());
1117                        (
1118                            name,
1119                            TreeValue::File {
1120                                id,
1121                                executable: true,
1122                                copy_id: CopyId::placeholder(),
1123                            },
1124                        )
1125                    }
1126                    gix::object::tree::EntryKind::Link => {
1127                        let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1128                        (name, TreeValue::Symlink(id))
1129                    }
1130                    gix::object::tree::EntryKind::Commit => {
1131                        let id = CommitId::from_bytes(entry.oid().as_bytes());
1132                        (name, TreeValue::GitSubmodule(id))
1133                    }
1134                };
1135                Ok((RepoPathComponentBuf::new(name).unwrap(), value))
1136            })
1137            .try_collect()?;
1138        // While Git tree entries are sorted, the rule is slightly different.
1139        // Directory names are sorted as if they had trailing "/".
1140        if !entries.is_sorted_by_key(|(name, _)| name) {
1141            entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
1142        }
1143        Ok(Tree::from_sorted_entries(entries))
1144    }
1145
1146    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1147        // Tree entries to be written must be sorted by Entry::filename(), which
1148        // is slightly different from the order of our backend::Tree.
1149        let entries = contents
1150            .entries()
1151            .map(|entry| {
1152                let name = entry.name().as_internal_str();
1153                match entry.value() {
1154                    TreeValue::File {
1155                        id,
1156                        executable: false,
1157                        copy_id: _, // TODO: Use the value
1158                    } => gix::objs::tree::Entry {
1159                        mode: gix::object::tree::EntryKind::Blob.into(),
1160                        filename: name.into(),
1161                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1162                    },
1163                    TreeValue::File {
1164                        id,
1165                        executable: true,
1166                        copy_id: _, // TODO: Use the value
1167                    } => gix::objs::tree::Entry {
1168                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1169                        filename: name.into(),
1170                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1171                    },
1172                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1173                        mode: gix::object::tree::EntryKind::Link.into(),
1174                        filename: name.into(),
1175                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1176                    },
1177                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1178                        mode: gix::object::tree::EntryKind::Tree.into(),
1179                        filename: name.into(),
1180                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1181                    },
1182                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1183                        mode: gix::object::tree::EntryKind::Commit.into(),
1184                        filename: name.into(),
1185                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1186                    },
1187                    TreeValue::Conflict(id) => gix::objs::tree::Entry {
1188                        mode: gix::object::tree::EntryKind::Blob.into(),
1189                        filename: (name.to_owned() + CONFLICT_SUFFIX).into(),
1190                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1191                    },
1192                }
1193            })
1194            .sorted_unstable()
1195            .collect();
1196        let locked_repo = self.lock_git_repo();
1197        let oid = locked_repo
1198            .write_object(gix::objs::Tree { entries })
1199            .map_err(|err| BackendError::WriteObject {
1200                object_type: "tree",
1201                source: Box::new(err),
1202            })?;
1203        Ok(TreeId::from_bytes(oid.as_bytes()))
1204    }
1205
1206    fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
1207        let data = self.read_file_sync(&FileId::new(id.to_bytes()))?;
1208        let json: serde_json::Value = serde_json::from_slice(&data).unwrap();
1209        Ok(Conflict {
1210            removes: conflict_term_list_from_json(json.get("removes").unwrap()),
1211            adds: conflict_term_list_from_json(json.get("adds").unwrap()),
1212        })
1213    }
1214
1215    fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
1216        let json = serde_json::json!({
1217            "removes": conflict_term_list_to_json(&conflict.removes),
1218            "adds": conflict_term_list_to_json(&conflict.adds),
1219        });
1220        let json_string = json.to_string();
1221        let bytes = json_string.as_bytes();
1222        let locked_repo = self.lock_git_repo();
1223        let oid = locked_repo
1224            .write_blob(bytes)
1225            .map_err(|err| BackendError::WriteObject {
1226                object_type: "conflict",
1227                source: Box::new(err),
1228            })?;
1229        Ok(ConflictId::from_bytes(oid.as_bytes()))
1230    }
1231
1232    #[tracing::instrument(skip(self))]
1233    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1234        if *id == self.root_commit_id {
1235            return Ok(make_root_commit(
1236                self.root_change_id().clone(),
1237                self.empty_tree_id.clone(),
1238            ));
1239        }
1240        let git_commit_id = validate_git_object_id(id)?;
1241
1242        let mut commit = {
1243            let locked_repo = self.lock_git_repo();
1244            let git_object = locked_repo
1245                .find_object(git_commit_id)
1246                .map_err(|err| map_not_found_err(err, id))?;
1247            let is_shallow = self.shallow_root_ids(&locked_repo)?.contains(id);
1248            commit_from_git_without_root_parent(id, &git_object, false, is_shallow)?
1249        };
1250        if commit.parents.is_empty() {
1251            commit.parents.push(self.root_commit_id.clone());
1252        };
1253
1254        let table = self.cached_extra_metadata_table()?;
1255        if let Some(extras) = table.get_value(id.as_bytes()) {
1256            deserialize_extras(&mut commit, extras);
1257        } else {
1258            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1259            // there are no reachable ancestor commits without extras metadata. Git commits
1260            // imported by jj < 0.8.0 might not have extras (#924).
1261            // https://github.com/jj-vcs/jj/issues/2343
1262            tracing::info!("unimported Git commit found");
1263            self.import_head_commits([id])?;
1264            let table = self.cached_extra_metadata_table()?;
1265            let extras = table.get_value(id.as_bytes()).unwrap();
1266            deserialize_extras(&mut commit, extras);
1267        }
1268        Ok(commit)
1269    }
1270
1271    async fn write_commit(
1272        &self,
1273        mut contents: Commit,
1274        mut sign_with: Option<&mut SigningFn>,
1275    ) -> BackendResult<(CommitId, Commit)> {
1276        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1277
1278        let locked_repo = self.lock_git_repo();
1279        let git_tree_id = match &contents.root_tree {
1280            MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
1281            MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
1282                Some(tree_id) => validate_git_object_id(tree_id)?,
1283                None => write_tree_conflict(&locked_repo, tree_ids)?,
1284            },
1285        };
1286        let author = signature_to_git(&contents.author);
1287        let mut committer = signature_to_git(&contents.committer);
1288        let message = &contents.description;
1289        if contents.parents.is_empty() {
1290            return Err(BackendError::Other(
1291                "Cannot write a commit with no parents".into(),
1292            ));
1293        }
1294        let mut parents = SmallVec::new();
1295        for parent_id in &contents.parents {
1296            if *parent_id == self.root_commit_id {
1297                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1298                // add it to the list of parents to write in the Git commit. We also check that
1299                // there are no other parents since Git cannot represent a merge between a root
1300                // commit and another commit.
1301                if contents.parents.len() > 1 {
1302                    return Err(BackendError::Unsupported(
1303                        "The Git backend does not support creating merge commits with the root \
1304                         commit as one of the parents."
1305                            .to_owned(),
1306                    ));
1307                }
1308            } else {
1309                parents.push(validate_git_object_id(parent_id)?);
1310            }
1311        }
1312        let mut extra_headers: Vec<(BString, BString)> = vec![];
1313        if let MergedTreeId::Merge(tree_ids) = &contents.root_tree {
1314            if !tree_ids.is_resolved() {
1315                let value = tree_ids.iter().map(|id| id.hex()).join(" ");
1316                extra_headers.push((JJ_TREES_COMMIT_HEADER.into(), value.into()));
1317            }
1318        }
1319        if self.write_change_id_header {
1320            extra_headers.push((
1321                CHANGE_ID_COMMIT_HEADER.into(),
1322                contents.change_id.reverse_hex().into(),
1323            ));
1324        }
1325
1326        let extras = serialize_extras(&contents);
1327
1328        // If two writers write commits of the same id with different metadata, they
1329        // will both succeed and the metadata entries will be "merged" later. Since
1330        // metadata entry is keyed by the commit id, one of the entries would be lost.
1331        // To prevent such race condition locally, we extend the scope covered by the
1332        // table lock. This is still racy if multiple machines are involved and the
1333        // repository is rsync-ed.
1334        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1335        let id = loop {
1336            let mut commit = gix::objs::Commit {
1337                message: message.to_owned().into(),
1338                tree: git_tree_id,
1339                author: author.clone(),
1340                committer: committer.clone(),
1341                encoding: None,
1342                parents: parents.clone(),
1343                extra_headers: extra_headers.clone(),
1344            };
1345
1346            if let Some(sign) = &mut sign_with {
1347                // we don't use gix pool, but at least use their heuristic
1348                let mut data = Vec::with_capacity(512);
1349                commit.write_to(&mut data).unwrap();
1350
1351                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1352                    object_type: "commit",
1353                    source: Box::new(err),
1354                })?;
1355                commit
1356                    .extra_headers
1357                    .push(("gpgsig".into(), sig.clone().into()));
1358                contents.secure_sig = Some(SecureSig { data, sig });
1359            }
1360
1361            let git_id =
1362                locked_repo
1363                    .write_object(&commit)
1364                    .map_err(|err| BackendError::WriteObject {
1365                        object_type: "commit",
1366                        source: Box::new(err),
1367                    })?;
1368
1369            match table.get_value(git_id.as_bytes()) {
1370                Some(existing_extras) if existing_extras != extras => {
1371                    // It's possible a commit already exists with the same
1372                    // commit id but different change id. Adjust the timestamp
1373                    // until this is no longer the case.
1374                    //
1375                    // For example, this can happen when rebasing duplicate
1376                    // commits, https://github.com/jj-vcs/jj/issues/694.
1377                    //
1378                    // `jj` resets the committer timestamp to the current
1379                    // timestamp whenever it rewrites a commit. So, it's
1380                    // unlikely for the timestamp to be 0 even if the original
1381                    // commit had its timestamp set to 0. Moreover, we test that
1382                    // a commit with a negative timestamp can still be written
1383                    // and read back by `jj`.
1384                    committer.time.seconds -= 1;
1385                }
1386                _ => break CommitId::from_bytes(git_id.as_bytes()),
1387            }
1388        };
1389
1390        // Everything up to this point had no permanent effect on the repo except
1391        // GC-able objects
1392        locked_repo
1393            .edit_reference(to_no_gc_ref_update(&id))
1394            .map_err(|err| BackendError::Other(Box::new(err)))?;
1395
1396        // Update the signature to match the one that was actually written to the object
1397        // store
1398        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1399        let mut mut_table = table.start_mutation();
1400        mut_table.add_entry(id.to_bytes(), extras);
1401        self.save_extra_metadata_table(mut_table, &table_lock)?;
1402        Ok((id, contents))
1403    }
1404
1405    fn get_copy_records(
1406        &self,
1407        paths: Option<&[RepoPathBuf]>,
1408        root_id: &CommitId,
1409        head_id: &CommitId,
1410    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
1411        let repo = self.git_repo();
1412        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1413        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1414
1415        let change_to_copy_record =
1416            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1417                let gix::object::tree::diff::Change::Rewrite {
1418                    source_location,
1419                    source_entry_mode,
1420                    source_id,
1421                    entry_mode: dest_entry_mode,
1422                    location: dest_location,
1423                    ..
1424                } = change
1425                else {
1426                    return Ok(None);
1427                };
1428                // TODO: Renamed symlinks cannot be returned because CopyRecord
1429                // expects `source_file: FileId`.
1430                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1431                    return Ok(None);
1432                }
1433
1434                let source = str::from_utf8(source_location)
1435                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1436                let dest = str::from_utf8(dest_location)
1437                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1438
1439                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1440                if !paths.is_none_or(|paths| paths.contains(&target)) {
1441                    return Ok(None);
1442                }
1443
1444                Ok(Some(CopyRecord {
1445                    target,
1446                    target_commit: head_id.clone(),
1447                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1448                    source_file: FileId::from_bytes(source_id.as_bytes()),
1449                    source_commit: root_id.clone(),
1450                }))
1451            };
1452
1453        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1454        root_tree
1455            .changes()
1456            .map_err(|err| BackendError::Other(err.into()))?
1457            .options(|opts| {
1458                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1459                    copies: Some(gix::diff::rewrites::Copies {
1460                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1461                        percentage: Some(0.5),
1462                    }),
1463                    percentage: Some(0.5),
1464                    limit: 1000,
1465                    track_empty: false,
1466                }));
1467            })
1468            .for_each_to_obtain_tree_with_cache(
1469                &head_tree,
1470                &mut self.new_diff_platform()?,
1471                |change| -> BackendResult<_> {
1472                    match change_to_copy_record(change) {
1473                        Ok(None) => {}
1474                        Ok(Some(change)) => records.push(Ok(change)),
1475                        Err(err) => records.push(Err(err)),
1476                    }
1477                    Ok(gix::object::tree::diff::Action::Continue)
1478                },
1479            )
1480            .map_err(|err| BackendError::Other(err.into()))?;
1481        Ok(Box::pin(futures::stream::iter(records)))
1482    }
1483
1484    #[tracing::instrument(skip(self, index))]
1485    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1486        let git_repo = self.lock_git_repo();
1487        let new_heads = index
1488            .all_heads_for_gc()
1489            .map_err(|err| BackendError::Other(err.into()))?
1490            .filter(|id| *id != self.root_commit_id);
1491        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1492        // TODO: remove unreachable entries from extras table if segment file
1493        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1494        // preserved by the keep_newer timestamp though)
1495        // TODO: remove unreachable extras table segments
1496        run_git_gc(
1497            self.git_executable.as_ref(),
1498            self.git_repo_path(),
1499            keep_newer,
1500        )
1501        .map_err(|err| BackendError::Other(err.into()))?;
1502        // Since "git gc" will move loose refs into packed refs, in-memory
1503        // packed-refs cache should be invalidated without relying on mtime.
1504        git_repo.refs.force_refresh_packed_buffer().ok();
1505        Ok(())
1506    }
1507}
1508
1509/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1510/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1511fn write_tree_conflict(
1512    repo: &gix::Repository,
1513    conflict: &Merge<TreeId>,
1514) -> BackendResult<gix::ObjectId> {
1515    // Tree entries to be written must be sorted by Entry::filename().
1516    let mut entries = itertools::chain(
1517        conflict
1518            .removes()
1519            .enumerate()
1520            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1521        conflict
1522            .adds()
1523            .enumerate()
1524            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1525    )
1526    .map(|(name, tree_id)| gix::objs::tree::Entry {
1527        mode: gix::object::tree::EntryKind::Tree.into(),
1528        filename: name.into(),
1529        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1530    })
1531    .collect_vec();
1532    let readme_id = repo
1533        .write_blob(
1534            r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1535The commit contains file conflicts, and therefore looks wrong when used with plain
1536Git or other tools that are unfamiliar with jj.
1537
1538The .jjconflict-* directories represent the different inputs to the conflict.
1539For details, see
1540https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1541
1542If you see this file in your working copy, it probably means that you used a
1543regular `git` command to check out a conflicted commit. Use `jj abandon` to
1544recover.
1545"#,
1546        )
1547        .map_err(|err| {
1548            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1549        })?
1550        .detach();
1551    entries.push(gix::objs::tree::Entry {
1552        mode: gix::object::tree::EntryKind::Blob.into(),
1553        filename: "README".into(),
1554        oid: readme_id,
1555    });
1556    entries.sort_unstable();
1557    let id = repo
1558        .write_object(gix::objs::Tree { entries })
1559        .map_err(|err| BackendError::WriteObject {
1560            object_type: "tree",
1561            source: Box::new(err),
1562        })?;
1563    Ok(id.detach())
1564}
1565
1566fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
1567    serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
1568}
1569
1570fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> {
1571    json.as_array()
1572        .unwrap()
1573        .iter()
1574        .map(conflict_term_from_json)
1575        .collect()
1576}
1577
1578fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value {
1579    serde_json::json!({
1580        "value": tree_value_to_json(&part.value),
1581    })
1582}
1583
1584fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm {
1585    let json_value = json.get("value").unwrap();
1586    ConflictTerm {
1587        value: tree_value_from_json(json_value),
1588    }
1589}
1590
1591fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
1592    match value {
1593        TreeValue::File {
1594            id,
1595            executable,
1596            copy_id: _,
1597        } => serde_json::json!({
1598             "file": {
1599                 "id": id.hex(),
1600                 "executable": executable,
1601             },
1602        }),
1603        TreeValue::Symlink(id) => serde_json::json!({
1604             "symlink_id": id.hex(),
1605        }),
1606        TreeValue::Tree(id) => serde_json::json!({
1607             "tree_id": id.hex(),
1608        }),
1609        TreeValue::GitSubmodule(id) => serde_json::json!({
1610             "submodule_id": id.hex(),
1611        }),
1612        TreeValue::Conflict(id) => serde_json::json!({
1613             "conflict_id": id.hex(),
1614        }),
1615    }
1616}
1617
1618fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
1619    if let Some(json_file) = json.get("file") {
1620        TreeValue::File {
1621            id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())),
1622            executable: json_file.get("executable").unwrap().as_bool().unwrap(),
1623            copy_id: CopyId::placeholder(),
1624        }
1625    } else if let Some(json_id) = json.get("symlink_id") {
1626        TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id)))
1627    } else if let Some(json_id) = json.get("tree_id") {
1628        TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id)))
1629    } else if let Some(json_id) = json.get("submodule_id") {
1630        TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id)))
1631    } else if let Some(json_id) = json.get("conflict_id") {
1632        TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id)))
1633    } else {
1634        panic!("unexpected json value in conflict: {json:#?}");
1635    }
1636}
1637
1638fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
1639    hex_util::decode_hex(value.as_str().unwrap()).unwrap()
1640}
1641
1642#[cfg(test)]
1643mod tests {
1644    use assert_matches::assert_matches;
1645    use gix::date::parse::TimeBuf;
1646    use pollster::FutureExt as _;
1647
1648    use super::*;
1649    use crate::config::StackedConfig;
1650    use crate::content_hash::blake2b_hash;
1651    use crate::tests::new_temp_dir;
1652
1653    const GIT_USER: &str = "Someone";
1654    const GIT_EMAIL: &str = "someone@example.com";
1655
1656    fn git_config() -> Vec<bstr::BString> {
1657        vec![
1658            format!("user.name = {GIT_USER}").into(),
1659            format!("user.email = {GIT_EMAIL}").into(),
1660            "init.defaultBranch = master".into(),
1661        ]
1662    }
1663
1664    fn open_options() -> gix::open::Options {
1665        gix::open::Options::isolated()
1666            .config_overrides(git_config())
1667            .strict_config(true)
1668    }
1669
1670    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1671        gix::ThreadSafeRepository::init_opts(
1672            directory,
1673            gix::create::Kind::WithWorktree,
1674            gix::create::Options::default(),
1675            open_options(),
1676        )
1677        .unwrap()
1678        .to_thread_local()
1679    }
1680
1681    #[test]
1682    fn read_plain_git_commit() {
1683        let settings = user_settings();
1684        let temp_dir = new_temp_dir();
1685        let store_path = temp_dir.path();
1686        let git_repo_path = temp_dir.path().join("git");
1687        let git_repo = git_init(git_repo_path);
1688
1689        // Add a commit with some files in
1690        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1691        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1692        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1693        dir_tree_editor
1694            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1695            .unwrap();
1696        dir_tree_editor
1697            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1698            .unwrap();
1699        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1700        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1701        root_tree_builder
1702            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1703            .unwrap();
1704        let root_tree_id = root_tree_builder.write().unwrap().detach();
1705        let git_author = gix::actor::Signature {
1706            name: "git author".into(),
1707            email: "git.author@example.com".into(),
1708            time: gix::date::Time::new(1000, 60 * 60),
1709        };
1710        let git_committer = gix::actor::Signature {
1711            name: "git committer".into(),
1712            email: "git.committer@example.com".into(),
1713            time: gix::date::Time::new(2000, -480 * 60),
1714        };
1715        let git_commit_id = git_repo
1716            .commit_as(
1717                git_committer.to_ref(&mut TimeBuf::default()),
1718                git_author.to_ref(&mut TimeBuf::default()),
1719                "refs/heads/dummy",
1720                "git commit message",
1721                root_tree_id,
1722                [] as [gix::ObjectId; 0],
1723            )
1724            .unwrap()
1725            .detach();
1726        git_repo
1727            .find_reference("refs/heads/dummy")
1728            .unwrap()
1729            .delete()
1730            .unwrap();
1731        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1732        // The change id is the leading reverse bits of the commit id
1733        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1734        // Check that the git commit above got the hash we expect
1735        assert_eq!(
1736            git_commit_id.as_bytes(),
1737            commit_id.as_bytes(),
1738            "{git_commit_id:?} vs {commit_id:?}"
1739        );
1740
1741        // Add an empty commit on top
1742        let git_commit_id2 = git_repo
1743            .commit_as(
1744                git_committer.to_ref(&mut TimeBuf::default()),
1745                git_author.to_ref(&mut TimeBuf::default()),
1746                "refs/heads/dummy2",
1747                "git commit message 2",
1748                root_tree_id,
1749                [git_commit_id],
1750            )
1751            .unwrap()
1752            .detach();
1753        git_repo
1754            .find_reference("refs/heads/dummy2")
1755            .unwrap()
1756            .delete()
1757            .unwrap();
1758        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1759
1760        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1761
1762        // Import the head commit and its ancestors
1763        backend.import_head_commits([&commit_id2]).unwrap();
1764        // Ref should be created only for the head commit
1765        let git_refs = backend
1766            .git_repo()
1767            .references()
1768            .unwrap()
1769            .prefixed("refs/jj/keep/")
1770            .unwrap()
1771            .map(|git_ref| git_ref.unwrap().id().detach())
1772            .collect_vec();
1773        assert_eq!(git_refs, vec![git_commit_id2]);
1774
1775        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1776        assert_eq!(&commit.change_id, &change_id);
1777        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1778        assert_eq!(commit.predecessors, vec![]);
1779        assert_eq!(
1780            commit.root_tree.to_merge(),
1781            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1782        );
1783        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1784        assert_eq!(commit.description, "git commit message");
1785        assert_eq!(commit.author.name, "git author");
1786        assert_eq!(commit.author.email, "git.author@example.com");
1787        assert_eq!(
1788            commit.author.timestamp.timestamp,
1789            MillisSinceEpoch(1000 * 1000)
1790        );
1791        assert_eq!(commit.author.timestamp.tz_offset, 60);
1792        assert_eq!(commit.committer.name, "git committer");
1793        assert_eq!(commit.committer.email, "git.committer@example.com");
1794        assert_eq!(
1795            commit.committer.timestamp.timestamp,
1796            MillisSinceEpoch(2000 * 1000)
1797        );
1798        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1799
1800        let root_tree = backend
1801            .read_tree(
1802                RepoPath::root(),
1803                &TreeId::from_bytes(root_tree_id.as_bytes()),
1804            )
1805            .block_on()
1806            .unwrap();
1807        let mut root_entries = root_tree.entries();
1808        let dir = root_entries.next().unwrap();
1809        assert_eq!(root_entries.next(), None);
1810        assert_eq!(dir.name().as_internal_str(), "dir");
1811        assert_eq!(
1812            dir.value(),
1813            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1814        );
1815
1816        let dir_tree = backend
1817            .read_tree(
1818                RepoPath::from_internal_string("dir").unwrap(),
1819                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1820            )
1821            .block_on()
1822            .unwrap();
1823        let mut entries = dir_tree.entries();
1824        let file = entries.next().unwrap();
1825        let symlink = entries.next().unwrap();
1826        assert_eq!(entries.next(), None);
1827        assert_eq!(file.name().as_internal_str(), "normal");
1828        assert_eq!(
1829            file.value(),
1830            &TreeValue::File {
1831                id: FileId::from_bytes(blob1.as_bytes()),
1832                executable: false,
1833                copy_id: CopyId::placeholder(),
1834            }
1835        );
1836        assert_eq!(symlink.name().as_internal_str(), "symlink");
1837        assert_eq!(
1838            symlink.value(),
1839            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1840        );
1841
1842        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1843        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1844        assert_eq!(commit.predecessors, vec![]);
1845        assert_eq!(
1846            commit.root_tree.to_merge(),
1847            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1848        );
1849        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1850    }
1851
1852    #[test]
1853    fn read_git_commit_without_importing() {
1854        let settings = user_settings();
1855        let temp_dir = new_temp_dir();
1856        let store_path = temp_dir.path();
1857        let git_repo_path = temp_dir.path().join("git");
1858        let git_repo = git_init(&git_repo_path);
1859
1860        let signature = gix::actor::Signature {
1861            name: GIT_USER.into(),
1862            email: GIT_EMAIL.into(),
1863            time: gix::date::Time::now_utc(),
1864        };
1865        let empty_tree_id =
1866            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1867        let git_commit_id = git_repo
1868            .commit_as(
1869                signature.to_ref(&mut TimeBuf::default()),
1870                signature.to_ref(&mut TimeBuf::default()),
1871                "refs/heads/main",
1872                "git commit message",
1873                empty_tree_id,
1874                [] as [gix::ObjectId; 0],
1875            )
1876            .unwrap();
1877
1878        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1879
1880        // read_commit() without import_head_commits() works as of now. This might be
1881        // changed later.
1882        assert!(
1883            backend
1884                .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1885                .block_on()
1886                .is_ok()
1887        );
1888        assert!(
1889            backend
1890                .cached_extra_metadata_table()
1891                .unwrap()
1892                .get_value(git_commit_id.as_bytes())
1893                .is_some(),
1894            "extra metadata should have been be created"
1895        );
1896    }
1897
1898    #[test]
1899    fn read_signed_git_commit() {
1900        let settings = user_settings();
1901        let temp_dir = new_temp_dir();
1902        let store_path = temp_dir.path();
1903        let git_repo_path = temp_dir.path().join("git");
1904        let git_repo = git_init(git_repo_path);
1905
1906        let signature = gix::actor::Signature {
1907            name: GIT_USER.into(),
1908            email: GIT_EMAIL.into(),
1909            time: gix::date::Time::now_utc(),
1910        };
1911        let empty_tree_id =
1912            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1913
1914        let secure_sig =
1915            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1916
1917        let mut commit = gix::objs::Commit {
1918            tree: empty_tree_id,
1919            parents: smallvec::SmallVec::new(),
1920            author: signature.clone(),
1921            committer: signature.clone(),
1922            encoding: None,
1923            message: "git commit message".into(),
1924            extra_headers: Vec::new(),
1925        };
1926
1927        let mut commit_buf = Vec::new();
1928        commit.write_to(&mut commit_buf).unwrap();
1929        let commit_str = std::str::from_utf8(&commit_buf).unwrap();
1930
1931        commit
1932            .extra_headers
1933            .push(("gpgsig".into(), secure_sig.into()));
1934
1935        let git_commit_id = git_repo.write_object(&commit).unwrap();
1936
1937        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1938
1939        let commit = backend
1940            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1941            .block_on()
1942            .unwrap();
1943
1944        let sig = commit.secure_sig.expect("failed to read the signature");
1945
1946        // converting to string for nicer assert diff
1947        assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig);
1948        assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str);
1949    }
1950
1951    #[test]
1952    fn round_trip_change_id_via_git_header() {
1953        let settings = user_settings();
1954        let temp_dir = new_temp_dir();
1955
1956        let store_path = temp_dir.path().join("store");
1957        fs::create_dir(&store_path).unwrap();
1958        let empty_store_path = temp_dir.path().join("empty_store");
1959        fs::create_dir(&empty_store_path).unwrap();
1960        let git_repo_path = temp_dir.path().join("git");
1961        let git_repo = git_init(git_repo_path);
1962
1963        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path()).unwrap();
1964        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1965        let commit = Commit {
1966            parents: vec![backend.root_commit_id().clone()],
1967            predecessors: vec![],
1968            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1969            change_id: original_change_id.clone(),
1970            description: "initial".to_string(),
1971            author: create_signature(),
1972            committer: create_signature(),
1973            secure_sig: None,
1974        };
1975
1976        let (initial_commit_id, _init_commit) =
1977            backend.write_commit(commit, None).block_on().unwrap();
1978        let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
1979        assert_eq!(
1980            commit.change_id, original_change_id,
1981            "The change-id header did not roundtrip"
1982        );
1983
1984        // Because of how change ids are also persisted in extra proto files,
1985        // initialize a new store without those files, but reuse the same git
1986        // storage. This change-id must be derived from the git commit header.
1987        let no_extra_backend =
1988            GitBackend::init_external(&settings, &empty_store_path, git_repo.path()).unwrap();
1989        let no_extra_commit = no_extra_backend
1990            .read_commit(&initial_commit_id)
1991            .block_on()
1992            .unwrap();
1993
1994        assert_eq!(
1995            no_extra_commit.change_id, original_change_id,
1996            "The change-id header did not roundtrip"
1997        );
1998    }
1999
2000    #[test]
2001    fn read_empty_string_placeholder() {
2002        let git_signature1 = gix::actor::Signature {
2003            name: EMPTY_STRING_PLACEHOLDER.into(),
2004            email: "git.author@example.com".into(),
2005            time: gix::date::Time::new(1000, 60 * 60),
2006        };
2007        let signature1 = signature_from_git(git_signature1.to_ref(&mut TimeBuf::default()));
2008        assert!(signature1.name.is_empty());
2009        assert_eq!(signature1.email, "git.author@example.com");
2010        let git_signature2 = gix::actor::Signature {
2011            name: "git committer".into(),
2012            email: EMPTY_STRING_PLACEHOLDER.into(),
2013            time: gix::date::Time::new(2000, -480 * 60),
2014        };
2015        let signature2 = signature_from_git(git_signature2.to_ref(&mut TimeBuf::default()));
2016        assert_eq!(signature2.name, "git committer");
2017        assert!(signature2.email.is_empty());
2018    }
2019
2020    #[test]
2021    fn write_empty_string_placeholder() {
2022        let signature1 = Signature {
2023            name: "".to_string(),
2024            email: "someone@example.com".to_string(),
2025            timestamp: Timestamp {
2026                timestamp: MillisSinceEpoch(0),
2027                tz_offset: 0,
2028            },
2029        };
2030        let git_signature1 = signature_to_git(&signature1);
2031        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
2032        assert_eq!(git_signature1.email, "someone@example.com");
2033        let signature2 = Signature {
2034            name: "Someone".to_string(),
2035            email: "".to_string(),
2036            timestamp: Timestamp {
2037                timestamp: MillisSinceEpoch(0),
2038                tz_offset: 0,
2039            },
2040        };
2041        let git_signature2 = signature_to_git(&signature2);
2042        assert_eq!(git_signature2.name, "Someone");
2043        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
2044    }
2045
2046    /// Test that parents get written correctly
2047    #[test]
2048    fn git_commit_parents() {
2049        let settings = user_settings();
2050        let temp_dir = new_temp_dir();
2051        let store_path = temp_dir.path();
2052        let git_repo_path = temp_dir.path().join("git");
2053        let git_repo = git_init(&git_repo_path);
2054
2055        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2056        let mut commit = Commit {
2057            parents: vec![],
2058            predecessors: vec![],
2059            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2060            change_id: ChangeId::from_hex("abc123"),
2061            description: "".to_string(),
2062            author: create_signature(),
2063            committer: create_signature(),
2064            secure_sig: None,
2065        };
2066
2067        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2068            backend.write_commit(commit, None).block_on()
2069        };
2070
2071        // No parents
2072        commit.parents = vec![];
2073        assert_matches!(
2074            write_commit(commit.clone()),
2075            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2076        );
2077
2078        // Only root commit as parent
2079        commit.parents = vec![backend.root_commit_id().clone()];
2080        let first_id = write_commit(commit.clone()).unwrap().0;
2081        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
2082        assert_eq!(first_commit, commit);
2083        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
2084        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2085
2086        // Only non-root commit as parent
2087        commit.parents = vec![first_id.clone()];
2088        let second_id = write_commit(commit.clone()).unwrap().0;
2089        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
2090        assert_eq!(second_commit, commit);
2091        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
2092        assert_eq!(
2093            second_git_commit.parent_ids().collect_vec(),
2094            vec![git_id(&first_id)]
2095        );
2096
2097        // Merge commit
2098        commit.parents = vec![first_id.clone(), second_id.clone()];
2099        let merge_id = write_commit(commit.clone()).unwrap().0;
2100        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
2101        assert_eq!(merge_commit, commit);
2102        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
2103        assert_eq!(
2104            merge_git_commit.parent_ids().collect_vec(),
2105            vec![git_id(&first_id), git_id(&second_id)]
2106        );
2107
2108        // Merge commit with root as one parent
2109        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2110        assert_matches!(
2111            write_commit(commit),
2112            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2113        );
2114    }
2115
2116    #[test]
2117    fn write_tree_conflicts() {
2118        let settings = user_settings();
2119        let temp_dir = new_temp_dir();
2120        let store_path = temp_dir.path();
2121        let git_repo_path = temp_dir.path().join("git");
2122        let git_repo = git_init(&git_repo_path);
2123
2124        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2125        let create_tree = |i| {
2126            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2127            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2128            tree_builder
2129                .upsert(
2130                    format!("file{i}"),
2131                    gix::object::tree::EntryKind::Blob,
2132                    blob_id,
2133                )
2134                .unwrap();
2135            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2136        };
2137
2138        let root_tree = Merge::from_removes_adds(
2139            vec![create_tree(0), create_tree(1)],
2140            vec![create_tree(2), create_tree(3), create_tree(4)],
2141        );
2142        let mut commit = Commit {
2143            parents: vec![backend.root_commit_id().clone()],
2144            predecessors: vec![],
2145            root_tree: MergedTreeId::Merge(root_tree.clone()),
2146            change_id: ChangeId::from_hex("abc123"),
2147            description: "".to_string(),
2148            author: create_signature(),
2149            committer: create_signature(),
2150            secure_sig: None,
2151        };
2152
2153        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2154            backend.write_commit(commit, None).block_on()
2155        };
2156
2157        // When writing a tree-level conflict, the root tree on the git side has the
2158        // individual trees as subtrees.
2159        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2160        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2161        assert_eq!(read_commit, commit);
2162        let git_commit = git_repo
2163            .find_commit(gix::ObjectId::from_bytes_or_panic(
2164                read_commit_id.as_bytes(),
2165            ))
2166            .unwrap();
2167        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2168        assert!(
2169            git_tree
2170                .iter()
2171                .map(Result::unwrap)
2172                .filter(|entry| entry.filename() != b"README")
2173                .all(|entry| entry.mode().value() == 0o040000)
2174        );
2175        let mut iter = git_tree.iter().map(Result::unwrap);
2176        let entry = iter.next().unwrap();
2177        assert_eq!(entry.filename(), b".jjconflict-base-0");
2178        assert_eq!(
2179            entry.id().as_bytes(),
2180            root_tree.get_remove(0).unwrap().as_bytes()
2181        );
2182        let entry = iter.next().unwrap();
2183        assert_eq!(entry.filename(), b".jjconflict-base-1");
2184        assert_eq!(
2185            entry.id().as_bytes(),
2186            root_tree.get_remove(1).unwrap().as_bytes()
2187        );
2188        let entry = iter.next().unwrap();
2189        assert_eq!(entry.filename(), b".jjconflict-side-0");
2190        assert_eq!(
2191            entry.id().as_bytes(),
2192            root_tree.get_add(0).unwrap().as_bytes()
2193        );
2194        let entry = iter.next().unwrap();
2195        assert_eq!(entry.filename(), b".jjconflict-side-1");
2196        assert_eq!(
2197            entry.id().as_bytes(),
2198            root_tree.get_add(1).unwrap().as_bytes()
2199        );
2200        let entry = iter.next().unwrap();
2201        assert_eq!(entry.filename(), b".jjconflict-side-2");
2202        assert_eq!(
2203            entry.id().as_bytes(),
2204            root_tree.get_add(2).unwrap().as_bytes()
2205        );
2206        let entry = iter.next().unwrap();
2207        assert_eq!(entry.filename(), b"README");
2208        assert_eq!(entry.mode().value(), 0o100644);
2209        assert!(iter.next().is_none());
2210
2211        // When writing a single tree using the new format, it's represented by a
2212        // regular git tree.
2213        commit.root_tree = MergedTreeId::resolved(create_tree(5));
2214        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2215        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2216        assert_eq!(read_commit, commit);
2217        let git_commit = git_repo
2218            .find_commit(gix::ObjectId::from_bytes_or_panic(
2219                read_commit_id.as_bytes(),
2220            ))
2221            .unwrap();
2222        assert_eq!(
2223            MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2224            commit.root_tree
2225        );
2226    }
2227
2228    #[test]
2229    fn commit_has_ref() {
2230        let settings = user_settings();
2231        let temp_dir = new_temp_dir();
2232        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2233        let git_repo = backend.git_repo();
2234        let signature = Signature {
2235            name: "Someone".to_string(),
2236            email: "someone@example.com".to_string(),
2237            timestamp: Timestamp {
2238                timestamp: MillisSinceEpoch(0),
2239                tz_offset: 0,
2240            },
2241        };
2242        let commit = Commit {
2243            parents: vec![backend.root_commit_id().clone()],
2244            predecessors: vec![],
2245            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2246            change_id: ChangeId::new(vec![42; 16]),
2247            description: "initial".to_string(),
2248            author: signature.clone(),
2249            committer: signature,
2250            secure_sig: None,
2251        };
2252        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2253        let git_refs = git_repo.references().unwrap();
2254        let git_ref_ids: Vec<_> = git_refs
2255            .prefixed("refs/jj/keep/")
2256            .unwrap()
2257            .map(|x| x.unwrap().id().detach())
2258            .collect();
2259        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2260
2261        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2262        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2263            git_ref.unwrap().delete().unwrap();
2264        }
2265        // Re-imported commit should have new ref.
2266        backend.import_head_commits([&commit_id]).unwrap();
2267        let git_refs = git_repo.references().unwrap();
2268        let git_ref_ids: Vec<_> = git_refs
2269            .prefixed("refs/jj/keep/")
2270            .unwrap()
2271            .map(|x| x.unwrap().id().detach())
2272            .collect();
2273        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2274    }
2275
2276    #[test]
2277    fn import_head_commits_duplicates() {
2278        let settings = user_settings();
2279        let temp_dir = new_temp_dir();
2280        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2281        let git_repo = backend.git_repo();
2282
2283        let signature = gix::actor::Signature {
2284            name: GIT_USER.into(),
2285            email: GIT_EMAIL.into(),
2286            time: gix::date::Time::now_utc(),
2287        };
2288        let empty_tree_id =
2289            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2290        let git_commit_id = git_repo
2291            .commit_as(
2292                signature.to_ref(&mut TimeBuf::default()),
2293                signature.to_ref(&mut TimeBuf::default()),
2294                "refs/heads/main",
2295                "git commit message",
2296                empty_tree_id,
2297                [] as [gix::ObjectId; 0],
2298            )
2299            .unwrap()
2300            .detach();
2301        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2302
2303        // Ref creation shouldn't fail because of duplicated head ids.
2304        backend
2305            .import_head_commits([&commit_id, &commit_id])
2306            .unwrap();
2307        assert!(
2308            git_repo
2309                .references()
2310                .unwrap()
2311                .prefixed("refs/jj/keep/")
2312                .unwrap()
2313                .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id)
2314        );
2315    }
2316
2317    #[test]
2318    fn overlapping_git_commit_id() {
2319        let settings = user_settings();
2320        let temp_dir = new_temp_dir();
2321        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2322        let commit1 = Commit {
2323            parents: vec![backend.root_commit_id().clone()],
2324            predecessors: vec![],
2325            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2326            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2327            description: "initial".to_string(),
2328            author: create_signature(),
2329            committer: create_signature(),
2330            secure_sig: None,
2331        };
2332
2333        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2334            backend.write_commit(commit, None).block_on()
2335        };
2336
2337        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2338        commit2.predecessors.push(commit_id1.clone());
2339        // `write_commit` should prevent the ids from being the same by changing the
2340        // committer timestamp of the commit it actually writes.
2341        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2342        // The returned matches the ID
2343        assert_eq!(
2344            backend.read_commit(&commit_id2).block_on().unwrap(),
2345            actual_commit2
2346        );
2347        assert_ne!(commit_id2, commit_id1);
2348        // The committer timestamp should differ
2349        assert_ne!(
2350            actual_commit2.committer.timestamp.timestamp,
2351            commit2.committer.timestamp.timestamp
2352        );
2353        // The rest of the commit should be the same
2354        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2355        assert_eq!(actual_commit2, commit2);
2356    }
2357
2358    #[test]
2359    fn write_signed_commit() {
2360        let settings = user_settings();
2361        let temp_dir = new_temp_dir();
2362        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2363
2364        let commit = Commit {
2365            parents: vec![backend.root_commit_id().clone()],
2366            predecessors: vec![],
2367            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2368            change_id: ChangeId::new(vec![42; 16]),
2369            description: "initial".to_string(),
2370            author: create_signature(),
2371            committer: create_signature(),
2372            secure_sig: None,
2373        };
2374
2375        let mut signer = |data: &_| {
2376            let hash: String = hex_util::encode_hex(&blake2b_hash(data));
2377            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2378        };
2379
2380        let (id, commit) = backend
2381            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2382            .block_on()
2383            .unwrap();
2384
2385        let git_repo = backend.git_repo();
2386        let obj = git_repo
2387            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2388            .unwrap();
2389        insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r"
2390        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2391        author Someone <someone@example.com> 0 +0000
2392        committer Someone <someone@example.com> 0 +0000
2393        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2394        gpgsig test sig
2395         hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2396
2397        initial
2398        ");
2399
2400        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2401
2402        let commit = backend.read_commit(&id).block_on().unwrap();
2403
2404        let sig = commit.secure_sig.expect("failed to read the signature");
2405        assert_eq!(&sig, &returned_sig);
2406
2407        insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r"
2408        test sig
2409        hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2410        ");
2411        insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r"
2412        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2413        author Someone <someone@example.com> 0 +0000
2414        committer Someone <someone@example.com> 0 +0000
2415        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2416
2417        initial
2418        ");
2419    }
2420
2421    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2422        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2423    }
2424
2425    fn create_signature() -> Signature {
2426        Signature {
2427            name: GIT_USER.to_string(),
2428            email: GIT_EMAIL.to_string(),
2429            timestamp: Timestamp {
2430                timestamp: MillisSinceEpoch(0),
2431                tz_offset: 0,
2432            },
2433        }
2434    }
2435
2436    // Not using testutils::user_settings() because there is a dependency cycle
2437    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2438    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2439    // our UserSettings type comes from jj_lib (1).
2440    fn user_settings() -> UserSettings {
2441        let config = StackedConfig::with_defaults();
2442        UserSettings::from_config(config).unwrap()
2443    }
2444}