Skip to main content

jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::collections::HashSet;
18use std::ffi::OsStr;
19use std::fmt::Debug;
20use std::fmt::Error;
21use std::fmt::Formatter;
22use std::fs;
23use std::io;
24use std::io::Cursor;
25use std::path::Path;
26use std::path::PathBuf;
27use std::pin::Pin;
28use std::process::Command;
29use std::process::ExitStatus;
30use std::str::Utf8Error;
31use std::sync::Arc;
32use std::sync::Mutex;
33use std::sync::MutexGuard;
34use std::time::SystemTime;
35
36use async_trait::async_trait;
37use futures::StreamExt as _;
38use futures::stream::BoxStream;
39use gix::bstr::BString;
40use gix::objs::CommitRefIter;
41use gix::objs::WriteTo as _;
42use itertools::Itertools as _;
43use once_cell::sync::OnceCell as OnceLock;
44use pollster::FutureExt as _;
45use prost::Message as _;
46use smallvec::SmallVec;
47use thiserror::Error;
48use tokio::io::AsyncRead;
49use tokio::io::AsyncReadExt as _;
50
51use crate::backend::Backend;
52use crate::backend::BackendError;
53use crate::backend::BackendInitError;
54use crate::backend::BackendLoadError;
55use crate::backend::BackendResult;
56use crate::backend::ChangeId;
57use crate::backend::Commit;
58use crate::backend::CommitId;
59use crate::backend::CopyHistory;
60use crate::backend::CopyId;
61use crate::backend::CopyRecord;
62use crate::backend::FileId;
63use crate::backend::MillisSinceEpoch;
64use crate::backend::RelatedCopy;
65use crate::backend::SecureSig;
66use crate::backend::Signature;
67use crate::backend::SigningFn;
68use crate::backend::SymlinkId;
69use crate::backend::Timestamp;
70use crate::backend::Tree;
71use crate::backend::TreeId;
72use crate::backend::TreeValue;
73use crate::backend::make_root_commit;
74use crate::config::ConfigGetError;
75use crate::file_util;
76use crate::file_util::BadPathEncoding;
77use crate::file_util::IoResultExt as _;
78use crate::file_util::PathError;
79use crate::git::GitSettings;
80use crate::index::Index;
81use crate::lock::FileLock;
82use crate::merge::Merge;
83use crate::merge::MergeBuilder;
84use crate::object_id::ObjectId;
85use crate::repo_path::RepoPath;
86use crate::repo_path::RepoPathBuf;
87use crate::repo_path::RepoPathComponentBuf;
88use crate::settings::UserSettings;
89use crate::stacked_table::MutableTable;
90use crate::stacked_table::ReadonlyTable;
91use crate::stacked_table::TableSegment as _;
92use crate::stacked_table::TableStore;
93use crate::stacked_table::TableStoreError;
94
95const HASH_LENGTH: usize = 20;
96const CHANGE_ID_LENGTH: usize = 16;
97/// Ref namespace used only for preventing GC.
98const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
99
100pub const JJ_CONFLICT_README_FILE_NAME: &str = "JJ-CONFLICT-README";
101
102pub const JJ_TREES_COMMIT_HEADER: &str = "jj:trees";
103pub const JJ_CONFLICT_LABELS_COMMIT_HEADER: &str = "jj:conflict-labels";
104pub const CHANGE_ID_COMMIT_HEADER: &str = "change-id";
105
106#[derive(Debug, Error)]
107pub enum GitBackendInitError {
108    #[error("Failed to initialize git repository")]
109    InitRepository(#[source] gix::init::Error),
110    #[error("Failed to open git repository")]
111    OpenRepository(#[source] gix::open::Error),
112    #[error("Failed to encode git repository path")]
113    EncodeRepositoryPath(#[source] BadPathEncoding),
114    #[error(transparent)]
115    Config(ConfigGetError),
116    #[error(transparent)]
117    Path(PathError),
118}
119
120impl From<Box<GitBackendInitError>> for BackendInitError {
121    fn from(err: Box<GitBackendInitError>) -> Self {
122        Self(err)
123    }
124}
125
126#[derive(Debug, Error)]
127pub enum GitBackendLoadError {
128    #[error("Failed to open git repository")]
129    OpenRepository(#[source] gix::open::Error),
130    #[error("Failed to decode git repository path")]
131    DecodeRepositoryPath(#[source] BadPathEncoding),
132    #[error(transparent)]
133    Config(ConfigGetError),
134    #[error(transparent)]
135    Path(PathError),
136}
137
138impl From<Box<GitBackendLoadError>> for BackendLoadError {
139    fn from(err: Box<GitBackendLoadError>) -> Self {
140        Self(err)
141    }
142}
143
144/// `GitBackend`-specific error that may occur after the backend is loaded.
145#[derive(Debug, Error)]
146pub enum GitBackendError {
147    #[error("Failed to read non-git metadata")]
148    ReadMetadata(#[source] TableStoreError),
149    #[error("Failed to write non-git metadata")]
150    WriteMetadata(#[source] TableStoreError),
151}
152
153impl From<GitBackendError> for BackendError {
154    fn from(err: GitBackendError) -> Self {
155        Self::Other(err.into())
156    }
157}
158
159#[derive(Debug, Error)]
160pub enum GitGcError {
161    #[error("Failed to run git gc command")]
162    GcCommand(#[source] std::io::Error),
163    #[error("git gc command exited with an error: {0}")]
164    GcCommandErrorStatus(ExitStatus),
165}
166
167pub struct GitBackend {
168    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
169    // cheaper to cache the thread-local instance behind a mutex than creating
170    // one for each backend method call. Our GitBackend is most likely to be
171    // used in a single-threaded context.
172    base_repo: gix::ThreadSafeRepository,
173    repo: Mutex<gix::Repository>,
174    root_commit_id: CommitId,
175    root_change_id: ChangeId,
176    empty_tree_id: TreeId,
177    shallow_root_ids: OnceLock<Vec<CommitId>>,
178    extra_metadata_store: TableStore,
179    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
180    git_executable: PathBuf,
181    write_change_id_header: bool,
182}
183
184impl GitBackend {
185    pub fn name() -> &'static str {
186        "git"
187    }
188
189    fn new(
190        base_repo: gix::ThreadSafeRepository,
191        extra_metadata_store: TableStore,
192        git_settings: GitSettings,
193    ) -> Self {
194        let repo = Mutex::new(base_repo.to_thread_local());
195        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
196        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
197        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
198        Self {
199            base_repo,
200            repo,
201            root_commit_id,
202            root_change_id,
203            empty_tree_id,
204            shallow_root_ids: OnceLock::new(),
205            extra_metadata_store,
206            cached_extra_metadata: Mutex::new(None),
207            git_executable: git_settings.executable_path,
208            write_change_id_header: git_settings.write_change_id_header,
209        }
210    }
211
212    pub fn init_internal(
213        settings: &UserSettings,
214        store_path: &Path,
215    ) -> Result<Self, Box<GitBackendInitError>> {
216        let git_repo_path = Path::new("git");
217        let git_repo = gix::ThreadSafeRepository::init_opts(
218            store_path.join(git_repo_path),
219            gix::create::Kind::Bare,
220            gix::create::Options::default(),
221            gix_open_opts_from_settings(settings),
222        )
223        .map_err(GitBackendInitError::InitRepository)?;
224        let git_settings =
225            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
226        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
227    }
228
229    /// Initializes backend by creating a new Git repo at the specified
230    /// workspace path. The workspace directory must exist.
231    pub fn init_colocated(
232        settings: &UserSettings,
233        store_path: &Path,
234        workspace_root: &Path,
235    ) -> Result<Self, Box<GitBackendInitError>> {
236        let canonical_workspace_root = {
237            let path = store_path.join(workspace_root);
238            dunce::canonicalize(&path)
239                .context(&path)
240                .map_err(GitBackendInitError::Path)?
241        };
242        let git_repo = gix::ThreadSafeRepository::init_opts(
243            canonical_workspace_root,
244            gix::create::Kind::WithWorktree,
245            gix::create::Options::default(),
246            gix_open_opts_from_settings(settings),
247        )
248        .map_err(GitBackendInitError::InitRepository)?;
249        let git_repo_path = workspace_root.join(".git");
250        let git_settings =
251            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
252        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
253    }
254
255    /// Initializes backend with an existing Git repo at the specified path.
256    pub fn init_external(
257        settings: &UserSettings,
258        store_path: &Path,
259        git_repo_path: &Path,
260    ) -> Result<Self, Box<GitBackendInitError>> {
261        let canonical_git_repo_path = {
262            let path = store_path.join(git_repo_path);
263            canonicalize_git_repo_path(&path)
264                .context(&path)
265                .map_err(GitBackendInitError::Path)?
266        };
267        let git_repo = gix::ThreadSafeRepository::open_opts(
268            canonical_git_repo_path,
269            gix_open_opts_from_settings(settings),
270        )
271        .map_err(GitBackendInitError::OpenRepository)?;
272        let git_settings =
273            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
274        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
275    }
276
277    fn init_with_repo(
278        store_path: &Path,
279        git_repo_path: &Path,
280        repo: gix::ThreadSafeRepository,
281        git_settings: GitSettings,
282    ) -> Result<Self, Box<GitBackendInitError>> {
283        let extra_path = store_path.join("extra");
284        fs::create_dir(&extra_path)
285            .context(&extra_path)
286            .map_err(GitBackendInitError::Path)?;
287        let target_path = store_path.join("git_target");
288        let git_repo_path = if cfg!(windows) && git_repo_path.is_relative() {
289            // When a repository is created in Windows, format the path with *forward
290            // slashes* and not backwards slashes. This makes it possible to use the same
291            // repository under Windows Subsystem for Linux.
292            //
293            // This only works for relative paths. If the path is absolute, there's not much
294            // we can do, and it simply won't work inside and outside WSL at the same time.
295            file_util::slash_path(git_repo_path)
296        } else {
297            git_repo_path.into()
298        };
299        let git_repo_path_bytes = file_util::path_to_bytes(&git_repo_path)
300            .map_err(GitBackendInitError::EncodeRepositoryPath)?;
301        fs::write(&target_path, git_repo_path_bytes)
302            .context(&target_path)
303            .map_err(GitBackendInitError::Path)?;
304        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
305        Ok(Self::new(repo, extra_metadata_store, git_settings))
306    }
307
308    pub fn load(
309        settings: &UserSettings,
310        store_path: &Path,
311    ) -> Result<Self, Box<GitBackendLoadError>> {
312        let git_repo_path = {
313            let target_path = store_path.join("git_target");
314            let git_repo_path_bytes = fs::read(&target_path)
315                .context(&target_path)
316                .map_err(GitBackendLoadError::Path)?;
317            let git_repo_path = file_util::path_from_bytes(&git_repo_path_bytes)
318                .map_err(GitBackendLoadError::DecodeRepositoryPath)?;
319            let git_repo_path = store_path.join(git_repo_path);
320            canonicalize_git_repo_path(&git_repo_path)
321                .context(&git_repo_path)
322                .map_err(GitBackendLoadError::Path)?
323        };
324        let repo = gix::ThreadSafeRepository::open_opts(
325            git_repo_path,
326            gix_open_opts_from_settings(settings),
327        )
328        .map_err(GitBackendLoadError::OpenRepository)?;
329        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
330        let git_settings =
331            GitSettings::from_settings(settings).map_err(GitBackendLoadError::Config)?;
332        Ok(Self::new(repo, extra_metadata_store, git_settings))
333    }
334
335    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
336        self.repo.lock().unwrap()
337    }
338
339    /// Returns new thread-local instance to access to the underlying Git repo.
340    pub fn git_repo(&self) -> gix::Repository {
341        self.base_repo.to_thread_local()
342    }
343
344    /// Path to the `.git` directory or the repository itself if it's bare.
345    pub fn git_repo_path(&self) -> &Path {
346        self.base_repo.path()
347    }
348
349    /// Path to the working directory if the repository isn't bare.
350    pub fn git_workdir(&self) -> Option<&Path> {
351        self.base_repo.work_dir()
352    }
353
354    fn shallow_root_ids(&self, git_repo: &gix::Repository) -> BackendResult<&[CommitId]> {
355        // The list of shallow roots is cached by gix, but it's still expensive
356        // to stat file on every read_object() call. Refreshing shallow roots is
357        // also bad for consistency reasons.
358        self.shallow_root_ids
359            .get_or_try_init(|| {
360                let maybe_oids = git_repo
361                    .shallow_commits()
362                    .map_err(|err| BackendError::Other(err.into()))?;
363                let commit_ids = maybe_oids.map_or(vec![], |oids| {
364                    oids.iter()
365                        .map(|oid| CommitId::from_bytes(oid.as_bytes()))
366                        .collect()
367                });
368                Ok(commit_ids)
369            })
370            .map(AsRef::as_ref)
371    }
372
373    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
374        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
375        match locked_head.as_ref() {
376            Some(head) => Ok(head.clone()),
377            None => {
378                let table = self
379                    .extra_metadata_store
380                    .get_head()
381                    .map_err(GitBackendError::ReadMetadata)?;
382                *locked_head = Some(table.clone());
383                Ok(table)
384            }
385        }
386    }
387
388    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
389        let table = self
390            .extra_metadata_store
391            .get_head_locked()
392            .map_err(GitBackendError::ReadMetadata)?;
393        Ok(table)
394    }
395
396    fn save_extra_metadata_table(
397        &self,
398        mut_table: MutableTable,
399        _table_lock: &FileLock,
400    ) -> BackendResult<()> {
401        let table = self
402            .extra_metadata_store
403            .save_table(mut_table)
404            .map_err(GitBackendError::WriteMetadata)?;
405        // Since the parent table was the head, saved table are likely to be new head.
406        // If it's not, cache will be reloaded when entry can't be found.
407        *self.cached_extra_metadata.lock().unwrap() = Some(table);
408        Ok(())
409    }
410
411    /// Imports the given commits and ancestors from the backing Git repo.
412    ///
413    /// The `head_ids` may contain commits that have already been imported, but
414    /// the caller should filter them out to eliminate redundant I/O processing.
415    #[tracing::instrument(skip(self, head_ids))]
416    pub fn import_head_commits<'a>(
417        &self,
418        head_ids: impl IntoIterator<Item = &'a CommitId>,
419    ) -> BackendResult<()> {
420        let head_ids: HashSet<&CommitId> = head_ids
421            .into_iter()
422            .filter(|&id| *id != self.root_commit_id)
423            .collect();
424        if head_ids.is_empty() {
425            return Ok(());
426        }
427
428        // Create no-gc ref even if known to the extras table. Concurrent GC
429        // process might have deleted the no-gc ref.
430        let locked_repo = self.lock_git_repo();
431        locked_repo
432            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
433            .map_err(|err| BackendError::Other(Box::new(err)))?;
434
435        // These commits are imported from Git. Make our change ids persist (otherwise
436        // future write_commit() could reassign new change id.)
437        tracing::debug!(
438            heads_count = head_ids.len(),
439            "import extra metadata entries"
440        );
441        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
442        let mut mut_table = table.start_mutation();
443        import_extra_metadata_entries_from_heads(
444            &locked_repo,
445            &mut mut_table,
446            &table_lock,
447            &head_ids,
448            self.shallow_root_ids(&locked_repo)?,
449        )?;
450        self.save_extra_metadata_table(mut_table, &table_lock)
451    }
452
453    fn read_file_sync(&self, id: &FileId) -> BackendResult<Vec<u8>> {
454        let git_blob_id = validate_git_object_id(id)?;
455        let locked_repo = self.lock_git_repo();
456        let mut blob = locked_repo
457            .find_object(git_blob_id)
458            .map_err(|err| map_not_found_err(err, id))?
459            .try_into_blob()
460            .map_err(|err| to_read_object_err(err, id))?;
461        Ok(blob.take_data())
462    }
463
464    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
465        let attributes = gix::worktree::Stack::new(
466            Path::new(""),
467            gix::worktree::stack::State::AttributesStack(Default::default()),
468            gix::worktree::glob::pattern::Case::Sensitive,
469            Vec::new(),
470            Vec::new(),
471        );
472        let filter = gix::diff::blob::Pipeline::new(
473            Default::default(),
474            gix::filter::plumbing::Pipeline::new(
475                self.git_repo()
476                    .command_context()
477                    .map_err(|err| BackendError::Other(Box::new(err)))?,
478                Default::default(),
479            ),
480            Vec::new(),
481            Default::default(),
482        );
483        Ok(gix::diff::blob::Platform::new(
484            Default::default(),
485            filter,
486            gix::diff::blob::pipeline::Mode::ToGit,
487            attributes,
488        ))
489    }
490
491    fn read_tree_for_commit<'repo>(
492        &self,
493        repo: &'repo gix::Repository,
494        id: &CommitId,
495    ) -> BackendResult<gix::Tree<'repo>> {
496        let tree = self.read_commit(id).block_on()?.root_tree;
497        // TODO(kfm): probably want to do something here if it is a merge
498        let tree_id = tree.first().clone();
499        let gix_id = validate_git_object_id(&tree_id)?;
500        repo.find_object(gix_id)
501            .map_err(|err| map_not_found_err(err, &tree_id))?
502            .try_into_tree()
503            .map_err(|err| to_read_object_err(err, &tree_id))
504    }
505}
506
507/// Canonicalizes the given `path` except for the last `".git"` component.
508///
509/// The last path component matters when opening a Git repo without `core.bare`
510/// config. This config is usually set, but the "repo" tool will set up such
511/// repositories and symlinks. Opening such repo with fully-canonicalized path
512/// would turn a colocated Git repo into a bare repo.
513pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
514    if path.ends_with(".git") {
515        let workdir = path.parent().unwrap();
516        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
517    } else {
518        dunce::canonicalize(path)
519    }
520}
521
522fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
523    let user_name = settings.user_name();
524    let user_email = settings.user_email();
525    gix::open::Options::default()
526        .config_overrides([
527            // Committer has to be configured to record reflog. Author isn't
528            // needed, but let's copy the same values.
529            format!("author.name={user_name}"),
530            format!("author.email={user_email}"),
531            format!("committer.name={user_name}"),
532            format!("committer.email={user_email}"),
533        ])
534        // The git_target path should point the repository, not the working directory.
535        .open_path_as_is(true)
536        // Gitoxide recommends this when correctness is preferred
537        .strict_config(true)
538}
539
540/// Parses the `jj:conflict-labels` header value if present.
541fn extract_conflict_labels_from_commit(commit: &gix::objs::CommitRef) -> Merge<String> {
542    let Some(value) = commit
543        .extra_headers()
544        .find(JJ_CONFLICT_LABELS_COMMIT_HEADER)
545    else {
546        return Merge::resolved(String::new());
547    };
548
549    str::from_utf8(value)
550        .expect("labels should be valid utf8")
551        .split_terminator('\n')
552        .map(str::to_owned)
553        .collect::<MergeBuilder<_>>()
554        .build()
555}
556
557/// Parses the `jj:trees` header value if present, otherwise returns the
558/// resolved tree ID from Git.
559fn extract_root_tree_from_commit(commit: &gix::objs::CommitRef) -> Result<Merge<TreeId>, ()> {
560    let Some(value) = commit.extra_headers().find(JJ_TREES_COMMIT_HEADER) else {
561        let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
562        return Ok(Merge::resolved(tree_id));
563    };
564
565    let mut tree_ids = SmallVec::new();
566    for hex in value.split(|b| *b == b' ') {
567        let tree_id = TreeId::try_from_hex(hex).ok_or(())?;
568        if tree_id.as_bytes().len() != HASH_LENGTH {
569            return Err(());
570        }
571        tree_ids.push(tree_id);
572    }
573    // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
574    // allowed, it would be possible to construct a commit which appears to have
575    // different contents depending on whether it is viewed using `jj` or `git`.
576    if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
577        return Err(());
578    }
579    Ok(Merge::from_vec(tree_ids))
580}
581
582fn commit_from_git_without_root_parent(
583    id: &CommitId,
584    git_object: &gix::Object,
585    is_shallow: bool,
586) -> BackendResult<Commit> {
587    let decode_err = |err: gix::objs::decode::Error| to_read_object_err(err, id);
588    let commit = git_object
589        .try_to_commit_ref()
590        .map_err(|err| to_read_object_err(err, id))?;
591
592    // If the git header has a change-id field, we attempt to convert that to a
593    // valid JJ Change Id
594    let change_id = extract_change_id_from_commit(&commit)
595        .unwrap_or_else(|| synthetic_change_id_from_git_commit_id(id));
596
597    // shallow commits don't have parents their parents actually fetched, so we
598    // discard them here
599    // TODO: This causes issues when a shallow repository is deepened/unshallowed
600    let parents = if is_shallow {
601        vec![]
602    } else {
603        commit
604            .parents()
605            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
606            .collect_vec()
607    };
608    // If the commit is a conflict, the conflict labels are stored in a commit
609    // header separately from the trees.
610    let conflict_labels = extract_conflict_labels_from_commit(&commit);
611    // Conflicted commits written before we started using the `jj:trees` header
612    // (~March 2024) may have the root trees stored in the extra metadata table
613    // instead. For such commits, we'll update the root tree later when we read the
614    // extra metadata.
615    let root_tree = extract_root_tree_from_commit(&commit)
616        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?;
617    // Use lossy conversion as commit message with "mojibake" is still better than
618    // nothing.
619    // TODO: what should we do with commit.encoding?
620    let description = String::from_utf8_lossy(commit.message).into_owned();
621    let author = signature_from_git(commit.author().map_err(decode_err)?);
622    let committer = signature_from_git(commit.committer().map_err(decode_err)?);
623
624    // If the commit is signed, extract both the signature and the signed data
625    // (which is the commit buffer with the gpgsig header omitted).
626    // We have to re-parse the raw commit data because gix CommitRef does not give
627    // us the sogned data, only the signature.
628    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
629    // function and extract everything from that. For now, this works
630    let secure_sig = commit
631        .extra_headers
632        .iter()
633        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
634        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
635        .then(|| CommitRefIter::signature(&git_object.data))
636        .transpose()
637        .map_err(decode_err)?
638        .flatten()
639        .map(|(sig, data)| SecureSig {
640            data: data.to_bstring().into(),
641            sig: sig.into_owned().into(),
642        });
643
644    Ok(Commit {
645        parents,
646        predecessors: vec![],
647        // If this commit has associated extra metadata, we may reset this later.
648        root_tree,
649        conflict_labels,
650        change_id,
651        description,
652        author,
653        committer,
654        secure_sig,
655    })
656}
657
658/// Extracts change id from commit headers.
659pub fn extract_change_id_from_commit(commit: &gix::objs::CommitRef) -> Option<ChangeId> {
660    commit
661        .extra_headers()
662        .find(CHANGE_ID_COMMIT_HEADER)
663        .and_then(ChangeId::try_from_reverse_hex)
664        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
665}
666
667/// Deterministically creates a change id based on the commit id
668///
669/// Used when we get a commit without a change id. The exact algorithm for the
670/// computation should not be relied upon.
671pub fn synthetic_change_id_from_git_commit_id(id: &CommitId) -> ChangeId {
672    // We reverse the bits of the commit id to create the change id. We don't
673    // want to use the first bytes unmodified because then it would be ambiguous
674    // if a given hash prefix refers to the commit id or the change id. It would
675    // have been enough to pick the last 16 bytes instead of the leading 16
676    // bytes to address that. We also reverse the bits to make it less likely
677    // that users depend on any relationship between the two ids.
678    let bytes = id.as_bytes()[4..HASH_LENGTH]
679        .iter()
680        .rev()
681        .map(|b| b.reverse_bits())
682        .collect();
683    ChangeId::new(bytes)
684}
685
686const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
687
688fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
689    let name = signature.name;
690    let name = if name != EMPTY_STRING_PLACEHOLDER {
691        String::from_utf8_lossy(name).into_owned()
692    } else {
693        "".to_string()
694    };
695    let email = signature.email;
696    let email = if email != EMPTY_STRING_PLACEHOLDER {
697        String::from_utf8_lossy(email).into_owned()
698    } else {
699        "".to_string()
700    };
701    let time = signature.time().unwrap_or_default();
702    let timestamp = MillisSinceEpoch(time.seconds * 1000);
703    let tz_offset = time.offset.div_euclid(60); // in minutes
704    Signature {
705        name,
706        email,
707        timestamp: Timestamp {
708            timestamp,
709            tz_offset,
710        },
711    }
712}
713
714fn signature_to_git(signature: &Signature) -> gix::actor::Signature {
715    // git does not support empty names or emails
716    let name = if !signature.name.is_empty() {
717        &signature.name
718    } else {
719        EMPTY_STRING_PLACEHOLDER
720    };
721    let email = if !signature.email.is_empty() {
722        &signature.email
723    } else {
724        EMPTY_STRING_PLACEHOLDER
725    };
726    let time = gix::date::Time::new(
727        signature.timestamp.timestamp.0.div_euclid(1000),
728        signature.timestamp.tz_offset * 60, // in seconds
729    );
730    gix::actor::Signature {
731        name: name.into(),
732        email: email.into(),
733        time,
734    }
735}
736
737fn serialize_extras(commit: &Commit) -> Vec<u8> {
738    let mut proto = crate::protos::git_store::Commit {
739        change_id: commit.change_id.to_bytes(),
740        ..Default::default()
741    };
742    proto.uses_tree_conflict_format = true;
743    for predecessor in &commit.predecessors {
744        proto.predecessors.push(predecessor.to_bytes());
745    }
746    proto.encode_to_vec()
747}
748
749fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
750    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
751    if !proto.change_id.is_empty() {
752        commit.change_id = ChangeId::new(proto.change_id);
753    }
754    if commit.root_tree.is_resolved()
755        && proto.uses_tree_conflict_format
756        && !proto.root_tree.is_empty()
757    {
758        let merge_builder: MergeBuilder<_> = proto
759            .root_tree
760            .iter()
761            .map(|id_bytes| TreeId::from_bytes(id_bytes))
762            .collect();
763        commit.root_tree = merge_builder.build();
764    }
765    for predecessor in &proto.predecessors {
766        commit.predecessors.push(CommitId::from_bytes(predecessor));
767    }
768}
769
770/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
771/// Used for preventing GC of commits we create.
772fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
773    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
774    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
775    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
776    gix::refs::transaction::RefEdit {
777        change: gix::refs::transaction::Change::Update {
778            log: gix::refs::transaction::LogChange {
779                message: "used by jj".into(),
780                ..Default::default()
781            },
782            expected,
783            new,
784        },
785        name: name.try_into().unwrap(),
786        deref: false,
787    }
788}
789
790fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
791    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
792    gix::refs::transaction::RefEdit {
793        change: gix::refs::transaction::Change::Delete {
794            expected,
795            log: gix::refs::transaction::RefLog::AndReference,
796        },
797        name: git_ref.name,
798        deref: false,
799    }
800}
801
802/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
803/// unreachable and non-head refs.
804fn recreate_no_gc_refs(
805    git_repo: &gix::Repository,
806    new_heads: impl IntoIterator<Item = CommitId>,
807    keep_newer: SystemTime,
808) -> BackendResult<()> {
809    // Calculate diff between existing no-gc refs and new heads.
810    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
811    let mut no_gc_refs_to_keep_count: usize = 0;
812    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
813    let git_references = git_repo
814        .references()
815        .map_err(|err| BackendError::Other(err.into()))?;
816    let no_gc_refs_iter = git_references
817        .prefixed(NO_GC_REF_NAMESPACE)
818        .map_err(|err| BackendError::Other(err.into()))?;
819    for git_ref in no_gc_refs_iter {
820        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
821        let oid = git_ref.target.try_id().ok_or_else(|| {
822            let name = git_ref.name.as_bstr();
823            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
824        })?;
825        let id = CommitId::from_bytes(oid.as_bytes());
826        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
827        if new_heads.contains(&id) && name_good {
828            no_gc_refs_to_keep_count += 1;
829            continue;
830        }
831        // Check timestamp of loose ref, but this is still racy on re-import
832        // because:
833        // - existing packed ref won't be demoted to loose ref
834        // - existing loose ref won't be touched
835        //
836        // TODO: might be better to switch to a dummy merge, where new no-gc ref
837        // will always have a unique name. Doing that with the current
838        // ref-per-head strategy would increase the number of the no-gc refs.
839        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
840        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
841        if let Ok(metadata) = loose_ref_path.metadata() {
842            let mtime = metadata.modified().expect("unsupported platform?");
843            if mtime > keep_newer {
844                tracing::trace!(?git_ref, "not deleting new");
845                no_gc_refs_to_keep_count += 1;
846                continue;
847            }
848        }
849        // Also deletes no-gc ref of random name created by old jj.
850        tracing::trace!(?git_ref, ?name_good, "will delete");
851        no_gc_refs_to_delete.push(git_ref);
852    }
853    tracing::info!(
854        new_heads_count = new_heads.len(),
855        no_gc_refs_to_keep_count,
856        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
857        "collected reachable refs"
858    );
859
860    // It's slow to delete packed refs one by one, so update refs all at once.
861    let ref_edits = itertools::chain(
862        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
863        new_heads.iter().map(to_no_gc_ref_update),
864    );
865    git_repo
866        .edit_references(ref_edits)
867        .map_err(|err| BackendError::Other(err.into()))?;
868
869    Ok(())
870}
871
872fn run_git_gc(program: &OsStr, git_dir: &Path, keep_newer: SystemTime) -> Result<(), GitGcError> {
873    let keep_newer = keep_newer
874        .duration_since(SystemTime::UNIX_EPOCH)
875        .unwrap_or_default(); // underflow
876    let mut git = Command::new(program);
877    git.arg("--git-dir=.") // turn off discovery
878        .arg("gc")
879        .arg(format!("--prune=@{} +0000", keep_newer.as_secs()));
880    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
881    // canonicalized as UNC path, which wouldn't be supported by git.
882    git.current_dir(git_dir);
883    // TODO: pass output to UI layer instead of printing directly here
884    tracing::info!(?git, "running git gc");
885    let status = git.status().map_err(GitGcError::GcCommand)?;
886    tracing::info!(?status, "git gc exited");
887    if !status.success() {
888        return Err(GitGcError::GcCommandErrorStatus(status));
889    }
890    Ok(())
891}
892
893fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
894    if id.as_bytes().len() != HASH_LENGTH {
895        return Err(BackendError::InvalidHashLength {
896            expected: HASH_LENGTH,
897            actual: id.as_bytes().len(),
898            object_type: id.object_type(),
899            hash: id.hex(),
900        });
901    }
902    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
903}
904
905fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
906    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
907        BackendError::ObjectNotFound {
908            object_type: id.object_type(),
909            hash: id.hex(),
910            source: Box::new(err),
911        }
912    } else {
913        to_read_object_err(err, id)
914    }
915}
916
917fn to_read_object_err(
918    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
919    id: &impl ObjectId,
920) -> BackendError {
921    BackendError::ReadObject {
922        object_type: id.object_type(),
923        hash: id.hex(),
924        source: err.into(),
925    }
926}
927
928fn to_invalid_utf8_err(source: Utf8Error, id: &impl ObjectId) -> BackendError {
929    BackendError::InvalidUtf8 {
930        object_type: id.object_type(),
931        hash: id.hex(),
932        source,
933    }
934}
935
936fn import_extra_metadata_entries_from_heads(
937    git_repo: &gix::Repository,
938    mut_table: &mut MutableTable,
939    _table_lock: &FileLock,
940    head_ids: &HashSet<&CommitId>,
941    shallow_roots: &[CommitId],
942) -> BackendResult<()> {
943    let mut work_ids = head_ids
944        .iter()
945        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
946        .map(|&id| id.clone())
947        .collect_vec();
948    while let Some(id) = work_ids.pop() {
949        let git_object = git_repo
950            .find_object(validate_git_object_id(&id)?)
951            .map_err(|err| map_not_found_err(err, &id))?;
952        let is_shallow = shallow_roots.contains(&id);
953        // TODO(#1624): Should we read the root tree here and check if it has a
954        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
955        // change the description of a commit with tree-level conflicts.
956        let commit = commit_from_git_without_root_parent(&id, &git_object, is_shallow)?;
957        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
958        work_ids.extend(
959            commit
960                .parents
961                .into_iter()
962                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
963        );
964    }
965    Ok(())
966}
967
968impl Debug for GitBackend {
969    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
970        f.debug_struct("GitBackend")
971            .field("path", &self.git_repo_path())
972            .finish()
973    }
974}
975
976#[async_trait]
977impl Backend for GitBackend {
978    fn name(&self) -> &str {
979        Self::name()
980    }
981
982    fn commit_id_length(&self) -> usize {
983        HASH_LENGTH
984    }
985
986    fn change_id_length(&self) -> usize {
987        CHANGE_ID_LENGTH
988    }
989
990    fn root_commit_id(&self) -> &CommitId {
991        &self.root_commit_id
992    }
993
994    fn root_change_id(&self) -> &ChangeId {
995        &self.root_change_id
996    }
997
998    fn empty_tree_id(&self) -> &TreeId {
999        &self.empty_tree_id
1000    }
1001
1002    fn concurrency(&self) -> usize {
1003        1
1004    }
1005
1006    async fn read_file(
1007        &self,
1008        _path: &RepoPath,
1009        id: &FileId,
1010    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
1011        let data = self.read_file_sync(id)?;
1012        Ok(Box::pin(Cursor::new(data)))
1013    }
1014
1015    async fn write_file(
1016        &self,
1017        _path: &RepoPath,
1018        contents: &mut (dyn AsyncRead + Send + Unpin),
1019    ) -> BackendResult<FileId> {
1020        let mut bytes = Vec::new();
1021        contents.read_to_end(&mut bytes).await.unwrap();
1022        let locked_repo = self.lock_git_repo();
1023        let oid = locked_repo
1024            .write_blob(bytes)
1025            .map_err(|err| BackendError::WriteObject {
1026                object_type: "file",
1027                source: Box::new(err),
1028            })?;
1029        Ok(FileId::new(oid.as_bytes().to_vec()))
1030    }
1031
1032    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
1033        let git_blob_id = validate_git_object_id(id)?;
1034        let locked_repo = self.lock_git_repo();
1035        let mut blob = locked_repo
1036            .find_object(git_blob_id)
1037            .map_err(|err| map_not_found_err(err, id))?
1038            .try_into_blob()
1039            .map_err(|err| to_read_object_err(err, id))?;
1040        let target = String::from_utf8(blob.take_data())
1041            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1042        Ok(target)
1043    }
1044
1045    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1046        let locked_repo = self.lock_git_repo();
1047        let oid =
1048            locked_repo
1049                .write_blob(target.as_bytes())
1050                .map_err(|err| BackendError::WriteObject {
1051                    object_type: "symlink",
1052                    source: Box::new(err),
1053                })?;
1054        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1055    }
1056
1057    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
1058        Err(BackendError::Unsupported(
1059            "The Git backend doesn't support tracked copies yet".to_string(),
1060        ))
1061    }
1062
1063    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
1064        Err(BackendError::Unsupported(
1065            "The Git backend doesn't support tracked copies yet".to_string(),
1066        ))
1067    }
1068
1069    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<RelatedCopy>> {
1070        Err(BackendError::Unsupported(
1071            "The Git backend doesn't support tracked copies yet".to_string(),
1072        ))
1073    }
1074
1075    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1076        if id == &self.empty_tree_id {
1077            return Ok(Tree::default());
1078        }
1079        let git_tree_id = validate_git_object_id(id)?;
1080
1081        let locked_repo = self.lock_git_repo();
1082        let git_tree = locked_repo
1083            .find_object(git_tree_id)
1084            .map_err(|err| map_not_found_err(err, id))?
1085            .try_into_tree()
1086            .map_err(|err| to_read_object_err(err, id))?;
1087        let mut entries: Vec<_> = git_tree
1088            .iter()
1089            .map(|entry| -> BackendResult<_> {
1090                let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1091                let name = RepoPathComponentBuf::new(
1092                    str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?,
1093                )
1094                .unwrap();
1095                let value = match entry.mode().kind() {
1096                    gix::object::tree::EntryKind::Tree => {
1097                        let id = TreeId::from_bytes(entry.oid().as_bytes());
1098                        TreeValue::Tree(id)
1099                    }
1100                    gix::object::tree::EntryKind::Blob => {
1101                        let id = FileId::from_bytes(entry.oid().as_bytes());
1102                        TreeValue::File {
1103                            id,
1104                            executable: false,
1105                            copy_id: CopyId::placeholder(),
1106                        }
1107                    }
1108                    gix::object::tree::EntryKind::BlobExecutable => {
1109                        let id = FileId::from_bytes(entry.oid().as_bytes());
1110                        TreeValue::File {
1111                            id,
1112                            executable: true,
1113                            copy_id: CopyId::placeholder(),
1114                        }
1115                    }
1116                    gix::object::tree::EntryKind::Link => {
1117                        let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1118                        TreeValue::Symlink(id)
1119                    }
1120                    gix::object::tree::EntryKind::Commit => {
1121                        let id = CommitId::from_bytes(entry.oid().as_bytes());
1122                        TreeValue::GitSubmodule(id)
1123                    }
1124                };
1125                Ok((name, value))
1126            })
1127            .try_collect()?;
1128        // While Git tree entries are sorted, the rule is slightly different.
1129        // Directory names are sorted as if they had trailing "/".
1130        if !entries.is_sorted_by_key(|(name, _)| name) {
1131            entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
1132        }
1133        Ok(Tree::from_sorted_entries(entries))
1134    }
1135
1136    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1137        // Tree entries to be written must be sorted by Entry::filename(), which
1138        // is slightly different from the order of our backend::Tree.
1139        let entries = contents
1140            .entries()
1141            .map(|entry| {
1142                let filename = BString::from(entry.name().as_internal_str());
1143                match entry.value() {
1144                    TreeValue::File {
1145                        id,
1146                        executable: false,
1147                        copy_id: _, // TODO: Use the value
1148                    } => gix::objs::tree::Entry {
1149                        mode: gix::object::tree::EntryKind::Blob.into(),
1150                        filename,
1151                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1152                    },
1153                    TreeValue::File {
1154                        id,
1155                        executable: true,
1156                        copy_id: _, // TODO: Use the value
1157                    } => gix::objs::tree::Entry {
1158                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1159                        filename,
1160                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1161                    },
1162                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1163                        mode: gix::object::tree::EntryKind::Link.into(),
1164                        filename,
1165                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1166                    },
1167                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1168                        mode: gix::object::tree::EntryKind::Tree.into(),
1169                        filename,
1170                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1171                    },
1172                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1173                        mode: gix::object::tree::EntryKind::Commit.into(),
1174                        filename,
1175                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1176                    },
1177                }
1178            })
1179            .sorted_unstable()
1180            .collect();
1181        let locked_repo = self.lock_git_repo();
1182        let oid = locked_repo
1183            .write_object(gix::objs::Tree { entries })
1184            .map_err(|err| BackendError::WriteObject {
1185                object_type: "tree",
1186                source: Box::new(err),
1187            })?;
1188        Ok(TreeId::from_bytes(oid.as_bytes()))
1189    }
1190
1191    #[tracing::instrument(skip(self))]
1192    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1193        if *id == self.root_commit_id {
1194            return Ok(make_root_commit(
1195                self.root_change_id().clone(),
1196                self.empty_tree_id.clone(),
1197            ));
1198        }
1199        let git_commit_id = validate_git_object_id(id)?;
1200
1201        let mut commit = {
1202            let locked_repo = self.lock_git_repo();
1203            let git_object = locked_repo
1204                .find_object(git_commit_id)
1205                .map_err(|err| map_not_found_err(err, id))?;
1206            let is_shallow = self.shallow_root_ids(&locked_repo)?.contains(id);
1207            commit_from_git_without_root_parent(id, &git_object, is_shallow)?
1208        };
1209        if commit.parents.is_empty() {
1210            commit.parents.push(self.root_commit_id.clone());
1211        }
1212
1213        let table = self.cached_extra_metadata_table()?;
1214        if let Some(extras) = table.get_value(id.as_bytes()) {
1215            deserialize_extras(&mut commit, extras);
1216        } else {
1217            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1218            // there are no reachable ancestor commits without extras metadata. Git commits
1219            // imported by jj < 0.8.0 might not have extras (#924).
1220            // https://github.com/jj-vcs/jj/issues/2343
1221            tracing::info!("unimported Git commit found");
1222            self.import_head_commits([id])?;
1223            let table = self.cached_extra_metadata_table()?;
1224            let extras = table.get_value(id.as_bytes()).unwrap();
1225            deserialize_extras(&mut commit, extras);
1226        }
1227        Ok(commit)
1228    }
1229
1230    async fn write_commit(
1231        &self,
1232        mut contents: Commit,
1233        mut sign_with: Option<&mut SigningFn>,
1234    ) -> BackendResult<(CommitId, Commit)> {
1235        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1236
1237        let locked_repo = self.lock_git_repo();
1238        let tree_ids = &contents.root_tree;
1239        let git_tree_id = match tree_ids.as_resolved() {
1240            Some(tree_id) => validate_git_object_id(tree_id)?,
1241            None => write_tree_conflict(&locked_repo, tree_ids)?,
1242        };
1243        let author = signature_to_git(&contents.author);
1244        let mut committer = signature_to_git(&contents.committer);
1245        let message = &contents.description;
1246        if contents.parents.is_empty() {
1247            return Err(BackendError::Other(
1248                "Cannot write a commit with no parents".into(),
1249            ));
1250        }
1251        let mut parents = SmallVec::new();
1252        for parent_id in &contents.parents {
1253            if *parent_id == self.root_commit_id {
1254                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1255                // add it to the list of parents to write in the Git commit. We also check that
1256                // there are no other parents since Git cannot represent a merge between a root
1257                // commit and another commit.
1258                if contents.parents.len() > 1 {
1259                    return Err(BackendError::Unsupported(
1260                        "The Git backend does not support creating merge commits with the root \
1261                         commit as one of the parents."
1262                            .to_owned(),
1263                    ));
1264                }
1265            } else {
1266                parents.push(validate_git_object_id(parent_id)?);
1267            }
1268        }
1269        let mut extra_headers: Vec<(BString, BString)> = vec![];
1270        if !contents.conflict_labels.is_resolved() {
1271            // Labels cannot contain '\n' since we use it as a separator in the header.
1272            assert!(
1273                contents
1274                    .conflict_labels
1275                    .iter()
1276                    .all(|label| !label.contains('\n'))
1277            );
1278            let mut joined_with_newlines = contents.conflict_labels.iter().join("\n");
1279            joined_with_newlines.push('\n');
1280            extra_headers.push((
1281                JJ_CONFLICT_LABELS_COMMIT_HEADER.into(),
1282                joined_with_newlines.into(),
1283            ));
1284        }
1285        if !tree_ids.is_resolved() {
1286            let value = tree_ids.iter().map(|id| id.hex()).join(" ");
1287            extra_headers.push((JJ_TREES_COMMIT_HEADER.into(), value.into()));
1288        }
1289        if self.write_change_id_header {
1290            extra_headers.push((
1291                CHANGE_ID_COMMIT_HEADER.into(),
1292                contents.change_id.reverse_hex().into(),
1293            ));
1294        }
1295
1296        if tree_ids.iter().any(|id| id == &self.empty_tree_id) {
1297            let tree = gix::objs::Tree::empty();
1298            let tree_id =
1299                locked_repo
1300                    .write_object(&tree)
1301                    .map_err(|err| BackendError::WriteObject {
1302                        object_type: "tree",
1303                        source: Box::new(err),
1304                    })?;
1305            assert!(tree_id.is_empty_tree());
1306        }
1307
1308        let extras = serialize_extras(&contents);
1309
1310        // If two writers write commits of the same id with different metadata, they
1311        // will both succeed and the metadata entries will be "merged" later. Since
1312        // metadata entry is keyed by the commit id, one of the entries would be lost.
1313        // To prevent such race condition locally, we extend the scope covered by the
1314        // table lock. This is still racy if multiple machines are involved and the
1315        // repository is rsync-ed.
1316        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1317        let id = loop {
1318            let mut commit = gix::objs::Commit {
1319                message: message.to_owned().into(),
1320                tree: git_tree_id,
1321                author: author.clone(),
1322                committer: committer.clone(),
1323                encoding: None,
1324                parents: parents.clone(),
1325                extra_headers: extra_headers.clone(),
1326            };
1327
1328            if let Some(sign) = &mut sign_with {
1329                // we don't use gix pool, but at least use their heuristic
1330                let mut data = Vec::with_capacity(512);
1331                commit.write_to(&mut data).unwrap();
1332
1333                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1334                    object_type: "commit",
1335                    source: Box::new(err),
1336                })?;
1337                commit
1338                    .extra_headers
1339                    .push(("gpgsig".into(), sig.clone().into()));
1340                contents.secure_sig = Some(SecureSig { data, sig });
1341            }
1342
1343            let git_id =
1344                locked_repo
1345                    .write_object(&commit)
1346                    .map_err(|err| BackendError::WriteObject {
1347                        object_type: "commit",
1348                        source: Box::new(err),
1349                    })?;
1350
1351            match table.get_value(git_id.as_bytes()) {
1352                Some(existing_extras) if existing_extras != extras => {
1353                    // It's possible a commit already exists with the same
1354                    // commit id but different change id. Adjust the timestamp
1355                    // until this is no longer the case.
1356                    //
1357                    // For example, this can happen when rebasing duplicate
1358                    // commits, https://github.com/jj-vcs/jj/issues/694.
1359                    //
1360                    // `jj` resets the committer timestamp to the current
1361                    // timestamp whenever it rewrites a commit. So, it's
1362                    // unlikely for the timestamp to be 0 even if the original
1363                    // commit had its timestamp set to 0. Moreover, we test that
1364                    // a commit with a negative timestamp can still be written
1365                    // and read back by `jj`.
1366                    committer.time.seconds -= 1;
1367                }
1368                _ => break CommitId::from_bytes(git_id.as_bytes()),
1369            }
1370        };
1371
1372        // Everything up to this point had no permanent effect on the repo except
1373        // GC-able objects
1374        locked_repo
1375            .edit_reference(to_no_gc_ref_update(&id))
1376            .map_err(|err| BackendError::Other(Box::new(err)))?;
1377
1378        // Update the signature to match the one that was actually written to the object
1379        // store
1380        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1381        let mut mut_table = table.start_mutation();
1382        mut_table.add_entry(id.to_bytes(), extras);
1383        self.save_extra_metadata_table(mut_table, &table_lock)?;
1384        Ok((id, contents))
1385    }
1386
1387    fn get_copy_records(
1388        &self,
1389        paths: Option<&[RepoPathBuf]>,
1390        root_id: &CommitId,
1391        head_id: &CommitId,
1392    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
1393        let repo = self.git_repo();
1394        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1395        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1396
1397        let change_to_copy_record =
1398            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1399                let gix::object::tree::diff::Change::Rewrite {
1400                    source_location,
1401                    source_entry_mode,
1402                    source_id,
1403                    entry_mode: dest_entry_mode,
1404                    location: dest_location,
1405                    ..
1406                } = change
1407                else {
1408                    return Ok(None);
1409                };
1410                // TODO: Renamed symlinks cannot be returned because CopyRecord
1411                // expects `source_file: FileId`.
1412                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1413                    return Ok(None);
1414                }
1415
1416                let source = str::from_utf8(source_location)
1417                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1418                let dest = str::from_utf8(dest_location)
1419                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1420
1421                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1422                if !paths.is_none_or(|paths| paths.contains(&target)) {
1423                    return Ok(None);
1424                }
1425
1426                Ok(Some(CopyRecord {
1427                    target,
1428                    target_commit: head_id.clone(),
1429                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1430                    source_file: FileId::from_bytes(source_id.as_bytes()),
1431                    source_commit: root_id.clone(),
1432                }))
1433            };
1434
1435        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1436        root_tree
1437            .changes()
1438            .map_err(|err| BackendError::Other(err.into()))?
1439            .options(|opts| {
1440                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1441                    copies: Some(gix::diff::rewrites::Copies {
1442                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1443                        percentage: Some(0.5),
1444                    }),
1445                    percentage: Some(0.5),
1446                    limit: 1000,
1447                    track_empty: false,
1448                }));
1449            })
1450            .for_each_to_obtain_tree_with_cache(
1451                &head_tree,
1452                &mut self.new_diff_platform()?,
1453                |change| -> BackendResult<_> {
1454                    match change_to_copy_record(change) {
1455                        Ok(None) => {}
1456                        Ok(Some(change)) => records.push(Ok(change)),
1457                        Err(err) => records.push(Err(err)),
1458                    }
1459                    Ok(gix::object::tree::diff::Action::Continue(()))
1460                },
1461            )
1462            .map_err(|err| BackendError::Other(err.into()))?;
1463        Ok(futures::stream::iter(records).boxed())
1464    }
1465
1466    #[tracing::instrument(skip(self, index))]
1467    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1468        let git_repo = self.lock_git_repo();
1469        let new_heads = index
1470            .all_heads_for_gc()
1471            .map_err(|err| BackendError::Other(err.into()))?
1472            .filter(|id| *id != self.root_commit_id);
1473        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1474
1475        // No locking is needed since we aren't going to add new "commits".
1476        let table = self.cached_extra_metadata_table()?;
1477        // TODO: remove unreachable entries from extras table if segment file
1478        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1479        // preserved by the keep_newer timestamp though)
1480        self.extra_metadata_store
1481            .gc(&table, keep_newer)
1482            .map_err(|err| BackendError::Other(err.into()))?;
1483
1484        run_git_gc(
1485            self.git_executable.as_ref(),
1486            self.git_repo_path(),
1487            keep_newer,
1488        )
1489        .map_err(|err| BackendError::Other(err.into()))?;
1490        // Since "git gc" will move loose refs into packed refs, in-memory
1491        // packed-refs cache should be invalidated without relying on mtime.
1492        git_repo.refs.force_refresh_packed_buffer().ok();
1493        Ok(())
1494    }
1495}
1496
1497/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1498/// `.jjconflict-side-N` subtrees. This ensure that the parts are not GC'd.
1499/// Also includes a `JJ-CONFLICT-README` file explaining why these trees are
1500/// present. The rest of the tree is copied from the first term of the conflict,
1501/// which prevents editors with Git support from highlighting all files as new.
1502fn write_tree_conflict(
1503    repo: &gix::Repository,
1504    conflict: &Merge<TreeId>,
1505) -> BackendResult<gix::ObjectId> {
1506    // Tree entries to be written must be sorted by Entry::filename().
1507    let mut entries = itertools::chain(
1508        conflict
1509            .removes()
1510            .enumerate()
1511            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1512        conflict
1513            .adds()
1514            .enumerate()
1515            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1516    )
1517    .map(|(name, tree_id)| gix::objs::tree::Entry {
1518        mode: gix::object::tree::EntryKind::Tree.into(),
1519        filename: name.into(),
1520        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1521    })
1522    .collect_vec();
1523    let readme_id = repo
1524        .write_blob(
1525            r#"This commit was made by jj, https://jj-vcs.dev/.
1526The commit contains file conflicts, and therefore looks wrong when used with
1527plain Git or other tools that are unfamiliar with jj.
1528
1529The .jjconflict-* directories represent the different inputs to the conflict.
1530For details, see
1531https://docs.jj-vcs.dev/latest/git-compatibility/#format-mapping-details
1532
1533If you see this file in your working copy, it probably means that you used a
1534regular `git` command to check out a conflicted commit. Use `jj abandon` to
1535recover.
1536"#,
1537        )
1538        .map_err(|err| {
1539            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1540        })?
1541        .detach();
1542    entries.push(gix::objs::tree::Entry {
1543        mode: gix::object::tree::EntryKind::Blob.into(),
1544        filename: JJ_CONFLICT_README_FILE_NAME.into(),
1545        oid: readme_id,
1546    });
1547    let first_tree_id = conflict.first();
1548    let first_tree = repo
1549        .find_tree(gix::ObjectId::from_bytes_or_panic(first_tree_id.as_bytes()))
1550        .map_err(|err| to_read_object_err(err, first_tree_id))?;
1551    for entry in first_tree.iter() {
1552        let entry = entry.map_err(|err| to_read_object_err(err, first_tree_id))?;
1553        if !entry.filename().starts_with(b".jjconflict")
1554            && entry.filename() != JJ_CONFLICT_README_FILE_NAME
1555        {
1556            entries.push(entry.detach().into());
1557        }
1558    }
1559    entries.sort_unstable();
1560    let id = repo
1561        .write_object(gix::objs::Tree { entries })
1562        .map_err(|err| BackendError::WriteObject {
1563            object_type: "tree",
1564            source: Box::new(err),
1565        })?;
1566    Ok(id.detach())
1567}
1568
1569#[cfg(test)]
1570mod tests {
1571    use assert_matches::assert_matches;
1572    use gix::date::parse::TimeBuf;
1573    use gix::objs::CommitRef;
1574    use indoc::indoc;
1575    use pollster::FutureExt as _;
1576
1577    use super::*;
1578    use crate::config::StackedConfig;
1579    use crate::content_hash::blake2b_hash;
1580    use crate::hex_util;
1581    use crate::tests::TestResult;
1582    use crate::tests::new_temp_dir;
1583
1584    const GIT_USER: &str = "Someone";
1585    const GIT_EMAIL: &str = "someone@example.com";
1586
1587    fn git_config() -> Vec<bstr::BString> {
1588        vec![
1589            format!("user.name = {GIT_USER}").into(),
1590            format!("user.email = {GIT_EMAIL}").into(),
1591            "init.defaultBranch = master".into(),
1592        ]
1593    }
1594
1595    fn open_options() -> gix::open::Options {
1596        gix::open::Options::isolated()
1597            .config_overrides(git_config())
1598            .strict_config(true)
1599    }
1600
1601    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1602        gix::ThreadSafeRepository::init_opts(
1603            directory,
1604            gix::create::Kind::WithWorktree,
1605            gix::create::Options::default(),
1606            open_options(),
1607        )
1608        .unwrap()
1609        .to_thread_local()
1610    }
1611
1612    #[test]
1613    fn read_plain_git_commit() -> TestResult {
1614        let settings = user_settings();
1615        let temp_dir = new_temp_dir();
1616        let store_path = temp_dir.path();
1617        let git_repo_path = temp_dir.path().join("git");
1618        let git_repo = git_init(git_repo_path);
1619
1620        // Add a commit with some files in
1621        let blob1 = git_repo.write_blob(b"content1")?.detach();
1622        let blob2 = git_repo.write_blob(b"normal")?.detach();
1623        let mut dir_tree_editor = git_repo.empty_tree().edit()?;
1624        dir_tree_editor.upsert("normal", gix::object::tree::EntryKind::Blob, blob1)?;
1625        dir_tree_editor.upsert("symlink", gix::object::tree::EntryKind::Link, blob2)?;
1626        let dir_tree_id = dir_tree_editor.write()?.detach();
1627        let mut root_tree_builder = git_repo.empty_tree().edit()?;
1628        root_tree_builder.upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)?;
1629        let root_tree_id = root_tree_builder.write()?.detach();
1630        let git_author = gix::actor::Signature {
1631            name: "git author".into(),
1632            email: "git.author@example.com".into(),
1633            time: gix::date::Time::new(1000, 60 * 60),
1634        };
1635        let git_committer = gix::actor::Signature {
1636            name: "git committer".into(),
1637            email: "git.committer@example.com".into(),
1638            time: gix::date::Time::new(2000, -480 * 60),
1639        };
1640        let git_commit_id = git_repo
1641            .commit_as(
1642                git_committer.to_ref(&mut TimeBuf::default()),
1643                git_author.to_ref(&mut TimeBuf::default()),
1644                "refs/heads/dummy",
1645                "git commit message",
1646                root_tree_id,
1647                [] as [gix::ObjectId; 0],
1648            )?
1649            .detach();
1650        git_repo.find_reference("refs/heads/dummy")?.delete()?;
1651        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1652        // The change id is the leading reverse bits of the commit id
1653        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1654        // Check that the git commit above got the hash we expect
1655        assert_eq!(
1656            git_commit_id.as_bytes(),
1657            commit_id.as_bytes(),
1658            "{git_commit_id:?} vs {commit_id:?}"
1659        );
1660
1661        // Add an empty commit on top
1662        let git_commit_id2 = git_repo
1663            .commit_as(
1664                git_committer.to_ref(&mut TimeBuf::default()),
1665                git_author.to_ref(&mut TimeBuf::default()),
1666                "refs/heads/dummy2",
1667                "git commit message 2",
1668                root_tree_id,
1669                [git_commit_id],
1670            )?
1671            .detach();
1672        git_repo.find_reference("refs/heads/dummy2")?.delete()?;
1673        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1674
1675        let backend = GitBackend::init_external(&settings, store_path, git_repo.path())?;
1676
1677        // Import the head commit and its ancestors
1678        backend.import_head_commits([&commit_id2])?;
1679        // Ref should be created only for the head commit
1680        let git_refs = backend
1681            .git_repo()
1682            .references()?
1683            .prefixed("refs/jj/keep/")?
1684            .map(|git_ref| git_ref.unwrap().id().detach())
1685            .collect_vec();
1686        assert_eq!(git_refs, vec![git_commit_id2]);
1687
1688        let commit = backend.read_commit(&commit_id).block_on()?;
1689        assert_eq!(&commit.change_id, &change_id);
1690        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1691        assert_eq!(commit.predecessors, vec![]);
1692        assert_eq!(
1693            commit.root_tree,
1694            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1695        );
1696        assert_eq!(commit.description, "git commit message");
1697        assert_eq!(commit.author.name, "git author");
1698        assert_eq!(commit.author.email, "git.author@example.com");
1699        assert_eq!(
1700            commit.author.timestamp.timestamp,
1701            MillisSinceEpoch(1000 * 1000)
1702        );
1703        assert_eq!(commit.author.timestamp.tz_offset, 60);
1704        assert_eq!(commit.committer.name, "git committer");
1705        assert_eq!(commit.committer.email, "git.committer@example.com");
1706        assert_eq!(
1707            commit.committer.timestamp.timestamp,
1708            MillisSinceEpoch(2000 * 1000)
1709        );
1710        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1711
1712        let root_tree = backend
1713            .read_tree(
1714                RepoPath::root(),
1715                &TreeId::from_bytes(root_tree_id.as_bytes()),
1716            )
1717            .block_on()?;
1718        let mut root_entries = root_tree.entries();
1719        let dir = root_entries.next().unwrap();
1720        assert_eq!(root_entries.next(), None);
1721        assert_eq!(dir.name().as_internal_str(), "dir");
1722        assert_eq!(
1723            dir.value(),
1724            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1725        );
1726
1727        let dir_tree = backend
1728            .read_tree(
1729                RepoPath::from_internal_string("dir")?,
1730                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1731            )
1732            .block_on()?;
1733        let mut entries = dir_tree.entries();
1734        let file = entries.next().unwrap();
1735        let symlink = entries.next().unwrap();
1736        assert_eq!(entries.next(), None);
1737        assert_eq!(file.name().as_internal_str(), "normal");
1738        assert_eq!(
1739            file.value(),
1740            &TreeValue::File {
1741                id: FileId::from_bytes(blob1.as_bytes()),
1742                executable: false,
1743                copy_id: CopyId::placeholder(),
1744            }
1745        );
1746        assert_eq!(symlink.name().as_internal_str(), "symlink");
1747        assert_eq!(
1748            symlink.value(),
1749            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1750        );
1751
1752        let commit2 = backend.read_commit(&commit_id2).block_on()?;
1753        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1754        assert_eq!(commit.predecessors, vec![]);
1755        assert_eq!(
1756            commit.root_tree,
1757            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1758        );
1759        Ok(())
1760    }
1761
1762    #[test]
1763    fn read_git_commit_without_importing() -> TestResult {
1764        let settings = user_settings();
1765        let temp_dir = new_temp_dir();
1766        let store_path = temp_dir.path();
1767        let git_repo_path = temp_dir.path().join("git");
1768        let git_repo = git_init(&git_repo_path);
1769
1770        let signature = gix::actor::Signature {
1771            name: GIT_USER.into(),
1772            email: GIT_EMAIL.into(),
1773            time: gix::date::Time::now_utc(),
1774        };
1775        let empty_tree_id = gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904")?;
1776        let git_commit_id = git_repo.commit_as(
1777            signature.to_ref(&mut TimeBuf::default()),
1778            signature.to_ref(&mut TimeBuf::default()),
1779            "refs/heads/main",
1780            "git commit message",
1781            empty_tree_id,
1782            [] as [gix::ObjectId; 0],
1783        )?;
1784
1785        let backend = GitBackend::init_external(&settings, store_path, git_repo.path())?;
1786
1787        // read_commit() without import_head_commits() works as of now. This might be
1788        // changed later.
1789        assert!(
1790            backend
1791                .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1792                .block_on()
1793                .is_ok()
1794        );
1795        assert!(
1796            backend
1797                .cached_extra_metadata_table()?
1798                .get_value(git_commit_id.as_bytes())
1799                .is_some(),
1800            "extra metadata should have been be created"
1801        );
1802        Ok(())
1803    }
1804
1805    #[test]
1806    fn read_signed_git_commit() -> TestResult {
1807        let settings = user_settings();
1808        let temp_dir = new_temp_dir();
1809        let store_path = temp_dir.path();
1810        let git_repo_path = temp_dir.path().join("git");
1811        let git_repo = git_init(git_repo_path);
1812
1813        let signature = gix::actor::Signature {
1814            name: GIT_USER.into(),
1815            email: GIT_EMAIL.into(),
1816            time: gix::date::Time::now_utc(),
1817        };
1818        let empty_tree_id = gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904")?;
1819
1820        let secure_sig =
1821            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1822
1823        let mut commit = gix::objs::Commit {
1824            tree: empty_tree_id,
1825            parents: smallvec::SmallVec::new(),
1826            author: signature.clone(),
1827            committer: signature.clone(),
1828            encoding: None,
1829            message: "git commit message".into(),
1830            extra_headers: Vec::new(),
1831        };
1832
1833        let mut commit_buf = Vec::new();
1834        commit.write_to(&mut commit_buf)?;
1835        let commit_str = str::from_utf8(&commit_buf)?;
1836
1837        commit
1838            .extra_headers
1839            .push(("gpgsig".into(), secure_sig.into()));
1840
1841        let git_commit_id = git_repo.write_object(&commit)?;
1842
1843        let backend = GitBackend::init_external(&settings, store_path, git_repo.path())?;
1844
1845        let commit = backend
1846            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1847            .block_on()?;
1848
1849        let sig = commit.secure_sig.expect("failed to read the signature");
1850
1851        // converting to string for nicer assert diff
1852        assert_eq!(str::from_utf8(&sig.sig)?, secure_sig);
1853        assert_eq!(str::from_utf8(&sig.data)?, commit_str);
1854        Ok(())
1855    }
1856
1857    #[test]
1858    fn change_id_parsing() {
1859        let id = |commit_object_bytes: &[u8]| {
1860            extract_change_id_from_commit(&CommitRef::from_bytes(commit_object_bytes).unwrap())
1861        };
1862
1863        let commit_with_id = indoc! {b"
1864            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1865            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1866            author JJ Fan <jjfan@example.com> 1757112665 -0700
1867            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1868            extra-header blah
1869            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1870
1871            test-commit
1872        "};
1873        insta::assert_compact_debug_snapshot!(
1874            id(commit_with_id),
1875            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1876        );
1877
1878        let commit_without_id = indoc! {b"
1879            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1880            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1881            author JJ Fan <jjfan@example.com> 1757112665 -0700
1882            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1883            extra-header blah
1884
1885            no id in header
1886        "};
1887        insta::assert_compact_debug_snapshot!(
1888            id(commit_without_id),
1889            @"None"
1890        );
1891
1892        let commit = indoc! {b"
1893            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1894            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1895            author JJ Fan <jjfan@example.com> 1757112665 -0700
1896            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1897            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1898            extra-header blah
1899            change-id abcabcabcabcabcabcabcabcabcabcab
1900
1901            valid change id first
1902        "};
1903        insta::assert_compact_debug_snapshot!(
1904            id(commit),
1905            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1906        );
1907
1908        // We only look at the first change id if multiple are present, so this should
1909        // error
1910        let commit = indoc! {b"
1911            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1912            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1913            author JJ Fan <jjfan@example.com> 1757112665 -0700
1914            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1915            change-id abcabcabcabcabcabcabcabcabcabcab
1916            extra-header blah
1917            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1918
1919            valid change id first
1920        "};
1921        insta::assert_compact_debug_snapshot!(
1922            id(commit),
1923            @"None"
1924        );
1925    }
1926
1927    #[test]
1928    fn round_trip_change_id_via_git_header() -> TestResult {
1929        let settings = user_settings();
1930        let temp_dir = new_temp_dir();
1931
1932        let store_path = temp_dir.path().join("store");
1933        fs::create_dir(&store_path)?;
1934        let empty_store_path = temp_dir.path().join("empty_store");
1935        fs::create_dir(&empty_store_path)?;
1936        let git_repo_path = temp_dir.path().join("git");
1937        let git_repo = git_init(git_repo_path);
1938
1939        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path())?;
1940        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1941        let commit = Commit {
1942            parents: vec![backend.root_commit_id().clone()],
1943            predecessors: vec![],
1944            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
1945            conflict_labels: Merge::resolved(String::new()),
1946            change_id: original_change_id.clone(),
1947            description: "initial".to_string(),
1948            author: create_signature(),
1949            committer: create_signature(),
1950            secure_sig: None,
1951        };
1952
1953        let (initial_commit_id, _init_commit) = backend.write_commit(commit, None).block_on()?;
1954        let commit = backend.read_commit(&initial_commit_id).block_on()?;
1955        assert_eq!(
1956            commit.change_id, original_change_id,
1957            "The change-id header did not roundtrip"
1958        );
1959
1960        // Because of how change ids are also persisted in extra proto files,
1961        // initialize a new store without those files, but reuse the same git
1962        // storage. This change-id must be derived from the git commit header.
1963        let no_extra_backend =
1964            GitBackend::init_external(&settings, &empty_store_path, git_repo.path())?;
1965        let no_extra_commit = no_extra_backend
1966            .read_commit(&initial_commit_id)
1967            .block_on()?;
1968
1969        assert_eq!(
1970            no_extra_commit.change_id, original_change_id,
1971            "The change-id header did not roundtrip"
1972        );
1973        Ok(())
1974    }
1975
1976    #[test]
1977    fn read_empty_string_placeholder() {
1978        let git_signature1 = gix::actor::Signature {
1979            name: EMPTY_STRING_PLACEHOLDER.into(),
1980            email: "git.author@example.com".into(),
1981            time: gix::date::Time::new(1000, 60 * 60),
1982        };
1983        let signature1 = signature_from_git(git_signature1.to_ref(&mut TimeBuf::default()));
1984        assert!(signature1.name.is_empty());
1985        assert_eq!(signature1.email, "git.author@example.com");
1986        let git_signature2 = gix::actor::Signature {
1987            name: "git committer".into(),
1988            email: EMPTY_STRING_PLACEHOLDER.into(),
1989            time: gix::date::Time::new(2000, -480 * 60),
1990        };
1991        let signature2 = signature_from_git(git_signature2.to_ref(&mut TimeBuf::default()));
1992        assert_eq!(signature2.name, "git committer");
1993        assert!(signature2.email.is_empty());
1994    }
1995
1996    #[test]
1997    fn write_empty_string_placeholder() {
1998        let signature1 = Signature {
1999            name: "".to_string(),
2000            email: "someone@example.com".to_string(),
2001            timestamp: Timestamp {
2002                timestamp: MillisSinceEpoch(0),
2003                tz_offset: 0,
2004            },
2005        };
2006        let git_signature1 = signature_to_git(&signature1);
2007        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
2008        assert_eq!(git_signature1.email, "someone@example.com");
2009        let signature2 = Signature {
2010            name: "Someone".to_string(),
2011            email: "".to_string(),
2012            timestamp: Timestamp {
2013                timestamp: MillisSinceEpoch(0),
2014                tz_offset: 0,
2015            },
2016        };
2017        let git_signature2 = signature_to_git(&signature2);
2018        assert_eq!(git_signature2.name, "Someone");
2019        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
2020    }
2021
2022    /// Test that parents get written correctly
2023    #[test]
2024    fn git_commit_parents() -> TestResult {
2025        let settings = user_settings();
2026        let temp_dir = new_temp_dir();
2027        let store_path = temp_dir.path();
2028        let git_repo_path = temp_dir.path().join("git");
2029        let git_repo = git_init(&git_repo_path);
2030
2031        let backend = GitBackend::init_external(&settings, store_path, git_repo.path())?;
2032        let mut commit = Commit {
2033            parents: vec![],
2034            predecessors: vec![],
2035            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2036            conflict_labels: Merge::resolved(String::new()),
2037            change_id: ChangeId::from_hex("abc123"),
2038            description: "".to_string(),
2039            author: create_signature(),
2040            committer: create_signature(),
2041            secure_sig: None,
2042        };
2043
2044        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2045            backend.write_commit(commit, None).block_on()
2046        };
2047
2048        // No parents
2049        commit.parents = vec![];
2050        assert_matches!(
2051            write_commit(commit.clone()),
2052            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2053        );
2054
2055        // Only root commit as parent
2056        commit.parents = vec![backend.root_commit_id().clone()];
2057        let first_id = write_commit(commit.clone())?.0;
2058        let first_commit = backend.read_commit(&first_id).block_on()?;
2059        assert_eq!(first_commit, commit);
2060        let first_git_commit = git_repo.find_commit(git_id(&first_id))?;
2061        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2062
2063        // Only non-root commit as parent
2064        commit.parents = vec![first_id.clone()];
2065        let second_id = write_commit(commit.clone())?.0;
2066        let second_commit = backend.read_commit(&second_id).block_on()?;
2067        assert_eq!(second_commit, commit);
2068        let second_git_commit = git_repo.find_commit(git_id(&second_id))?;
2069        assert_eq!(
2070            second_git_commit.parent_ids().collect_vec(),
2071            vec![git_id(&first_id)]
2072        );
2073
2074        // Merge commit
2075        commit.parents = vec![first_id.clone(), second_id.clone()];
2076        let merge_id = write_commit(commit.clone())?.0;
2077        let merge_commit = backend.read_commit(&merge_id).block_on()?;
2078        assert_eq!(merge_commit, commit);
2079        let merge_git_commit = git_repo.find_commit(git_id(&merge_id))?;
2080        assert_eq!(
2081            merge_git_commit.parent_ids().collect_vec(),
2082            vec![git_id(&first_id), git_id(&second_id)]
2083        );
2084
2085        // Merge commit with root as one parent
2086        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2087        assert_matches!(
2088            write_commit(commit),
2089            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2090        );
2091        Ok(())
2092    }
2093
2094    #[test]
2095    fn write_tree_conflicts() -> TestResult {
2096        let settings = user_settings();
2097        let temp_dir = new_temp_dir();
2098        let store_path = temp_dir.path();
2099        let git_repo_path = temp_dir.path().join("git");
2100        let git_repo = git_init(&git_repo_path);
2101
2102        let backend = GitBackend::init_external(&settings, store_path, git_repo.path())?;
2103        let create_tree = |i| {
2104            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2105            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2106            tree_builder
2107                .upsert(
2108                    format!("file{i}"),
2109                    gix::object::tree::EntryKind::Blob,
2110                    blob_id,
2111                )
2112                .unwrap();
2113            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2114        };
2115
2116        let root_tree = Merge::from_removes_adds(
2117            vec![create_tree(0), create_tree(1)],
2118            vec![create_tree(2), create_tree(3), create_tree(4)],
2119        );
2120        let mut commit = Commit {
2121            parents: vec![backend.root_commit_id().clone()],
2122            predecessors: vec![],
2123            root_tree: root_tree.clone(),
2124            conflict_labels: Merge::resolved(String::new()),
2125            change_id: ChangeId::from_hex("abc123"),
2126            description: "".to_string(),
2127            author: create_signature(),
2128            committer: create_signature(),
2129            secure_sig: None,
2130        };
2131
2132        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2133            backend.write_commit(commit, None).block_on()
2134        };
2135
2136        // When writing a tree-level conflict, the root tree on the git side has the
2137        // individual trees as subtrees.
2138        let read_commit_id = write_commit(commit.clone())?.0;
2139        let read_commit = backend.read_commit(&read_commit_id).block_on()?;
2140        assert_eq!(read_commit, commit);
2141        let git_commit = git_repo.find_commit(gix::ObjectId::from_bytes_or_panic(
2142            read_commit_id.as_bytes(),
2143        ))?;
2144        let git_tree = git_repo.find_tree(git_commit.tree_id()?)?;
2145        let jj_conflict_entries = git_tree
2146            .iter()
2147            .map(Result::unwrap)
2148            .filter(|entry| {
2149                entry.filename().starts_with(b".jjconflict")
2150                    || entry.filename() == JJ_CONFLICT_README_FILE_NAME
2151            })
2152            .collect_vec();
2153        assert!(
2154            jj_conflict_entries
2155                .iter()
2156                .filter(|entry| entry.filename() != JJ_CONFLICT_README_FILE_NAME)
2157                .all(|entry| entry.mode().value() == 0o040000)
2158        );
2159        let mut iter = jj_conflict_entries.iter();
2160        let entry = iter.next().unwrap();
2161        assert_eq!(entry.filename(), b".jjconflict-base-0");
2162        assert_eq!(
2163            entry.id().as_bytes(),
2164            root_tree.get_remove(0).unwrap().as_bytes()
2165        );
2166        let entry = iter.next().unwrap();
2167        assert_eq!(entry.filename(), b".jjconflict-base-1");
2168        assert_eq!(
2169            entry.id().as_bytes(),
2170            root_tree.get_remove(1).unwrap().as_bytes()
2171        );
2172        let entry = iter.next().unwrap();
2173        assert_eq!(entry.filename(), b".jjconflict-side-0");
2174        assert_eq!(
2175            entry.id().as_bytes(),
2176            root_tree.get_add(0).unwrap().as_bytes()
2177        );
2178        let entry = iter.next().unwrap();
2179        assert_eq!(entry.filename(), b".jjconflict-side-1");
2180        assert_eq!(
2181            entry.id().as_bytes(),
2182            root_tree.get_add(1).unwrap().as_bytes()
2183        );
2184        let entry = iter.next().unwrap();
2185        assert_eq!(entry.filename(), b".jjconflict-side-2");
2186        assert_eq!(
2187            entry.id().as_bytes(),
2188            root_tree.get_add(2).unwrap().as_bytes()
2189        );
2190        let entry = iter.next().unwrap();
2191        assert_eq!(entry.filename(), b"JJ-CONFLICT-README");
2192        assert_eq!(entry.mode().value(), 0o100644);
2193        assert!(iter.next().is_none());
2194
2195        // When writing a single tree using the new format, it's represented by a
2196        // regular git tree.
2197        commit.root_tree = Merge::resolved(create_tree(5));
2198        let read_commit_id = write_commit(commit.clone())?.0;
2199        let read_commit = backend.read_commit(&read_commit_id).block_on()?;
2200        assert_eq!(read_commit, commit);
2201        let git_commit = git_repo.find_commit(gix::ObjectId::from_bytes_or_panic(
2202            read_commit_id.as_bytes(),
2203        ))?;
2204        assert_eq!(
2205            Merge::resolved(TreeId::from_bytes(git_commit.tree_id()?.as_bytes())),
2206            commit.root_tree
2207        );
2208        Ok(())
2209    }
2210
2211    #[test]
2212    fn commit_has_ref() -> TestResult {
2213        let settings = user_settings();
2214        let temp_dir = new_temp_dir();
2215        let backend = GitBackend::init_internal(&settings, temp_dir.path())?;
2216        let git_repo = backend.git_repo();
2217        let signature = Signature {
2218            name: "Someone".to_string(),
2219            email: "someone@example.com".to_string(),
2220            timestamp: Timestamp {
2221                timestamp: MillisSinceEpoch(0),
2222                tz_offset: 0,
2223            },
2224        };
2225        let commit = Commit {
2226            parents: vec![backend.root_commit_id().clone()],
2227            predecessors: vec![],
2228            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2229            conflict_labels: Merge::resolved(String::new()),
2230            change_id: ChangeId::new(vec![42; 16]),
2231            description: "initial".to_string(),
2232            author: signature.clone(),
2233            committer: signature,
2234            secure_sig: None,
2235        };
2236        let commit_id = backend.write_commit(commit, None).block_on()?.0;
2237        let git_refs = git_repo.references()?;
2238        let git_ref_ids: Vec<_> = git_refs
2239            .prefixed("refs/jj/keep/")?
2240            .map(|x| x.unwrap().id().detach())
2241            .collect();
2242        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2243
2244        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2245        for git_ref in git_refs.prefixed("refs/jj/keep/")? {
2246            git_ref.unwrap().delete().unwrap();
2247        }
2248        // Re-imported commit should have new ref.
2249        backend.import_head_commits([&commit_id])?;
2250        let git_refs = git_repo.references()?;
2251        let git_ref_ids: Vec<_> = git_refs
2252            .prefixed("refs/jj/keep/")?
2253            .map(|x| x.unwrap().id().detach())
2254            .collect();
2255        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2256        Ok(())
2257    }
2258
2259    #[test]
2260    fn import_head_commits_duplicates() -> TestResult {
2261        let settings = user_settings();
2262        let temp_dir = new_temp_dir();
2263        let backend = GitBackend::init_internal(&settings, temp_dir.path())?;
2264        let git_repo = backend.git_repo();
2265
2266        let signature = gix::actor::Signature {
2267            name: GIT_USER.into(),
2268            email: GIT_EMAIL.into(),
2269            time: gix::date::Time::now_utc(),
2270        };
2271        let empty_tree_id = gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904")?;
2272        let git_commit_id = git_repo
2273            .commit_as(
2274                signature.to_ref(&mut TimeBuf::default()),
2275                signature.to_ref(&mut TimeBuf::default()),
2276                "refs/heads/main",
2277                "git commit message",
2278                empty_tree_id,
2279                [] as [gix::ObjectId; 0],
2280            )?
2281            .detach();
2282        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2283
2284        // Ref creation shouldn't fail because of duplicated head ids.
2285        backend.import_head_commits([&commit_id, &commit_id])?;
2286        assert!(
2287            git_repo
2288                .references()?
2289                .prefixed("refs/jj/keep/")?
2290                .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id)
2291        );
2292        Ok(())
2293    }
2294
2295    #[test]
2296    fn overlapping_git_commit_id() -> TestResult {
2297        let settings = user_settings();
2298        let temp_dir = new_temp_dir();
2299        let backend = GitBackend::init_internal(&settings, temp_dir.path())?;
2300        let commit1 = Commit {
2301            parents: vec![backend.root_commit_id().clone()],
2302            predecessors: vec![],
2303            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2304            conflict_labels: Merge::resolved(String::new()),
2305            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2306            description: "initial".to_string(),
2307            author: create_signature(),
2308            committer: create_signature(),
2309            secure_sig: None,
2310        };
2311
2312        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2313            backend.write_commit(commit, None).block_on()
2314        };
2315
2316        let (commit_id1, mut commit2) = write_commit(commit1)?;
2317        commit2.predecessors.push(commit_id1.clone());
2318        // `write_commit` should prevent the ids from being the same by changing the
2319        // committer timestamp of the commit it actually writes.
2320        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone())?;
2321        // The returned matches the ID
2322        assert_eq!(backend.read_commit(&commit_id2).block_on()?, actual_commit2);
2323        assert_ne!(commit_id2, commit_id1);
2324        // The committer timestamp should differ
2325        assert_ne!(
2326            actual_commit2.committer.timestamp.timestamp,
2327            commit2.committer.timestamp.timestamp
2328        );
2329        // The rest of the commit should be the same
2330        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2331        assert_eq!(actual_commit2, commit2);
2332        Ok(())
2333    }
2334
2335    #[test]
2336    fn write_signed_commit() -> TestResult {
2337        let settings = user_settings();
2338        let temp_dir = new_temp_dir();
2339        let backend = GitBackend::init_internal(&settings, temp_dir.path())?;
2340
2341        let commit = Commit {
2342            parents: vec![backend.root_commit_id().clone()],
2343            predecessors: vec![],
2344            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2345            conflict_labels: Merge::resolved(String::new()),
2346            change_id: ChangeId::new(vec![42; 16]),
2347            description: "initial".to_string(),
2348            author: create_signature(),
2349            committer: create_signature(),
2350            secure_sig: None,
2351        };
2352
2353        let mut signer = |data: &_| {
2354            let hash: String = hex_util::encode_hex(&blake2b_hash(data));
2355            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2356        };
2357
2358        let (id, commit) = backend
2359            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2360            .block_on()?;
2361
2362        let git_repo = backend.git_repo();
2363        let obj = git_repo.find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))?;
2364        insta::assert_snapshot!(str::from_utf8(&obj.data)?, @"
2365        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2366        author Someone <someone@example.com> 0 +0000
2367        committer Someone <someone@example.com> 0 +0000
2368        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2369        gpgsig test sig
2370         hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2371
2372        initial
2373        ");
2374
2375        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2376
2377        let commit = backend.read_commit(&id).block_on()?;
2378
2379        let sig = commit.secure_sig.expect("failed to read the signature");
2380        assert_eq!(&sig, &returned_sig);
2381
2382        insta::assert_snapshot!(str::from_utf8(&sig.sig)?, @"
2383        test sig
2384        hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2385        ");
2386        insta::assert_snapshot!(str::from_utf8(&sig.data)?, @"
2387        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2388        author Someone <someone@example.com> 0 +0000
2389        committer Someone <someone@example.com> 0 +0000
2390        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2391
2392        initial
2393        ");
2394        Ok(())
2395    }
2396
2397    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2398        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2399    }
2400
2401    fn create_signature() -> Signature {
2402        Signature {
2403            name: GIT_USER.to_string(),
2404            email: GIT_EMAIL.to_string(),
2405            timestamp: Timestamp {
2406                timestamp: MillisSinceEpoch(0),
2407                tz_offset: 0,
2408            },
2409        }
2410    }
2411
2412    // Not using testutils::user_settings() because there is a dependency cycle
2413    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2414    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2415    // our UserSettings type comes from jj_lib (1).
2416    fn user_settings() -> UserSettings {
2417        let config = StackedConfig::with_defaults();
2418        UserSettings::from_config(config).unwrap()
2419    }
2420}