Skip to main content

jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::collections::HashSet;
18use std::ffi::OsStr;
19use std::fmt::Debug;
20use std::fmt::Error;
21use std::fmt::Formatter;
22use std::fs;
23use std::io;
24use std::io::Cursor;
25use std::path::Path;
26use std::path::PathBuf;
27use std::pin::Pin;
28use std::process::Command;
29use std::process::ExitStatus;
30use std::str::Utf8Error;
31use std::sync::Arc;
32use std::sync::Mutex;
33use std::sync::MutexGuard;
34use std::time::SystemTime;
35
36use async_trait::async_trait;
37use futures::stream::BoxStream;
38use gix::bstr::BString;
39use gix::objs::CommitRefIter;
40use gix::objs::WriteTo as _;
41use itertools::Itertools as _;
42use once_cell::sync::OnceCell as OnceLock;
43use pollster::FutureExt as _;
44use prost::Message as _;
45use smallvec::SmallVec;
46use thiserror::Error;
47use tokio::io::AsyncRead;
48use tokio::io::AsyncReadExt as _;
49
50use crate::backend::Backend;
51use crate::backend::BackendError;
52use crate::backend::BackendInitError;
53use crate::backend::BackendLoadError;
54use crate::backend::BackendResult;
55use crate::backend::ChangeId;
56use crate::backend::Commit;
57use crate::backend::CommitId;
58use crate::backend::CopyHistory;
59use crate::backend::CopyId;
60use crate::backend::CopyRecord;
61use crate::backend::FileId;
62use crate::backend::MillisSinceEpoch;
63use crate::backend::SecureSig;
64use crate::backend::Signature;
65use crate::backend::SigningFn;
66use crate::backend::SymlinkId;
67use crate::backend::Timestamp;
68use crate::backend::Tree;
69use crate::backend::TreeId;
70use crate::backend::TreeValue;
71use crate::backend::make_root_commit;
72use crate::config::ConfigGetError;
73use crate::file_util;
74use crate::file_util::BadPathEncoding;
75use crate::file_util::IoResultExt as _;
76use crate::file_util::PathError;
77use crate::git::GitSettings;
78use crate::index::Index;
79use crate::lock::FileLock;
80use crate::merge::Merge;
81use crate::merge::MergeBuilder;
82use crate::object_id::ObjectId;
83use crate::repo_path::RepoPath;
84use crate::repo_path::RepoPathBuf;
85use crate::repo_path::RepoPathComponentBuf;
86use crate::settings::UserSettings;
87use crate::stacked_table::MutableTable;
88use crate::stacked_table::ReadonlyTable;
89use crate::stacked_table::TableSegment as _;
90use crate::stacked_table::TableStore;
91use crate::stacked_table::TableStoreError;
92
93const HASH_LENGTH: usize = 20;
94const CHANGE_ID_LENGTH: usize = 16;
95/// Ref namespace used only for preventing GC.
96const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
97
98pub const JJ_TREES_COMMIT_HEADER: &str = "jj:trees";
99pub const JJ_CONFLICT_LABELS_COMMIT_HEADER: &str = "jj:conflict-labels";
100pub const CHANGE_ID_COMMIT_HEADER: &str = "change-id";
101
102#[derive(Debug, Error)]
103pub enum GitBackendInitError {
104    #[error("Failed to initialize git repository")]
105    InitRepository(#[source] gix::init::Error),
106    #[error("Failed to open git repository")]
107    OpenRepository(#[source] gix::open::Error),
108    #[error("Failed to encode git repository path")]
109    EncodeRepositoryPath(#[source] BadPathEncoding),
110    #[error(transparent)]
111    Config(ConfigGetError),
112    #[error(transparent)]
113    Path(PathError),
114}
115
116impl From<Box<GitBackendInitError>> for BackendInitError {
117    fn from(err: Box<GitBackendInitError>) -> Self {
118        Self(err)
119    }
120}
121
122#[derive(Debug, Error)]
123pub enum GitBackendLoadError {
124    #[error("Failed to open git repository")]
125    OpenRepository(#[source] gix::open::Error),
126    #[error("Failed to decode git repository path")]
127    DecodeRepositoryPath(#[source] BadPathEncoding),
128    #[error(transparent)]
129    Config(ConfigGetError),
130    #[error(transparent)]
131    Path(PathError),
132}
133
134impl From<Box<GitBackendLoadError>> for BackendLoadError {
135    fn from(err: Box<GitBackendLoadError>) -> Self {
136        Self(err)
137    }
138}
139
140/// `GitBackend`-specific error that may occur after the backend is loaded.
141#[derive(Debug, Error)]
142pub enum GitBackendError {
143    #[error("Failed to read non-git metadata")]
144    ReadMetadata(#[source] TableStoreError),
145    #[error("Failed to write non-git metadata")]
146    WriteMetadata(#[source] TableStoreError),
147}
148
149impl From<GitBackendError> for BackendError {
150    fn from(err: GitBackendError) -> Self {
151        Self::Other(err.into())
152    }
153}
154
155#[derive(Debug, Error)]
156pub enum GitGcError {
157    #[error("Failed to run git gc command")]
158    GcCommand(#[source] std::io::Error),
159    #[error("git gc command exited with an error: {0}")]
160    GcCommandErrorStatus(ExitStatus),
161}
162
163pub struct GitBackend {
164    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
165    // cheaper to cache the thread-local instance behind a mutex than creating
166    // one for each backend method call. Our GitBackend is most likely to be
167    // used in a single-threaded context.
168    base_repo: gix::ThreadSafeRepository,
169    repo: Mutex<gix::Repository>,
170    root_commit_id: CommitId,
171    root_change_id: ChangeId,
172    empty_tree_id: TreeId,
173    shallow_root_ids: OnceLock<Vec<CommitId>>,
174    extra_metadata_store: TableStore,
175    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
176    git_executable: PathBuf,
177    write_change_id_header: bool,
178}
179
180impl GitBackend {
181    pub fn name() -> &'static str {
182        "git"
183    }
184
185    fn new(
186        base_repo: gix::ThreadSafeRepository,
187        extra_metadata_store: TableStore,
188        git_settings: GitSettings,
189    ) -> Self {
190        let repo = Mutex::new(base_repo.to_thread_local());
191        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
192        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
193        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
194        Self {
195            base_repo,
196            repo,
197            root_commit_id,
198            root_change_id,
199            empty_tree_id,
200            shallow_root_ids: OnceLock::new(),
201            extra_metadata_store,
202            cached_extra_metadata: Mutex::new(None),
203            git_executable: git_settings.executable_path,
204            write_change_id_header: git_settings.write_change_id_header,
205        }
206    }
207
208    pub fn init_internal(
209        settings: &UserSettings,
210        store_path: &Path,
211    ) -> Result<Self, Box<GitBackendInitError>> {
212        let git_repo_path = Path::new("git");
213        let git_repo = gix::ThreadSafeRepository::init_opts(
214            store_path.join(git_repo_path),
215            gix::create::Kind::Bare,
216            gix::create::Options::default(),
217            gix_open_opts_from_settings(settings),
218        )
219        .map_err(GitBackendInitError::InitRepository)?;
220        let git_settings =
221            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
222        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
223    }
224
225    /// Initializes backend by creating a new Git repo at the specified
226    /// workspace path. The workspace directory must exist.
227    pub fn init_colocated(
228        settings: &UserSettings,
229        store_path: &Path,
230        workspace_root: &Path,
231    ) -> Result<Self, Box<GitBackendInitError>> {
232        let canonical_workspace_root = {
233            let path = store_path.join(workspace_root);
234            dunce::canonicalize(&path)
235                .context(&path)
236                .map_err(GitBackendInitError::Path)?
237        };
238        let git_repo = gix::ThreadSafeRepository::init_opts(
239            canonical_workspace_root,
240            gix::create::Kind::WithWorktree,
241            gix::create::Options::default(),
242            gix_open_opts_from_settings(settings),
243        )
244        .map_err(GitBackendInitError::InitRepository)?;
245        let git_repo_path = workspace_root.join(".git");
246        let git_settings =
247            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
248        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
249    }
250
251    /// Initializes backend with an existing Git repo at the specified path.
252    pub fn init_external(
253        settings: &UserSettings,
254        store_path: &Path,
255        git_repo_path: &Path,
256    ) -> Result<Self, Box<GitBackendInitError>> {
257        let canonical_git_repo_path = {
258            let path = store_path.join(git_repo_path);
259            canonicalize_git_repo_path(&path)
260                .context(&path)
261                .map_err(GitBackendInitError::Path)?
262        };
263        let git_repo = gix::ThreadSafeRepository::open_opts(
264            canonical_git_repo_path,
265            gix_open_opts_from_settings(settings),
266        )
267        .map_err(GitBackendInitError::OpenRepository)?;
268        let git_settings =
269            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
270        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
271    }
272
273    fn init_with_repo(
274        store_path: &Path,
275        git_repo_path: &Path,
276        repo: gix::ThreadSafeRepository,
277        git_settings: GitSettings,
278    ) -> Result<Self, Box<GitBackendInitError>> {
279        let extra_path = store_path.join("extra");
280        fs::create_dir(&extra_path)
281            .context(&extra_path)
282            .map_err(GitBackendInitError::Path)?;
283        let target_path = store_path.join("git_target");
284        let git_repo_path = if cfg!(windows) && git_repo_path.is_relative() {
285            // When a repository is created in Windows, format the path with *forward
286            // slashes* and not backwards slashes. This makes it possible to use the same
287            // repository under Windows Subsystem for Linux.
288            //
289            // This only works for relative paths. If the path is absolute, there's not much
290            // we can do, and it simply won't work inside and outside WSL at the same time.
291            file_util::slash_path(git_repo_path)
292        } else {
293            git_repo_path.into()
294        };
295        let git_repo_path_bytes = file_util::path_to_bytes(&git_repo_path)
296            .map_err(GitBackendInitError::EncodeRepositoryPath)?;
297        fs::write(&target_path, git_repo_path_bytes)
298            .context(&target_path)
299            .map_err(GitBackendInitError::Path)?;
300        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
301        Ok(Self::new(repo, extra_metadata_store, git_settings))
302    }
303
304    pub fn load(
305        settings: &UserSettings,
306        store_path: &Path,
307    ) -> Result<Self, Box<GitBackendLoadError>> {
308        let git_repo_path = {
309            let target_path = store_path.join("git_target");
310            let git_repo_path_bytes = fs::read(&target_path)
311                .context(&target_path)
312                .map_err(GitBackendLoadError::Path)?;
313            let git_repo_path = file_util::path_from_bytes(&git_repo_path_bytes)
314                .map_err(GitBackendLoadError::DecodeRepositoryPath)?;
315            let git_repo_path = store_path.join(git_repo_path);
316            canonicalize_git_repo_path(&git_repo_path)
317                .context(&git_repo_path)
318                .map_err(GitBackendLoadError::Path)?
319        };
320        let repo = gix::ThreadSafeRepository::open_opts(
321            git_repo_path,
322            gix_open_opts_from_settings(settings),
323        )
324        .map_err(GitBackendLoadError::OpenRepository)?;
325        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
326        let git_settings =
327            GitSettings::from_settings(settings).map_err(GitBackendLoadError::Config)?;
328        Ok(Self::new(repo, extra_metadata_store, git_settings))
329    }
330
331    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
332        self.repo.lock().unwrap()
333    }
334
335    /// Returns new thread-local instance to access to the underlying Git repo.
336    pub fn git_repo(&self) -> gix::Repository {
337        self.base_repo.to_thread_local()
338    }
339
340    /// Path to the `.git` directory or the repository itself if it's bare.
341    pub fn git_repo_path(&self) -> &Path {
342        self.base_repo.path()
343    }
344
345    /// Path to the working directory if the repository isn't bare.
346    pub fn git_workdir(&self) -> Option<&Path> {
347        self.base_repo.work_dir()
348    }
349
350    fn shallow_root_ids(&self, git_repo: &gix::Repository) -> BackendResult<&[CommitId]> {
351        // The list of shallow roots is cached by gix, but it's still expensive
352        // to stat file on every read_object() call. Refreshing shallow roots is
353        // also bad for consistency reasons.
354        self.shallow_root_ids
355            .get_or_try_init(|| {
356                let maybe_oids = git_repo
357                    .shallow_commits()
358                    .map_err(|err| BackendError::Other(err.into()))?;
359                let commit_ids = maybe_oids.map_or(vec![], |oids| {
360                    oids.iter()
361                        .map(|oid| CommitId::from_bytes(oid.as_bytes()))
362                        .collect()
363                });
364                Ok(commit_ids)
365            })
366            .map(AsRef::as_ref)
367    }
368
369    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
370        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
371        match locked_head.as_ref() {
372            Some(head) => Ok(head.clone()),
373            None => {
374                let table = self
375                    .extra_metadata_store
376                    .get_head()
377                    .map_err(GitBackendError::ReadMetadata)?;
378                *locked_head = Some(table.clone());
379                Ok(table)
380            }
381        }
382    }
383
384    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
385        let table = self
386            .extra_metadata_store
387            .get_head_locked()
388            .map_err(GitBackendError::ReadMetadata)?;
389        Ok(table)
390    }
391
392    fn save_extra_metadata_table(
393        &self,
394        mut_table: MutableTable,
395        _table_lock: &FileLock,
396    ) -> BackendResult<()> {
397        let table = self
398            .extra_metadata_store
399            .save_table(mut_table)
400            .map_err(GitBackendError::WriteMetadata)?;
401        // Since the parent table was the head, saved table are likely to be new head.
402        // If it's not, cache will be reloaded when entry can't be found.
403        *self.cached_extra_metadata.lock().unwrap() = Some(table);
404        Ok(())
405    }
406
407    /// Imports the given commits and ancestors from the backing Git repo.
408    ///
409    /// The `head_ids` may contain commits that have already been imported, but
410    /// the caller should filter them out to eliminate redundant I/O processing.
411    #[tracing::instrument(skip(self, head_ids))]
412    pub fn import_head_commits<'a>(
413        &self,
414        head_ids: impl IntoIterator<Item = &'a CommitId>,
415    ) -> BackendResult<()> {
416        let head_ids: HashSet<&CommitId> = head_ids
417            .into_iter()
418            .filter(|&id| *id != self.root_commit_id)
419            .collect();
420        if head_ids.is_empty() {
421            return Ok(());
422        }
423
424        // Create no-gc ref even if known to the extras table. Concurrent GC
425        // process might have deleted the no-gc ref.
426        let locked_repo = self.lock_git_repo();
427        locked_repo
428            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
429            .map_err(|err| BackendError::Other(Box::new(err)))?;
430
431        // These commits are imported from Git. Make our change ids persist (otherwise
432        // future write_commit() could reassign new change id.)
433        tracing::debug!(
434            heads_count = head_ids.len(),
435            "import extra metadata entries"
436        );
437        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
438        let mut mut_table = table.start_mutation();
439        import_extra_metadata_entries_from_heads(
440            &locked_repo,
441            &mut mut_table,
442            &table_lock,
443            &head_ids,
444            self.shallow_root_ids(&locked_repo)?,
445        )?;
446        self.save_extra_metadata_table(mut_table, &table_lock)
447    }
448
449    fn read_file_sync(&self, id: &FileId) -> BackendResult<Vec<u8>> {
450        let git_blob_id = validate_git_object_id(id)?;
451        let locked_repo = self.lock_git_repo();
452        let mut blob = locked_repo
453            .find_object(git_blob_id)
454            .map_err(|err| map_not_found_err(err, id))?
455            .try_into_blob()
456            .map_err(|err| to_read_object_err(err, id))?;
457        Ok(blob.take_data())
458    }
459
460    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
461        let attributes = gix::worktree::Stack::new(
462            Path::new(""),
463            gix::worktree::stack::State::AttributesStack(Default::default()),
464            gix::worktree::glob::pattern::Case::Sensitive,
465            Vec::new(),
466            Vec::new(),
467        );
468        let filter = gix::diff::blob::Pipeline::new(
469            Default::default(),
470            gix::filter::plumbing::Pipeline::new(
471                self.git_repo()
472                    .command_context()
473                    .map_err(|err| BackendError::Other(Box::new(err)))?,
474                Default::default(),
475            ),
476            Vec::new(),
477            Default::default(),
478        );
479        Ok(gix::diff::blob::Platform::new(
480            Default::default(),
481            filter,
482            gix::diff::blob::pipeline::Mode::ToGit,
483            attributes,
484        ))
485    }
486
487    fn read_tree_for_commit<'repo>(
488        &self,
489        repo: &'repo gix::Repository,
490        id: &CommitId,
491    ) -> BackendResult<gix::Tree<'repo>> {
492        let tree = self.read_commit(id).block_on()?.root_tree;
493        // TODO(kfm): probably want to do something here if it is a merge
494        let tree_id = tree.first().clone();
495        let gix_id = validate_git_object_id(&tree_id)?;
496        repo.find_object(gix_id)
497            .map_err(|err| map_not_found_err(err, &tree_id))?
498            .try_into_tree()
499            .map_err(|err| to_read_object_err(err, &tree_id))
500    }
501}
502
503/// Canonicalizes the given `path` except for the last `".git"` component.
504///
505/// The last path component matters when opening a Git repo without `core.bare`
506/// config. This config is usually set, but the "repo" tool will set up such
507/// repositories and symlinks. Opening such repo with fully-canonicalized path
508/// would turn a colocated Git repo into a bare repo.
509pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
510    if path.ends_with(".git") {
511        let workdir = path.parent().unwrap();
512        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
513    } else {
514        dunce::canonicalize(path)
515    }
516}
517
518fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
519    let user_name = settings.user_name();
520    let user_email = settings.user_email();
521    gix::open::Options::default()
522        .config_overrides([
523            // Committer has to be configured to record reflog. Author isn't
524            // needed, but let's copy the same values.
525            format!("author.name={user_name}"),
526            format!("author.email={user_email}"),
527            format!("committer.name={user_name}"),
528            format!("committer.email={user_email}"),
529        ])
530        // The git_target path should point the repository, not the working directory.
531        .open_path_as_is(true)
532        // Gitoxide recommends this when correctness is preferred
533        .strict_config(true)
534}
535
536/// Parses the `jj:conflict-labels` header value if present.
537fn extract_conflict_labels_from_commit(commit: &gix::objs::CommitRef) -> Merge<String> {
538    let Some(value) = commit
539        .extra_headers()
540        .find(JJ_CONFLICT_LABELS_COMMIT_HEADER)
541    else {
542        return Merge::resolved(String::new());
543    };
544
545    str::from_utf8(value)
546        .expect("labels should be valid utf8")
547        .split_terminator('\n')
548        .map(str::to_owned)
549        .collect::<MergeBuilder<_>>()
550        .build()
551}
552
553/// Parses the `jj:trees` header value if present, otherwise returns the
554/// resolved tree ID from Git.
555fn extract_root_tree_from_commit(commit: &gix::objs::CommitRef) -> Result<Merge<TreeId>, ()> {
556    let Some(value) = commit.extra_headers().find(JJ_TREES_COMMIT_HEADER) else {
557        let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
558        return Ok(Merge::resolved(tree_id));
559    };
560
561    let mut tree_ids = SmallVec::new();
562    for hex in value.split(|b| *b == b' ') {
563        let tree_id = TreeId::try_from_hex(hex).ok_or(())?;
564        if tree_id.as_bytes().len() != HASH_LENGTH {
565            return Err(());
566        }
567        tree_ids.push(tree_id);
568    }
569    // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
570    // allowed, it would be possible to construct a commit which appears to have
571    // different contents depending on whether it is viewed using `jj` or `git`.
572    if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
573        return Err(());
574    }
575    Ok(Merge::from_vec(tree_ids))
576}
577
578fn commit_from_git_without_root_parent(
579    id: &CommitId,
580    git_object: &gix::Object,
581    is_shallow: bool,
582) -> BackendResult<Commit> {
583    let decode_err = |err: gix::objs::decode::Error| to_read_object_err(err, id);
584    let commit = git_object
585        .try_to_commit_ref()
586        .map_err(|err| to_read_object_err(err, id))?;
587
588    // If the git header has a change-id field, we attempt to convert that to a
589    // valid JJ Change Id
590    let change_id = extract_change_id_from_commit(&commit)
591        .unwrap_or_else(|| synthetic_change_id_from_git_commit_id(id));
592
593    // shallow commits don't have parents their parents actually fetched, so we
594    // discard them here
595    // TODO: This causes issues when a shallow repository is deepened/unshallowed
596    let parents = if is_shallow {
597        vec![]
598    } else {
599        commit
600            .parents()
601            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
602            .collect_vec()
603    };
604    // If the commit is a conflict, the conflict labels are stored in a commit
605    // header separately from the trees.
606    let conflict_labels = extract_conflict_labels_from_commit(&commit);
607    // Conflicted commits written before we started using the `jj:trees` header
608    // (~March 2024) may have the root trees stored in the extra metadata table
609    // instead. For such commits, we'll update the root tree later when we read the
610    // extra metadata.
611    let root_tree = extract_root_tree_from_commit(&commit)
612        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?;
613    // Use lossy conversion as commit message with "mojibake" is still better than
614    // nothing.
615    // TODO: what should we do with commit.encoding?
616    let description = String::from_utf8_lossy(commit.message).into_owned();
617    let author = signature_from_git(commit.author().map_err(decode_err)?);
618    let committer = signature_from_git(commit.committer().map_err(decode_err)?);
619
620    // If the commit is signed, extract both the signature and the signed data
621    // (which is the commit buffer with the gpgsig header omitted).
622    // We have to re-parse the raw commit data because gix CommitRef does not give
623    // us the sogned data, only the signature.
624    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
625    // function and extract everything from that. For now, this works
626    let secure_sig = commit
627        .extra_headers
628        .iter()
629        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
630        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
631        .then(|| CommitRefIter::signature(&git_object.data))
632        .transpose()
633        .map_err(decode_err)?
634        .flatten()
635        .map(|(sig, data)| SecureSig {
636            data: data.to_bstring().into(),
637            sig: sig.into_owned().into(),
638        });
639
640    Ok(Commit {
641        parents,
642        predecessors: vec![],
643        // If this commit has associated extra metadata, we may reset this later.
644        root_tree,
645        conflict_labels,
646        change_id,
647        description,
648        author,
649        committer,
650        secure_sig,
651    })
652}
653
654/// Extracts change id from commit headers.
655pub fn extract_change_id_from_commit(commit: &gix::objs::CommitRef) -> Option<ChangeId> {
656    commit
657        .extra_headers()
658        .find(CHANGE_ID_COMMIT_HEADER)
659        .and_then(ChangeId::try_from_reverse_hex)
660        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
661}
662
663/// Deterministically creates a change id based on the commit id
664///
665/// Used when we get a commit without a change id. The exact algorithm for the
666/// computation should not be relied upon.
667pub fn synthetic_change_id_from_git_commit_id(id: &CommitId) -> ChangeId {
668    // We reverse the bits of the commit id to create the change id. We don't
669    // want to use the first bytes unmodified because then it would be ambiguous
670    // if a given hash prefix refers to the commit id or the change id. It would
671    // have been enough to pick the last 16 bytes instead of the leading 16
672    // bytes to address that. We also reverse the bits to make it less likely
673    // that users depend on any relationship between the two ids.
674    let bytes = id.as_bytes()[4..HASH_LENGTH]
675        .iter()
676        .rev()
677        .map(|b| b.reverse_bits())
678        .collect();
679    ChangeId::new(bytes)
680}
681
682const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
683
684fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
685    let name = signature.name;
686    let name = if name != EMPTY_STRING_PLACEHOLDER {
687        String::from_utf8_lossy(name).into_owned()
688    } else {
689        "".to_string()
690    };
691    let email = signature.email;
692    let email = if email != EMPTY_STRING_PLACEHOLDER {
693        String::from_utf8_lossy(email).into_owned()
694    } else {
695        "".to_string()
696    };
697    let time = signature.time().unwrap_or_default();
698    let timestamp = MillisSinceEpoch(time.seconds * 1000);
699    let tz_offset = time.offset.div_euclid(60); // in minutes
700    Signature {
701        name,
702        email,
703        timestamp: Timestamp {
704            timestamp,
705            tz_offset,
706        },
707    }
708}
709
710fn signature_to_git(signature: &Signature) -> gix::actor::Signature {
711    // git does not support empty names or emails
712    let name = if !signature.name.is_empty() {
713        &signature.name
714    } else {
715        EMPTY_STRING_PLACEHOLDER
716    };
717    let email = if !signature.email.is_empty() {
718        &signature.email
719    } else {
720        EMPTY_STRING_PLACEHOLDER
721    };
722    let time = gix::date::Time::new(
723        signature.timestamp.timestamp.0.div_euclid(1000),
724        signature.timestamp.tz_offset * 60, // in seconds
725    );
726    gix::actor::Signature {
727        name: name.into(),
728        email: email.into(),
729        time,
730    }
731}
732
733fn serialize_extras(commit: &Commit) -> Vec<u8> {
734    let mut proto = crate::protos::git_store::Commit {
735        change_id: commit.change_id.to_bytes(),
736        ..Default::default()
737    };
738    proto.uses_tree_conflict_format = true;
739    if !commit.root_tree.is_resolved() {
740        // This is done for the sake of jj versions <0.28 (before commit
741        // f7b14be) being able to read the repo. At some point in the
742        // future, we can stop doing it.
743        proto.root_tree = commit.root_tree.iter().map(|r| r.to_bytes()).collect();
744    }
745    for predecessor in &commit.predecessors {
746        proto.predecessors.push(predecessor.to_bytes());
747    }
748    proto.encode_to_vec()
749}
750
751fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
752    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
753    if !proto.change_id.is_empty() {
754        commit.change_id = ChangeId::new(proto.change_id);
755    }
756    if commit.root_tree.is_resolved()
757        && proto.uses_tree_conflict_format
758        && !proto.root_tree.is_empty()
759    {
760        let merge_builder: MergeBuilder<_> = proto
761            .root_tree
762            .iter()
763            .map(|id_bytes| TreeId::from_bytes(id_bytes))
764            .collect();
765        commit.root_tree = merge_builder.build();
766    }
767    for predecessor in &proto.predecessors {
768        commit.predecessors.push(CommitId::from_bytes(predecessor));
769    }
770}
771
772/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
773/// Used for preventing GC of commits we create.
774fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
775    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
776    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
777    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
778    gix::refs::transaction::RefEdit {
779        change: gix::refs::transaction::Change::Update {
780            log: gix::refs::transaction::LogChange {
781                message: "used by jj".into(),
782                ..Default::default()
783            },
784            expected,
785            new,
786        },
787        name: name.try_into().unwrap(),
788        deref: false,
789    }
790}
791
792fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
793    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
794    gix::refs::transaction::RefEdit {
795        change: gix::refs::transaction::Change::Delete {
796            expected,
797            log: gix::refs::transaction::RefLog::AndReference,
798        },
799        name: git_ref.name,
800        deref: false,
801    }
802}
803
804/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
805/// unreachable and non-head refs.
806fn recreate_no_gc_refs(
807    git_repo: &gix::Repository,
808    new_heads: impl IntoIterator<Item = CommitId>,
809    keep_newer: SystemTime,
810) -> BackendResult<()> {
811    // Calculate diff between existing no-gc refs and new heads.
812    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
813    let mut no_gc_refs_to_keep_count: usize = 0;
814    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
815    let git_references = git_repo
816        .references()
817        .map_err(|err| BackendError::Other(err.into()))?;
818    let no_gc_refs_iter = git_references
819        .prefixed(NO_GC_REF_NAMESPACE)
820        .map_err(|err| BackendError::Other(err.into()))?;
821    for git_ref in no_gc_refs_iter {
822        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
823        let oid = git_ref.target.try_id().ok_or_else(|| {
824            let name = git_ref.name.as_bstr();
825            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
826        })?;
827        let id = CommitId::from_bytes(oid.as_bytes());
828        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
829        if new_heads.contains(&id) && name_good {
830            no_gc_refs_to_keep_count += 1;
831            continue;
832        }
833        // Check timestamp of loose ref, but this is still racy on re-import
834        // because:
835        // - existing packed ref won't be demoted to loose ref
836        // - existing loose ref won't be touched
837        //
838        // TODO: might be better to switch to a dummy merge, where new no-gc ref
839        // will always have a unique name. Doing that with the current
840        // ref-per-head strategy would increase the number of the no-gc refs.
841        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
842        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
843        if let Ok(metadata) = loose_ref_path.metadata() {
844            let mtime = metadata.modified().expect("unsupported platform?");
845            if mtime > keep_newer {
846                tracing::trace!(?git_ref, "not deleting new");
847                no_gc_refs_to_keep_count += 1;
848                continue;
849            }
850        }
851        // Also deletes no-gc ref of random name created by old jj.
852        tracing::trace!(?git_ref, ?name_good, "will delete");
853        no_gc_refs_to_delete.push(git_ref);
854    }
855    tracing::info!(
856        new_heads_count = new_heads.len(),
857        no_gc_refs_to_keep_count,
858        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
859        "collected reachable refs"
860    );
861
862    // It's slow to delete packed refs one by one, so update refs all at once.
863    let ref_edits = itertools::chain(
864        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
865        new_heads.iter().map(to_no_gc_ref_update),
866    );
867    git_repo
868        .edit_references(ref_edits)
869        .map_err(|err| BackendError::Other(err.into()))?;
870
871    Ok(())
872}
873
874fn run_git_gc(program: &OsStr, git_dir: &Path, keep_newer: SystemTime) -> Result<(), GitGcError> {
875    let keep_newer = keep_newer
876        .duration_since(SystemTime::UNIX_EPOCH)
877        .unwrap_or_default(); // underflow
878    let mut git = Command::new(program);
879    git.arg("--git-dir=.") // turn off discovery
880        .arg("gc")
881        .arg(format!("--prune=@{} +0000", keep_newer.as_secs()));
882    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
883    // canonicalized as UNC path, which wouldn't be supported by git.
884    git.current_dir(git_dir);
885    // TODO: pass output to UI layer instead of printing directly here
886    tracing::info!(?git, "running git gc");
887    let status = git.status().map_err(GitGcError::GcCommand)?;
888    tracing::info!(?status, "git gc exited");
889    if !status.success() {
890        return Err(GitGcError::GcCommandErrorStatus(status));
891    }
892    Ok(())
893}
894
895fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
896    if id.as_bytes().len() != HASH_LENGTH {
897        return Err(BackendError::InvalidHashLength {
898            expected: HASH_LENGTH,
899            actual: id.as_bytes().len(),
900            object_type: id.object_type(),
901            hash: id.hex(),
902        });
903    }
904    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
905}
906
907fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
908    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
909        BackendError::ObjectNotFound {
910            object_type: id.object_type(),
911            hash: id.hex(),
912            source: Box::new(err),
913        }
914    } else {
915        to_read_object_err(err, id)
916    }
917}
918
919fn to_read_object_err(
920    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
921    id: &impl ObjectId,
922) -> BackendError {
923    BackendError::ReadObject {
924        object_type: id.object_type(),
925        hash: id.hex(),
926        source: err.into(),
927    }
928}
929
930fn to_invalid_utf8_err(source: Utf8Error, id: &impl ObjectId) -> BackendError {
931    BackendError::InvalidUtf8 {
932        object_type: id.object_type(),
933        hash: id.hex(),
934        source,
935    }
936}
937
938fn import_extra_metadata_entries_from_heads(
939    git_repo: &gix::Repository,
940    mut_table: &mut MutableTable,
941    _table_lock: &FileLock,
942    head_ids: &HashSet<&CommitId>,
943    shallow_roots: &[CommitId],
944) -> BackendResult<()> {
945    let mut work_ids = head_ids
946        .iter()
947        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
948        .map(|&id| id.clone())
949        .collect_vec();
950    while let Some(id) = work_ids.pop() {
951        let git_object = git_repo
952            .find_object(validate_git_object_id(&id)?)
953            .map_err(|err| map_not_found_err(err, &id))?;
954        let is_shallow = shallow_roots.contains(&id);
955        // TODO(#1624): Should we read the root tree here and check if it has a
956        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
957        // change the description of a commit with tree-level conflicts.
958        let commit = commit_from_git_without_root_parent(&id, &git_object, is_shallow)?;
959        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
960        work_ids.extend(
961            commit
962                .parents
963                .into_iter()
964                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
965        );
966    }
967    Ok(())
968}
969
970impl Debug for GitBackend {
971    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
972        f.debug_struct("GitBackend")
973            .field("path", &self.git_repo_path())
974            .finish()
975    }
976}
977
978#[async_trait]
979impl Backend for GitBackend {
980    fn name(&self) -> &str {
981        Self::name()
982    }
983
984    fn commit_id_length(&self) -> usize {
985        HASH_LENGTH
986    }
987
988    fn change_id_length(&self) -> usize {
989        CHANGE_ID_LENGTH
990    }
991
992    fn root_commit_id(&self) -> &CommitId {
993        &self.root_commit_id
994    }
995
996    fn root_change_id(&self) -> &ChangeId {
997        &self.root_change_id
998    }
999
1000    fn empty_tree_id(&self) -> &TreeId {
1001        &self.empty_tree_id
1002    }
1003
1004    fn concurrency(&self) -> usize {
1005        1
1006    }
1007
1008    async fn read_file(
1009        &self,
1010        _path: &RepoPath,
1011        id: &FileId,
1012    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
1013        let data = self.read_file_sync(id)?;
1014        Ok(Box::pin(Cursor::new(data)))
1015    }
1016
1017    async fn write_file(
1018        &self,
1019        _path: &RepoPath,
1020        contents: &mut (dyn AsyncRead + Send + Unpin),
1021    ) -> BackendResult<FileId> {
1022        let mut bytes = Vec::new();
1023        contents.read_to_end(&mut bytes).await.unwrap();
1024        let locked_repo = self.lock_git_repo();
1025        let oid = locked_repo
1026            .write_blob(bytes)
1027            .map_err(|err| BackendError::WriteObject {
1028                object_type: "file",
1029                source: Box::new(err),
1030            })?;
1031        Ok(FileId::new(oid.as_bytes().to_vec()))
1032    }
1033
1034    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
1035        let git_blob_id = validate_git_object_id(id)?;
1036        let locked_repo = self.lock_git_repo();
1037        let mut blob = locked_repo
1038            .find_object(git_blob_id)
1039            .map_err(|err| map_not_found_err(err, id))?
1040            .try_into_blob()
1041            .map_err(|err| to_read_object_err(err, id))?;
1042        let target = String::from_utf8(blob.take_data())
1043            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1044        Ok(target)
1045    }
1046
1047    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1048        let locked_repo = self.lock_git_repo();
1049        let oid =
1050            locked_repo
1051                .write_blob(target.as_bytes())
1052                .map_err(|err| BackendError::WriteObject {
1053                    object_type: "symlink",
1054                    source: Box::new(err),
1055                })?;
1056        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1057    }
1058
1059    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
1060        Err(BackendError::Unsupported(
1061            "The Git backend doesn't support tracked copies yet".to_string(),
1062        ))
1063    }
1064
1065    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
1066        Err(BackendError::Unsupported(
1067            "The Git backend doesn't support tracked copies yet".to_string(),
1068        ))
1069    }
1070
1071    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
1072        Err(BackendError::Unsupported(
1073            "The Git backend doesn't support tracked copies yet".to_string(),
1074        ))
1075    }
1076
1077    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1078        if id == &self.empty_tree_id {
1079            return Ok(Tree::default());
1080        }
1081        let git_tree_id = validate_git_object_id(id)?;
1082
1083        let locked_repo = self.lock_git_repo();
1084        let git_tree = locked_repo
1085            .find_object(git_tree_id)
1086            .map_err(|err| map_not_found_err(err, id))?
1087            .try_into_tree()
1088            .map_err(|err| to_read_object_err(err, id))?;
1089        let mut entries: Vec<_> = git_tree
1090            .iter()
1091            .map(|entry| -> BackendResult<_> {
1092                let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1093                let name = RepoPathComponentBuf::new(
1094                    str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?,
1095                )
1096                .unwrap();
1097                let value = match entry.mode().kind() {
1098                    gix::object::tree::EntryKind::Tree => {
1099                        let id = TreeId::from_bytes(entry.oid().as_bytes());
1100                        TreeValue::Tree(id)
1101                    }
1102                    gix::object::tree::EntryKind::Blob => {
1103                        let id = FileId::from_bytes(entry.oid().as_bytes());
1104                        TreeValue::File {
1105                            id,
1106                            executable: false,
1107                            copy_id: CopyId::placeholder(),
1108                        }
1109                    }
1110                    gix::object::tree::EntryKind::BlobExecutable => {
1111                        let id = FileId::from_bytes(entry.oid().as_bytes());
1112                        TreeValue::File {
1113                            id,
1114                            executable: true,
1115                            copy_id: CopyId::placeholder(),
1116                        }
1117                    }
1118                    gix::object::tree::EntryKind::Link => {
1119                        let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1120                        TreeValue::Symlink(id)
1121                    }
1122                    gix::object::tree::EntryKind::Commit => {
1123                        let id = CommitId::from_bytes(entry.oid().as_bytes());
1124                        TreeValue::GitSubmodule(id)
1125                    }
1126                };
1127                Ok((name, value))
1128            })
1129            .try_collect()?;
1130        // While Git tree entries are sorted, the rule is slightly different.
1131        // Directory names are sorted as if they had trailing "/".
1132        if !entries.is_sorted_by_key(|(name, _)| name) {
1133            entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
1134        }
1135        Ok(Tree::from_sorted_entries(entries))
1136    }
1137
1138    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1139        // Tree entries to be written must be sorted by Entry::filename(), which
1140        // is slightly different from the order of our backend::Tree.
1141        let entries = contents
1142            .entries()
1143            .map(|entry| {
1144                let filename = BString::from(entry.name().as_internal_str());
1145                match entry.value() {
1146                    TreeValue::File {
1147                        id,
1148                        executable: false,
1149                        copy_id: _, // TODO: Use the value
1150                    } => gix::objs::tree::Entry {
1151                        mode: gix::object::tree::EntryKind::Blob.into(),
1152                        filename,
1153                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1154                    },
1155                    TreeValue::File {
1156                        id,
1157                        executable: true,
1158                        copy_id: _, // TODO: Use the value
1159                    } => gix::objs::tree::Entry {
1160                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1161                        filename,
1162                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1163                    },
1164                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1165                        mode: gix::object::tree::EntryKind::Link.into(),
1166                        filename,
1167                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1168                    },
1169                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1170                        mode: gix::object::tree::EntryKind::Tree.into(),
1171                        filename,
1172                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1173                    },
1174                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1175                        mode: gix::object::tree::EntryKind::Commit.into(),
1176                        filename,
1177                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1178                    },
1179                }
1180            })
1181            .sorted_unstable()
1182            .collect();
1183        let locked_repo = self.lock_git_repo();
1184        let oid = locked_repo
1185            .write_object(gix::objs::Tree { entries })
1186            .map_err(|err| BackendError::WriteObject {
1187                object_type: "tree",
1188                source: Box::new(err),
1189            })?;
1190        Ok(TreeId::from_bytes(oid.as_bytes()))
1191    }
1192
1193    #[tracing::instrument(skip(self))]
1194    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1195        if *id == self.root_commit_id {
1196            return Ok(make_root_commit(
1197                self.root_change_id().clone(),
1198                self.empty_tree_id.clone(),
1199            ));
1200        }
1201        let git_commit_id = validate_git_object_id(id)?;
1202
1203        let mut commit = {
1204            let locked_repo = self.lock_git_repo();
1205            let git_object = locked_repo
1206                .find_object(git_commit_id)
1207                .map_err(|err| map_not_found_err(err, id))?;
1208            let is_shallow = self.shallow_root_ids(&locked_repo)?.contains(id);
1209            commit_from_git_without_root_parent(id, &git_object, is_shallow)?
1210        };
1211        if commit.parents.is_empty() {
1212            commit.parents.push(self.root_commit_id.clone());
1213        }
1214
1215        let table = self.cached_extra_metadata_table()?;
1216        if let Some(extras) = table.get_value(id.as_bytes()) {
1217            deserialize_extras(&mut commit, extras);
1218        } else {
1219            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1220            // there are no reachable ancestor commits without extras metadata. Git commits
1221            // imported by jj < 0.8.0 might not have extras (#924).
1222            // https://github.com/jj-vcs/jj/issues/2343
1223            tracing::info!("unimported Git commit found");
1224            self.import_head_commits([id])?;
1225            let table = self.cached_extra_metadata_table()?;
1226            let extras = table.get_value(id.as_bytes()).unwrap();
1227            deserialize_extras(&mut commit, extras);
1228        }
1229        Ok(commit)
1230    }
1231
1232    async fn write_commit(
1233        &self,
1234        mut contents: Commit,
1235        mut sign_with: Option<&mut SigningFn>,
1236    ) -> BackendResult<(CommitId, Commit)> {
1237        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1238
1239        let locked_repo = self.lock_git_repo();
1240        let tree_ids = &contents.root_tree;
1241        let git_tree_id = match tree_ids.as_resolved() {
1242            Some(tree_id) => validate_git_object_id(tree_id)?,
1243            None => write_tree_conflict(&locked_repo, tree_ids)?,
1244        };
1245        let author = signature_to_git(&contents.author);
1246        let mut committer = signature_to_git(&contents.committer);
1247        let message = &contents.description;
1248        if contents.parents.is_empty() {
1249            return Err(BackendError::Other(
1250                "Cannot write a commit with no parents".into(),
1251            ));
1252        }
1253        let mut parents = SmallVec::new();
1254        for parent_id in &contents.parents {
1255            if *parent_id == self.root_commit_id {
1256                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1257                // add it to the list of parents to write in the Git commit. We also check that
1258                // there are no other parents since Git cannot represent a merge between a root
1259                // commit and another commit.
1260                if contents.parents.len() > 1 {
1261                    return Err(BackendError::Unsupported(
1262                        "The Git backend does not support creating merge commits with the root \
1263                         commit as one of the parents."
1264                            .to_owned(),
1265                    ));
1266                }
1267            } else {
1268                parents.push(validate_git_object_id(parent_id)?);
1269            }
1270        }
1271        let mut extra_headers: Vec<(BString, BString)> = vec![];
1272        if !contents.conflict_labels.is_resolved() {
1273            // Labels cannot contain '\n' since we use it as a separator in the header.
1274            assert!(
1275                contents
1276                    .conflict_labels
1277                    .iter()
1278                    .all(|label| !label.contains('\n'))
1279            );
1280            let mut joined_with_newlines = contents.conflict_labels.iter().join("\n");
1281            joined_with_newlines.push('\n');
1282            extra_headers.push((
1283                JJ_CONFLICT_LABELS_COMMIT_HEADER.into(),
1284                joined_with_newlines.into(),
1285            ));
1286        }
1287        if !tree_ids.is_resolved() {
1288            let value = tree_ids.iter().map(|id| id.hex()).join(" ");
1289            extra_headers.push((JJ_TREES_COMMIT_HEADER.into(), value.into()));
1290        }
1291        if self.write_change_id_header {
1292            extra_headers.push((
1293                CHANGE_ID_COMMIT_HEADER.into(),
1294                contents.change_id.reverse_hex().into(),
1295            ));
1296        }
1297
1298        if tree_ids.iter().any(|id| id == &self.empty_tree_id) {
1299            let tree = gix::objs::Tree::empty();
1300            let tree_id =
1301                locked_repo
1302                    .write_object(&tree)
1303                    .map_err(|err| BackendError::WriteObject {
1304                        object_type: "tree",
1305                        source: Box::new(err),
1306                    })?;
1307            assert!(tree_id.is_empty_tree());
1308        }
1309
1310        let extras = serialize_extras(&contents);
1311
1312        // If two writers write commits of the same id with different metadata, they
1313        // will both succeed and the metadata entries will be "merged" later. Since
1314        // metadata entry is keyed by the commit id, one of the entries would be lost.
1315        // To prevent such race condition locally, we extend the scope covered by the
1316        // table lock. This is still racy if multiple machines are involved and the
1317        // repository is rsync-ed.
1318        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1319        let id = loop {
1320            let mut commit = gix::objs::Commit {
1321                message: message.to_owned().into(),
1322                tree: git_tree_id,
1323                author: author.clone(),
1324                committer: committer.clone(),
1325                encoding: None,
1326                parents: parents.clone(),
1327                extra_headers: extra_headers.clone(),
1328            };
1329
1330            if let Some(sign) = &mut sign_with {
1331                // we don't use gix pool, but at least use their heuristic
1332                let mut data = Vec::with_capacity(512);
1333                commit.write_to(&mut data).unwrap();
1334
1335                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1336                    object_type: "commit",
1337                    source: Box::new(err),
1338                })?;
1339                commit
1340                    .extra_headers
1341                    .push(("gpgsig".into(), sig.clone().into()));
1342                contents.secure_sig = Some(SecureSig { data, sig });
1343            }
1344
1345            let git_id =
1346                locked_repo
1347                    .write_object(&commit)
1348                    .map_err(|err| BackendError::WriteObject {
1349                        object_type: "commit",
1350                        source: Box::new(err),
1351                    })?;
1352
1353            match table.get_value(git_id.as_bytes()) {
1354                Some(existing_extras) if existing_extras != extras => {
1355                    // It's possible a commit already exists with the same
1356                    // commit id but different change id. Adjust the timestamp
1357                    // until this is no longer the case.
1358                    //
1359                    // For example, this can happen when rebasing duplicate
1360                    // commits, https://github.com/jj-vcs/jj/issues/694.
1361                    //
1362                    // `jj` resets the committer timestamp to the current
1363                    // timestamp whenever it rewrites a commit. So, it's
1364                    // unlikely for the timestamp to be 0 even if the original
1365                    // commit had its timestamp set to 0. Moreover, we test that
1366                    // a commit with a negative timestamp can still be written
1367                    // and read back by `jj`.
1368                    committer.time.seconds -= 1;
1369                }
1370                _ => break CommitId::from_bytes(git_id.as_bytes()),
1371            }
1372        };
1373
1374        // Everything up to this point had no permanent effect on the repo except
1375        // GC-able objects
1376        locked_repo
1377            .edit_reference(to_no_gc_ref_update(&id))
1378            .map_err(|err| BackendError::Other(Box::new(err)))?;
1379
1380        // Update the signature to match the one that was actually written to the object
1381        // store
1382        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1383        let mut mut_table = table.start_mutation();
1384        mut_table.add_entry(id.to_bytes(), extras);
1385        self.save_extra_metadata_table(mut_table, &table_lock)?;
1386        Ok((id, contents))
1387    }
1388
1389    fn get_copy_records(
1390        &self,
1391        paths: Option<&[RepoPathBuf]>,
1392        root_id: &CommitId,
1393        head_id: &CommitId,
1394    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
1395        let repo = self.git_repo();
1396        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1397        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1398
1399        let change_to_copy_record =
1400            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1401                let gix::object::tree::diff::Change::Rewrite {
1402                    source_location,
1403                    source_entry_mode,
1404                    source_id,
1405                    entry_mode: dest_entry_mode,
1406                    location: dest_location,
1407                    ..
1408                } = change
1409                else {
1410                    return Ok(None);
1411                };
1412                // TODO: Renamed symlinks cannot be returned because CopyRecord
1413                // expects `source_file: FileId`.
1414                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1415                    return Ok(None);
1416                }
1417
1418                let source = str::from_utf8(source_location)
1419                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1420                let dest = str::from_utf8(dest_location)
1421                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1422
1423                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1424                if !paths.is_none_or(|paths| paths.contains(&target)) {
1425                    return Ok(None);
1426                }
1427
1428                Ok(Some(CopyRecord {
1429                    target,
1430                    target_commit: head_id.clone(),
1431                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1432                    source_file: FileId::from_bytes(source_id.as_bytes()),
1433                    source_commit: root_id.clone(),
1434                }))
1435            };
1436
1437        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1438        root_tree
1439            .changes()
1440            .map_err(|err| BackendError::Other(err.into()))?
1441            .options(|opts| {
1442                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1443                    copies: Some(gix::diff::rewrites::Copies {
1444                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1445                        percentage: Some(0.5),
1446                    }),
1447                    percentage: Some(0.5),
1448                    limit: 1000,
1449                    track_empty: false,
1450                }));
1451            })
1452            .for_each_to_obtain_tree_with_cache(
1453                &head_tree,
1454                &mut self.new_diff_platform()?,
1455                |change| -> BackendResult<_> {
1456                    match change_to_copy_record(change) {
1457                        Ok(None) => {}
1458                        Ok(Some(change)) => records.push(Ok(change)),
1459                        Err(err) => records.push(Err(err)),
1460                    }
1461                    Ok(gix::object::tree::diff::Action::Continue(()))
1462                },
1463            )
1464            .map_err(|err| BackendError::Other(err.into()))?;
1465        Ok(Box::pin(futures::stream::iter(records)))
1466    }
1467
1468    #[tracing::instrument(skip(self, index))]
1469    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1470        let git_repo = self.lock_git_repo();
1471        let new_heads = index
1472            .all_heads_for_gc()
1473            .map_err(|err| BackendError::Other(err.into()))?
1474            .filter(|id| *id != self.root_commit_id);
1475        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1476
1477        // No locking is needed since we aren't going to add new "commits".
1478        let table = self.cached_extra_metadata_table()?;
1479        // TODO: remove unreachable entries from extras table if segment file
1480        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1481        // preserved by the keep_newer timestamp though)
1482        self.extra_metadata_store
1483            .gc(&table, keep_newer)
1484            .map_err(|err| BackendError::Other(err.into()))?;
1485
1486        run_git_gc(
1487            self.git_executable.as_ref(),
1488            self.git_repo_path(),
1489            keep_newer,
1490        )
1491        .map_err(|err| BackendError::Other(err.into()))?;
1492        // Since "git gc" will move loose refs into packed refs, in-memory
1493        // packed-refs cache should be invalidated without relying on mtime.
1494        git_repo.refs.force_refresh_packed_buffer().ok();
1495        Ok(())
1496    }
1497}
1498
1499/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1500/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1501fn write_tree_conflict(
1502    repo: &gix::Repository,
1503    conflict: &Merge<TreeId>,
1504) -> BackendResult<gix::ObjectId> {
1505    // Tree entries to be written must be sorted by Entry::filename().
1506    let mut entries = itertools::chain(
1507        conflict
1508            .removes()
1509            .enumerate()
1510            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1511        conflict
1512            .adds()
1513            .enumerate()
1514            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1515    )
1516    .map(|(name, tree_id)| gix::objs::tree::Entry {
1517        mode: gix::object::tree::EntryKind::Tree.into(),
1518        filename: name.into(),
1519        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1520    })
1521    .collect_vec();
1522    let readme_id = repo
1523        .write_blob(
1524            r#"This commit was made by jj, https://jj-vcs.dev/.
1525The commit contains file conflicts, and therefore looks wrong when used with plain
1526Git or other tools that are unfamiliar with jj.
1527
1528The .jjconflict-* directories represent the different inputs to the conflict.
1529For details, see
1530https://docs.jj-vcs.dev/prerelease/git-compatibility/#format-mapping-details
1531
1532If you see this file in your working copy, it probably means that you used a
1533regular `git` command to check out a conflicted commit. Use `jj abandon` to
1534recover.
1535"#,
1536        )
1537        .map_err(|err| {
1538            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1539        })?
1540        .detach();
1541    entries.push(gix::objs::tree::Entry {
1542        mode: gix::object::tree::EntryKind::Blob.into(),
1543        filename: "README".into(),
1544        oid: readme_id,
1545    });
1546    entries.sort_unstable();
1547    let id = repo
1548        .write_object(gix::objs::Tree { entries })
1549        .map_err(|err| BackendError::WriteObject {
1550            object_type: "tree",
1551            source: Box::new(err),
1552        })?;
1553    Ok(id.detach())
1554}
1555
1556#[cfg(test)]
1557mod tests {
1558    use assert_matches::assert_matches;
1559    use gix::date::parse::TimeBuf;
1560    use gix::objs::CommitRef;
1561    use indoc::indoc;
1562    use pollster::FutureExt as _;
1563
1564    use super::*;
1565    use crate::config::StackedConfig;
1566    use crate::content_hash::blake2b_hash;
1567    use crate::hex_util;
1568    use crate::tests::new_temp_dir;
1569
1570    const GIT_USER: &str = "Someone";
1571    const GIT_EMAIL: &str = "someone@example.com";
1572
1573    fn git_config() -> Vec<bstr::BString> {
1574        vec![
1575            format!("user.name = {GIT_USER}").into(),
1576            format!("user.email = {GIT_EMAIL}").into(),
1577            "init.defaultBranch = master".into(),
1578        ]
1579    }
1580
1581    fn open_options() -> gix::open::Options {
1582        gix::open::Options::isolated()
1583            .config_overrides(git_config())
1584            .strict_config(true)
1585    }
1586
1587    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1588        gix::ThreadSafeRepository::init_opts(
1589            directory,
1590            gix::create::Kind::WithWorktree,
1591            gix::create::Options::default(),
1592            open_options(),
1593        )
1594        .unwrap()
1595        .to_thread_local()
1596    }
1597
1598    #[test]
1599    fn read_plain_git_commit() {
1600        let settings = user_settings();
1601        let temp_dir = new_temp_dir();
1602        let store_path = temp_dir.path();
1603        let git_repo_path = temp_dir.path().join("git");
1604        let git_repo = git_init(git_repo_path);
1605
1606        // Add a commit with some files in
1607        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1608        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1609        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1610        dir_tree_editor
1611            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1612            .unwrap();
1613        dir_tree_editor
1614            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1615            .unwrap();
1616        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1617        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1618        root_tree_builder
1619            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1620            .unwrap();
1621        let root_tree_id = root_tree_builder.write().unwrap().detach();
1622        let git_author = gix::actor::Signature {
1623            name: "git author".into(),
1624            email: "git.author@example.com".into(),
1625            time: gix::date::Time::new(1000, 60 * 60),
1626        };
1627        let git_committer = gix::actor::Signature {
1628            name: "git committer".into(),
1629            email: "git.committer@example.com".into(),
1630            time: gix::date::Time::new(2000, -480 * 60),
1631        };
1632        let git_commit_id = git_repo
1633            .commit_as(
1634                git_committer.to_ref(&mut TimeBuf::default()),
1635                git_author.to_ref(&mut TimeBuf::default()),
1636                "refs/heads/dummy",
1637                "git commit message",
1638                root_tree_id,
1639                [] as [gix::ObjectId; 0],
1640            )
1641            .unwrap()
1642            .detach();
1643        git_repo
1644            .find_reference("refs/heads/dummy")
1645            .unwrap()
1646            .delete()
1647            .unwrap();
1648        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1649        // The change id is the leading reverse bits of the commit id
1650        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1651        // Check that the git commit above got the hash we expect
1652        assert_eq!(
1653            git_commit_id.as_bytes(),
1654            commit_id.as_bytes(),
1655            "{git_commit_id:?} vs {commit_id:?}"
1656        );
1657
1658        // Add an empty commit on top
1659        let git_commit_id2 = git_repo
1660            .commit_as(
1661                git_committer.to_ref(&mut TimeBuf::default()),
1662                git_author.to_ref(&mut TimeBuf::default()),
1663                "refs/heads/dummy2",
1664                "git commit message 2",
1665                root_tree_id,
1666                [git_commit_id],
1667            )
1668            .unwrap()
1669            .detach();
1670        git_repo
1671            .find_reference("refs/heads/dummy2")
1672            .unwrap()
1673            .delete()
1674            .unwrap();
1675        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1676
1677        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1678
1679        // Import the head commit and its ancestors
1680        backend.import_head_commits([&commit_id2]).unwrap();
1681        // Ref should be created only for the head commit
1682        let git_refs = backend
1683            .git_repo()
1684            .references()
1685            .unwrap()
1686            .prefixed("refs/jj/keep/")
1687            .unwrap()
1688            .map(|git_ref| git_ref.unwrap().id().detach())
1689            .collect_vec();
1690        assert_eq!(git_refs, vec![git_commit_id2]);
1691
1692        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1693        assert_eq!(&commit.change_id, &change_id);
1694        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1695        assert_eq!(commit.predecessors, vec![]);
1696        assert_eq!(
1697            commit.root_tree,
1698            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1699        );
1700        assert_eq!(commit.description, "git commit message");
1701        assert_eq!(commit.author.name, "git author");
1702        assert_eq!(commit.author.email, "git.author@example.com");
1703        assert_eq!(
1704            commit.author.timestamp.timestamp,
1705            MillisSinceEpoch(1000 * 1000)
1706        );
1707        assert_eq!(commit.author.timestamp.tz_offset, 60);
1708        assert_eq!(commit.committer.name, "git committer");
1709        assert_eq!(commit.committer.email, "git.committer@example.com");
1710        assert_eq!(
1711            commit.committer.timestamp.timestamp,
1712            MillisSinceEpoch(2000 * 1000)
1713        );
1714        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1715
1716        let root_tree = backend
1717            .read_tree(
1718                RepoPath::root(),
1719                &TreeId::from_bytes(root_tree_id.as_bytes()),
1720            )
1721            .block_on()
1722            .unwrap();
1723        let mut root_entries = root_tree.entries();
1724        let dir = root_entries.next().unwrap();
1725        assert_eq!(root_entries.next(), None);
1726        assert_eq!(dir.name().as_internal_str(), "dir");
1727        assert_eq!(
1728            dir.value(),
1729            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1730        );
1731
1732        let dir_tree = backend
1733            .read_tree(
1734                RepoPath::from_internal_string("dir").unwrap(),
1735                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1736            )
1737            .block_on()
1738            .unwrap();
1739        let mut entries = dir_tree.entries();
1740        let file = entries.next().unwrap();
1741        let symlink = entries.next().unwrap();
1742        assert_eq!(entries.next(), None);
1743        assert_eq!(file.name().as_internal_str(), "normal");
1744        assert_eq!(
1745            file.value(),
1746            &TreeValue::File {
1747                id: FileId::from_bytes(blob1.as_bytes()),
1748                executable: false,
1749                copy_id: CopyId::placeholder(),
1750            }
1751        );
1752        assert_eq!(symlink.name().as_internal_str(), "symlink");
1753        assert_eq!(
1754            symlink.value(),
1755            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1756        );
1757
1758        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1759        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1760        assert_eq!(commit.predecessors, vec![]);
1761        assert_eq!(
1762            commit.root_tree,
1763            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1764        );
1765    }
1766
1767    #[test]
1768    fn read_git_commit_without_importing() {
1769        let settings = user_settings();
1770        let temp_dir = new_temp_dir();
1771        let store_path = temp_dir.path();
1772        let git_repo_path = temp_dir.path().join("git");
1773        let git_repo = git_init(&git_repo_path);
1774
1775        let signature = gix::actor::Signature {
1776            name: GIT_USER.into(),
1777            email: GIT_EMAIL.into(),
1778            time: gix::date::Time::now_utc(),
1779        };
1780        let empty_tree_id =
1781            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1782        let git_commit_id = git_repo
1783            .commit_as(
1784                signature.to_ref(&mut TimeBuf::default()),
1785                signature.to_ref(&mut TimeBuf::default()),
1786                "refs/heads/main",
1787                "git commit message",
1788                empty_tree_id,
1789                [] as [gix::ObjectId; 0],
1790            )
1791            .unwrap();
1792
1793        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1794
1795        // read_commit() without import_head_commits() works as of now. This might be
1796        // changed later.
1797        assert!(
1798            backend
1799                .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1800                .block_on()
1801                .is_ok()
1802        );
1803        assert!(
1804            backend
1805                .cached_extra_metadata_table()
1806                .unwrap()
1807                .get_value(git_commit_id.as_bytes())
1808                .is_some(),
1809            "extra metadata should have been be created"
1810        );
1811    }
1812
1813    #[test]
1814    fn read_signed_git_commit() {
1815        let settings = user_settings();
1816        let temp_dir = new_temp_dir();
1817        let store_path = temp_dir.path();
1818        let git_repo_path = temp_dir.path().join("git");
1819        let git_repo = git_init(git_repo_path);
1820
1821        let signature = gix::actor::Signature {
1822            name: GIT_USER.into(),
1823            email: GIT_EMAIL.into(),
1824            time: gix::date::Time::now_utc(),
1825        };
1826        let empty_tree_id =
1827            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1828
1829        let secure_sig =
1830            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1831
1832        let mut commit = gix::objs::Commit {
1833            tree: empty_tree_id,
1834            parents: smallvec::SmallVec::new(),
1835            author: signature.clone(),
1836            committer: signature.clone(),
1837            encoding: None,
1838            message: "git commit message".into(),
1839            extra_headers: Vec::new(),
1840        };
1841
1842        let mut commit_buf = Vec::new();
1843        commit.write_to(&mut commit_buf).unwrap();
1844        let commit_str = str::from_utf8(&commit_buf).unwrap();
1845
1846        commit
1847            .extra_headers
1848            .push(("gpgsig".into(), secure_sig.into()));
1849
1850        let git_commit_id = git_repo.write_object(&commit).unwrap();
1851
1852        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1853
1854        let commit = backend
1855            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1856            .block_on()
1857            .unwrap();
1858
1859        let sig = commit.secure_sig.expect("failed to read the signature");
1860
1861        // converting to string for nicer assert diff
1862        assert_eq!(str::from_utf8(&sig.sig).unwrap(), secure_sig);
1863        assert_eq!(str::from_utf8(&sig.data).unwrap(), commit_str);
1864    }
1865
1866    #[test]
1867    fn change_id_parsing() {
1868        let id = |commit_object_bytes: &[u8]| {
1869            extract_change_id_from_commit(&CommitRef::from_bytes(commit_object_bytes).unwrap())
1870        };
1871
1872        let commit_with_id = indoc! {b"
1873            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1874            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1875            author JJ Fan <jjfan@example.com> 1757112665 -0700
1876            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1877            extra-header blah
1878            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1879
1880            test-commit
1881        "};
1882        insta::assert_compact_debug_snapshot!(
1883            id(commit_with_id),
1884            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1885        );
1886
1887        let commit_without_id = indoc! {b"
1888            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1889            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1890            author JJ Fan <jjfan@example.com> 1757112665 -0700
1891            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1892            extra-header blah
1893
1894            no id in header
1895        "};
1896        insta::assert_compact_debug_snapshot!(
1897            id(commit_without_id),
1898            @"None"
1899        );
1900
1901        let commit = indoc! {b"
1902            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1903            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1904            author JJ Fan <jjfan@example.com> 1757112665 -0700
1905            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1906            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1907            extra-header blah
1908            change-id abcabcabcabcabcabcabcabcabcabcab
1909
1910            valid change id first
1911        "};
1912        insta::assert_compact_debug_snapshot!(
1913            id(commit),
1914            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1915        );
1916
1917        // We only look at the first change id if multiple are present, so this should
1918        // error
1919        let commit = indoc! {b"
1920            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1921            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1922            author JJ Fan <jjfan@example.com> 1757112665 -0700
1923            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1924            change-id abcabcabcabcabcabcabcabcabcabcab
1925            extra-header blah
1926            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1927
1928            valid change id first
1929        "};
1930        insta::assert_compact_debug_snapshot!(
1931            id(commit),
1932            @"None"
1933        );
1934    }
1935
1936    #[test]
1937    fn round_trip_change_id_via_git_header() {
1938        let settings = user_settings();
1939        let temp_dir = new_temp_dir();
1940
1941        let store_path = temp_dir.path().join("store");
1942        fs::create_dir(&store_path).unwrap();
1943        let empty_store_path = temp_dir.path().join("empty_store");
1944        fs::create_dir(&empty_store_path).unwrap();
1945        let git_repo_path = temp_dir.path().join("git");
1946        let git_repo = git_init(git_repo_path);
1947
1948        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path()).unwrap();
1949        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1950        let commit = Commit {
1951            parents: vec![backend.root_commit_id().clone()],
1952            predecessors: vec![],
1953            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
1954            conflict_labels: Merge::resolved(String::new()),
1955            change_id: original_change_id.clone(),
1956            description: "initial".to_string(),
1957            author: create_signature(),
1958            committer: create_signature(),
1959            secure_sig: None,
1960        };
1961
1962        let (initial_commit_id, _init_commit) =
1963            backend.write_commit(commit, None).block_on().unwrap();
1964        let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
1965        assert_eq!(
1966            commit.change_id, original_change_id,
1967            "The change-id header did not roundtrip"
1968        );
1969
1970        // Because of how change ids are also persisted in extra proto files,
1971        // initialize a new store without those files, but reuse the same git
1972        // storage. This change-id must be derived from the git commit header.
1973        let no_extra_backend =
1974            GitBackend::init_external(&settings, &empty_store_path, git_repo.path()).unwrap();
1975        let no_extra_commit = no_extra_backend
1976            .read_commit(&initial_commit_id)
1977            .block_on()
1978            .unwrap();
1979
1980        assert_eq!(
1981            no_extra_commit.change_id, original_change_id,
1982            "The change-id header did not roundtrip"
1983        );
1984    }
1985
1986    #[test]
1987    fn read_empty_string_placeholder() {
1988        let git_signature1 = gix::actor::Signature {
1989            name: EMPTY_STRING_PLACEHOLDER.into(),
1990            email: "git.author@example.com".into(),
1991            time: gix::date::Time::new(1000, 60 * 60),
1992        };
1993        let signature1 = signature_from_git(git_signature1.to_ref(&mut TimeBuf::default()));
1994        assert!(signature1.name.is_empty());
1995        assert_eq!(signature1.email, "git.author@example.com");
1996        let git_signature2 = gix::actor::Signature {
1997            name: "git committer".into(),
1998            email: EMPTY_STRING_PLACEHOLDER.into(),
1999            time: gix::date::Time::new(2000, -480 * 60),
2000        };
2001        let signature2 = signature_from_git(git_signature2.to_ref(&mut TimeBuf::default()));
2002        assert_eq!(signature2.name, "git committer");
2003        assert!(signature2.email.is_empty());
2004    }
2005
2006    #[test]
2007    fn write_empty_string_placeholder() {
2008        let signature1 = Signature {
2009            name: "".to_string(),
2010            email: "someone@example.com".to_string(),
2011            timestamp: Timestamp {
2012                timestamp: MillisSinceEpoch(0),
2013                tz_offset: 0,
2014            },
2015        };
2016        let git_signature1 = signature_to_git(&signature1);
2017        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
2018        assert_eq!(git_signature1.email, "someone@example.com");
2019        let signature2 = Signature {
2020            name: "Someone".to_string(),
2021            email: "".to_string(),
2022            timestamp: Timestamp {
2023                timestamp: MillisSinceEpoch(0),
2024                tz_offset: 0,
2025            },
2026        };
2027        let git_signature2 = signature_to_git(&signature2);
2028        assert_eq!(git_signature2.name, "Someone");
2029        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
2030    }
2031
2032    /// Test that parents get written correctly
2033    #[test]
2034    fn git_commit_parents() {
2035        let settings = user_settings();
2036        let temp_dir = new_temp_dir();
2037        let store_path = temp_dir.path();
2038        let git_repo_path = temp_dir.path().join("git");
2039        let git_repo = git_init(&git_repo_path);
2040
2041        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2042        let mut commit = Commit {
2043            parents: vec![],
2044            predecessors: vec![],
2045            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2046            conflict_labels: Merge::resolved(String::new()),
2047            change_id: ChangeId::from_hex("abc123"),
2048            description: "".to_string(),
2049            author: create_signature(),
2050            committer: create_signature(),
2051            secure_sig: None,
2052        };
2053
2054        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2055            backend.write_commit(commit, None).block_on()
2056        };
2057
2058        // No parents
2059        commit.parents = vec![];
2060        assert_matches!(
2061            write_commit(commit.clone()),
2062            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2063        );
2064
2065        // Only root commit as parent
2066        commit.parents = vec![backend.root_commit_id().clone()];
2067        let first_id = write_commit(commit.clone()).unwrap().0;
2068        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
2069        assert_eq!(first_commit, commit);
2070        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
2071        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2072
2073        // Only non-root commit as parent
2074        commit.parents = vec![first_id.clone()];
2075        let second_id = write_commit(commit.clone()).unwrap().0;
2076        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
2077        assert_eq!(second_commit, commit);
2078        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
2079        assert_eq!(
2080            second_git_commit.parent_ids().collect_vec(),
2081            vec![git_id(&first_id)]
2082        );
2083
2084        // Merge commit
2085        commit.parents = vec![first_id.clone(), second_id.clone()];
2086        let merge_id = write_commit(commit.clone()).unwrap().0;
2087        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
2088        assert_eq!(merge_commit, commit);
2089        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
2090        assert_eq!(
2091            merge_git_commit.parent_ids().collect_vec(),
2092            vec![git_id(&first_id), git_id(&second_id)]
2093        );
2094
2095        // Merge commit with root as one parent
2096        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2097        assert_matches!(
2098            write_commit(commit),
2099            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2100        );
2101    }
2102
2103    #[test]
2104    fn write_tree_conflicts() {
2105        let settings = user_settings();
2106        let temp_dir = new_temp_dir();
2107        let store_path = temp_dir.path();
2108        let git_repo_path = temp_dir.path().join("git");
2109        let git_repo = git_init(&git_repo_path);
2110
2111        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2112        let create_tree = |i| {
2113            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2114            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2115            tree_builder
2116                .upsert(
2117                    format!("file{i}"),
2118                    gix::object::tree::EntryKind::Blob,
2119                    blob_id,
2120                )
2121                .unwrap();
2122            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2123        };
2124
2125        let root_tree = Merge::from_removes_adds(
2126            vec![create_tree(0), create_tree(1)],
2127            vec![create_tree(2), create_tree(3), create_tree(4)],
2128        );
2129        let mut commit = Commit {
2130            parents: vec![backend.root_commit_id().clone()],
2131            predecessors: vec![],
2132            root_tree: root_tree.clone(),
2133            conflict_labels: Merge::resolved(String::new()),
2134            change_id: ChangeId::from_hex("abc123"),
2135            description: "".to_string(),
2136            author: create_signature(),
2137            committer: create_signature(),
2138            secure_sig: None,
2139        };
2140
2141        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2142            backend.write_commit(commit, None).block_on()
2143        };
2144
2145        // When writing a tree-level conflict, the root tree on the git side has the
2146        // individual trees as subtrees.
2147        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2148        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2149        assert_eq!(read_commit, commit);
2150        let git_commit = git_repo
2151            .find_commit(gix::ObjectId::from_bytes_or_panic(
2152                read_commit_id.as_bytes(),
2153            ))
2154            .unwrap();
2155        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2156        assert!(
2157            git_tree
2158                .iter()
2159                .map(Result::unwrap)
2160                .filter(|entry| entry.filename() != b"README")
2161                .all(|entry| entry.mode().value() == 0o040000)
2162        );
2163        let mut iter = git_tree.iter().map(Result::unwrap);
2164        let entry = iter.next().unwrap();
2165        assert_eq!(entry.filename(), b".jjconflict-base-0");
2166        assert_eq!(
2167            entry.id().as_bytes(),
2168            root_tree.get_remove(0).unwrap().as_bytes()
2169        );
2170        let entry = iter.next().unwrap();
2171        assert_eq!(entry.filename(), b".jjconflict-base-1");
2172        assert_eq!(
2173            entry.id().as_bytes(),
2174            root_tree.get_remove(1).unwrap().as_bytes()
2175        );
2176        let entry = iter.next().unwrap();
2177        assert_eq!(entry.filename(), b".jjconflict-side-0");
2178        assert_eq!(
2179            entry.id().as_bytes(),
2180            root_tree.get_add(0).unwrap().as_bytes()
2181        );
2182        let entry = iter.next().unwrap();
2183        assert_eq!(entry.filename(), b".jjconflict-side-1");
2184        assert_eq!(
2185            entry.id().as_bytes(),
2186            root_tree.get_add(1).unwrap().as_bytes()
2187        );
2188        let entry = iter.next().unwrap();
2189        assert_eq!(entry.filename(), b".jjconflict-side-2");
2190        assert_eq!(
2191            entry.id().as_bytes(),
2192            root_tree.get_add(2).unwrap().as_bytes()
2193        );
2194        let entry = iter.next().unwrap();
2195        assert_eq!(entry.filename(), b"README");
2196        assert_eq!(entry.mode().value(), 0o100644);
2197        assert!(iter.next().is_none());
2198
2199        // When writing a single tree using the new format, it's represented by a
2200        // regular git tree.
2201        commit.root_tree = Merge::resolved(create_tree(5));
2202        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2203        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2204        assert_eq!(read_commit, commit);
2205        let git_commit = git_repo
2206            .find_commit(gix::ObjectId::from_bytes_or_panic(
2207                read_commit_id.as_bytes(),
2208            ))
2209            .unwrap();
2210        assert_eq!(
2211            Merge::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2212            commit.root_tree
2213        );
2214    }
2215
2216    #[test]
2217    fn commit_has_ref() {
2218        let settings = user_settings();
2219        let temp_dir = new_temp_dir();
2220        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2221        let git_repo = backend.git_repo();
2222        let signature = Signature {
2223            name: "Someone".to_string(),
2224            email: "someone@example.com".to_string(),
2225            timestamp: Timestamp {
2226                timestamp: MillisSinceEpoch(0),
2227                tz_offset: 0,
2228            },
2229        };
2230        let commit = Commit {
2231            parents: vec![backend.root_commit_id().clone()],
2232            predecessors: vec![],
2233            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2234            conflict_labels: Merge::resolved(String::new()),
2235            change_id: ChangeId::new(vec![42; 16]),
2236            description: "initial".to_string(),
2237            author: signature.clone(),
2238            committer: signature,
2239            secure_sig: None,
2240        };
2241        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2242        let git_refs = git_repo.references().unwrap();
2243        let git_ref_ids: Vec<_> = git_refs
2244            .prefixed("refs/jj/keep/")
2245            .unwrap()
2246            .map(|x| x.unwrap().id().detach())
2247            .collect();
2248        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2249
2250        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2251        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2252            git_ref.unwrap().delete().unwrap();
2253        }
2254        // Re-imported commit should have new ref.
2255        backend.import_head_commits([&commit_id]).unwrap();
2256        let git_refs = git_repo.references().unwrap();
2257        let git_ref_ids: Vec<_> = git_refs
2258            .prefixed("refs/jj/keep/")
2259            .unwrap()
2260            .map(|x| x.unwrap().id().detach())
2261            .collect();
2262        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2263    }
2264
2265    #[test]
2266    fn import_head_commits_duplicates() {
2267        let settings = user_settings();
2268        let temp_dir = new_temp_dir();
2269        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2270        let git_repo = backend.git_repo();
2271
2272        let signature = gix::actor::Signature {
2273            name: GIT_USER.into(),
2274            email: GIT_EMAIL.into(),
2275            time: gix::date::Time::now_utc(),
2276        };
2277        let empty_tree_id =
2278            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2279        let git_commit_id = git_repo
2280            .commit_as(
2281                signature.to_ref(&mut TimeBuf::default()),
2282                signature.to_ref(&mut TimeBuf::default()),
2283                "refs/heads/main",
2284                "git commit message",
2285                empty_tree_id,
2286                [] as [gix::ObjectId; 0],
2287            )
2288            .unwrap()
2289            .detach();
2290        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2291
2292        // Ref creation shouldn't fail because of duplicated head ids.
2293        backend
2294            .import_head_commits([&commit_id, &commit_id])
2295            .unwrap();
2296        assert!(
2297            git_repo
2298                .references()
2299                .unwrap()
2300                .prefixed("refs/jj/keep/")
2301                .unwrap()
2302                .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id)
2303        );
2304    }
2305
2306    #[test]
2307    fn overlapping_git_commit_id() {
2308        let settings = user_settings();
2309        let temp_dir = new_temp_dir();
2310        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2311        let commit1 = Commit {
2312            parents: vec![backend.root_commit_id().clone()],
2313            predecessors: vec![],
2314            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2315            conflict_labels: Merge::resolved(String::new()),
2316            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2317            description: "initial".to_string(),
2318            author: create_signature(),
2319            committer: create_signature(),
2320            secure_sig: None,
2321        };
2322
2323        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2324            backend.write_commit(commit, None).block_on()
2325        };
2326
2327        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2328        commit2.predecessors.push(commit_id1.clone());
2329        // `write_commit` should prevent the ids from being the same by changing the
2330        // committer timestamp of the commit it actually writes.
2331        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2332        // The returned matches the ID
2333        assert_eq!(
2334            backend.read_commit(&commit_id2).block_on().unwrap(),
2335            actual_commit2
2336        );
2337        assert_ne!(commit_id2, commit_id1);
2338        // The committer timestamp should differ
2339        assert_ne!(
2340            actual_commit2.committer.timestamp.timestamp,
2341            commit2.committer.timestamp.timestamp
2342        );
2343        // The rest of the commit should be the same
2344        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2345        assert_eq!(actual_commit2, commit2);
2346    }
2347
2348    #[test]
2349    fn write_signed_commit() {
2350        let settings = user_settings();
2351        let temp_dir = new_temp_dir();
2352        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2353
2354        let commit = Commit {
2355            parents: vec![backend.root_commit_id().clone()],
2356            predecessors: vec![],
2357            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2358            conflict_labels: Merge::resolved(String::new()),
2359            change_id: ChangeId::new(vec![42; 16]),
2360            description: "initial".to_string(),
2361            author: create_signature(),
2362            committer: create_signature(),
2363            secure_sig: None,
2364        };
2365
2366        let mut signer = |data: &_| {
2367            let hash: String = hex_util::encode_hex(&blake2b_hash(data));
2368            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2369        };
2370
2371        let (id, commit) = backend
2372            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2373            .block_on()
2374            .unwrap();
2375
2376        let git_repo = backend.git_repo();
2377        let obj = git_repo
2378            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2379            .unwrap();
2380        insta::assert_snapshot!(str::from_utf8(&obj.data).unwrap(), @r"
2381        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2382        author Someone <someone@example.com> 0 +0000
2383        committer Someone <someone@example.com> 0 +0000
2384        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2385        gpgsig test sig
2386         hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2387
2388        initial
2389        ");
2390
2391        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2392
2393        let commit = backend.read_commit(&id).block_on().unwrap();
2394
2395        let sig = commit.secure_sig.expect("failed to read the signature");
2396        assert_eq!(&sig, &returned_sig);
2397
2398        insta::assert_snapshot!(str::from_utf8(&sig.sig).unwrap(), @r"
2399        test sig
2400        hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2401        ");
2402        insta::assert_snapshot!(str::from_utf8(&sig.data).unwrap(), @r"
2403        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2404        author Someone <someone@example.com> 0 +0000
2405        committer Someone <someone@example.com> 0 +0000
2406        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2407
2408        initial
2409        ");
2410    }
2411
2412    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2413        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2414    }
2415
2416    fn create_signature() -> Signature {
2417        Signature {
2418            name: GIT_USER.to_string(),
2419            email: GIT_EMAIL.to_string(),
2420            timestamp: Timestamp {
2421                timestamp: MillisSinceEpoch(0),
2422                tz_offset: 0,
2423            },
2424        }
2425    }
2426
2427    // Not using testutils::user_settings() because there is a dependency cycle
2428    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2429    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2430    // our UserSettings type comes from jj_lib (1).
2431    fn user_settings() -> UserSettings {
2432        let config = StackedConfig::with_defaults();
2433        UserSettings::from_config(config).unwrap()
2434    }
2435}