jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::collections::HashSet;
18use std::ffi::OsStr;
19use std::fmt::Debug;
20use std::fmt::Error;
21use std::fmt::Formatter;
22use std::fs;
23use std::io;
24use std::io::Cursor;
25use std::path::Path;
26use std::path::PathBuf;
27use std::pin::Pin;
28use std::process::Command;
29use std::process::ExitStatus;
30use std::str::Utf8Error;
31use std::sync::Arc;
32use std::sync::Mutex;
33use std::sync::MutexGuard;
34use std::time::SystemTime;
35
36use async_trait::async_trait;
37use bstr::BStr;
38use futures::stream::BoxStream;
39use gix::bstr::BString;
40use gix::objs::CommitRefIter;
41use gix::objs::WriteTo as _;
42use itertools::Itertools as _;
43use once_cell::sync::OnceCell as OnceLock;
44use pollster::FutureExt as _;
45use prost::Message as _;
46use smallvec::SmallVec;
47use thiserror::Error;
48use tokio::io::AsyncRead;
49use tokio::io::AsyncReadExt as _;
50
51use crate::backend::Backend;
52use crate::backend::BackendError;
53use crate::backend::BackendInitError;
54use crate::backend::BackendLoadError;
55use crate::backend::BackendResult;
56use crate::backend::ChangeId;
57use crate::backend::Commit;
58use crate::backend::CommitId;
59use crate::backend::CopyHistory;
60use crate::backend::CopyId;
61use crate::backend::CopyRecord;
62use crate::backend::FileId;
63use crate::backend::MillisSinceEpoch;
64use crate::backend::SecureSig;
65use crate::backend::Signature;
66use crate::backend::SigningFn;
67use crate::backend::SymlinkId;
68use crate::backend::Timestamp;
69use crate::backend::Tree;
70use crate::backend::TreeId;
71use crate::backend::TreeValue;
72use crate::backend::make_root_commit;
73use crate::config::ConfigGetError;
74use crate::file_util;
75use crate::file_util::BadPathEncoding;
76use crate::file_util::IoResultExt as _;
77use crate::file_util::PathError;
78use crate::git::GitSettings;
79use crate::index::Index;
80use crate::lock::FileLock;
81use crate::merge::Merge;
82use crate::merge::MergeBuilder;
83use crate::object_id::ObjectId;
84use crate::repo_path::RepoPath;
85use crate::repo_path::RepoPathBuf;
86use crate::repo_path::RepoPathComponentBuf;
87use crate::settings::UserSettings;
88use crate::stacked_table::MutableTable;
89use crate::stacked_table::ReadonlyTable;
90use crate::stacked_table::TableSegment as _;
91use crate::stacked_table::TableStore;
92use crate::stacked_table::TableStoreError;
93
94const HASH_LENGTH: usize = 20;
95const CHANGE_ID_LENGTH: usize = 16;
96/// Ref namespace used only for preventing GC.
97const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
98
99pub const JJ_TREES_COMMIT_HEADER: &str = "jj:trees";
100pub const CHANGE_ID_COMMIT_HEADER: &str = "change-id";
101
102#[derive(Debug, Error)]
103pub enum GitBackendInitError {
104    #[error("Failed to initialize git repository")]
105    InitRepository(#[source] gix::init::Error),
106    #[error("Failed to open git repository")]
107    OpenRepository(#[source] gix::open::Error),
108    #[error("Failed to encode git repository path")]
109    EncodeRepositoryPath(#[source] BadPathEncoding),
110    #[error(transparent)]
111    Config(ConfigGetError),
112    #[error(transparent)]
113    Path(PathError),
114}
115
116impl From<Box<GitBackendInitError>> for BackendInitError {
117    fn from(err: Box<GitBackendInitError>) -> Self {
118        Self(err)
119    }
120}
121
122#[derive(Debug, Error)]
123pub enum GitBackendLoadError {
124    #[error("Failed to open git repository")]
125    OpenRepository(#[source] gix::open::Error),
126    #[error("Failed to decode git repository path")]
127    DecodeRepositoryPath(#[source] BadPathEncoding),
128    #[error(transparent)]
129    Config(ConfigGetError),
130    #[error(transparent)]
131    Path(PathError),
132}
133
134impl From<Box<GitBackendLoadError>> for BackendLoadError {
135    fn from(err: Box<GitBackendLoadError>) -> Self {
136        Self(err)
137    }
138}
139
140/// `GitBackend`-specific error that may occur after the backend is loaded.
141#[derive(Debug, Error)]
142pub enum GitBackendError {
143    #[error("Failed to read non-git metadata")]
144    ReadMetadata(#[source] TableStoreError),
145    #[error("Failed to write non-git metadata")]
146    WriteMetadata(#[source] TableStoreError),
147}
148
149impl From<GitBackendError> for BackendError {
150    fn from(err: GitBackendError) -> Self {
151        Self::Other(err.into())
152    }
153}
154
155#[derive(Debug, Error)]
156pub enum GitGcError {
157    #[error("Failed to run git gc command")]
158    GcCommand(#[source] std::io::Error),
159    #[error("git gc command exited with an error: {0}")]
160    GcCommandErrorStatus(ExitStatus),
161}
162
163pub struct GitBackend {
164    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
165    // cheaper to cache the thread-local instance behind a mutex than creating
166    // one for each backend method call. Our GitBackend is most likely to be
167    // used in a single-threaded context.
168    base_repo: gix::ThreadSafeRepository,
169    repo: Mutex<gix::Repository>,
170    root_commit_id: CommitId,
171    root_change_id: ChangeId,
172    empty_tree_id: TreeId,
173    shallow_root_ids: OnceLock<Vec<CommitId>>,
174    extra_metadata_store: TableStore,
175    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
176    git_executable: PathBuf,
177    write_change_id_header: bool,
178}
179
180impl GitBackend {
181    pub fn name() -> &'static str {
182        "git"
183    }
184
185    fn new(
186        base_repo: gix::ThreadSafeRepository,
187        extra_metadata_store: TableStore,
188        git_settings: GitSettings,
189    ) -> Self {
190        let repo = Mutex::new(base_repo.to_thread_local());
191        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
192        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
193        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
194        Self {
195            base_repo,
196            repo,
197            root_commit_id,
198            root_change_id,
199            empty_tree_id,
200            shallow_root_ids: OnceLock::new(),
201            extra_metadata_store,
202            cached_extra_metadata: Mutex::new(None),
203            git_executable: git_settings.executable_path,
204            write_change_id_header: git_settings.write_change_id_header,
205        }
206    }
207
208    pub fn init_internal(
209        settings: &UserSettings,
210        store_path: &Path,
211    ) -> Result<Self, Box<GitBackendInitError>> {
212        let git_repo_path = Path::new("git");
213        let git_repo = gix::ThreadSafeRepository::init_opts(
214            store_path.join(git_repo_path),
215            gix::create::Kind::Bare,
216            gix::create::Options::default(),
217            gix_open_opts_from_settings(settings),
218        )
219        .map_err(GitBackendInitError::InitRepository)?;
220        let git_settings =
221            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
222        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
223    }
224
225    /// Initializes backend by creating a new Git repo at the specified
226    /// workspace path. The workspace directory must exist.
227    pub fn init_colocated(
228        settings: &UserSettings,
229        store_path: &Path,
230        workspace_root: &Path,
231    ) -> Result<Self, Box<GitBackendInitError>> {
232        let canonical_workspace_root = {
233            let path = store_path.join(workspace_root);
234            dunce::canonicalize(&path)
235                .context(&path)
236                .map_err(GitBackendInitError::Path)?
237        };
238        let git_repo = gix::ThreadSafeRepository::init_opts(
239            canonical_workspace_root,
240            gix::create::Kind::WithWorktree,
241            gix::create::Options::default(),
242            gix_open_opts_from_settings(settings),
243        )
244        .map_err(GitBackendInitError::InitRepository)?;
245        let git_repo_path = workspace_root.join(".git");
246        let git_settings =
247            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
248        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
249    }
250
251    /// Initializes backend with an existing Git repo at the specified path.
252    pub fn init_external(
253        settings: &UserSettings,
254        store_path: &Path,
255        git_repo_path: &Path,
256    ) -> Result<Self, Box<GitBackendInitError>> {
257        let canonical_git_repo_path = {
258            let path = store_path.join(git_repo_path);
259            canonicalize_git_repo_path(&path)
260                .context(&path)
261                .map_err(GitBackendInitError::Path)?
262        };
263        let git_repo = gix::ThreadSafeRepository::open_opts(
264            canonical_git_repo_path,
265            gix_open_opts_from_settings(settings),
266        )
267        .map_err(GitBackendInitError::OpenRepository)?;
268        let git_settings =
269            GitSettings::from_settings(settings).map_err(GitBackendInitError::Config)?;
270        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
271    }
272
273    fn init_with_repo(
274        store_path: &Path,
275        git_repo_path: &Path,
276        repo: gix::ThreadSafeRepository,
277        git_settings: GitSettings,
278    ) -> Result<Self, Box<GitBackendInitError>> {
279        let extra_path = store_path.join("extra");
280        fs::create_dir(&extra_path)
281            .context(&extra_path)
282            .map_err(GitBackendInitError::Path)?;
283        let target_path = store_path.join("git_target");
284        let git_repo_path = if cfg!(windows) && git_repo_path.is_relative() {
285            // When a repository is created in Windows, format the path with *forward
286            // slashes* and not backwards slashes. This makes it possible to use the same
287            // repository under Windows Subsystem for Linux.
288            //
289            // This only works for relative paths. If the path is absolute, there's not much
290            // we can do, and it simply won't work inside and outside WSL at the same time.
291            file_util::slash_path(git_repo_path)
292        } else {
293            git_repo_path.into()
294        };
295        let git_repo_path_bytes = file_util::path_to_bytes(&git_repo_path)
296            .map_err(GitBackendInitError::EncodeRepositoryPath)?;
297        fs::write(&target_path, git_repo_path_bytes)
298            .context(&target_path)
299            .map_err(GitBackendInitError::Path)?;
300        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
301        Ok(Self::new(repo, extra_metadata_store, git_settings))
302    }
303
304    pub fn load(
305        settings: &UserSettings,
306        store_path: &Path,
307    ) -> Result<Self, Box<GitBackendLoadError>> {
308        let git_repo_path = {
309            let target_path = store_path.join("git_target");
310            let git_repo_path_bytes = fs::read(&target_path)
311                .context(&target_path)
312                .map_err(GitBackendLoadError::Path)?;
313            let git_repo_path = file_util::path_from_bytes(&git_repo_path_bytes)
314                .map_err(GitBackendLoadError::DecodeRepositoryPath)?;
315            let git_repo_path = store_path.join(git_repo_path);
316            canonicalize_git_repo_path(&git_repo_path)
317                .context(&git_repo_path)
318                .map_err(GitBackendLoadError::Path)?
319        };
320        let repo = gix::ThreadSafeRepository::open_opts(
321            git_repo_path,
322            gix_open_opts_from_settings(settings),
323        )
324        .map_err(GitBackendLoadError::OpenRepository)?;
325        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
326        let git_settings =
327            GitSettings::from_settings(settings).map_err(GitBackendLoadError::Config)?;
328        Ok(Self::new(repo, extra_metadata_store, git_settings))
329    }
330
331    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
332        self.repo.lock().unwrap()
333    }
334
335    /// Returns new thread-local instance to access to the underlying Git repo.
336    pub fn git_repo(&self) -> gix::Repository {
337        self.base_repo.to_thread_local()
338    }
339
340    /// Path to the `.git` directory or the repository itself if it's bare.
341    pub fn git_repo_path(&self) -> &Path {
342        self.base_repo.path()
343    }
344
345    /// Path to the working directory if the repository isn't bare.
346    pub fn git_workdir(&self) -> Option<&Path> {
347        self.base_repo.work_dir()
348    }
349
350    fn shallow_root_ids(&self, git_repo: &gix::Repository) -> BackendResult<&[CommitId]> {
351        // The list of shallow roots is cached by gix, but it's still expensive
352        // to stat file on every read_object() call. Refreshing shallow roots is
353        // also bad for consistency reasons.
354        self.shallow_root_ids
355            .get_or_try_init(|| {
356                let maybe_oids = git_repo
357                    .shallow_commits()
358                    .map_err(|err| BackendError::Other(err.into()))?;
359                let commit_ids = maybe_oids.map_or(vec![], |oids| {
360                    oids.iter()
361                        .map(|oid| CommitId::from_bytes(oid.as_bytes()))
362                        .collect()
363                });
364                Ok(commit_ids)
365            })
366            .map(AsRef::as_ref)
367    }
368
369    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
370        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
371        match locked_head.as_ref() {
372            Some(head) => Ok(head.clone()),
373            None => {
374                let table = self
375                    .extra_metadata_store
376                    .get_head()
377                    .map_err(GitBackendError::ReadMetadata)?;
378                *locked_head = Some(table.clone());
379                Ok(table)
380            }
381        }
382    }
383
384    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
385        let table = self
386            .extra_metadata_store
387            .get_head_locked()
388            .map_err(GitBackendError::ReadMetadata)?;
389        Ok(table)
390    }
391
392    fn save_extra_metadata_table(
393        &self,
394        mut_table: MutableTable,
395        _table_lock: &FileLock,
396    ) -> BackendResult<()> {
397        let table = self
398            .extra_metadata_store
399            .save_table(mut_table)
400            .map_err(GitBackendError::WriteMetadata)?;
401        // Since the parent table was the head, saved table are likely to be new head.
402        // If it's not, cache will be reloaded when entry can't be found.
403        *self.cached_extra_metadata.lock().unwrap() = Some(table);
404        Ok(())
405    }
406
407    /// Imports the given commits and ancestors from the backing Git repo.
408    ///
409    /// The `head_ids` may contain commits that have already been imported, but
410    /// the caller should filter them out to eliminate redundant I/O processing.
411    #[tracing::instrument(skip(self, head_ids))]
412    pub fn import_head_commits<'a>(
413        &self,
414        head_ids: impl IntoIterator<Item = &'a CommitId>,
415    ) -> BackendResult<()> {
416        let head_ids: HashSet<&CommitId> = head_ids
417            .into_iter()
418            .filter(|&id| *id != self.root_commit_id)
419            .collect();
420        if head_ids.is_empty() {
421            return Ok(());
422        }
423
424        // Create no-gc ref even if known to the extras table. Concurrent GC
425        // process might have deleted the no-gc ref.
426        let locked_repo = self.lock_git_repo();
427        locked_repo
428            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
429            .map_err(|err| BackendError::Other(Box::new(err)))?;
430
431        // These commits are imported from Git. Make our change ids persist (otherwise
432        // future write_commit() could reassign new change id.)
433        tracing::debug!(
434            heads_count = head_ids.len(),
435            "import extra metadata entries"
436        );
437        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
438        let mut mut_table = table.start_mutation();
439        import_extra_metadata_entries_from_heads(
440            &locked_repo,
441            &mut mut_table,
442            &table_lock,
443            &head_ids,
444            self.shallow_root_ids(&locked_repo)?,
445        )?;
446        self.save_extra_metadata_table(mut_table, &table_lock)
447    }
448
449    fn read_file_sync(&self, id: &FileId) -> BackendResult<Vec<u8>> {
450        let git_blob_id = validate_git_object_id(id)?;
451        let locked_repo = self.lock_git_repo();
452        let mut blob = locked_repo
453            .find_object(git_blob_id)
454            .map_err(|err| map_not_found_err(err, id))?
455            .try_into_blob()
456            .map_err(|err| to_read_object_err(err, id))?;
457        Ok(blob.take_data())
458    }
459
460    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
461        let attributes = gix::worktree::Stack::new(
462            Path::new(""),
463            gix::worktree::stack::State::AttributesStack(Default::default()),
464            gix::worktree::glob::pattern::Case::Sensitive,
465            Vec::new(),
466            Vec::new(),
467        );
468        let filter = gix::diff::blob::Pipeline::new(
469            Default::default(),
470            gix::filter::plumbing::Pipeline::new(
471                self.git_repo()
472                    .command_context()
473                    .map_err(|err| BackendError::Other(Box::new(err)))?,
474                Default::default(),
475            ),
476            Vec::new(),
477            Default::default(),
478        );
479        Ok(gix::diff::blob::Platform::new(
480            Default::default(),
481            filter,
482            gix::diff::blob::pipeline::Mode::ToGit,
483            attributes,
484        ))
485    }
486
487    fn read_tree_for_commit<'repo>(
488        &self,
489        repo: &'repo gix::Repository,
490        id: &CommitId,
491    ) -> BackendResult<gix::Tree<'repo>> {
492        let tree = self.read_commit(id).block_on()?.root_tree;
493        // TODO(kfm): probably want to do something here if it is a merge
494        let tree_id = tree.first().clone();
495        let gix_id = validate_git_object_id(&tree_id)?;
496        repo.find_object(gix_id)
497            .map_err(|err| map_not_found_err(err, &tree_id))?
498            .try_into_tree()
499            .map_err(|err| to_read_object_err(err, &tree_id))
500    }
501}
502
503/// Canonicalizes the given `path` except for the last `".git"` component.
504///
505/// The last path component matters when opening a Git repo without `core.bare`
506/// config. This config is usually set, but the "repo" tool will set up such
507/// repositories and symlinks. Opening such repo with fully-canonicalized path
508/// would turn a colocated Git repo into a bare repo.
509pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
510    if path.ends_with(".git") {
511        let workdir = path.parent().unwrap();
512        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
513    } else {
514        dunce::canonicalize(path)
515    }
516}
517
518fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
519    let user_name = settings.user_name();
520    let user_email = settings.user_email();
521    gix::open::Options::default()
522        .config_overrides([
523            // Committer has to be configured to record reflog. Author isn't
524            // needed, but let's copy the same values.
525            format!("author.name={user_name}"),
526            format!("author.email={user_email}"),
527            format!("committer.name={user_name}"),
528            format!("committer.email={user_email}"),
529        ])
530        // The git_target path should point the repository, not the working directory.
531        .open_path_as_is(true)
532        // Gitoxide recommends this when correctness is preferred
533        .strict_config(true)
534}
535
536/// Parses the `jj:trees` header value.
537fn root_tree_from_git_extra_header(value: &BStr) -> Result<Merge<TreeId>, ()> {
538    let mut tree_ids = SmallVec::new();
539    for hex in value.split(|b| *b == b' ') {
540        let tree_id = TreeId::try_from_hex(hex).ok_or(())?;
541        if tree_id.as_bytes().len() != HASH_LENGTH {
542            return Err(());
543        }
544        tree_ids.push(tree_id);
545    }
546    // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
547    // allowed, it would be possible to construct a commit which appears to have
548    // different contents depending on whether it is viewed using `jj` or `git`.
549    if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
550        return Err(());
551    }
552    Ok(Merge::from_vec(tree_ids))
553}
554
555fn commit_from_git_without_root_parent(
556    id: &CommitId,
557    git_object: &gix::Object,
558    is_shallow: bool,
559) -> BackendResult<Commit> {
560    let commit = git_object
561        .try_to_commit_ref()
562        .map_err(|err| to_read_object_err(err, id))?;
563
564    // If the git header has a change-id field, we attempt to convert that to a
565    // valid JJ Change Id
566    let change_id = extract_change_id_from_commit(&commit)
567        .unwrap_or_else(|| synthetic_change_id_from_git_commit_id(id));
568
569    // shallow commits don't have parents their parents actually fetched, so we
570    // discard them here
571    // TODO: This causes issues when a shallow repository is deepened/unshallowed
572    let parents = if is_shallow {
573        vec![]
574    } else {
575        commit
576            .parents()
577            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
578            .collect_vec()
579    };
580    // Conflicted commits written before we started using the `jj:trees` header
581    // (~March 2024) may have the root trees stored in the extra metadata table
582    // instead. For such commits, we'll update the root tree later when we read the
583    // extra metadata.
584    let root_tree = commit
585        .extra_headers()
586        .find(JJ_TREES_COMMIT_HEADER)
587        .map(root_tree_from_git_extra_header)
588        .transpose()
589        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?
590        .unwrap_or_else(|| {
591            let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
592            Merge::resolved(tree_id)
593        });
594    // Use lossy conversion as commit message with "mojibake" is still better than
595    // nothing.
596    // TODO: what should we do with commit.encoding?
597    let description = String::from_utf8_lossy(commit.message).into_owned();
598    let author = signature_from_git(commit.author());
599    let committer = signature_from_git(commit.committer());
600
601    // If the commit is signed, extract both the signature and the signed data
602    // (which is the commit buffer with the gpgsig header omitted).
603    // We have to re-parse the raw commit data because gix CommitRef does not give
604    // us the sogned data, only the signature.
605    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
606    // function and extract everything from that. For now, this works
607    let secure_sig = commit
608        .extra_headers
609        .iter()
610        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
611        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
612        .then(|| CommitRefIter::signature(&git_object.data))
613        .transpose()
614        .map_err(|err| to_read_object_err(err, id))?
615        .flatten()
616        .map(|(sig, data)| SecureSig {
617            data: data.to_bstring().into(),
618            sig: sig.into_owned().into(),
619        });
620
621    Ok(Commit {
622        parents,
623        predecessors: vec![],
624        // If this commit has associated extra metadata, we may reset this later.
625        root_tree,
626        change_id,
627        description,
628        author,
629        committer,
630        secure_sig,
631    })
632}
633
634/// Extracts change id from commit headers.
635pub fn extract_change_id_from_commit(commit: &gix::objs::CommitRef) -> Option<ChangeId> {
636    commit
637        .extra_headers()
638        .find(CHANGE_ID_COMMIT_HEADER)
639        .and_then(ChangeId::try_from_reverse_hex)
640        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
641}
642
643/// Deterministically creates a change id based on the commit id
644///
645/// Used when we get a commit without a change id. The exact algorithm for the
646/// computation should not be relied upon.
647pub fn synthetic_change_id_from_git_commit_id(id: &CommitId) -> ChangeId {
648    // We reverse the bits of the commit id to create the change id. We don't
649    // want to use the first bytes unmodified because then it would be ambiguous
650    // if a given hash prefix refers to the commit id or the change id. It would
651    // have been enough to pick the last 16 bytes instead of the leading 16
652    // bytes to address that. We also reverse the bits to make it less likely
653    // that users depend on any relationship between the two ids.
654    let bytes = id.as_bytes()[4..HASH_LENGTH]
655        .iter()
656        .rev()
657        .map(|b| b.reverse_bits())
658        .collect();
659    ChangeId::new(bytes)
660}
661
662const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
663
664fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
665    let name = signature.name;
666    let name = if name != EMPTY_STRING_PLACEHOLDER {
667        String::from_utf8_lossy(name).into_owned()
668    } else {
669        "".to_string()
670    };
671    let email = signature.email;
672    let email = if email != EMPTY_STRING_PLACEHOLDER {
673        String::from_utf8_lossy(email).into_owned()
674    } else {
675        "".to_string()
676    };
677    let time = signature.time().unwrap_or_default();
678    let timestamp = MillisSinceEpoch(time.seconds * 1000);
679    let tz_offset = time.offset.div_euclid(60); // in minutes
680    Signature {
681        name,
682        email,
683        timestamp: Timestamp {
684            timestamp,
685            tz_offset,
686        },
687    }
688}
689
690fn signature_to_git(signature: &Signature) -> gix::actor::Signature {
691    // git does not support empty names or emails
692    let name = if !signature.name.is_empty() {
693        &signature.name
694    } else {
695        EMPTY_STRING_PLACEHOLDER
696    };
697    let email = if !signature.email.is_empty() {
698        &signature.email
699    } else {
700        EMPTY_STRING_PLACEHOLDER
701    };
702    let time = gix::date::Time::new(
703        signature.timestamp.timestamp.0.div_euclid(1000),
704        signature.timestamp.tz_offset * 60, // in seconds
705    );
706    gix::actor::Signature {
707        name: name.into(),
708        email: email.into(),
709        time,
710    }
711}
712
713fn serialize_extras(commit: &Commit) -> Vec<u8> {
714    let mut proto = crate::protos::git_store::Commit {
715        change_id: commit.change_id.to_bytes(),
716        ..Default::default()
717    };
718    proto.uses_tree_conflict_format = true;
719    if !commit.root_tree.is_resolved() {
720        // This is done for the sake of jj versions <0.28 (before commit
721        // f7b14be) being able to read the repo. At some point in the
722        // future, we can stop doing it.
723        proto.root_tree = commit.root_tree.iter().map(|r| r.to_bytes()).collect();
724    }
725    for predecessor in &commit.predecessors {
726        proto.predecessors.push(predecessor.to_bytes());
727    }
728    proto.encode_to_vec()
729}
730
731fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
732    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
733    if !proto.change_id.is_empty() {
734        commit.change_id = ChangeId::new(proto.change_id);
735    }
736    if commit.root_tree.is_resolved()
737        && proto.uses_tree_conflict_format
738        && !proto.root_tree.is_empty()
739    {
740        let merge_builder: MergeBuilder<_> = proto
741            .root_tree
742            .iter()
743            .map(|id_bytes| TreeId::from_bytes(id_bytes))
744            .collect();
745        commit.root_tree = merge_builder.build();
746    }
747    for predecessor in &proto.predecessors {
748        commit.predecessors.push(CommitId::from_bytes(predecessor));
749    }
750}
751
752/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
753/// Used for preventing GC of commits we create.
754fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
755    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
756    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
757    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
758    gix::refs::transaction::RefEdit {
759        change: gix::refs::transaction::Change::Update {
760            log: gix::refs::transaction::LogChange {
761                message: "used by jj".into(),
762                ..Default::default()
763            },
764            expected,
765            new,
766        },
767        name: name.try_into().unwrap(),
768        deref: false,
769    }
770}
771
772fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
773    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
774    gix::refs::transaction::RefEdit {
775        change: gix::refs::transaction::Change::Delete {
776            expected,
777            log: gix::refs::transaction::RefLog::AndReference,
778        },
779        name: git_ref.name,
780        deref: false,
781    }
782}
783
784/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
785/// unreachable and non-head refs.
786fn recreate_no_gc_refs(
787    git_repo: &gix::Repository,
788    new_heads: impl IntoIterator<Item = CommitId>,
789    keep_newer: SystemTime,
790) -> BackendResult<()> {
791    // Calculate diff between existing no-gc refs and new heads.
792    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
793    let mut no_gc_refs_to_keep_count: usize = 0;
794    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
795    let git_references = git_repo
796        .references()
797        .map_err(|err| BackendError::Other(err.into()))?;
798    let no_gc_refs_iter = git_references
799        .prefixed(NO_GC_REF_NAMESPACE)
800        .map_err(|err| BackendError::Other(err.into()))?;
801    for git_ref in no_gc_refs_iter {
802        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
803        let oid = git_ref.target.try_id().ok_or_else(|| {
804            let name = git_ref.name.as_bstr();
805            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
806        })?;
807        let id = CommitId::from_bytes(oid.as_bytes());
808        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
809        if new_heads.contains(&id) && name_good {
810            no_gc_refs_to_keep_count += 1;
811            continue;
812        }
813        // Check timestamp of loose ref, but this is still racy on re-import
814        // because:
815        // - existing packed ref won't be demoted to loose ref
816        // - existing loose ref won't be touched
817        //
818        // TODO: might be better to switch to a dummy merge, where new no-gc ref
819        // will always have a unique name. Doing that with the current
820        // ref-per-head strategy would increase the number of the no-gc refs.
821        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
822        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
823        if let Ok(metadata) = loose_ref_path.metadata() {
824            let mtime = metadata.modified().expect("unsupported platform?");
825            if mtime > keep_newer {
826                tracing::trace!(?git_ref, "not deleting new");
827                no_gc_refs_to_keep_count += 1;
828                continue;
829            }
830        }
831        // Also deletes no-gc ref of random name created by old jj.
832        tracing::trace!(?git_ref, ?name_good, "will delete");
833        no_gc_refs_to_delete.push(git_ref);
834    }
835    tracing::info!(
836        new_heads_count = new_heads.len(),
837        no_gc_refs_to_keep_count,
838        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
839        "collected reachable refs"
840    );
841
842    // It's slow to delete packed refs one by one, so update refs all at once.
843    let ref_edits = itertools::chain(
844        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
845        new_heads.iter().map(to_no_gc_ref_update),
846    );
847    git_repo
848        .edit_references(ref_edits)
849        .map_err(|err| BackendError::Other(err.into()))?;
850
851    Ok(())
852}
853
854fn run_git_gc(program: &OsStr, git_dir: &Path, keep_newer: SystemTime) -> Result<(), GitGcError> {
855    let keep_newer = keep_newer
856        .duration_since(SystemTime::UNIX_EPOCH)
857        .unwrap_or_default(); // underflow
858    let mut git = Command::new(program);
859    git.arg("--git-dir=.") // turn off discovery
860        .arg("gc")
861        .arg(format!("--prune=@{} +0000", keep_newer.as_secs()));
862    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
863    // canonicalized as UNC path, which wouldn't be supported by git.
864    git.current_dir(git_dir);
865    // TODO: pass output to UI layer instead of printing directly here
866    tracing::info!(?git, "running git gc");
867    let status = git.status().map_err(GitGcError::GcCommand)?;
868    tracing::info!(?status, "git gc exited");
869    if !status.success() {
870        return Err(GitGcError::GcCommandErrorStatus(status));
871    }
872    Ok(())
873}
874
875fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
876    if id.as_bytes().len() != HASH_LENGTH {
877        return Err(BackendError::InvalidHashLength {
878            expected: HASH_LENGTH,
879            actual: id.as_bytes().len(),
880            object_type: id.object_type(),
881            hash: id.hex(),
882        });
883    }
884    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
885}
886
887fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
888    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
889        BackendError::ObjectNotFound {
890            object_type: id.object_type(),
891            hash: id.hex(),
892            source: Box::new(err),
893        }
894    } else {
895        to_read_object_err(err, id)
896    }
897}
898
899fn to_read_object_err(
900    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
901    id: &impl ObjectId,
902) -> BackendError {
903    BackendError::ReadObject {
904        object_type: id.object_type(),
905        hash: id.hex(),
906        source: err.into(),
907    }
908}
909
910fn to_invalid_utf8_err(source: Utf8Error, id: &impl ObjectId) -> BackendError {
911    BackendError::InvalidUtf8 {
912        object_type: id.object_type(),
913        hash: id.hex(),
914        source,
915    }
916}
917
918fn import_extra_metadata_entries_from_heads(
919    git_repo: &gix::Repository,
920    mut_table: &mut MutableTable,
921    _table_lock: &FileLock,
922    head_ids: &HashSet<&CommitId>,
923    shallow_roots: &[CommitId],
924) -> BackendResult<()> {
925    let mut work_ids = head_ids
926        .iter()
927        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
928        .map(|&id| id.clone())
929        .collect_vec();
930    while let Some(id) = work_ids.pop() {
931        let git_object = git_repo
932            .find_object(validate_git_object_id(&id)?)
933            .map_err(|err| map_not_found_err(err, &id))?;
934        let is_shallow = shallow_roots.contains(&id);
935        // TODO(#1624): Should we read the root tree here and check if it has a
936        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
937        // change the description of a commit with tree-level conflicts.
938        let commit = commit_from_git_without_root_parent(&id, &git_object, is_shallow)?;
939        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
940        work_ids.extend(
941            commit
942                .parents
943                .into_iter()
944                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
945        );
946    }
947    Ok(())
948}
949
950impl Debug for GitBackend {
951    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
952        f.debug_struct("GitBackend")
953            .field("path", &self.git_repo_path())
954            .finish()
955    }
956}
957
958#[async_trait]
959impl Backend for GitBackend {
960    fn name(&self) -> &str {
961        Self::name()
962    }
963
964    fn commit_id_length(&self) -> usize {
965        HASH_LENGTH
966    }
967
968    fn change_id_length(&self) -> usize {
969        CHANGE_ID_LENGTH
970    }
971
972    fn root_commit_id(&self) -> &CommitId {
973        &self.root_commit_id
974    }
975
976    fn root_change_id(&self) -> &ChangeId {
977        &self.root_change_id
978    }
979
980    fn empty_tree_id(&self) -> &TreeId {
981        &self.empty_tree_id
982    }
983
984    fn concurrency(&self) -> usize {
985        1
986    }
987
988    async fn read_file(
989        &self,
990        _path: &RepoPath,
991        id: &FileId,
992    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
993        let data = self.read_file_sync(id)?;
994        Ok(Box::pin(Cursor::new(data)))
995    }
996
997    async fn write_file(
998        &self,
999        _path: &RepoPath,
1000        contents: &mut (dyn AsyncRead + Send + Unpin),
1001    ) -> BackendResult<FileId> {
1002        let mut bytes = Vec::new();
1003        contents.read_to_end(&mut bytes).await.unwrap();
1004        let locked_repo = self.lock_git_repo();
1005        let oid = locked_repo
1006            .write_blob(bytes)
1007            .map_err(|err| BackendError::WriteObject {
1008                object_type: "file",
1009                source: Box::new(err),
1010            })?;
1011        Ok(FileId::new(oid.as_bytes().to_vec()))
1012    }
1013
1014    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
1015        let git_blob_id = validate_git_object_id(id)?;
1016        let locked_repo = self.lock_git_repo();
1017        let mut blob = locked_repo
1018            .find_object(git_blob_id)
1019            .map_err(|err| map_not_found_err(err, id))?
1020            .try_into_blob()
1021            .map_err(|err| to_read_object_err(err, id))?;
1022        let target = String::from_utf8(blob.take_data())
1023            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1024        Ok(target)
1025    }
1026
1027    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1028        let locked_repo = self.lock_git_repo();
1029        let oid =
1030            locked_repo
1031                .write_blob(target.as_bytes())
1032                .map_err(|err| BackendError::WriteObject {
1033                    object_type: "symlink",
1034                    source: Box::new(err),
1035                })?;
1036        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1037    }
1038
1039    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
1040        Err(BackendError::Unsupported(
1041            "The Git backend doesn't support tracked copies yet".to_string(),
1042        ))
1043    }
1044
1045    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
1046        Err(BackendError::Unsupported(
1047            "The Git backend doesn't support tracked copies yet".to_string(),
1048        ))
1049    }
1050
1051    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
1052        Err(BackendError::Unsupported(
1053            "The Git backend doesn't support tracked copies yet".to_string(),
1054        ))
1055    }
1056
1057    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1058        if id == &self.empty_tree_id {
1059            return Ok(Tree::default());
1060        }
1061        let git_tree_id = validate_git_object_id(id)?;
1062
1063        let locked_repo = self.lock_git_repo();
1064        let git_tree = locked_repo
1065            .find_object(git_tree_id)
1066            .map_err(|err| map_not_found_err(err, id))?
1067            .try_into_tree()
1068            .map_err(|err| to_read_object_err(err, id))?;
1069        let mut entries: Vec<_> = git_tree
1070            .iter()
1071            .map(|entry| -> BackendResult<_> {
1072                let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1073                let name = RepoPathComponentBuf::new(
1074                    str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?,
1075                )
1076                .unwrap();
1077                let value = match entry.mode().kind() {
1078                    gix::object::tree::EntryKind::Tree => {
1079                        let id = TreeId::from_bytes(entry.oid().as_bytes());
1080                        TreeValue::Tree(id)
1081                    }
1082                    gix::object::tree::EntryKind::Blob => {
1083                        let id = FileId::from_bytes(entry.oid().as_bytes());
1084                        TreeValue::File {
1085                            id,
1086                            executable: false,
1087                            copy_id: CopyId::placeholder(),
1088                        }
1089                    }
1090                    gix::object::tree::EntryKind::BlobExecutable => {
1091                        let id = FileId::from_bytes(entry.oid().as_bytes());
1092                        TreeValue::File {
1093                            id,
1094                            executable: true,
1095                            copy_id: CopyId::placeholder(),
1096                        }
1097                    }
1098                    gix::object::tree::EntryKind::Link => {
1099                        let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1100                        TreeValue::Symlink(id)
1101                    }
1102                    gix::object::tree::EntryKind::Commit => {
1103                        let id = CommitId::from_bytes(entry.oid().as_bytes());
1104                        TreeValue::GitSubmodule(id)
1105                    }
1106                };
1107                Ok((name, value))
1108            })
1109            .try_collect()?;
1110        // While Git tree entries are sorted, the rule is slightly different.
1111        // Directory names are sorted as if they had trailing "/".
1112        if !entries.is_sorted_by_key(|(name, _)| name) {
1113            entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
1114        }
1115        Ok(Tree::from_sorted_entries(entries))
1116    }
1117
1118    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1119        // Tree entries to be written must be sorted by Entry::filename(), which
1120        // is slightly different from the order of our backend::Tree.
1121        let entries = contents
1122            .entries()
1123            .map(|entry| {
1124                let filename = BString::from(entry.name().as_internal_str());
1125                match entry.value() {
1126                    TreeValue::File {
1127                        id,
1128                        executable: false,
1129                        copy_id: _, // TODO: Use the value
1130                    } => gix::objs::tree::Entry {
1131                        mode: gix::object::tree::EntryKind::Blob.into(),
1132                        filename,
1133                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1134                    },
1135                    TreeValue::File {
1136                        id,
1137                        executable: true,
1138                        copy_id: _, // TODO: Use the value
1139                    } => gix::objs::tree::Entry {
1140                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1141                        filename,
1142                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1143                    },
1144                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1145                        mode: gix::object::tree::EntryKind::Link.into(),
1146                        filename,
1147                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1148                    },
1149                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1150                        mode: gix::object::tree::EntryKind::Tree.into(),
1151                        filename,
1152                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1153                    },
1154                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1155                        mode: gix::object::tree::EntryKind::Commit.into(),
1156                        filename,
1157                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1158                    },
1159                }
1160            })
1161            .sorted_unstable()
1162            .collect();
1163        let locked_repo = self.lock_git_repo();
1164        let oid = locked_repo
1165            .write_object(gix::objs::Tree { entries })
1166            .map_err(|err| BackendError::WriteObject {
1167                object_type: "tree",
1168                source: Box::new(err),
1169            })?;
1170        Ok(TreeId::from_bytes(oid.as_bytes()))
1171    }
1172
1173    #[tracing::instrument(skip(self))]
1174    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1175        if *id == self.root_commit_id {
1176            return Ok(make_root_commit(
1177                self.root_change_id().clone(),
1178                self.empty_tree_id.clone(),
1179            ));
1180        }
1181        let git_commit_id = validate_git_object_id(id)?;
1182
1183        let mut commit = {
1184            let locked_repo = self.lock_git_repo();
1185            let git_object = locked_repo
1186                .find_object(git_commit_id)
1187                .map_err(|err| map_not_found_err(err, id))?;
1188            let is_shallow = self.shallow_root_ids(&locked_repo)?.contains(id);
1189            commit_from_git_without_root_parent(id, &git_object, is_shallow)?
1190        };
1191        if commit.parents.is_empty() {
1192            commit.parents.push(self.root_commit_id.clone());
1193        };
1194
1195        let table = self.cached_extra_metadata_table()?;
1196        if let Some(extras) = table.get_value(id.as_bytes()) {
1197            deserialize_extras(&mut commit, extras);
1198        } else {
1199            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1200            // there are no reachable ancestor commits without extras metadata. Git commits
1201            // imported by jj < 0.8.0 might not have extras (#924).
1202            // https://github.com/jj-vcs/jj/issues/2343
1203            tracing::info!("unimported Git commit found");
1204            self.import_head_commits([id])?;
1205            let table = self.cached_extra_metadata_table()?;
1206            let extras = table.get_value(id.as_bytes()).unwrap();
1207            deserialize_extras(&mut commit, extras);
1208        }
1209        Ok(commit)
1210    }
1211
1212    async fn write_commit(
1213        &self,
1214        mut contents: Commit,
1215        mut sign_with: Option<&mut SigningFn>,
1216    ) -> BackendResult<(CommitId, Commit)> {
1217        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1218
1219        let locked_repo = self.lock_git_repo();
1220        let tree_ids = &contents.root_tree;
1221        let git_tree_id = match tree_ids.as_resolved() {
1222            Some(tree_id) => validate_git_object_id(tree_id)?,
1223            None => write_tree_conflict(&locked_repo, tree_ids)?,
1224        };
1225        let author = signature_to_git(&contents.author);
1226        let mut committer = signature_to_git(&contents.committer);
1227        let message = &contents.description;
1228        if contents.parents.is_empty() {
1229            return Err(BackendError::Other(
1230                "Cannot write a commit with no parents".into(),
1231            ));
1232        }
1233        let mut parents = SmallVec::new();
1234        for parent_id in &contents.parents {
1235            if *parent_id == self.root_commit_id {
1236                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1237                // add it to the list of parents to write in the Git commit. We also check that
1238                // there are no other parents since Git cannot represent a merge between a root
1239                // commit and another commit.
1240                if contents.parents.len() > 1 {
1241                    return Err(BackendError::Unsupported(
1242                        "The Git backend does not support creating merge commits with the root \
1243                         commit as one of the parents."
1244                            .to_owned(),
1245                    ));
1246                }
1247            } else {
1248                parents.push(validate_git_object_id(parent_id)?);
1249            }
1250        }
1251        let mut extra_headers: Vec<(BString, BString)> = vec![];
1252        if !tree_ids.is_resolved() {
1253            let value = tree_ids.iter().map(|id| id.hex()).join(" ");
1254            extra_headers.push((JJ_TREES_COMMIT_HEADER.into(), value.into()));
1255        }
1256        if self.write_change_id_header {
1257            extra_headers.push((
1258                CHANGE_ID_COMMIT_HEADER.into(),
1259                contents.change_id.reverse_hex().into(),
1260            ));
1261        }
1262
1263        let extras = serialize_extras(&contents);
1264
1265        // If two writers write commits of the same id with different metadata, they
1266        // will both succeed and the metadata entries will be "merged" later. Since
1267        // metadata entry is keyed by the commit id, one of the entries would be lost.
1268        // To prevent such race condition locally, we extend the scope covered by the
1269        // table lock. This is still racy if multiple machines are involved and the
1270        // repository is rsync-ed.
1271        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1272        let id = loop {
1273            let mut commit = gix::objs::Commit {
1274                message: message.to_owned().into(),
1275                tree: git_tree_id,
1276                author: author.clone(),
1277                committer: committer.clone(),
1278                encoding: None,
1279                parents: parents.clone(),
1280                extra_headers: extra_headers.clone(),
1281            };
1282
1283            if let Some(sign) = &mut sign_with {
1284                // we don't use gix pool, but at least use their heuristic
1285                let mut data = Vec::with_capacity(512);
1286                commit.write_to(&mut data).unwrap();
1287
1288                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1289                    object_type: "commit",
1290                    source: Box::new(err),
1291                })?;
1292                commit
1293                    .extra_headers
1294                    .push(("gpgsig".into(), sig.clone().into()));
1295                contents.secure_sig = Some(SecureSig { data, sig });
1296            }
1297
1298            let git_id =
1299                locked_repo
1300                    .write_object(&commit)
1301                    .map_err(|err| BackendError::WriteObject {
1302                        object_type: "commit",
1303                        source: Box::new(err),
1304                    })?;
1305
1306            match table.get_value(git_id.as_bytes()) {
1307                Some(existing_extras) if existing_extras != extras => {
1308                    // It's possible a commit already exists with the same
1309                    // commit id but different change id. Adjust the timestamp
1310                    // until this is no longer the case.
1311                    //
1312                    // For example, this can happen when rebasing duplicate
1313                    // commits, https://github.com/jj-vcs/jj/issues/694.
1314                    //
1315                    // `jj` resets the committer timestamp to the current
1316                    // timestamp whenever it rewrites a commit. So, it's
1317                    // unlikely for the timestamp to be 0 even if the original
1318                    // commit had its timestamp set to 0. Moreover, we test that
1319                    // a commit with a negative timestamp can still be written
1320                    // and read back by `jj`.
1321                    committer.time.seconds -= 1;
1322                }
1323                _ => break CommitId::from_bytes(git_id.as_bytes()),
1324            }
1325        };
1326
1327        // Everything up to this point had no permanent effect on the repo except
1328        // GC-able objects
1329        locked_repo
1330            .edit_reference(to_no_gc_ref_update(&id))
1331            .map_err(|err| BackendError::Other(Box::new(err)))?;
1332
1333        // Update the signature to match the one that was actually written to the object
1334        // store
1335        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1336        let mut mut_table = table.start_mutation();
1337        mut_table.add_entry(id.to_bytes(), extras);
1338        self.save_extra_metadata_table(mut_table, &table_lock)?;
1339        Ok((id, contents))
1340    }
1341
1342    fn get_copy_records(
1343        &self,
1344        paths: Option<&[RepoPathBuf]>,
1345        root_id: &CommitId,
1346        head_id: &CommitId,
1347    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
1348        let repo = self.git_repo();
1349        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1350        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1351
1352        let change_to_copy_record =
1353            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1354                let gix::object::tree::diff::Change::Rewrite {
1355                    source_location,
1356                    source_entry_mode,
1357                    source_id,
1358                    entry_mode: dest_entry_mode,
1359                    location: dest_location,
1360                    ..
1361                } = change
1362                else {
1363                    return Ok(None);
1364                };
1365                // TODO: Renamed symlinks cannot be returned because CopyRecord
1366                // expects `source_file: FileId`.
1367                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1368                    return Ok(None);
1369                }
1370
1371                let source = str::from_utf8(source_location)
1372                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1373                let dest = str::from_utf8(dest_location)
1374                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1375
1376                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1377                if !paths.is_none_or(|paths| paths.contains(&target)) {
1378                    return Ok(None);
1379                }
1380
1381                Ok(Some(CopyRecord {
1382                    target,
1383                    target_commit: head_id.clone(),
1384                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1385                    source_file: FileId::from_bytes(source_id.as_bytes()),
1386                    source_commit: root_id.clone(),
1387                }))
1388            };
1389
1390        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1391        root_tree
1392            .changes()
1393            .map_err(|err| BackendError::Other(err.into()))?
1394            .options(|opts| {
1395                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1396                    copies: Some(gix::diff::rewrites::Copies {
1397                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1398                        percentage: Some(0.5),
1399                    }),
1400                    percentage: Some(0.5),
1401                    limit: 1000,
1402                    track_empty: false,
1403                }));
1404            })
1405            .for_each_to_obtain_tree_with_cache(
1406                &head_tree,
1407                &mut self.new_diff_platform()?,
1408                |change| -> BackendResult<_> {
1409                    match change_to_copy_record(change) {
1410                        Ok(None) => {}
1411                        Ok(Some(change)) => records.push(Ok(change)),
1412                        Err(err) => records.push(Err(err)),
1413                    }
1414                    Ok(gix::object::tree::diff::Action::Continue)
1415                },
1416            )
1417            .map_err(|err| BackendError::Other(err.into()))?;
1418        Ok(Box::pin(futures::stream::iter(records)))
1419    }
1420
1421    #[tracing::instrument(skip(self, index))]
1422    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1423        let git_repo = self.lock_git_repo();
1424        let new_heads = index
1425            .all_heads_for_gc()
1426            .map_err(|err| BackendError::Other(err.into()))?
1427            .filter(|id| *id != self.root_commit_id);
1428        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1429        // TODO: remove unreachable entries from extras table if segment file
1430        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1431        // preserved by the keep_newer timestamp though)
1432        // TODO: remove unreachable extras table segments
1433        run_git_gc(
1434            self.git_executable.as_ref(),
1435            self.git_repo_path(),
1436            keep_newer,
1437        )
1438        .map_err(|err| BackendError::Other(err.into()))?;
1439        // Since "git gc" will move loose refs into packed refs, in-memory
1440        // packed-refs cache should be invalidated without relying on mtime.
1441        git_repo.refs.force_refresh_packed_buffer().ok();
1442        Ok(())
1443    }
1444}
1445
1446/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1447/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1448fn write_tree_conflict(
1449    repo: &gix::Repository,
1450    conflict: &Merge<TreeId>,
1451) -> BackendResult<gix::ObjectId> {
1452    // Tree entries to be written must be sorted by Entry::filename().
1453    let mut entries = itertools::chain(
1454        conflict
1455            .removes()
1456            .enumerate()
1457            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1458        conflict
1459            .adds()
1460            .enumerate()
1461            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1462    )
1463    .map(|(name, tree_id)| gix::objs::tree::Entry {
1464        mode: gix::object::tree::EntryKind::Tree.into(),
1465        filename: name.into(),
1466        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1467    })
1468    .collect_vec();
1469    let readme_id = repo
1470        .write_blob(
1471            r#"This commit was made by jj, https://jj-vcs.dev/.
1472The commit contains file conflicts, and therefore looks wrong when used with plain
1473Git or other tools that are unfamiliar with jj.
1474
1475The .jjconflict-* directories represent the different inputs to the conflict.
1476For details, see
1477https://docs.jj-vcs.dev/prerelease/git-compatibility/#format-mapping-details
1478
1479If you see this file in your working copy, it probably means that you used a
1480regular `git` command to check out a conflicted commit. Use `jj abandon` to
1481recover.
1482"#,
1483        )
1484        .map_err(|err| {
1485            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1486        })?
1487        .detach();
1488    entries.push(gix::objs::tree::Entry {
1489        mode: gix::object::tree::EntryKind::Blob.into(),
1490        filename: "README".into(),
1491        oid: readme_id,
1492    });
1493    entries.sort_unstable();
1494    let id = repo
1495        .write_object(gix::objs::Tree { entries })
1496        .map_err(|err| BackendError::WriteObject {
1497            object_type: "tree",
1498            source: Box::new(err),
1499        })?;
1500    Ok(id.detach())
1501}
1502
1503#[cfg(test)]
1504mod tests {
1505    use assert_matches::assert_matches;
1506    use gix::date::parse::TimeBuf;
1507    use gix::objs::CommitRef;
1508    use indoc::indoc;
1509    use pollster::FutureExt as _;
1510
1511    use super::*;
1512    use crate::config::StackedConfig;
1513    use crate::content_hash::blake2b_hash;
1514    use crate::hex_util;
1515    use crate::tests::new_temp_dir;
1516
1517    const GIT_USER: &str = "Someone";
1518    const GIT_EMAIL: &str = "someone@example.com";
1519
1520    fn git_config() -> Vec<bstr::BString> {
1521        vec![
1522            format!("user.name = {GIT_USER}").into(),
1523            format!("user.email = {GIT_EMAIL}").into(),
1524            "init.defaultBranch = master".into(),
1525        ]
1526    }
1527
1528    fn open_options() -> gix::open::Options {
1529        gix::open::Options::isolated()
1530            .config_overrides(git_config())
1531            .strict_config(true)
1532    }
1533
1534    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1535        gix::ThreadSafeRepository::init_opts(
1536            directory,
1537            gix::create::Kind::WithWorktree,
1538            gix::create::Options::default(),
1539            open_options(),
1540        )
1541        .unwrap()
1542        .to_thread_local()
1543    }
1544
1545    #[test]
1546    fn read_plain_git_commit() {
1547        let settings = user_settings();
1548        let temp_dir = new_temp_dir();
1549        let store_path = temp_dir.path();
1550        let git_repo_path = temp_dir.path().join("git");
1551        let git_repo = git_init(git_repo_path);
1552
1553        // Add a commit with some files in
1554        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1555        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1556        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1557        dir_tree_editor
1558            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1559            .unwrap();
1560        dir_tree_editor
1561            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1562            .unwrap();
1563        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1564        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1565        root_tree_builder
1566            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1567            .unwrap();
1568        let root_tree_id = root_tree_builder.write().unwrap().detach();
1569        let git_author = gix::actor::Signature {
1570            name: "git author".into(),
1571            email: "git.author@example.com".into(),
1572            time: gix::date::Time::new(1000, 60 * 60),
1573        };
1574        let git_committer = gix::actor::Signature {
1575            name: "git committer".into(),
1576            email: "git.committer@example.com".into(),
1577            time: gix::date::Time::new(2000, -480 * 60),
1578        };
1579        let git_commit_id = git_repo
1580            .commit_as(
1581                git_committer.to_ref(&mut TimeBuf::default()),
1582                git_author.to_ref(&mut TimeBuf::default()),
1583                "refs/heads/dummy",
1584                "git commit message",
1585                root_tree_id,
1586                [] as [gix::ObjectId; 0],
1587            )
1588            .unwrap()
1589            .detach();
1590        git_repo
1591            .find_reference("refs/heads/dummy")
1592            .unwrap()
1593            .delete()
1594            .unwrap();
1595        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1596        // The change id is the leading reverse bits of the commit id
1597        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1598        // Check that the git commit above got the hash we expect
1599        assert_eq!(
1600            git_commit_id.as_bytes(),
1601            commit_id.as_bytes(),
1602            "{git_commit_id:?} vs {commit_id:?}"
1603        );
1604
1605        // Add an empty commit on top
1606        let git_commit_id2 = git_repo
1607            .commit_as(
1608                git_committer.to_ref(&mut TimeBuf::default()),
1609                git_author.to_ref(&mut TimeBuf::default()),
1610                "refs/heads/dummy2",
1611                "git commit message 2",
1612                root_tree_id,
1613                [git_commit_id],
1614            )
1615            .unwrap()
1616            .detach();
1617        git_repo
1618            .find_reference("refs/heads/dummy2")
1619            .unwrap()
1620            .delete()
1621            .unwrap();
1622        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1623
1624        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1625
1626        // Import the head commit and its ancestors
1627        backend.import_head_commits([&commit_id2]).unwrap();
1628        // Ref should be created only for the head commit
1629        let git_refs = backend
1630            .git_repo()
1631            .references()
1632            .unwrap()
1633            .prefixed("refs/jj/keep/")
1634            .unwrap()
1635            .map(|git_ref| git_ref.unwrap().id().detach())
1636            .collect_vec();
1637        assert_eq!(git_refs, vec![git_commit_id2]);
1638
1639        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1640        assert_eq!(&commit.change_id, &change_id);
1641        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1642        assert_eq!(commit.predecessors, vec![]);
1643        assert_eq!(
1644            commit.root_tree,
1645            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1646        );
1647        assert_eq!(commit.description, "git commit message");
1648        assert_eq!(commit.author.name, "git author");
1649        assert_eq!(commit.author.email, "git.author@example.com");
1650        assert_eq!(
1651            commit.author.timestamp.timestamp,
1652            MillisSinceEpoch(1000 * 1000)
1653        );
1654        assert_eq!(commit.author.timestamp.tz_offset, 60);
1655        assert_eq!(commit.committer.name, "git committer");
1656        assert_eq!(commit.committer.email, "git.committer@example.com");
1657        assert_eq!(
1658            commit.committer.timestamp.timestamp,
1659            MillisSinceEpoch(2000 * 1000)
1660        );
1661        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1662
1663        let root_tree = backend
1664            .read_tree(
1665                RepoPath::root(),
1666                &TreeId::from_bytes(root_tree_id.as_bytes()),
1667            )
1668            .block_on()
1669            .unwrap();
1670        let mut root_entries = root_tree.entries();
1671        let dir = root_entries.next().unwrap();
1672        assert_eq!(root_entries.next(), None);
1673        assert_eq!(dir.name().as_internal_str(), "dir");
1674        assert_eq!(
1675            dir.value(),
1676            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1677        );
1678
1679        let dir_tree = backend
1680            .read_tree(
1681                RepoPath::from_internal_string("dir").unwrap(),
1682                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1683            )
1684            .block_on()
1685            .unwrap();
1686        let mut entries = dir_tree.entries();
1687        let file = entries.next().unwrap();
1688        let symlink = entries.next().unwrap();
1689        assert_eq!(entries.next(), None);
1690        assert_eq!(file.name().as_internal_str(), "normal");
1691        assert_eq!(
1692            file.value(),
1693            &TreeValue::File {
1694                id: FileId::from_bytes(blob1.as_bytes()),
1695                executable: false,
1696                copy_id: CopyId::placeholder(),
1697            }
1698        );
1699        assert_eq!(symlink.name().as_internal_str(), "symlink");
1700        assert_eq!(
1701            symlink.value(),
1702            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1703        );
1704
1705        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1706        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1707        assert_eq!(commit.predecessors, vec![]);
1708        assert_eq!(
1709            commit.root_tree,
1710            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1711        );
1712    }
1713
1714    #[test]
1715    fn read_git_commit_without_importing() {
1716        let settings = user_settings();
1717        let temp_dir = new_temp_dir();
1718        let store_path = temp_dir.path();
1719        let git_repo_path = temp_dir.path().join("git");
1720        let git_repo = git_init(&git_repo_path);
1721
1722        let signature = gix::actor::Signature {
1723            name: GIT_USER.into(),
1724            email: GIT_EMAIL.into(),
1725            time: gix::date::Time::now_utc(),
1726        };
1727        let empty_tree_id =
1728            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1729        let git_commit_id = git_repo
1730            .commit_as(
1731                signature.to_ref(&mut TimeBuf::default()),
1732                signature.to_ref(&mut TimeBuf::default()),
1733                "refs/heads/main",
1734                "git commit message",
1735                empty_tree_id,
1736                [] as [gix::ObjectId; 0],
1737            )
1738            .unwrap();
1739
1740        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1741
1742        // read_commit() without import_head_commits() works as of now. This might be
1743        // changed later.
1744        assert!(
1745            backend
1746                .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1747                .block_on()
1748                .is_ok()
1749        );
1750        assert!(
1751            backend
1752                .cached_extra_metadata_table()
1753                .unwrap()
1754                .get_value(git_commit_id.as_bytes())
1755                .is_some(),
1756            "extra metadata should have been be created"
1757        );
1758    }
1759
1760    #[test]
1761    fn read_signed_git_commit() {
1762        let settings = user_settings();
1763        let temp_dir = new_temp_dir();
1764        let store_path = temp_dir.path();
1765        let git_repo_path = temp_dir.path().join("git");
1766        let git_repo = git_init(git_repo_path);
1767
1768        let signature = gix::actor::Signature {
1769            name: GIT_USER.into(),
1770            email: GIT_EMAIL.into(),
1771            time: gix::date::Time::now_utc(),
1772        };
1773        let empty_tree_id =
1774            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1775
1776        let secure_sig =
1777            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1778
1779        let mut commit = gix::objs::Commit {
1780            tree: empty_tree_id,
1781            parents: smallvec::SmallVec::new(),
1782            author: signature.clone(),
1783            committer: signature.clone(),
1784            encoding: None,
1785            message: "git commit message".into(),
1786            extra_headers: Vec::new(),
1787        };
1788
1789        let mut commit_buf = Vec::new();
1790        commit.write_to(&mut commit_buf).unwrap();
1791        let commit_str = str::from_utf8(&commit_buf).unwrap();
1792
1793        commit
1794            .extra_headers
1795            .push(("gpgsig".into(), secure_sig.into()));
1796
1797        let git_commit_id = git_repo.write_object(&commit).unwrap();
1798
1799        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1800
1801        let commit = backend
1802            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1803            .block_on()
1804            .unwrap();
1805
1806        let sig = commit.secure_sig.expect("failed to read the signature");
1807
1808        // converting to string for nicer assert diff
1809        assert_eq!(str::from_utf8(&sig.sig).unwrap(), secure_sig);
1810        assert_eq!(str::from_utf8(&sig.data).unwrap(), commit_str);
1811    }
1812
1813    #[test]
1814    fn change_id_parsing() {
1815        let id = |commit_object_bytes: &[u8]| {
1816            extract_change_id_from_commit(&CommitRef::from_bytes(commit_object_bytes).unwrap())
1817        };
1818
1819        let commit_with_id = indoc! {b"
1820            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1821            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1822            author JJ Fan <jjfan@example.com> 1757112665 -0700
1823            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1824            extra-header blah
1825            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1826
1827            test-commit
1828        "};
1829        insta::assert_compact_debug_snapshot!(
1830            id(commit_with_id),
1831            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1832        );
1833
1834        let commit_without_id = indoc! {b"
1835            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1836            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1837            author JJ Fan <jjfan@example.com> 1757112665 -0700
1838            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1839            extra-header blah
1840
1841            no id in header
1842        "};
1843        insta::assert_compact_debug_snapshot!(
1844            id(commit_without_id),
1845            @"None"
1846        );
1847
1848        let commit = indoc! {b"
1849            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1850            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1851            author JJ Fan <jjfan@example.com> 1757112665 -0700
1852            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1853            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1854            extra-header blah
1855            change-id abcabcabcabcabcabcabcabcabcabcab
1856
1857            valid change id first
1858        "};
1859        insta::assert_compact_debug_snapshot!(
1860            id(commit),
1861            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1862        );
1863
1864        // We only look at the first change id if multiple are present, so this should
1865        // error
1866        let commit = indoc! {b"
1867            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1868            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1869            author JJ Fan <jjfan@example.com> 1757112665 -0700
1870            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1871            change-id abcabcabcabcabcabcabcabcabcabcab
1872            extra-header blah
1873            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1874
1875            valid change id first
1876        "};
1877        insta::assert_compact_debug_snapshot!(
1878            id(commit),
1879            @"None"
1880        );
1881    }
1882
1883    #[test]
1884    fn round_trip_change_id_via_git_header() {
1885        let settings = user_settings();
1886        let temp_dir = new_temp_dir();
1887
1888        let store_path = temp_dir.path().join("store");
1889        fs::create_dir(&store_path).unwrap();
1890        let empty_store_path = temp_dir.path().join("empty_store");
1891        fs::create_dir(&empty_store_path).unwrap();
1892        let git_repo_path = temp_dir.path().join("git");
1893        let git_repo = git_init(git_repo_path);
1894
1895        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path()).unwrap();
1896        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1897        let commit = Commit {
1898            parents: vec![backend.root_commit_id().clone()],
1899            predecessors: vec![],
1900            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
1901            change_id: original_change_id.clone(),
1902            description: "initial".to_string(),
1903            author: create_signature(),
1904            committer: create_signature(),
1905            secure_sig: None,
1906        };
1907
1908        let (initial_commit_id, _init_commit) =
1909            backend.write_commit(commit, None).block_on().unwrap();
1910        let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
1911        assert_eq!(
1912            commit.change_id, original_change_id,
1913            "The change-id header did not roundtrip"
1914        );
1915
1916        // Because of how change ids are also persisted in extra proto files,
1917        // initialize a new store without those files, but reuse the same git
1918        // storage. This change-id must be derived from the git commit header.
1919        let no_extra_backend =
1920            GitBackend::init_external(&settings, &empty_store_path, git_repo.path()).unwrap();
1921        let no_extra_commit = no_extra_backend
1922            .read_commit(&initial_commit_id)
1923            .block_on()
1924            .unwrap();
1925
1926        assert_eq!(
1927            no_extra_commit.change_id, original_change_id,
1928            "The change-id header did not roundtrip"
1929        );
1930    }
1931
1932    #[test]
1933    fn read_empty_string_placeholder() {
1934        let git_signature1 = gix::actor::Signature {
1935            name: EMPTY_STRING_PLACEHOLDER.into(),
1936            email: "git.author@example.com".into(),
1937            time: gix::date::Time::new(1000, 60 * 60),
1938        };
1939        let signature1 = signature_from_git(git_signature1.to_ref(&mut TimeBuf::default()));
1940        assert!(signature1.name.is_empty());
1941        assert_eq!(signature1.email, "git.author@example.com");
1942        let git_signature2 = gix::actor::Signature {
1943            name: "git committer".into(),
1944            email: EMPTY_STRING_PLACEHOLDER.into(),
1945            time: gix::date::Time::new(2000, -480 * 60),
1946        };
1947        let signature2 = signature_from_git(git_signature2.to_ref(&mut TimeBuf::default()));
1948        assert_eq!(signature2.name, "git committer");
1949        assert!(signature2.email.is_empty());
1950    }
1951
1952    #[test]
1953    fn write_empty_string_placeholder() {
1954        let signature1 = Signature {
1955            name: "".to_string(),
1956            email: "someone@example.com".to_string(),
1957            timestamp: Timestamp {
1958                timestamp: MillisSinceEpoch(0),
1959                tz_offset: 0,
1960            },
1961        };
1962        let git_signature1 = signature_to_git(&signature1);
1963        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
1964        assert_eq!(git_signature1.email, "someone@example.com");
1965        let signature2 = Signature {
1966            name: "Someone".to_string(),
1967            email: "".to_string(),
1968            timestamp: Timestamp {
1969                timestamp: MillisSinceEpoch(0),
1970                tz_offset: 0,
1971            },
1972        };
1973        let git_signature2 = signature_to_git(&signature2);
1974        assert_eq!(git_signature2.name, "Someone");
1975        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
1976    }
1977
1978    /// Test that parents get written correctly
1979    #[test]
1980    fn git_commit_parents() {
1981        let settings = user_settings();
1982        let temp_dir = new_temp_dir();
1983        let store_path = temp_dir.path();
1984        let git_repo_path = temp_dir.path().join("git");
1985        let git_repo = git_init(&git_repo_path);
1986
1987        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1988        let mut commit = Commit {
1989            parents: vec![],
1990            predecessors: vec![],
1991            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
1992            change_id: ChangeId::from_hex("abc123"),
1993            description: "".to_string(),
1994            author: create_signature(),
1995            committer: create_signature(),
1996            secure_sig: None,
1997        };
1998
1999        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2000            backend.write_commit(commit, None).block_on()
2001        };
2002
2003        // No parents
2004        commit.parents = vec![];
2005        assert_matches!(
2006            write_commit(commit.clone()),
2007            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2008        );
2009
2010        // Only root commit as parent
2011        commit.parents = vec![backend.root_commit_id().clone()];
2012        let first_id = write_commit(commit.clone()).unwrap().0;
2013        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
2014        assert_eq!(first_commit, commit);
2015        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
2016        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2017
2018        // Only non-root commit as parent
2019        commit.parents = vec![first_id.clone()];
2020        let second_id = write_commit(commit.clone()).unwrap().0;
2021        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
2022        assert_eq!(second_commit, commit);
2023        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
2024        assert_eq!(
2025            second_git_commit.parent_ids().collect_vec(),
2026            vec![git_id(&first_id)]
2027        );
2028
2029        // Merge commit
2030        commit.parents = vec![first_id.clone(), second_id.clone()];
2031        let merge_id = write_commit(commit.clone()).unwrap().0;
2032        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
2033        assert_eq!(merge_commit, commit);
2034        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
2035        assert_eq!(
2036            merge_git_commit.parent_ids().collect_vec(),
2037            vec![git_id(&first_id), git_id(&second_id)]
2038        );
2039
2040        // Merge commit with root as one parent
2041        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2042        assert_matches!(
2043            write_commit(commit),
2044            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2045        );
2046    }
2047
2048    #[test]
2049    fn write_tree_conflicts() {
2050        let settings = user_settings();
2051        let temp_dir = new_temp_dir();
2052        let store_path = temp_dir.path();
2053        let git_repo_path = temp_dir.path().join("git");
2054        let git_repo = git_init(&git_repo_path);
2055
2056        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2057        let create_tree = |i| {
2058            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2059            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2060            tree_builder
2061                .upsert(
2062                    format!("file{i}"),
2063                    gix::object::tree::EntryKind::Blob,
2064                    blob_id,
2065                )
2066                .unwrap();
2067            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2068        };
2069
2070        let root_tree = Merge::from_removes_adds(
2071            vec![create_tree(0), create_tree(1)],
2072            vec![create_tree(2), create_tree(3), create_tree(4)],
2073        );
2074        let mut commit = Commit {
2075            parents: vec![backend.root_commit_id().clone()],
2076            predecessors: vec![],
2077            root_tree: root_tree.clone(),
2078            change_id: ChangeId::from_hex("abc123"),
2079            description: "".to_string(),
2080            author: create_signature(),
2081            committer: create_signature(),
2082            secure_sig: None,
2083        };
2084
2085        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2086            backend.write_commit(commit, None).block_on()
2087        };
2088
2089        // When writing a tree-level conflict, the root tree on the git side has the
2090        // individual trees as subtrees.
2091        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2092        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2093        assert_eq!(read_commit, commit);
2094        let git_commit = git_repo
2095            .find_commit(gix::ObjectId::from_bytes_or_panic(
2096                read_commit_id.as_bytes(),
2097            ))
2098            .unwrap();
2099        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2100        assert!(
2101            git_tree
2102                .iter()
2103                .map(Result::unwrap)
2104                .filter(|entry| entry.filename() != b"README")
2105                .all(|entry| entry.mode().value() == 0o040000)
2106        );
2107        let mut iter = git_tree.iter().map(Result::unwrap);
2108        let entry = iter.next().unwrap();
2109        assert_eq!(entry.filename(), b".jjconflict-base-0");
2110        assert_eq!(
2111            entry.id().as_bytes(),
2112            root_tree.get_remove(0).unwrap().as_bytes()
2113        );
2114        let entry = iter.next().unwrap();
2115        assert_eq!(entry.filename(), b".jjconflict-base-1");
2116        assert_eq!(
2117            entry.id().as_bytes(),
2118            root_tree.get_remove(1).unwrap().as_bytes()
2119        );
2120        let entry = iter.next().unwrap();
2121        assert_eq!(entry.filename(), b".jjconflict-side-0");
2122        assert_eq!(
2123            entry.id().as_bytes(),
2124            root_tree.get_add(0).unwrap().as_bytes()
2125        );
2126        let entry = iter.next().unwrap();
2127        assert_eq!(entry.filename(), b".jjconflict-side-1");
2128        assert_eq!(
2129            entry.id().as_bytes(),
2130            root_tree.get_add(1).unwrap().as_bytes()
2131        );
2132        let entry = iter.next().unwrap();
2133        assert_eq!(entry.filename(), b".jjconflict-side-2");
2134        assert_eq!(
2135            entry.id().as_bytes(),
2136            root_tree.get_add(2).unwrap().as_bytes()
2137        );
2138        let entry = iter.next().unwrap();
2139        assert_eq!(entry.filename(), b"README");
2140        assert_eq!(entry.mode().value(), 0o100644);
2141        assert!(iter.next().is_none());
2142
2143        // When writing a single tree using the new format, it's represented by a
2144        // regular git tree.
2145        commit.root_tree = Merge::resolved(create_tree(5));
2146        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2147        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2148        assert_eq!(read_commit, commit);
2149        let git_commit = git_repo
2150            .find_commit(gix::ObjectId::from_bytes_or_panic(
2151                read_commit_id.as_bytes(),
2152            ))
2153            .unwrap();
2154        assert_eq!(
2155            Merge::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2156            commit.root_tree
2157        );
2158    }
2159
2160    #[test]
2161    fn commit_has_ref() {
2162        let settings = user_settings();
2163        let temp_dir = new_temp_dir();
2164        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2165        let git_repo = backend.git_repo();
2166        let signature = Signature {
2167            name: "Someone".to_string(),
2168            email: "someone@example.com".to_string(),
2169            timestamp: Timestamp {
2170                timestamp: MillisSinceEpoch(0),
2171                tz_offset: 0,
2172            },
2173        };
2174        let commit = Commit {
2175            parents: vec![backend.root_commit_id().clone()],
2176            predecessors: vec![],
2177            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2178            change_id: ChangeId::new(vec![42; 16]),
2179            description: "initial".to_string(),
2180            author: signature.clone(),
2181            committer: signature,
2182            secure_sig: None,
2183        };
2184        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2185        let git_refs = git_repo.references().unwrap();
2186        let git_ref_ids: Vec<_> = git_refs
2187            .prefixed("refs/jj/keep/")
2188            .unwrap()
2189            .map(|x| x.unwrap().id().detach())
2190            .collect();
2191        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2192
2193        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2194        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2195            git_ref.unwrap().delete().unwrap();
2196        }
2197        // Re-imported commit should have new ref.
2198        backend.import_head_commits([&commit_id]).unwrap();
2199        let git_refs = git_repo.references().unwrap();
2200        let git_ref_ids: Vec<_> = git_refs
2201            .prefixed("refs/jj/keep/")
2202            .unwrap()
2203            .map(|x| x.unwrap().id().detach())
2204            .collect();
2205        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2206    }
2207
2208    #[test]
2209    fn import_head_commits_duplicates() {
2210        let settings = user_settings();
2211        let temp_dir = new_temp_dir();
2212        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2213        let git_repo = backend.git_repo();
2214
2215        let signature = gix::actor::Signature {
2216            name: GIT_USER.into(),
2217            email: GIT_EMAIL.into(),
2218            time: gix::date::Time::now_utc(),
2219        };
2220        let empty_tree_id =
2221            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2222        let git_commit_id = git_repo
2223            .commit_as(
2224                signature.to_ref(&mut TimeBuf::default()),
2225                signature.to_ref(&mut TimeBuf::default()),
2226                "refs/heads/main",
2227                "git commit message",
2228                empty_tree_id,
2229                [] as [gix::ObjectId; 0],
2230            )
2231            .unwrap()
2232            .detach();
2233        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2234
2235        // Ref creation shouldn't fail because of duplicated head ids.
2236        backend
2237            .import_head_commits([&commit_id, &commit_id])
2238            .unwrap();
2239        assert!(
2240            git_repo
2241                .references()
2242                .unwrap()
2243                .prefixed("refs/jj/keep/")
2244                .unwrap()
2245                .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id)
2246        );
2247    }
2248
2249    #[test]
2250    fn overlapping_git_commit_id() {
2251        let settings = user_settings();
2252        let temp_dir = new_temp_dir();
2253        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2254        let commit1 = Commit {
2255            parents: vec![backend.root_commit_id().clone()],
2256            predecessors: vec![],
2257            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2258            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2259            description: "initial".to_string(),
2260            author: create_signature(),
2261            committer: create_signature(),
2262            secure_sig: None,
2263        };
2264
2265        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2266            backend.write_commit(commit, None).block_on()
2267        };
2268
2269        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2270        commit2.predecessors.push(commit_id1.clone());
2271        // `write_commit` should prevent the ids from being the same by changing the
2272        // committer timestamp of the commit it actually writes.
2273        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2274        // The returned matches the ID
2275        assert_eq!(
2276            backend.read_commit(&commit_id2).block_on().unwrap(),
2277            actual_commit2
2278        );
2279        assert_ne!(commit_id2, commit_id1);
2280        // The committer timestamp should differ
2281        assert_ne!(
2282            actual_commit2.committer.timestamp.timestamp,
2283            commit2.committer.timestamp.timestamp
2284        );
2285        // The rest of the commit should be the same
2286        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2287        assert_eq!(actual_commit2, commit2);
2288    }
2289
2290    #[test]
2291    fn write_signed_commit() {
2292        let settings = user_settings();
2293        let temp_dir = new_temp_dir();
2294        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2295
2296        let commit = Commit {
2297            parents: vec![backend.root_commit_id().clone()],
2298            predecessors: vec![],
2299            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
2300            change_id: ChangeId::new(vec![42; 16]),
2301            description: "initial".to_string(),
2302            author: create_signature(),
2303            committer: create_signature(),
2304            secure_sig: None,
2305        };
2306
2307        let mut signer = |data: &_| {
2308            let hash: String = hex_util::encode_hex(&blake2b_hash(data));
2309            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2310        };
2311
2312        let (id, commit) = backend
2313            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2314            .block_on()
2315            .unwrap();
2316
2317        let git_repo = backend.git_repo();
2318        let obj = git_repo
2319            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2320            .unwrap();
2321        insta::assert_snapshot!(str::from_utf8(&obj.data).unwrap(), @r"
2322        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2323        author Someone <someone@example.com> 0 +0000
2324        committer Someone <someone@example.com> 0 +0000
2325        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2326        gpgsig test sig
2327         hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2328
2329        initial
2330        ");
2331
2332        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2333
2334        let commit = backend.read_commit(&id).block_on().unwrap();
2335
2336        let sig = commit.secure_sig.expect("failed to read the signature");
2337        assert_eq!(&sig, &returned_sig);
2338
2339        insta::assert_snapshot!(str::from_utf8(&sig.sig).unwrap(), @r"
2340        test sig
2341        hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2342        ");
2343        insta::assert_snapshot!(str::from_utf8(&sig.data).unwrap(), @r"
2344        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2345        author Someone <someone@example.com> 0 +0000
2346        committer Someone <someone@example.com> 0 +0000
2347        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2348
2349        initial
2350        ");
2351    }
2352
2353    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2354        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2355    }
2356
2357    fn create_signature() -> Signature {
2358        Signature {
2359            name: GIT_USER.to_string(),
2360            email: GIT_EMAIL.to_string(),
2361            timestamp: Timestamp {
2362                timestamp: MillisSinceEpoch(0),
2363                tz_offset: 0,
2364            },
2365        }
2366    }
2367
2368    // Not using testutils::user_settings() because there is a dependency cycle
2369    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2370    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2371    // our UserSettings type comes from jj_lib (1).
2372    fn user_settings() -> UserSettings {
2373        let config = StackedConfig::with_defaults();
2374        UserSettings::from_config(config).unwrap()
2375    }
2376}