jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::collections::HashSet;
18use std::ffi::OsStr;
19use std::fmt::Debug;
20use std::fmt::Error;
21use std::fmt::Formatter;
22use std::fs;
23use std::io;
24use std::io::Cursor;
25use std::path::Path;
26use std::path::PathBuf;
27use std::pin::Pin;
28use std::process::Command;
29use std::process::ExitStatus;
30use std::str::Utf8Error;
31use std::sync::Arc;
32use std::sync::Mutex;
33use std::sync::MutexGuard;
34use std::time::SystemTime;
35
36use async_trait::async_trait;
37use bstr::BStr;
38use futures::stream::BoxStream;
39use gix::bstr::BString;
40use gix::objs::CommitRefIter;
41use gix::objs::WriteTo as _;
42use itertools::Itertools as _;
43use once_cell::sync::OnceCell as OnceLock;
44use pollster::FutureExt as _;
45use prost::Message as _;
46use smallvec::SmallVec;
47use thiserror::Error;
48use tokio::io::AsyncRead;
49use tokio::io::AsyncReadExt as _;
50
51use crate::backend::Backend;
52use crate::backend::BackendError;
53use crate::backend::BackendInitError;
54use crate::backend::BackendLoadError;
55use crate::backend::BackendResult;
56use crate::backend::ChangeId;
57use crate::backend::Commit;
58use crate::backend::CommitId;
59use crate::backend::CopyHistory;
60use crate::backend::CopyId;
61use crate::backend::CopyRecord;
62use crate::backend::FileId;
63use crate::backend::MergedTreeId;
64use crate::backend::MillisSinceEpoch;
65use crate::backend::SecureSig;
66use crate::backend::Signature;
67use crate::backend::SigningFn;
68use crate::backend::SymlinkId;
69use crate::backend::Timestamp;
70use crate::backend::Tree;
71use crate::backend::TreeId;
72use crate::backend::TreeValue;
73use crate::backend::make_root_commit;
74use crate::config::ConfigGetError;
75use crate::file_util;
76use crate::file_util::BadPathEncoding;
77use crate::file_util::IoResultExt as _;
78use crate::file_util::PathError;
79use crate::index::Index;
80use crate::lock::FileLock;
81use crate::merge::Merge;
82use crate::merge::MergeBuilder;
83use crate::object_id::ObjectId;
84use crate::repo_path::RepoPath;
85use crate::repo_path::RepoPathBuf;
86use crate::repo_path::RepoPathComponentBuf;
87use crate::settings::GitSettings;
88use crate::settings::UserSettings;
89use crate::stacked_table::MutableTable;
90use crate::stacked_table::ReadonlyTable;
91use crate::stacked_table::TableSegment as _;
92use crate::stacked_table::TableStore;
93use crate::stacked_table::TableStoreError;
94
95const HASH_LENGTH: usize = 20;
96const CHANGE_ID_LENGTH: usize = 16;
97/// Ref namespace used only for preventing GC.
98const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
99
100pub const JJ_TREES_COMMIT_HEADER: &str = "jj:trees";
101pub const CHANGE_ID_COMMIT_HEADER: &str = "change-id";
102
103#[derive(Debug, Error)]
104pub enum GitBackendInitError {
105    #[error("Failed to initialize git repository")]
106    InitRepository(#[source] gix::init::Error),
107    #[error("Failed to open git repository")]
108    OpenRepository(#[source] gix::open::Error),
109    #[error("Failed to encode git repository path")]
110    EncodeRepositoryPath(#[source] BadPathEncoding),
111    #[error(transparent)]
112    Config(ConfigGetError),
113    #[error(transparent)]
114    Path(PathError),
115}
116
117impl From<Box<GitBackendInitError>> for BackendInitError {
118    fn from(err: Box<GitBackendInitError>) -> Self {
119        Self(err)
120    }
121}
122
123#[derive(Debug, Error)]
124pub enum GitBackendLoadError {
125    #[error("Failed to open git repository")]
126    OpenRepository(#[source] gix::open::Error),
127    #[error("Failed to decode git repository path")]
128    DecodeRepositoryPath(#[source] BadPathEncoding),
129    #[error(transparent)]
130    Config(ConfigGetError),
131    #[error(transparent)]
132    Path(PathError),
133}
134
135impl From<Box<GitBackendLoadError>> for BackendLoadError {
136    fn from(err: Box<GitBackendLoadError>) -> Self {
137        Self(err)
138    }
139}
140
141/// `GitBackend`-specific error that may occur after the backend is loaded.
142#[derive(Debug, Error)]
143pub enum GitBackendError {
144    #[error("Failed to read non-git metadata")]
145    ReadMetadata(#[source] TableStoreError),
146    #[error("Failed to write non-git metadata")]
147    WriteMetadata(#[source] TableStoreError),
148}
149
150impl From<GitBackendError> for BackendError {
151    fn from(err: GitBackendError) -> Self {
152        Self::Other(err.into())
153    }
154}
155
156#[derive(Debug, Error)]
157pub enum GitGcError {
158    #[error("Failed to run git gc command")]
159    GcCommand(#[source] std::io::Error),
160    #[error("git gc command exited with an error: {0}")]
161    GcCommandErrorStatus(ExitStatus),
162}
163
164pub struct GitBackend {
165    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
166    // cheaper to cache the thread-local instance behind a mutex than creating
167    // one for each backend method call. Our GitBackend is most likely to be
168    // used in a single-threaded context.
169    base_repo: gix::ThreadSafeRepository,
170    repo: Mutex<gix::Repository>,
171    root_commit_id: CommitId,
172    root_change_id: ChangeId,
173    empty_tree_id: TreeId,
174    shallow_root_ids: OnceLock<Vec<CommitId>>,
175    extra_metadata_store: TableStore,
176    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
177    git_executable: PathBuf,
178    write_change_id_header: bool,
179}
180
181impl GitBackend {
182    pub fn name() -> &'static str {
183        "git"
184    }
185
186    fn new(
187        base_repo: gix::ThreadSafeRepository,
188        extra_metadata_store: TableStore,
189        git_settings: GitSettings,
190    ) -> Self {
191        let repo = Mutex::new(base_repo.to_thread_local());
192        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
193        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
194        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
195        Self {
196            base_repo,
197            repo,
198            root_commit_id,
199            root_change_id,
200            empty_tree_id,
201            shallow_root_ids: OnceLock::new(),
202            extra_metadata_store,
203            cached_extra_metadata: Mutex::new(None),
204            git_executable: git_settings.executable_path,
205            write_change_id_header: git_settings.write_change_id_header,
206        }
207    }
208
209    pub fn init_internal(
210        settings: &UserSettings,
211        store_path: &Path,
212    ) -> Result<Self, Box<GitBackendInitError>> {
213        let git_repo_path = Path::new("git");
214        let git_repo = gix::ThreadSafeRepository::init_opts(
215            store_path.join(git_repo_path),
216            gix::create::Kind::Bare,
217            gix::create::Options::default(),
218            gix_open_opts_from_settings(settings),
219        )
220        .map_err(GitBackendInitError::InitRepository)?;
221        let git_settings = settings
222            .git_settings()
223            .map_err(GitBackendInitError::Config)?;
224        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
225    }
226
227    /// Initializes backend by creating a new Git repo at the specified
228    /// workspace path. The workspace directory must exist.
229    pub fn init_colocated(
230        settings: &UserSettings,
231        store_path: &Path,
232        workspace_root: &Path,
233    ) -> Result<Self, Box<GitBackendInitError>> {
234        let canonical_workspace_root = {
235            let path = store_path.join(workspace_root);
236            dunce::canonicalize(&path)
237                .context(&path)
238                .map_err(GitBackendInitError::Path)?
239        };
240        let git_repo = gix::ThreadSafeRepository::init_opts(
241            canonical_workspace_root,
242            gix::create::Kind::WithWorktree,
243            gix::create::Options::default(),
244            gix_open_opts_from_settings(settings),
245        )
246        .map_err(GitBackendInitError::InitRepository)?;
247        let git_repo_path = workspace_root.join(".git");
248        let git_settings = settings
249            .git_settings()
250            .map_err(GitBackendInitError::Config)?;
251        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
252    }
253
254    /// Initializes backend with an existing Git repo at the specified path.
255    pub fn init_external(
256        settings: &UserSettings,
257        store_path: &Path,
258        git_repo_path: &Path,
259    ) -> Result<Self, Box<GitBackendInitError>> {
260        let canonical_git_repo_path = {
261            let path = store_path.join(git_repo_path);
262            canonicalize_git_repo_path(&path)
263                .context(&path)
264                .map_err(GitBackendInitError::Path)?
265        };
266        let git_repo = gix::ThreadSafeRepository::open_opts(
267            canonical_git_repo_path,
268            gix_open_opts_from_settings(settings),
269        )
270        .map_err(GitBackendInitError::OpenRepository)?;
271        let git_settings = settings
272            .git_settings()
273            .map_err(GitBackendInitError::Config)?;
274        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
275    }
276
277    fn init_with_repo(
278        store_path: &Path,
279        git_repo_path: &Path,
280        repo: gix::ThreadSafeRepository,
281        git_settings: GitSettings,
282    ) -> Result<Self, Box<GitBackendInitError>> {
283        let extra_path = store_path.join("extra");
284        fs::create_dir(&extra_path)
285            .context(&extra_path)
286            .map_err(GitBackendInitError::Path)?;
287        let target_path = store_path.join("git_target");
288        let git_repo_path = if cfg!(windows) && git_repo_path.is_relative() {
289            // When a repository is created in Windows, format the path with *forward
290            // slashes* and not backwards slashes. This makes it possible to use the same
291            // repository under Windows Subsystem for Linux.
292            //
293            // This only works for relative paths. If the path is absolute, there's not much
294            // we can do, and it simply won't work inside and outside WSL at the same time.
295            file_util::slash_path(git_repo_path)
296        } else {
297            git_repo_path.into()
298        };
299        let git_repo_path_bytes = file_util::path_to_bytes(&git_repo_path)
300            .map_err(GitBackendInitError::EncodeRepositoryPath)?;
301        fs::write(&target_path, git_repo_path_bytes)
302            .context(&target_path)
303            .map_err(GitBackendInitError::Path)?;
304        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
305        Ok(Self::new(repo, extra_metadata_store, git_settings))
306    }
307
308    pub fn load(
309        settings: &UserSettings,
310        store_path: &Path,
311    ) -> Result<Self, Box<GitBackendLoadError>> {
312        let git_repo_path = {
313            let target_path = store_path.join("git_target");
314            let git_repo_path_bytes = fs::read(&target_path)
315                .context(&target_path)
316                .map_err(GitBackendLoadError::Path)?;
317            let git_repo_path = file_util::path_from_bytes(&git_repo_path_bytes)
318                .map_err(GitBackendLoadError::DecodeRepositoryPath)?;
319            let git_repo_path = store_path.join(git_repo_path);
320            canonicalize_git_repo_path(&git_repo_path)
321                .context(&git_repo_path)
322                .map_err(GitBackendLoadError::Path)?
323        };
324        let repo = gix::ThreadSafeRepository::open_opts(
325            git_repo_path,
326            gix_open_opts_from_settings(settings),
327        )
328        .map_err(GitBackendLoadError::OpenRepository)?;
329        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
330        let git_settings = settings
331            .git_settings()
332            .map_err(GitBackendLoadError::Config)?;
333        Ok(Self::new(repo, extra_metadata_store, git_settings))
334    }
335
336    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
337        self.repo.lock().unwrap()
338    }
339
340    /// Returns new thread-local instance to access to the underlying Git repo.
341    pub fn git_repo(&self) -> gix::Repository {
342        self.base_repo.to_thread_local()
343    }
344
345    /// Path to the `.git` directory or the repository itself if it's bare.
346    pub fn git_repo_path(&self) -> &Path {
347        self.base_repo.path()
348    }
349
350    /// Path to the working directory if the repository isn't bare.
351    pub fn git_workdir(&self) -> Option<&Path> {
352        self.base_repo.work_dir()
353    }
354
355    fn shallow_root_ids(&self, git_repo: &gix::Repository) -> BackendResult<&[CommitId]> {
356        // The list of shallow roots is cached by gix, but it's still expensive
357        // to stat file on every read_object() call. Refreshing shallow roots is
358        // also bad for consistency reasons.
359        self.shallow_root_ids
360            .get_or_try_init(|| {
361                let maybe_oids = git_repo
362                    .shallow_commits()
363                    .map_err(|err| BackendError::Other(err.into()))?;
364                let commit_ids = maybe_oids.map_or(vec![], |oids| {
365                    oids.iter()
366                        .map(|oid| CommitId::from_bytes(oid.as_bytes()))
367                        .collect()
368                });
369                Ok(commit_ids)
370            })
371            .map(AsRef::as_ref)
372    }
373
374    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
375        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
376        match locked_head.as_ref() {
377            Some(head) => Ok(head.clone()),
378            None => {
379                let table = self
380                    .extra_metadata_store
381                    .get_head()
382                    .map_err(GitBackendError::ReadMetadata)?;
383                *locked_head = Some(table.clone());
384                Ok(table)
385            }
386        }
387    }
388
389    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
390        let table = self
391            .extra_metadata_store
392            .get_head_locked()
393            .map_err(GitBackendError::ReadMetadata)?;
394        Ok(table)
395    }
396
397    fn save_extra_metadata_table(
398        &self,
399        mut_table: MutableTable,
400        _table_lock: &FileLock,
401    ) -> BackendResult<()> {
402        let table = self
403            .extra_metadata_store
404            .save_table(mut_table)
405            .map_err(GitBackendError::WriteMetadata)?;
406        // Since the parent table was the head, saved table are likely to be new head.
407        // If it's not, cache will be reloaded when entry can't be found.
408        *self.cached_extra_metadata.lock().unwrap() = Some(table);
409        Ok(())
410    }
411
412    /// Imports the given commits and ancestors from the backing Git repo.
413    ///
414    /// The `head_ids` may contain commits that have already been imported, but
415    /// the caller should filter them out to eliminate redundant I/O processing.
416    #[tracing::instrument(skip(self, head_ids))]
417    pub fn import_head_commits<'a>(
418        &self,
419        head_ids: impl IntoIterator<Item = &'a CommitId>,
420    ) -> BackendResult<()> {
421        let head_ids: HashSet<&CommitId> = head_ids
422            .into_iter()
423            .filter(|&id| *id != self.root_commit_id)
424            .collect();
425        if head_ids.is_empty() {
426            return Ok(());
427        }
428
429        // Create no-gc ref even if known to the extras table. Concurrent GC
430        // process might have deleted the no-gc ref.
431        let locked_repo = self.lock_git_repo();
432        locked_repo
433            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
434            .map_err(|err| BackendError::Other(Box::new(err)))?;
435
436        // These commits are imported from Git. Make our change ids persist (otherwise
437        // future write_commit() could reassign new change id.)
438        tracing::debug!(
439            heads_count = head_ids.len(),
440            "import extra metadata entries"
441        );
442        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
443        let mut mut_table = table.start_mutation();
444        import_extra_metadata_entries_from_heads(
445            &locked_repo,
446            &mut mut_table,
447            &table_lock,
448            &head_ids,
449            self.shallow_root_ids(&locked_repo)?,
450        )?;
451        self.save_extra_metadata_table(mut_table, &table_lock)
452    }
453
454    fn read_file_sync(&self, id: &FileId) -> BackendResult<Vec<u8>> {
455        let git_blob_id = validate_git_object_id(id)?;
456        let locked_repo = self.lock_git_repo();
457        let mut blob = locked_repo
458            .find_object(git_blob_id)
459            .map_err(|err| map_not_found_err(err, id))?
460            .try_into_blob()
461            .map_err(|err| to_read_object_err(err, id))?;
462        Ok(blob.take_data())
463    }
464
465    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
466        let attributes = gix::worktree::Stack::new(
467            Path::new(""),
468            gix::worktree::stack::State::AttributesStack(Default::default()),
469            gix::worktree::glob::pattern::Case::Sensitive,
470            Vec::new(),
471            Vec::new(),
472        );
473        let filter = gix::diff::blob::Pipeline::new(
474            Default::default(),
475            gix::filter::plumbing::Pipeline::new(
476                self.git_repo()
477                    .command_context()
478                    .map_err(|err| BackendError::Other(Box::new(err)))?,
479                Default::default(),
480            ),
481            Vec::new(),
482            Default::default(),
483        );
484        Ok(gix::diff::blob::Platform::new(
485            Default::default(),
486            filter,
487            gix::diff::blob::pipeline::Mode::ToGit,
488            attributes,
489        ))
490    }
491
492    fn read_tree_for_commit<'repo>(
493        &self,
494        repo: &'repo gix::Repository,
495        id: &CommitId,
496    ) -> BackendResult<gix::Tree<'repo>> {
497        let tree = self.read_commit(id).block_on()?.root_tree.to_merge();
498        // TODO(kfm): probably want to do something here if it is a merge
499        let tree_id = tree.first().clone();
500        let gix_id = validate_git_object_id(&tree_id)?;
501        repo.find_object(gix_id)
502            .map_err(|err| map_not_found_err(err, &tree_id))?
503            .try_into_tree()
504            .map_err(|err| to_read_object_err(err, &tree_id))
505    }
506}
507
508/// Canonicalizes the given `path` except for the last `".git"` component.
509///
510/// The last path component matters when opening a Git repo without `core.bare`
511/// config. This config is usually set, but the "repo" tool will set up such
512/// repositories and symlinks. Opening such repo with fully-canonicalized path
513/// would turn a colocated Git repo into a bare repo.
514pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
515    if path.ends_with(".git") {
516        let workdir = path.parent().unwrap();
517        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
518    } else {
519        dunce::canonicalize(path)
520    }
521}
522
523fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
524    let user_name = settings.user_name();
525    let user_email = settings.user_email();
526    gix::open::Options::default()
527        .config_overrides([
528            // Committer has to be configured to record reflog. Author isn't
529            // needed, but let's copy the same values.
530            format!("author.name={user_name}"),
531            format!("author.email={user_email}"),
532            format!("committer.name={user_name}"),
533            format!("committer.email={user_email}"),
534        ])
535        // The git_target path should point the repository, not the working directory.
536        .open_path_as_is(true)
537        // Gitoxide recommends this when correctness is preferred
538        .strict_config(true)
539}
540
541/// Parses the `jj:trees` header value.
542fn root_tree_from_git_extra_header(value: &BStr) -> Result<MergedTreeId, ()> {
543    let mut tree_ids = SmallVec::new();
544    for hex in value.split(|b| *b == b' ') {
545        let tree_id = TreeId::try_from_hex(hex).ok_or(())?;
546        if tree_id.as_bytes().len() != HASH_LENGTH {
547            return Err(());
548        }
549        tree_ids.push(tree_id);
550    }
551    // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
552    // allowed, it would be possible to construct a commit which appears to have
553    // different contents depending on whether it is viewed using `jj` or `git`.
554    if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
555        return Err(());
556    }
557    Ok(MergedTreeId::Merge(Merge::from_vec(tree_ids)))
558}
559
560fn commit_from_git_without_root_parent(
561    id: &CommitId,
562    git_object: &gix::Object,
563    uses_tree_conflict_format: bool,
564    is_shallow: bool,
565) -> BackendResult<Commit> {
566    let commit = git_object
567        .try_to_commit_ref()
568        .map_err(|err| to_read_object_err(err, id))?;
569
570    // If the git header has a change-id field, we attempt to convert that to a
571    // valid JJ Change Id
572    let change_id = extract_change_id_from_commit(&commit)
573        .unwrap_or_else(|| synthetic_change_id_from_git_commit_id(id));
574
575    // shallow commits don't have parents their parents actually fetched, so we
576    // discard them here
577    // TODO: This causes issues when a shallow repository is deepened/unshallowed
578    let parents = if is_shallow {
579        vec![]
580    } else {
581        commit
582            .parents()
583            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
584            .collect_vec()
585    };
586    // If this commit is a conflict, we'll update the root tree later, when we read
587    // the extra metadata.
588    let root_tree = commit
589        .extra_headers()
590        .find(JJ_TREES_COMMIT_HEADER)
591        .map(root_tree_from_git_extra_header)
592        .transpose()
593        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?
594        .unwrap_or_else(|| {
595            let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
596            if uses_tree_conflict_format {
597                MergedTreeId::resolved(tree_id)
598            } else {
599                MergedTreeId::Legacy(tree_id)
600            }
601        });
602    // Use lossy conversion as commit message with "mojibake" is still better than
603    // nothing.
604    // TODO: what should we do with commit.encoding?
605    let description = String::from_utf8_lossy(commit.message).into_owned();
606    let author = signature_from_git(commit.author());
607    let committer = signature_from_git(commit.committer());
608
609    // If the commit is signed, extract both the signature and the signed data
610    // (which is the commit buffer with the gpgsig header omitted).
611    // We have to re-parse the raw commit data because gix CommitRef does not give
612    // us the sogned data, only the signature.
613    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
614    // function and extract everything from that. For now, this works
615    let secure_sig = commit
616        .extra_headers
617        .iter()
618        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
619        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
620        .then(|| CommitRefIter::signature(&git_object.data))
621        .transpose()
622        .map_err(|err| to_read_object_err(err, id))?
623        .flatten()
624        .map(|(sig, data)| SecureSig {
625            data: data.to_bstring().into(),
626            sig: sig.into_owned().into(),
627        });
628
629    Ok(Commit {
630        parents,
631        predecessors: vec![],
632        // If this commit has associated extra metadata, we may reset this later.
633        root_tree,
634        change_id,
635        description,
636        author,
637        committer,
638        secure_sig,
639    })
640}
641
642/// Extracts change id from commit headers.
643pub fn extract_change_id_from_commit(commit: &gix::objs::CommitRef) -> Option<ChangeId> {
644    commit
645        .extra_headers()
646        .find(CHANGE_ID_COMMIT_HEADER)
647        .and_then(ChangeId::try_from_reverse_hex)
648        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
649}
650
651/// Deterministically creates a change id based on the commit id
652///
653/// Used when we get a commit without a change id. The exact algorithm for the
654/// computation should not be relied upon.
655pub fn synthetic_change_id_from_git_commit_id(id: &CommitId) -> ChangeId {
656    // We reverse the bits of the commit id to create the change id. We don't
657    // want to use the first bytes unmodified because then it would be ambiguous
658    // if a given hash prefix refers to the commit id or the change id. It would
659    // have been enough to pick the last 16 bytes instead of the leading 16
660    // bytes to address that. We also reverse the bits to make it less likely
661    // that users depend on any relationship between the two ids.
662    let bytes = id.as_bytes()[4..HASH_LENGTH]
663        .iter()
664        .rev()
665        .map(|b| b.reverse_bits())
666        .collect();
667    ChangeId::new(bytes)
668}
669
670const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
671
672fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
673    let name = signature.name;
674    let name = if name != EMPTY_STRING_PLACEHOLDER {
675        String::from_utf8_lossy(name).into_owned()
676    } else {
677        "".to_string()
678    };
679    let email = signature.email;
680    let email = if email != EMPTY_STRING_PLACEHOLDER {
681        String::from_utf8_lossy(email).into_owned()
682    } else {
683        "".to_string()
684    };
685    let time = signature.time().unwrap_or_default();
686    let timestamp = MillisSinceEpoch(time.seconds * 1000);
687    let tz_offset = time.offset.div_euclid(60); // in minutes
688    Signature {
689        name,
690        email,
691        timestamp: Timestamp {
692            timestamp,
693            tz_offset,
694        },
695    }
696}
697
698fn signature_to_git(signature: &Signature) -> gix::actor::Signature {
699    // git does not support empty names or emails
700    let name = if !signature.name.is_empty() {
701        &signature.name
702    } else {
703        EMPTY_STRING_PLACEHOLDER
704    };
705    let email = if !signature.email.is_empty() {
706        &signature.email
707    } else {
708        EMPTY_STRING_PLACEHOLDER
709    };
710    let time = gix::date::Time::new(
711        signature.timestamp.timestamp.0.div_euclid(1000),
712        signature.timestamp.tz_offset * 60, // in seconds
713    );
714    gix::actor::Signature {
715        name: name.into(),
716        email: email.into(),
717        time,
718    }
719}
720
721fn serialize_extras(commit: &Commit) -> Vec<u8> {
722    let mut proto = crate::protos::git_store::Commit {
723        change_id: commit.change_id.to_bytes(),
724        ..Default::default()
725    };
726    if let MergedTreeId::Merge(tree_ids) = &commit.root_tree {
727        proto.uses_tree_conflict_format = true;
728        if !tree_ids.is_resolved() {
729            // This is done for the sake of jj versions <0.28 (before commit
730            // f7b14be) being able to read the repo. At some point in the
731            // future, we can stop doing it.
732            proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
733        }
734    }
735    for predecessor in &commit.predecessors {
736        proto.predecessors.push(predecessor.to_bytes());
737    }
738    proto.encode_to_vec()
739}
740
741fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
742    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
743    if !proto.change_id.is_empty() {
744        commit.change_id = ChangeId::new(proto.change_id);
745    }
746    if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree
747        && proto.uses_tree_conflict_format
748    {
749        if !proto.root_tree.is_empty() {
750            let merge_builder: MergeBuilder<_> = proto
751                .root_tree
752                .iter()
753                .map(|id_bytes| TreeId::from_bytes(id_bytes))
754                .collect();
755            commit.root_tree = MergedTreeId::Merge(merge_builder.build());
756        } else {
757            // uses_tree_conflict_format was set but there was no root_tree override in the
758            // proto, which means we should just promote the tree id from the
759            // git commit to be a known-conflict-free tree
760            commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone());
761        }
762    }
763    for predecessor in &proto.predecessors {
764        commit.predecessors.push(CommitId::from_bytes(predecessor));
765    }
766}
767
768/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
769/// Used for preventing GC of commits we create.
770fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
771    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
772    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
773    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
774    gix::refs::transaction::RefEdit {
775        change: gix::refs::transaction::Change::Update {
776            log: gix::refs::transaction::LogChange {
777                message: "used by jj".into(),
778                ..Default::default()
779            },
780            expected,
781            new,
782        },
783        name: name.try_into().unwrap(),
784        deref: false,
785    }
786}
787
788fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
789    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
790    gix::refs::transaction::RefEdit {
791        change: gix::refs::transaction::Change::Delete {
792            expected,
793            log: gix::refs::transaction::RefLog::AndReference,
794        },
795        name: git_ref.name,
796        deref: false,
797    }
798}
799
800/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
801/// unreachable and non-head refs.
802fn recreate_no_gc_refs(
803    git_repo: &gix::Repository,
804    new_heads: impl IntoIterator<Item = CommitId>,
805    keep_newer: SystemTime,
806) -> BackendResult<()> {
807    // Calculate diff between existing no-gc refs and new heads.
808    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
809    let mut no_gc_refs_to_keep_count: usize = 0;
810    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
811    let git_references = git_repo
812        .references()
813        .map_err(|err| BackendError::Other(err.into()))?;
814    let no_gc_refs_iter = git_references
815        .prefixed(NO_GC_REF_NAMESPACE)
816        .map_err(|err| BackendError::Other(err.into()))?;
817    for git_ref in no_gc_refs_iter {
818        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
819        let oid = git_ref.target.try_id().ok_or_else(|| {
820            let name = git_ref.name.as_bstr();
821            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
822        })?;
823        let id = CommitId::from_bytes(oid.as_bytes());
824        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
825        if new_heads.contains(&id) && name_good {
826            no_gc_refs_to_keep_count += 1;
827            continue;
828        }
829        // Check timestamp of loose ref, but this is still racy on re-import
830        // because:
831        // - existing packed ref won't be demoted to loose ref
832        // - existing loose ref won't be touched
833        //
834        // TODO: might be better to switch to a dummy merge, where new no-gc ref
835        // will always have a unique name. Doing that with the current
836        // ref-per-head strategy would increase the number of the no-gc refs.
837        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
838        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
839        if let Ok(metadata) = loose_ref_path.metadata() {
840            let mtime = metadata.modified().expect("unsupported platform?");
841            if mtime > keep_newer {
842                tracing::trace!(?git_ref, "not deleting new");
843                no_gc_refs_to_keep_count += 1;
844                continue;
845            }
846        }
847        // Also deletes no-gc ref of random name created by old jj.
848        tracing::trace!(?git_ref, ?name_good, "will delete");
849        no_gc_refs_to_delete.push(git_ref);
850    }
851    tracing::info!(
852        new_heads_count = new_heads.len(),
853        no_gc_refs_to_keep_count,
854        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
855        "collected reachable refs"
856    );
857
858    // It's slow to delete packed refs one by one, so update refs all at once.
859    let ref_edits = itertools::chain(
860        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
861        new_heads.iter().map(to_no_gc_ref_update),
862    );
863    git_repo
864        .edit_references(ref_edits)
865        .map_err(|err| BackendError::Other(err.into()))?;
866
867    Ok(())
868}
869
870fn run_git_gc(program: &OsStr, git_dir: &Path, keep_newer: SystemTime) -> Result<(), GitGcError> {
871    let keep_newer = keep_newer
872        .duration_since(SystemTime::UNIX_EPOCH)
873        .unwrap_or_default(); // underflow
874    let mut git = Command::new(program);
875    git.arg("--git-dir=.") // turn off discovery
876        .arg("gc")
877        .arg(format!("--prune=@{} +0000", keep_newer.as_secs()));
878    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
879    // canonicalized as UNC path, which wouldn't be supported by git.
880    git.current_dir(git_dir);
881    // TODO: pass output to UI layer instead of printing directly here
882    tracing::info!(?git, "running git gc");
883    let status = git.status().map_err(GitGcError::GcCommand)?;
884    tracing::info!(?status, "git gc exited");
885    if !status.success() {
886        return Err(GitGcError::GcCommandErrorStatus(status));
887    }
888    Ok(())
889}
890
891fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
892    if id.as_bytes().len() != HASH_LENGTH {
893        return Err(BackendError::InvalidHashLength {
894            expected: HASH_LENGTH,
895            actual: id.as_bytes().len(),
896            object_type: id.object_type(),
897            hash: id.hex(),
898        });
899    }
900    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
901}
902
903fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
904    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
905        BackendError::ObjectNotFound {
906            object_type: id.object_type(),
907            hash: id.hex(),
908            source: Box::new(err),
909        }
910    } else {
911        to_read_object_err(err, id)
912    }
913}
914
915fn to_read_object_err(
916    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
917    id: &impl ObjectId,
918) -> BackendError {
919    BackendError::ReadObject {
920        object_type: id.object_type(),
921        hash: id.hex(),
922        source: err.into(),
923    }
924}
925
926fn to_invalid_utf8_err(source: Utf8Error, id: &impl ObjectId) -> BackendError {
927    BackendError::InvalidUtf8 {
928        object_type: id.object_type(),
929        hash: id.hex(),
930        source,
931    }
932}
933
934fn import_extra_metadata_entries_from_heads(
935    git_repo: &gix::Repository,
936    mut_table: &mut MutableTable,
937    _table_lock: &FileLock,
938    head_ids: &HashSet<&CommitId>,
939    shallow_roots: &[CommitId],
940) -> BackendResult<()> {
941    let mut work_ids = head_ids
942        .iter()
943        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
944        .map(|&id| id.clone())
945        .collect_vec();
946    while let Some(id) = work_ids.pop() {
947        let git_object = git_repo
948            .find_object(validate_git_object_id(&id)?)
949            .map_err(|err| map_not_found_err(err, &id))?;
950        let is_shallow = shallow_roots.contains(&id);
951        // TODO(#1624): Should we read the root tree here and check if it has a
952        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
953        // change the description of a commit with tree-level conflicts.
954        let commit = commit_from_git_without_root_parent(&id, &git_object, true, is_shallow)?;
955        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
956        work_ids.extend(
957            commit
958                .parents
959                .into_iter()
960                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
961        );
962    }
963    Ok(())
964}
965
966impl Debug for GitBackend {
967    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
968        f.debug_struct("GitBackend")
969            .field("path", &self.git_repo_path())
970            .finish()
971    }
972}
973
974#[async_trait]
975impl Backend for GitBackend {
976    fn name(&self) -> &str {
977        Self::name()
978    }
979
980    fn commit_id_length(&self) -> usize {
981        HASH_LENGTH
982    }
983
984    fn change_id_length(&self) -> usize {
985        CHANGE_ID_LENGTH
986    }
987
988    fn root_commit_id(&self) -> &CommitId {
989        &self.root_commit_id
990    }
991
992    fn root_change_id(&self) -> &ChangeId {
993        &self.root_change_id
994    }
995
996    fn empty_tree_id(&self) -> &TreeId {
997        &self.empty_tree_id
998    }
999
1000    fn concurrency(&self) -> usize {
1001        1
1002    }
1003
1004    async fn read_file(
1005        &self,
1006        _path: &RepoPath,
1007        id: &FileId,
1008    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
1009        let data = self.read_file_sync(id)?;
1010        Ok(Box::pin(Cursor::new(data)))
1011    }
1012
1013    async fn write_file(
1014        &self,
1015        _path: &RepoPath,
1016        contents: &mut (dyn AsyncRead + Send + Unpin),
1017    ) -> BackendResult<FileId> {
1018        let mut bytes = Vec::new();
1019        contents.read_to_end(&mut bytes).await.unwrap();
1020        let locked_repo = self.lock_git_repo();
1021        let oid = locked_repo
1022            .write_blob(bytes)
1023            .map_err(|err| BackendError::WriteObject {
1024                object_type: "file",
1025                source: Box::new(err),
1026            })?;
1027        Ok(FileId::new(oid.as_bytes().to_vec()))
1028    }
1029
1030    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
1031        let git_blob_id = validate_git_object_id(id)?;
1032        let locked_repo = self.lock_git_repo();
1033        let mut blob = locked_repo
1034            .find_object(git_blob_id)
1035            .map_err(|err| map_not_found_err(err, id))?
1036            .try_into_blob()
1037            .map_err(|err| to_read_object_err(err, id))?;
1038        let target = String::from_utf8(blob.take_data())
1039            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1040        Ok(target)
1041    }
1042
1043    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1044        let locked_repo = self.lock_git_repo();
1045        let oid =
1046            locked_repo
1047                .write_blob(target.as_bytes())
1048                .map_err(|err| BackendError::WriteObject {
1049                    object_type: "symlink",
1050                    source: Box::new(err),
1051                })?;
1052        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1053    }
1054
1055    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
1056        Err(BackendError::Unsupported(
1057            "The Git backend doesn't support tracked copies yet".to_string(),
1058        ))
1059    }
1060
1061    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
1062        Err(BackendError::Unsupported(
1063            "The Git backend doesn't support tracked copies yet".to_string(),
1064        ))
1065    }
1066
1067    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
1068        Err(BackendError::Unsupported(
1069            "The Git backend doesn't support tracked copies yet".to_string(),
1070        ))
1071    }
1072
1073    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1074        if id == &self.empty_tree_id {
1075            return Ok(Tree::default());
1076        }
1077        let git_tree_id = validate_git_object_id(id)?;
1078
1079        let locked_repo = self.lock_git_repo();
1080        let git_tree = locked_repo
1081            .find_object(git_tree_id)
1082            .map_err(|err| map_not_found_err(err, id))?
1083            .try_into_tree()
1084            .map_err(|err| to_read_object_err(err, id))?;
1085        let mut entries: Vec<_> = git_tree
1086            .iter()
1087            .map(|entry| -> BackendResult<_> {
1088                let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1089                let name =
1090                    str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
1091                let (name, value) = match entry.mode().kind() {
1092                    gix::object::tree::EntryKind::Tree => {
1093                        let id = TreeId::from_bytes(entry.oid().as_bytes());
1094                        (name, TreeValue::Tree(id))
1095                    }
1096                    gix::object::tree::EntryKind::Blob => {
1097                        let id = FileId::from_bytes(entry.oid().as_bytes());
1098                        (
1099                            name,
1100                            TreeValue::File {
1101                                id,
1102                                executable: false,
1103                                copy_id: CopyId::placeholder(),
1104                            },
1105                        )
1106                    }
1107                    gix::object::tree::EntryKind::BlobExecutable => {
1108                        let id = FileId::from_bytes(entry.oid().as_bytes());
1109                        (
1110                            name,
1111                            TreeValue::File {
1112                                id,
1113                                executable: true,
1114                                copy_id: CopyId::placeholder(),
1115                            },
1116                        )
1117                    }
1118                    gix::object::tree::EntryKind::Link => {
1119                        let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1120                        (name, TreeValue::Symlink(id))
1121                    }
1122                    gix::object::tree::EntryKind::Commit => {
1123                        let id = CommitId::from_bytes(entry.oid().as_bytes());
1124                        (name, TreeValue::GitSubmodule(id))
1125                    }
1126                };
1127                Ok((RepoPathComponentBuf::new(name).unwrap(), value))
1128            })
1129            .try_collect()?;
1130        // While Git tree entries are sorted, the rule is slightly different.
1131        // Directory names are sorted as if they had trailing "/".
1132        if !entries.is_sorted_by_key(|(name, _)| name) {
1133            entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
1134        }
1135        Ok(Tree::from_sorted_entries(entries))
1136    }
1137
1138    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1139        // Tree entries to be written must be sorted by Entry::filename(), which
1140        // is slightly different from the order of our backend::Tree.
1141        let entries = contents
1142            .entries()
1143            .map(|entry| {
1144                let name = entry.name().as_internal_str();
1145                match entry.value() {
1146                    TreeValue::File {
1147                        id,
1148                        executable: false,
1149                        copy_id: _, // TODO: Use the value
1150                    } => gix::objs::tree::Entry {
1151                        mode: gix::object::tree::EntryKind::Blob.into(),
1152                        filename: name.into(),
1153                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1154                    },
1155                    TreeValue::File {
1156                        id,
1157                        executable: true,
1158                        copy_id: _, // TODO: Use the value
1159                    } => gix::objs::tree::Entry {
1160                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1161                        filename: name.into(),
1162                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1163                    },
1164                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1165                        mode: gix::object::tree::EntryKind::Link.into(),
1166                        filename: name.into(),
1167                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1168                    },
1169                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1170                        mode: gix::object::tree::EntryKind::Tree.into(),
1171                        filename: name.into(),
1172                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1173                    },
1174                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1175                        mode: gix::object::tree::EntryKind::Commit.into(),
1176                        filename: name.into(),
1177                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1178                    },
1179                }
1180            })
1181            .sorted_unstable()
1182            .collect();
1183        let locked_repo = self.lock_git_repo();
1184        let oid = locked_repo
1185            .write_object(gix::objs::Tree { entries })
1186            .map_err(|err| BackendError::WriteObject {
1187                object_type: "tree",
1188                source: Box::new(err),
1189            })?;
1190        Ok(TreeId::from_bytes(oid.as_bytes()))
1191    }
1192
1193    #[tracing::instrument(skip(self))]
1194    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1195        if *id == self.root_commit_id {
1196            return Ok(make_root_commit(
1197                self.root_change_id().clone(),
1198                self.empty_tree_id.clone(),
1199            ));
1200        }
1201        let git_commit_id = validate_git_object_id(id)?;
1202
1203        let mut commit = {
1204            let locked_repo = self.lock_git_repo();
1205            let git_object = locked_repo
1206                .find_object(git_commit_id)
1207                .map_err(|err| map_not_found_err(err, id))?;
1208            let is_shallow = self.shallow_root_ids(&locked_repo)?.contains(id);
1209            commit_from_git_without_root_parent(id, &git_object, false, is_shallow)?
1210        };
1211        if commit.parents.is_empty() {
1212            commit.parents.push(self.root_commit_id.clone());
1213        };
1214
1215        let table = self.cached_extra_metadata_table()?;
1216        if let Some(extras) = table.get_value(id.as_bytes()) {
1217            deserialize_extras(&mut commit, extras);
1218        } else {
1219            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1220            // there are no reachable ancestor commits without extras metadata. Git commits
1221            // imported by jj < 0.8.0 might not have extras (#924).
1222            // https://github.com/jj-vcs/jj/issues/2343
1223            tracing::info!("unimported Git commit found");
1224            self.import_head_commits([id])?;
1225            let table = self.cached_extra_metadata_table()?;
1226            let extras = table.get_value(id.as_bytes()).unwrap();
1227            deserialize_extras(&mut commit, extras);
1228        }
1229        Ok(commit)
1230    }
1231
1232    async fn write_commit(
1233        &self,
1234        mut contents: Commit,
1235        mut sign_with: Option<&mut SigningFn>,
1236    ) -> BackendResult<(CommitId, Commit)> {
1237        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1238
1239        let locked_repo = self.lock_git_repo();
1240        let git_tree_id = match &contents.root_tree {
1241            MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
1242            MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
1243                Some(tree_id) => validate_git_object_id(tree_id)?,
1244                None => write_tree_conflict(&locked_repo, tree_ids)?,
1245            },
1246        };
1247        let author = signature_to_git(&contents.author);
1248        let mut committer = signature_to_git(&contents.committer);
1249        let message = &contents.description;
1250        if contents.parents.is_empty() {
1251            return Err(BackendError::Other(
1252                "Cannot write a commit with no parents".into(),
1253            ));
1254        }
1255        let mut parents = SmallVec::new();
1256        for parent_id in &contents.parents {
1257            if *parent_id == self.root_commit_id {
1258                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1259                // add it to the list of parents to write in the Git commit. We also check that
1260                // there are no other parents since Git cannot represent a merge between a root
1261                // commit and another commit.
1262                if contents.parents.len() > 1 {
1263                    return Err(BackendError::Unsupported(
1264                        "The Git backend does not support creating merge commits with the root \
1265                         commit as one of the parents."
1266                            .to_owned(),
1267                    ));
1268                }
1269            } else {
1270                parents.push(validate_git_object_id(parent_id)?);
1271            }
1272        }
1273        let mut extra_headers: Vec<(BString, BString)> = vec![];
1274        if let MergedTreeId::Merge(tree_ids) = &contents.root_tree
1275            && !tree_ids.is_resolved()
1276        {
1277            let value = tree_ids.iter().map(|id| id.hex()).join(" ");
1278            extra_headers.push((JJ_TREES_COMMIT_HEADER.into(), value.into()));
1279        }
1280        if self.write_change_id_header {
1281            extra_headers.push((
1282                CHANGE_ID_COMMIT_HEADER.into(),
1283                contents.change_id.reverse_hex().into(),
1284            ));
1285        }
1286
1287        let extras = serialize_extras(&contents);
1288
1289        // If two writers write commits of the same id with different metadata, they
1290        // will both succeed and the metadata entries will be "merged" later. Since
1291        // metadata entry is keyed by the commit id, one of the entries would be lost.
1292        // To prevent such race condition locally, we extend the scope covered by the
1293        // table lock. This is still racy if multiple machines are involved and the
1294        // repository is rsync-ed.
1295        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1296        let id = loop {
1297            let mut commit = gix::objs::Commit {
1298                message: message.to_owned().into(),
1299                tree: git_tree_id,
1300                author: author.clone(),
1301                committer: committer.clone(),
1302                encoding: None,
1303                parents: parents.clone(),
1304                extra_headers: extra_headers.clone(),
1305            };
1306
1307            if let Some(sign) = &mut sign_with {
1308                // we don't use gix pool, but at least use their heuristic
1309                let mut data = Vec::with_capacity(512);
1310                commit.write_to(&mut data).unwrap();
1311
1312                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1313                    object_type: "commit",
1314                    source: Box::new(err),
1315                })?;
1316                commit
1317                    .extra_headers
1318                    .push(("gpgsig".into(), sig.clone().into()));
1319                contents.secure_sig = Some(SecureSig { data, sig });
1320            }
1321
1322            let git_id =
1323                locked_repo
1324                    .write_object(&commit)
1325                    .map_err(|err| BackendError::WriteObject {
1326                        object_type: "commit",
1327                        source: Box::new(err),
1328                    })?;
1329
1330            match table.get_value(git_id.as_bytes()) {
1331                Some(existing_extras) if existing_extras != extras => {
1332                    // It's possible a commit already exists with the same
1333                    // commit id but different change id. Adjust the timestamp
1334                    // until this is no longer the case.
1335                    //
1336                    // For example, this can happen when rebasing duplicate
1337                    // commits, https://github.com/jj-vcs/jj/issues/694.
1338                    //
1339                    // `jj` resets the committer timestamp to the current
1340                    // timestamp whenever it rewrites a commit. So, it's
1341                    // unlikely for the timestamp to be 0 even if the original
1342                    // commit had its timestamp set to 0. Moreover, we test that
1343                    // a commit with a negative timestamp can still be written
1344                    // and read back by `jj`.
1345                    committer.time.seconds -= 1;
1346                }
1347                _ => break CommitId::from_bytes(git_id.as_bytes()),
1348            }
1349        };
1350
1351        // Everything up to this point had no permanent effect on the repo except
1352        // GC-able objects
1353        locked_repo
1354            .edit_reference(to_no_gc_ref_update(&id))
1355            .map_err(|err| BackendError::Other(Box::new(err)))?;
1356
1357        // Update the signature to match the one that was actually written to the object
1358        // store
1359        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1360        let mut mut_table = table.start_mutation();
1361        mut_table.add_entry(id.to_bytes(), extras);
1362        self.save_extra_metadata_table(mut_table, &table_lock)?;
1363        Ok((id, contents))
1364    }
1365
1366    fn get_copy_records(
1367        &self,
1368        paths: Option<&[RepoPathBuf]>,
1369        root_id: &CommitId,
1370        head_id: &CommitId,
1371    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
1372        let repo = self.git_repo();
1373        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1374        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1375
1376        let change_to_copy_record =
1377            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1378                let gix::object::tree::diff::Change::Rewrite {
1379                    source_location,
1380                    source_entry_mode,
1381                    source_id,
1382                    entry_mode: dest_entry_mode,
1383                    location: dest_location,
1384                    ..
1385                } = change
1386                else {
1387                    return Ok(None);
1388                };
1389                // TODO: Renamed symlinks cannot be returned because CopyRecord
1390                // expects `source_file: FileId`.
1391                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1392                    return Ok(None);
1393                }
1394
1395                let source = str::from_utf8(source_location)
1396                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1397                let dest = str::from_utf8(dest_location)
1398                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1399
1400                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1401                if !paths.is_none_or(|paths| paths.contains(&target)) {
1402                    return Ok(None);
1403                }
1404
1405                Ok(Some(CopyRecord {
1406                    target,
1407                    target_commit: head_id.clone(),
1408                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1409                    source_file: FileId::from_bytes(source_id.as_bytes()),
1410                    source_commit: root_id.clone(),
1411                }))
1412            };
1413
1414        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1415        root_tree
1416            .changes()
1417            .map_err(|err| BackendError::Other(err.into()))?
1418            .options(|opts| {
1419                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1420                    copies: Some(gix::diff::rewrites::Copies {
1421                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1422                        percentage: Some(0.5),
1423                    }),
1424                    percentage: Some(0.5),
1425                    limit: 1000,
1426                    track_empty: false,
1427                }));
1428            })
1429            .for_each_to_obtain_tree_with_cache(
1430                &head_tree,
1431                &mut self.new_diff_platform()?,
1432                |change| -> BackendResult<_> {
1433                    match change_to_copy_record(change) {
1434                        Ok(None) => {}
1435                        Ok(Some(change)) => records.push(Ok(change)),
1436                        Err(err) => records.push(Err(err)),
1437                    }
1438                    Ok(gix::object::tree::diff::Action::Continue)
1439                },
1440            )
1441            .map_err(|err| BackendError::Other(err.into()))?;
1442        Ok(Box::pin(futures::stream::iter(records)))
1443    }
1444
1445    #[tracing::instrument(skip(self, index))]
1446    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1447        let git_repo = self.lock_git_repo();
1448        let new_heads = index
1449            .all_heads_for_gc()
1450            .map_err(|err| BackendError::Other(err.into()))?
1451            .filter(|id| *id != self.root_commit_id);
1452        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1453        // TODO: remove unreachable entries from extras table if segment file
1454        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1455        // preserved by the keep_newer timestamp though)
1456        // TODO: remove unreachable extras table segments
1457        run_git_gc(
1458            self.git_executable.as_ref(),
1459            self.git_repo_path(),
1460            keep_newer,
1461        )
1462        .map_err(|err| BackendError::Other(err.into()))?;
1463        // Since "git gc" will move loose refs into packed refs, in-memory
1464        // packed-refs cache should be invalidated without relying on mtime.
1465        git_repo.refs.force_refresh_packed_buffer().ok();
1466        Ok(())
1467    }
1468}
1469
1470/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1471/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1472fn write_tree_conflict(
1473    repo: &gix::Repository,
1474    conflict: &Merge<TreeId>,
1475) -> BackendResult<gix::ObjectId> {
1476    // Tree entries to be written must be sorted by Entry::filename().
1477    let mut entries = itertools::chain(
1478        conflict
1479            .removes()
1480            .enumerate()
1481            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1482        conflict
1483            .adds()
1484            .enumerate()
1485            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1486    )
1487    .map(|(name, tree_id)| gix::objs::tree::Entry {
1488        mode: gix::object::tree::EntryKind::Tree.into(),
1489        filename: name.into(),
1490        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1491    })
1492    .collect_vec();
1493    let readme_id = repo
1494        .write_blob(
1495            r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1496The commit contains file conflicts, and therefore looks wrong when used with plain
1497Git or other tools that are unfamiliar with jj.
1498
1499The .jjconflict-* directories represent the different inputs to the conflict.
1500For details, see
1501https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1502
1503If you see this file in your working copy, it probably means that you used a
1504regular `git` command to check out a conflicted commit. Use `jj abandon` to
1505recover.
1506"#,
1507        )
1508        .map_err(|err| {
1509            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1510        })?
1511        .detach();
1512    entries.push(gix::objs::tree::Entry {
1513        mode: gix::object::tree::EntryKind::Blob.into(),
1514        filename: "README".into(),
1515        oid: readme_id,
1516    });
1517    entries.sort_unstable();
1518    let id = repo
1519        .write_object(gix::objs::Tree { entries })
1520        .map_err(|err| BackendError::WriteObject {
1521            object_type: "tree",
1522            source: Box::new(err),
1523        })?;
1524    Ok(id.detach())
1525}
1526
1527#[cfg(test)]
1528mod tests {
1529    use assert_matches::assert_matches;
1530    use gix::date::parse::TimeBuf;
1531    use gix::objs::CommitRef;
1532    use indoc::indoc;
1533    use pollster::FutureExt as _;
1534
1535    use super::*;
1536    use crate::config::StackedConfig;
1537    use crate::content_hash::blake2b_hash;
1538    use crate::hex_util;
1539    use crate::tests::new_temp_dir;
1540
1541    const GIT_USER: &str = "Someone";
1542    const GIT_EMAIL: &str = "someone@example.com";
1543
1544    fn git_config() -> Vec<bstr::BString> {
1545        vec![
1546            format!("user.name = {GIT_USER}").into(),
1547            format!("user.email = {GIT_EMAIL}").into(),
1548            "init.defaultBranch = master".into(),
1549        ]
1550    }
1551
1552    fn open_options() -> gix::open::Options {
1553        gix::open::Options::isolated()
1554            .config_overrides(git_config())
1555            .strict_config(true)
1556    }
1557
1558    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1559        gix::ThreadSafeRepository::init_opts(
1560            directory,
1561            gix::create::Kind::WithWorktree,
1562            gix::create::Options::default(),
1563            open_options(),
1564        )
1565        .unwrap()
1566        .to_thread_local()
1567    }
1568
1569    #[test]
1570    fn read_plain_git_commit() {
1571        let settings = user_settings();
1572        let temp_dir = new_temp_dir();
1573        let store_path = temp_dir.path();
1574        let git_repo_path = temp_dir.path().join("git");
1575        let git_repo = git_init(git_repo_path);
1576
1577        // Add a commit with some files in
1578        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1579        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1580        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1581        dir_tree_editor
1582            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1583            .unwrap();
1584        dir_tree_editor
1585            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1586            .unwrap();
1587        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1588        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1589        root_tree_builder
1590            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1591            .unwrap();
1592        let root_tree_id = root_tree_builder.write().unwrap().detach();
1593        let git_author = gix::actor::Signature {
1594            name: "git author".into(),
1595            email: "git.author@example.com".into(),
1596            time: gix::date::Time::new(1000, 60 * 60),
1597        };
1598        let git_committer = gix::actor::Signature {
1599            name: "git committer".into(),
1600            email: "git.committer@example.com".into(),
1601            time: gix::date::Time::new(2000, -480 * 60),
1602        };
1603        let git_commit_id = git_repo
1604            .commit_as(
1605                git_committer.to_ref(&mut TimeBuf::default()),
1606                git_author.to_ref(&mut TimeBuf::default()),
1607                "refs/heads/dummy",
1608                "git commit message",
1609                root_tree_id,
1610                [] as [gix::ObjectId; 0],
1611            )
1612            .unwrap()
1613            .detach();
1614        git_repo
1615            .find_reference("refs/heads/dummy")
1616            .unwrap()
1617            .delete()
1618            .unwrap();
1619        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1620        // The change id is the leading reverse bits of the commit id
1621        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1622        // Check that the git commit above got the hash we expect
1623        assert_eq!(
1624            git_commit_id.as_bytes(),
1625            commit_id.as_bytes(),
1626            "{git_commit_id:?} vs {commit_id:?}"
1627        );
1628
1629        // Add an empty commit on top
1630        let git_commit_id2 = git_repo
1631            .commit_as(
1632                git_committer.to_ref(&mut TimeBuf::default()),
1633                git_author.to_ref(&mut TimeBuf::default()),
1634                "refs/heads/dummy2",
1635                "git commit message 2",
1636                root_tree_id,
1637                [git_commit_id],
1638            )
1639            .unwrap()
1640            .detach();
1641        git_repo
1642            .find_reference("refs/heads/dummy2")
1643            .unwrap()
1644            .delete()
1645            .unwrap();
1646        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1647
1648        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1649
1650        // Import the head commit and its ancestors
1651        backend.import_head_commits([&commit_id2]).unwrap();
1652        // Ref should be created only for the head commit
1653        let git_refs = backend
1654            .git_repo()
1655            .references()
1656            .unwrap()
1657            .prefixed("refs/jj/keep/")
1658            .unwrap()
1659            .map(|git_ref| git_ref.unwrap().id().detach())
1660            .collect_vec();
1661        assert_eq!(git_refs, vec![git_commit_id2]);
1662
1663        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1664        assert_eq!(&commit.change_id, &change_id);
1665        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1666        assert_eq!(commit.predecessors, vec![]);
1667        assert_eq!(
1668            commit.root_tree.to_merge(),
1669            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1670        );
1671        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1672        assert_eq!(commit.description, "git commit message");
1673        assert_eq!(commit.author.name, "git author");
1674        assert_eq!(commit.author.email, "git.author@example.com");
1675        assert_eq!(
1676            commit.author.timestamp.timestamp,
1677            MillisSinceEpoch(1000 * 1000)
1678        );
1679        assert_eq!(commit.author.timestamp.tz_offset, 60);
1680        assert_eq!(commit.committer.name, "git committer");
1681        assert_eq!(commit.committer.email, "git.committer@example.com");
1682        assert_eq!(
1683            commit.committer.timestamp.timestamp,
1684            MillisSinceEpoch(2000 * 1000)
1685        );
1686        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1687
1688        let root_tree = backend
1689            .read_tree(
1690                RepoPath::root(),
1691                &TreeId::from_bytes(root_tree_id.as_bytes()),
1692            )
1693            .block_on()
1694            .unwrap();
1695        let mut root_entries = root_tree.entries();
1696        let dir = root_entries.next().unwrap();
1697        assert_eq!(root_entries.next(), None);
1698        assert_eq!(dir.name().as_internal_str(), "dir");
1699        assert_eq!(
1700            dir.value(),
1701            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1702        );
1703
1704        let dir_tree = backend
1705            .read_tree(
1706                RepoPath::from_internal_string("dir").unwrap(),
1707                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1708            )
1709            .block_on()
1710            .unwrap();
1711        let mut entries = dir_tree.entries();
1712        let file = entries.next().unwrap();
1713        let symlink = entries.next().unwrap();
1714        assert_eq!(entries.next(), None);
1715        assert_eq!(file.name().as_internal_str(), "normal");
1716        assert_eq!(
1717            file.value(),
1718            &TreeValue::File {
1719                id: FileId::from_bytes(blob1.as_bytes()),
1720                executable: false,
1721                copy_id: CopyId::placeholder(),
1722            }
1723        );
1724        assert_eq!(symlink.name().as_internal_str(), "symlink");
1725        assert_eq!(
1726            symlink.value(),
1727            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1728        );
1729
1730        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1731        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1732        assert_eq!(commit.predecessors, vec![]);
1733        assert_eq!(
1734            commit.root_tree.to_merge(),
1735            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1736        );
1737        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1738    }
1739
1740    #[test]
1741    fn read_git_commit_without_importing() {
1742        let settings = user_settings();
1743        let temp_dir = new_temp_dir();
1744        let store_path = temp_dir.path();
1745        let git_repo_path = temp_dir.path().join("git");
1746        let git_repo = git_init(&git_repo_path);
1747
1748        let signature = gix::actor::Signature {
1749            name: GIT_USER.into(),
1750            email: GIT_EMAIL.into(),
1751            time: gix::date::Time::now_utc(),
1752        };
1753        let empty_tree_id =
1754            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1755        let git_commit_id = git_repo
1756            .commit_as(
1757                signature.to_ref(&mut TimeBuf::default()),
1758                signature.to_ref(&mut TimeBuf::default()),
1759                "refs/heads/main",
1760                "git commit message",
1761                empty_tree_id,
1762                [] as [gix::ObjectId; 0],
1763            )
1764            .unwrap();
1765
1766        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1767
1768        // read_commit() without import_head_commits() works as of now. This might be
1769        // changed later.
1770        assert!(
1771            backend
1772                .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1773                .block_on()
1774                .is_ok()
1775        );
1776        assert!(
1777            backend
1778                .cached_extra_metadata_table()
1779                .unwrap()
1780                .get_value(git_commit_id.as_bytes())
1781                .is_some(),
1782            "extra metadata should have been be created"
1783        );
1784    }
1785
1786    #[test]
1787    fn read_signed_git_commit() {
1788        let settings = user_settings();
1789        let temp_dir = new_temp_dir();
1790        let store_path = temp_dir.path();
1791        let git_repo_path = temp_dir.path().join("git");
1792        let git_repo = git_init(git_repo_path);
1793
1794        let signature = gix::actor::Signature {
1795            name: GIT_USER.into(),
1796            email: GIT_EMAIL.into(),
1797            time: gix::date::Time::now_utc(),
1798        };
1799        let empty_tree_id =
1800            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1801
1802        let secure_sig =
1803            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1804
1805        let mut commit = gix::objs::Commit {
1806            tree: empty_tree_id,
1807            parents: smallvec::SmallVec::new(),
1808            author: signature.clone(),
1809            committer: signature.clone(),
1810            encoding: None,
1811            message: "git commit message".into(),
1812            extra_headers: Vec::new(),
1813        };
1814
1815        let mut commit_buf = Vec::new();
1816        commit.write_to(&mut commit_buf).unwrap();
1817        let commit_str = str::from_utf8(&commit_buf).unwrap();
1818
1819        commit
1820            .extra_headers
1821            .push(("gpgsig".into(), secure_sig.into()));
1822
1823        let git_commit_id = git_repo.write_object(&commit).unwrap();
1824
1825        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1826
1827        let commit = backend
1828            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1829            .block_on()
1830            .unwrap();
1831
1832        let sig = commit.secure_sig.expect("failed to read the signature");
1833
1834        // converting to string for nicer assert diff
1835        assert_eq!(str::from_utf8(&sig.sig).unwrap(), secure_sig);
1836        assert_eq!(str::from_utf8(&sig.data).unwrap(), commit_str);
1837    }
1838
1839    #[test]
1840    fn change_id_parsing() {
1841        let id = |commit_object_bytes: &[u8]| {
1842            extract_change_id_from_commit(&CommitRef::from_bytes(commit_object_bytes).unwrap())
1843        };
1844
1845        let commit_with_id = indoc! {b"
1846            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1847            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1848            author JJ Fan <jjfan@example.com> 1757112665 -0700
1849            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1850            extra-header blah
1851            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1852
1853            test-commit
1854        "};
1855        insta::assert_compact_debug_snapshot!(
1856            id(commit_with_id),
1857            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1858        );
1859
1860        let commit_without_id = indoc! {b"
1861            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1862            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1863            author JJ Fan <jjfan@example.com> 1757112665 -0700
1864            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1865            extra-header blah
1866
1867            no id in header
1868        "};
1869        insta::assert_compact_debug_snapshot!(
1870            id(commit_without_id),
1871            @"None"
1872        );
1873
1874        let commit = indoc! {b"
1875            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1876            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1877            author JJ Fan <jjfan@example.com> 1757112665 -0700
1878            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1879            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1880            extra-header blah
1881            change-id abcabcabcabcabcabcabcabcabcabcab
1882
1883            valid change id first
1884        "};
1885        insta::assert_compact_debug_snapshot!(
1886            id(commit),
1887            @r#"Some(ChangeId("efbc06dc4721683f2a45568dbda31e99"))"#
1888        );
1889
1890        // We only look at the first change id if multiple are present, so this should
1891        // error
1892        let commit = indoc! {b"
1893            tree 126799bf8058d1b5c531e93079f4fe79733920dd
1894            parent bd50783bdf38406dd6143475cd1a3c27938db2ee
1895            author JJ Fan <jjfan@example.com> 1757112665 -0700
1896            committer JJ Fan <jjfan@example.com> 1757359886 -0700
1897            change-id abcabcabcabcabcabcabcabcabcabcab
1898            extra-header blah
1899            change-id lkonztmnvsxytrwkxpvuutrmompwylqq
1900
1901            valid change id first
1902        "};
1903        insta::assert_compact_debug_snapshot!(
1904            id(commit),
1905            @"None"
1906        );
1907    }
1908
1909    #[test]
1910    fn round_trip_change_id_via_git_header() {
1911        let settings = user_settings();
1912        let temp_dir = new_temp_dir();
1913
1914        let store_path = temp_dir.path().join("store");
1915        fs::create_dir(&store_path).unwrap();
1916        let empty_store_path = temp_dir.path().join("empty_store");
1917        fs::create_dir(&empty_store_path).unwrap();
1918        let git_repo_path = temp_dir.path().join("git");
1919        let git_repo = git_init(git_repo_path);
1920
1921        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path()).unwrap();
1922        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1923        let commit = Commit {
1924            parents: vec![backend.root_commit_id().clone()],
1925            predecessors: vec![],
1926            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1927            change_id: original_change_id.clone(),
1928            description: "initial".to_string(),
1929            author: create_signature(),
1930            committer: create_signature(),
1931            secure_sig: None,
1932        };
1933
1934        let (initial_commit_id, _init_commit) =
1935            backend.write_commit(commit, None).block_on().unwrap();
1936        let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
1937        assert_eq!(
1938            commit.change_id, original_change_id,
1939            "The change-id header did not roundtrip"
1940        );
1941
1942        // Because of how change ids are also persisted in extra proto files,
1943        // initialize a new store without those files, but reuse the same git
1944        // storage. This change-id must be derived from the git commit header.
1945        let no_extra_backend =
1946            GitBackend::init_external(&settings, &empty_store_path, git_repo.path()).unwrap();
1947        let no_extra_commit = no_extra_backend
1948            .read_commit(&initial_commit_id)
1949            .block_on()
1950            .unwrap();
1951
1952        assert_eq!(
1953            no_extra_commit.change_id, original_change_id,
1954            "The change-id header did not roundtrip"
1955        );
1956    }
1957
1958    #[test]
1959    fn read_empty_string_placeholder() {
1960        let git_signature1 = gix::actor::Signature {
1961            name: EMPTY_STRING_PLACEHOLDER.into(),
1962            email: "git.author@example.com".into(),
1963            time: gix::date::Time::new(1000, 60 * 60),
1964        };
1965        let signature1 = signature_from_git(git_signature1.to_ref(&mut TimeBuf::default()));
1966        assert!(signature1.name.is_empty());
1967        assert_eq!(signature1.email, "git.author@example.com");
1968        let git_signature2 = gix::actor::Signature {
1969            name: "git committer".into(),
1970            email: EMPTY_STRING_PLACEHOLDER.into(),
1971            time: gix::date::Time::new(2000, -480 * 60),
1972        };
1973        let signature2 = signature_from_git(git_signature2.to_ref(&mut TimeBuf::default()));
1974        assert_eq!(signature2.name, "git committer");
1975        assert!(signature2.email.is_empty());
1976    }
1977
1978    #[test]
1979    fn write_empty_string_placeholder() {
1980        let signature1 = Signature {
1981            name: "".to_string(),
1982            email: "someone@example.com".to_string(),
1983            timestamp: Timestamp {
1984                timestamp: MillisSinceEpoch(0),
1985                tz_offset: 0,
1986            },
1987        };
1988        let git_signature1 = signature_to_git(&signature1);
1989        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
1990        assert_eq!(git_signature1.email, "someone@example.com");
1991        let signature2 = Signature {
1992            name: "Someone".to_string(),
1993            email: "".to_string(),
1994            timestamp: Timestamp {
1995                timestamp: MillisSinceEpoch(0),
1996                tz_offset: 0,
1997            },
1998        };
1999        let git_signature2 = signature_to_git(&signature2);
2000        assert_eq!(git_signature2.name, "Someone");
2001        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
2002    }
2003
2004    /// Test that parents get written correctly
2005    #[test]
2006    fn git_commit_parents() {
2007        let settings = user_settings();
2008        let temp_dir = new_temp_dir();
2009        let store_path = temp_dir.path();
2010        let git_repo_path = temp_dir.path().join("git");
2011        let git_repo = git_init(&git_repo_path);
2012
2013        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2014        let mut commit = Commit {
2015            parents: vec![],
2016            predecessors: vec![],
2017            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2018            change_id: ChangeId::from_hex("abc123"),
2019            description: "".to_string(),
2020            author: create_signature(),
2021            committer: create_signature(),
2022            secure_sig: None,
2023        };
2024
2025        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2026            backend.write_commit(commit, None).block_on()
2027        };
2028
2029        // No parents
2030        commit.parents = vec![];
2031        assert_matches!(
2032            write_commit(commit.clone()),
2033            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2034        );
2035
2036        // Only root commit as parent
2037        commit.parents = vec![backend.root_commit_id().clone()];
2038        let first_id = write_commit(commit.clone()).unwrap().0;
2039        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
2040        assert_eq!(first_commit, commit);
2041        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
2042        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2043
2044        // Only non-root commit as parent
2045        commit.parents = vec![first_id.clone()];
2046        let second_id = write_commit(commit.clone()).unwrap().0;
2047        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
2048        assert_eq!(second_commit, commit);
2049        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
2050        assert_eq!(
2051            second_git_commit.parent_ids().collect_vec(),
2052            vec![git_id(&first_id)]
2053        );
2054
2055        // Merge commit
2056        commit.parents = vec![first_id.clone(), second_id.clone()];
2057        let merge_id = write_commit(commit.clone()).unwrap().0;
2058        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
2059        assert_eq!(merge_commit, commit);
2060        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
2061        assert_eq!(
2062            merge_git_commit.parent_ids().collect_vec(),
2063            vec![git_id(&first_id), git_id(&second_id)]
2064        );
2065
2066        // Merge commit with root as one parent
2067        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2068        assert_matches!(
2069            write_commit(commit),
2070            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2071        );
2072    }
2073
2074    #[test]
2075    fn write_tree_conflicts() {
2076        let settings = user_settings();
2077        let temp_dir = new_temp_dir();
2078        let store_path = temp_dir.path();
2079        let git_repo_path = temp_dir.path().join("git");
2080        let git_repo = git_init(&git_repo_path);
2081
2082        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2083        let create_tree = |i| {
2084            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2085            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2086            tree_builder
2087                .upsert(
2088                    format!("file{i}"),
2089                    gix::object::tree::EntryKind::Blob,
2090                    blob_id,
2091                )
2092                .unwrap();
2093            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2094        };
2095
2096        let root_tree = Merge::from_removes_adds(
2097            vec![create_tree(0), create_tree(1)],
2098            vec![create_tree(2), create_tree(3), create_tree(4)],
2099        );
2100        let mut commit = Commit {
2101            parents: vec![backend.root_commit_id().clone()],
2102            predecessors: vec![],
2103            root_tree: MergedTreeId::Merge(root_tree.clone()),
2104            change_id: ChangeId::from_hex("abc123"),
2105            description: "".to_string(),
2106            author: create_signature(),
2107            committer: create_signature(),
2108            secure_sig: None,
2109        };
2110
2111        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2112            backend.write_commit(commit, None).block_on()
2113        };
2114
2115        // When writing a tree-level conflict, the root tree on the git side has the
2116        // individual trees as subtrees.
2117        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2118        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2119        assert_eq!(read_commit, commit);
2120        let git_commit = git_repo
2121            .find_commit(gix::ObjectId::from_bytes_or_panic(
2122                read_commit_id.as_bytes(),
2123            ))
2124            .unwrap();
2125        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2126        assert!(
2127            git_tree
2128                .iter()
2129                .map(Result::unwrap)
2130                .filter(|entry| entry.filename() != b"README")
2131                .all(|entry| entry.mode().value() == 0o040000)
2132        );
2133        let mut iter = git_tree.iter().map(Result::unwrap);
2134        let entry = iter.next().unwrap();
2135        assert_eq!(entry.filename(), b".jjconflict-base-0");
2136        assert_eq!(
2137            entry.id().as_bytes(),
2138            root_tree.get_remove(0).unwrap().as_bytes()
2139        );
2140        let entry = iter.next().unwrap();
2141        assert_eq!(entry.filename(), b".jjconflict-base-1");
2142        assert_eq!(
2143            entry.id().as_bytes(),
2144            root_tree.get_remove(1).unwrap().as_bytes()
2145        );
2146        let entry = iter.next().unwrap();
2147        assert_eq!(entry.filename(), b".jjconflict-side-0");
2148        assert_eq!(
2149            entry.id().as_bytes(),
2150            root_tree.get_add(0).unwrap().as_bytes()
2151        );
2152        let entry = iter.next().unwrap();
2153        assert_eq!(entry.filename(), b".jjconflict-side-1");
2154        assert_eq!(
2155            entry.id().as_bytes(),
2156            root_tree.get_add(1).unwrap().as_bytes()
2157        );
2158        let entry = iter.next().unwrap();
2159        assert_eq!(entry.filename(), b".jjconflict-side-2");
2160        assert_eq!(
2161            entry.id().as_bytes(),
2162            root_tree.get_add(2).unwrap().as_bytes()
2163        );
2164        let entry = iter.next().unwrap();
2165        assert_eq!(entry.filename(), b"README");
2166        assert_eq!(entry.mode().value(), 0o100644);
2167        assert!(iter.next().is_none());
2168
2169        // When writing a single tree using the new format, it's represented by a
2170        // regular git tree.
2171        commit.root_tree = MergedTreeId::resolved(create_tree(5));
2172        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2173        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2174        assert_eq!(read_commit, commit);
2175        let git_commit = git_repo
2176            .find_commit(gix::ObjectId::from_bytes_or_panic(
2177                read_commit_id.as_bytes(),
2178            ))
2179            .unwrap();
2180        assert_eq!(
2181            MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2182            commit.root_tree
2183        );
2184    }
2185
2186    #[test]
2187    fn commit_has_ref() {
2188        let settings = user_settings();
2189        let temp_dir = new_temp_dir();
2190        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2191        let git_repo = backend.git_repo();
2192        let signature = Signature {
2193            name: "Someone".to_string(),
2194            email: "someone@example.com".to_string(),
2195            timestamp: Timestamp {
2196                timestamp: MillisSinceEpoch(0),
2197                tz_offset: 0,
2198            },
2199        };
2200        let commit = Commit {
2201            parents: vec![backend.root_commit_id().clone()],
2202            predecessors: vec![],
2203            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2204            change_id: ChangeId::new(vec![42; 16]),
2205            description: "initial".to_string(),
2206            author: signature.clone(),
2207            committer: signature,
2208            secure_sig: None,
2209        };
2210        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2211        let git_refs = git_repo.references().unwrap();
2212        let git_ref_ids: Vec<_> = git_refs
2213            .prefixed("refs/jj/keep/")
2214            .unwrap()
2215            .map(|x| x.unwrap().id().detach())
2216            .collect();
2217        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2218
2219        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2220        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2221            git_ref.unwrap().delete().unwrap();
2222        }
2223        // Re-imported commit should have new ref.
2224        backend.import_head_commits([&commit_id]).unwrap();
2225        let git_refs = git_repo.references().unwrap();
2226        let git_ref_ids: Vec<_> = git_refs
2227            .prefixed("refs/jj/keep/")
2228            .unwrap()
2229            .map(|x| x.unwrap().id().detach())
2230            .collect();
2231        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2232    }
2233
2234    #[test]
2235    fn import_head_commits_duplicates() {
2236        let settings = user_settings();
2237        let temp_dir = new_temp_dir();
2238        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2239        let git_repo = backend.git_repo();
2240
2241        let signature = gix::actor::Signature {
2242            name: GIT_USER.into(),
2243            email: GIT_EMAIL.into(),
2244            time: gix::date::Time::now_utc(),
2245        };
2246        let empty_tree_id =
2247            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2248        let git_commit_id = git_repo
2249            .commit_as(
2250                signature.to_ref(&mut TimeBuf::default()),
2251                signature.to_ref(&mut TimeBuf::default()),
2252                "refs/heads/main",
2253                "git commit message",
2254                empty_tree_id,
2255                [] as [gix::ObjectId; 0],
2256            )
2257            .unwrap()
2258            .detach();
2259        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2260
2261        // Ref creation shouldn't fail because of duplicated head ids.
2262        backend
2263            .import_head_commits([&commit_id, &commit_id])
2264            .unwrap();
2265        assert!(
2266            git_repo
2267                .references()
2268                .unwrap()
2269                .prefixed("refs/jj/keep/")
2270                .unwrap()
2271                .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id)
2272        );
2273    }
2274
2275    #[test]
2276    fn overlapping_git_commit_id() {
2277        let settings = user_settings();
2278        let temp_dir = new_temp_dir();
2279        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2280        let commit1 = Commit {
2281            parents: vec![backend.root_commit_id().clone()],
2282            predecessors: vec![],
2283            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2284            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2285            description: "initial".to_string(),
2286            author: create_signature(),
2287            committer: create_signature(),
2288            secure_sig: None,
2289        };
2290
2291        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2292            backend.write_commit(commit, None).block_on()
2293        };
2294
2295        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2296        commit2.predecessors.push(commit_id1.clone());
2297        // `write_commit` should prevent the ids from being the same by changing the
2298        // committer timestamp of the commit it actually writes.
2299        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2300        // The returned matches the ID
2301        assert_eq!(
2302            backend.read_commit(&commit_id2).block_on().unwrap(),
2303            actual_commit2
2304        );
2305        assert_ne!(commit_id2, commit_id1);
2306        // The committer timestamp should differ
2307        assert_ne!(
2308            actual_commit2.committer.timestamp.timestamp,
2309            commit2.committer.timestamp.timestamp
2310        );
2311        // The rest of the commit should be the same
2312        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2313        assert_eq!(actual_commit2, commit2);
2314    }
2315
2316    #[test]
2317    fn write_signed_commit() {
2318        let settings = user_settings();
2319        let temp_dir = new_temp_dir();
2320        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2321
2322        let commit = Commit {
2323            parents: vec![backend.root_commit_id().clone()],
2324            predecessors: vec![],
2325            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2326            change_id: ChangeId::new(vec![42; 16]),
2327            description: "initial".to_string(),
2328            author: create_signature(),
2329            committer: create_signature(),
2330            secure_sig: None,
2331        };
2332
2333        let mut signer = |data: &_| {
2334            let hash: String = hex_util::encode_hex(&blake2b_hash(data));
2335            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2336        };
2337
2338        let (id, commit) = backend
2339            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2340            .block_on()
2341            .unwrap();
2342
2343        let git_repo = backend.git_repo();
2344        let obj = git_repo
2345            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2346            .unwrap();
2347        insta::assert_snapshot!(str::from_utf8(&obj.data).unwrap(), @r"
2348        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2349        author Someone <someone@example.com> 0 +0000
2350        committer Someone <someone@example.com> 0 +0000
2351        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2352        gpgsig test sig
2353         hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2354
2355        initial
2356        ");
2357
2358        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2359
2360        let commit = backend.read_commit(&id).block_on().unwrap();
2361
2362        let sig = commit.secure_sig.expect("failed to read the signature");
2363        assert_eq!(&sig, &returned_sig);
2364
2365        insta::assert_snapshot!(str::from_utf8(&sig.sig).unwrap(), @r"
2366        test sig
2367        hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2368        ");
2369        insta::assert_snapshot!(str::from_utf8(&sig.data).unwrap(), @r"
2370        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2371        author Someone <someone@example.com> 0 +0000
2372        committer Someone <someone@example.com> 0 +0000
2373        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2374
2375        initial
2376        ");
2377    }
2378
2379    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2380        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2381    }
2382
2383    fn create_signature() -> Signature {
2384        Signature {
2385            name: GIT_USER.to_string(),
2386            email: GIT_EMAIL.to_string(),
2387            timestamp: Timestamp {
2388                timestamp: MillisSinceEpoch(0),
2389                tz_offset: 0,
2390            },
2391        }
2392    }
2393
2394    // Not using testutils::user_settings() because there is a dependency cycle
2395    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2396    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2397    // our UserSettings type comes from jj_lib (1).
2398    fn user_settings() -> UserSettings {
2399        let config = StackedConfig::with_defaults();
2400        UserSettings::from_config(config).unwrap()
2401    }
2402}