jj_lib/
git_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::HashSet;
19use std::ffi::OsStr;
20use std::fmt::Debug;
21use std::fmt::Error;
22use std::fmt::Formatter;
23use std::fs;
24use std::io;
25use std::io::Cursor;
26use std::path::Path;
27use std::path::PathBuf;
28use std::pin::Pin;
29use std::process::Command;
30use std::process::ExitStatus;
31use std::str;
32use std::sync::Arc;
33use std::sync::Mutex;
34use std::sync::MutexGuard;
35use std::time::SystemTime;
36
37use async_trait::async_trait;
38use futures::stream::BoxStream;
39use gix::bstr::BString;
40use gix::objs::CommitRef;
41use gix::objs::CommitRefIter;
42use gix::objs::WriteTo as _;
43use itertools::Itertools as _;
44use pollster::FutureExt as _;
45use prost::Message as _;
46use smallvec::SmallVec;
47use thiserror::Error;
48use tokio::io::AsyncRead;
49use tokio::io::AsyncReadExt as _;
50
51use crate::backend::make_root_commit;
52use crate::backend::Backend;
53use crate::backend::BackendError;
54use crate::backend::BackendInitError;
55use crate::backend::BackendLoadError;
56use crate::backend::BackendResult;
57use crate::backend::ChangeId;
58use crate::backend::Commit;
59use crate::backend::CommitId;
60use crate::backend::Conflict;
61use crate::backend::ConflictId;
62use crate::backend::ConflictTerm;
63use crate::backend::CopyHistory;
64use crate::backend::CopyId;
65use crate::backend::CopyRecord;
66use crate::backend::FileId;
67use crate::backend::MergedTreeId;
68use crate::backend::MillisSinceEpoch;
69use crate::backend::SecureSig;
70use crate::backend::Signature;
71use crate::backend::SigningFn;
72use crate::backend::SymlinkId;
73use crate::backend::Timestamp;
74use crate::backend::Tree;
75use crate::backend::TreeId;
76use crate::backend::TreeValue;
77use crate::config::ConfigGetError;
78use crate::file_util::IoResultExt as _;
79use crate::file_util::PathError;
80use crate::hex_util::to_forward_hex;
81use crate::index::Index;
82use crate::lock::FileLock;
83use crate::merge::Merge;
84use crate::merge::MergeBuilder;
85use crate::object_id::ObjectId;
86use crate::repo_path::RepoPath;
87use crate::repo_path::RepoPathBuf;
88use crate::repo_path::RepoPathComponentBuf;
89use crate::settings::GitSettings;
90use crate::settings::UserSettings;
91use crate::stacked_table::MutableTable;
92use crate::stacked_table::ReadonlyTable;
93use crate::stacked_table::TableSegment as _;
94use crate::stacked_table::TableStore;
95use crate::stacked_table::TableStoreError;
96
97const HASH_LENGTH: usize = 20;
98const CHANGE_ID_LENGTH: usize = 16;
99/// Ref namespace used only for preventing GC.
100const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
101const CONFLICT_SUFFIX: &str = ".jjconflict";
102
103pub const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees";
104pub const CHANGE_ID_COMMIT_HEADER: &[u8] = b"change-id";
105
106#[derive(Debug, Error)]
107pub enum GitBackendInitError {
108    #[error("Failed to initialize git repository")]
109    InitRepository(#[source] gix::init::Error),
110    #[error("Failed to open git repository")]
111    OpenRepository(#[source] gix::open::Error),
112    #[error(transparent)]
113    Config(ConfigGetError),
114    #[error(transparent)]
115    Path(PathError),
116}
117
118impl From<Box<GitBackendInitError>> for BackendInitError {
119    fn from(err: Box<GitBackendInitError>) -> Self {
120        BackendInitError(err)
121    }
122}
123
124#[derive(Debug, Error)]
125pub enum GitBackendLoadError {
126    #[error("Failed to open git repository")]
127    OpenRepository(#[source] gix::open::Error),
128    #[error(transparent)]
129    Config(ConfigGetError),
130    #[error(transparent)]
131    Path(PathError),
132}
133
134impl From<Box<GitBackendLoadError>> for BackendLoadError {
135    fn from(err: Box<GitBackendLoadError>) -> Self {
136        BackendLoadError(err)
137    }
138}
139
140/// `GitBackend`-specific error that may occur after the backend is loaded.
141#[derive(Debug, Error)]
142pub enum GitBackendError {
143    #[error("Failed to read non-git metadata")]
144    ReadMetadata(#[source] TableStoreError),
145    #[error("Failed to write non-git metadata")]
146    WriteMetadata(#[source] TableStoreError),
147}
148
149impl From<GitBackendError> for BackendError {
150    fn from(err: GitBackendError) -> Self {
151        BackendError::Other(err.into())
152    }
153}
154
155#[derive(Debug, Error)]
156pub enum GitGcError {
157    #[error("Failed to run git gc command")]
158    GcCommand(#[source] std::io::Error),
159    #[error("git gc command exited with an error: {0}")]
160    GcCommandErrorStatus(ExitStatus),
161}
162
163pub struct GitBackend {
164    // While gix::Repository can be created from gix::ThreadSafeRepository, it's
165    // cheaper to cache the thread-local instance behind a mutex than creating
166    // one for each backend method call. Our GitBackend is most likely to be
167    // used in a single-threaded context.
168    base_repo: gix::ThreadSafeRepository,
169    repo: Mutex<gix::Repository>,
170    root_commit_id: CommitId,
171    root_change_id: ChangeId,
172    empty_tree_id: TreeId,
173    extra_metadata_store: TableStore,
174    cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
175    git_executable: PathBuf,
176    write_change_id_header: bool,
177}
178
179impl GitBackend {
180    pub fn name() -> &'static str {
181        "git"
182    }
183
184    fn new(
185        base_repo: gix::ThreadSafeRepository,
186        extra_metadata_store: TableStore,
187        git_settings: GitSettings,
188    ) -> Self {
189        let repo = Mutex::new(base_repo.to_thread_local());
190        let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
191        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
192        let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
193        GitBackend {
194            base_repo,
195            repo,
196            root_commit_id,
197            root_change_id,
198            empty_tree_id,
199            extra_metadata_store,
200            cached_extra_metadata: Mutex::new(None),
201            git_executable: git_settings.executable_path,
202            write_change_id_header: git_settings.write_change_id_header,
203        }
204    }
205
206    pub fn init_internal(
207        settings: &UserSettings,
208        store_path: &Path,
209    ) -> Result<Self, Box<GitBackendInitError>> {
210        let git_repo_path = Path::new("git");
211        let git_repo = gix::ThreadSafeRepository::init_opts(
212            store_path.join(git_repo_path),
213            gix::create::Kind::Bare,
214            gix::create::Options::default(),
215            gix_open_opts_from_settings(settings),
216        )
217        .map_err(GitBackendInitError::InitRepository)?;
218        let git_settings = settings
219            .git_settings()
220            .map_err(GitBackendInitError::Config)?;
221        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
222    }
223
224    /// Initializes backend by creating a new Git repo at the specified
225    /// workspace path. The workspace directory must exist.
226    pub fn init_colocated(
227        settings: &UserSettings,
228        store_path: &Path,
229        workspace_root: &Path,
230    ) -> Result<Self, Box<GitBackendInitError>> {
231        let canonical_workspace_root = {
232            let path = store_path.join(workspace_root);
233            dunce::canonicalize(&path)
234                .context(&path)
235                .map_err(GitBackendInitError::Path)?
236        };
237        let git_repo = gix::ThreadSafeRepository::init_opts(
238            canonical_workspace_root,
239            gix::create::Kind::WithWorktree,
240            gix::create::Options::default(),
241            gix_open_opts_from_settings(settings),
242        )
243        .map_err(GitBackendInitError::InitRepository)?;
244        let git_repo_path = workspace_root.join(".git");
245        let git_settings = settings
246            .git_settings()
247            .map_err(GitBackendInitError::Config)?;
248        Self::init_with_repo(store_path, &git_repo_path, git_repo, git_settings)
249    }
250
251    /// Initializes backend with an existing Git repo at the specified path.
252    pub fn init_external(
253        settings: &UserSettings,
254        store_path: &Path,
255        git_repo_path: &Path,
256    ) -> Result<Self, Box<GitBackendInitError>> {
257        let canonical_git_repo_path = {
258            let path = store_path.join(git_repo_path);
259            canonicalize_git_repo_path(&path)
260                .context(&path)
261                .map_err(GitBackendInitError::Path)?
262        };
263        let git_repo = gix::ThreadSafeRepository::open_opts(
264            canonical_git_repo_path,
265            gix_open_opts_from_settings(settings),
266        )
267        .map_err(GitBackendInitError::OpenRepository)?;
268        let git_settings = settings
269            .git_settings()
270            .map_err(GitBackendInitError::Config)?;
271        Self::init_with_repo(store_path, git_repo_path, git_repo, git_settings)
272    }
273
274    fn init_with_repo(
275        store_path: &Path,
276        git_repo_path: &Path,
277        repo: gix::ThreadSafeRepository,
278        git_settings: GitSettings,
279    ) -> Result<Self, Box<GitBackendInitError>> {
280        let extra_path = store_path.join("extra");
281        fs::create_dir(&extra_path)
282            .context(&extra_path)
283            .map_err(GitBackendInitError::Path)?;
284        let target_path = store_path.join("git_target");
285        if cfg!(windows) && git_repo_path.is_relative() {
286            // When a repository is created in Windows, format the path with *forward
287            // slashes* and not backwards slashes. This makes it possible to use the same
288            // repository under Windows Subsystem for Linux.
289            //
290            // This only works for relative paths. If the path is absolute, there's not much
291            // we can do, and it simply won't work inside and outside WSL at the same time.
292            let git_repo_path_string = git_repo_path
293                .components()
294                .map(|component| component.as_os_str().to_str().unwrap().to_owned())
295                .join("/");
296            fs::write(&target_path, git_repo_path_string.as_bytes())
297                .context(&target_path)
298                .map_err(GitBackendInitError::Path)?;
299        } else {
300            fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes())
301                .context(&target_path)
302                .map_err(GitBackendInitError::Path)?;
303        };
304        let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
305        Ok(GitBackend::new(repo, extra_metadata_store, git_settings))
306    }
307
308    pub fn load(
309        settings: &UserSettings,
310        store_path: &Path,
311    ) -> Result<Self, Box<GitBackendLoadError>> {
312        let git_repo_path = {
313            let target_path = store_path.join("git_target");
314            let git_repo_path_str = fs::read_to_string(&target_path)
315                .context(&target_path)
316                .map_err(GitBackendLoadError::Path)?;
317            let git_repo_path = store_path.join(git_repo_path_str);
318            canonicalize_git_repo_path(&git_repo_path)
319                .context(&git_repo_path)
320                .map_err(GitBackendLoadError::Path)?
321        };
322        let repo = gix::ThreadSafeRepository::open_opts(
323            git_repo_path,
324            gix_open_opts_from_settings(settings),
325        )
326        .map_err(GitBackendLoadError::OpenRepository)?;
327        let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
328        let git_settings = settings
329            .git_settings()
330            .map_err(GitBackendLoadError::Config)?;
331        Ok(GitBackend::new(repo, extra_metadata_store, git_settings))
332    }
333
334    fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
335        self.repo.lock().unwrap()
336    }
337
338    /// Returns new thread-local instance to access to the underlying Git repo.
339    pub fn git_repo(&self) -> gix::Repository {
340        self.base_repo.to_thread_local()
341    }
342
343    /// Path to the `.git` directory or the repository itself if it's bare.
344    pub fn git_repo_path(&self) -> &Path {
345        self.base_repo.path()
346    }
347
348    /// Path to the working directory if the repository isn't bare.
349    pub fn git_workdir(&self) -> Option<&Path> {
350        self.base_repo.work_dir()
351    }
352
353    fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
354        let mut locked_head = self.cached_extra_metadata.lock().unwrap();
355        match locked_head.as_ref() {
356            Some(head) => Ok(head.clone()),
357            None => {
358                let table = self
359                    .extra_metadata_store
360                    .get_head()
361                    .map_err(GitBackendError::ReadMetadata)?;
362                *locked_head = Some(table.clone());
363                Ok(table)
364            }
365        }
366    }
367
368    fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
369        let table = self
370            .extra_metadata_store
371            .get_head_locked()
372            .map_err(GitBackendError::ReadMetadata)?;
373        Ok(table)
374    }
375
376    fn save_extra_metadata_table(
377        &self,
378        mut_table: MutableTable,
379        _table_lock: &FileLock,
380    ) -> BackendResult<()> {
381        let table = self
382            .extra_metadata_store
383            .save_table(mut_table)
384            .map_err(GitBackendError::WriteMetadata)?;
385        // Since the parent table was the head, saved table are likely to be new head.
386        // If it's not, cache will be reloaded when entry can't be found.
387        *self.cached_extra_metadata.lock().unwrap() = Some(table);
388        Ok(())
389    }
390
391    /// Imports the given commits and ancestors from the backing Git repo.
392    ///
393    /// The `head_ids` may contain commits that have already been imported, but
394    /// the caller should filter them out to eliminate redundant I/O processing.
395    #[tracing::instrument(skip(self, head_ids))]
396    pub fn import_head_commits<'a>(
397        &self,
398        head_ids: impl IntoIterator<Item = &'a CommitId>,
399    ) -> BackendResult<()> {
400        let head_ids: HashSet<&CommitId> = head_ids
401            .into_iter()
402            .filter(|&id| *id != self.root_commit_id)
403            .collect();
404        if head_ids.is_empty() {
405            return Ok(());
406        }
407
408        // Create no-gc ref even if known to the extras table. Concurrent GC
409        // process might have deleted the no-gc ref.
410        let locked_repo = self.lock_git_repo();
411        locked_repo
412            .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
413            .map_err(|err| BackendError::Other(Box::new(err)))?;
414
415        // These commits are imported from Git. Make our change ids persist (otherwise
416        // future write_commit() could reassign new change id.)
417        tracing::debug!(
418            heads_count = head_ids.len(),
419            "import extra metadata entries"
420        );
421        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
422        let mut mut_table = table.start_mutation();
423        import_extra_metadata_entries_from_heads(
424            &locked_repo,
425            &mut mut_table,
426            &table_lock,
427            &head_ids,
428        )?;
429        self.save_extra_metadata_table(mut_table, &table_lock)
430    }
431
432    fn read_file_sync(&self, id: &FileId) -> BackendResult<Vec<u8>> {
433        let git_blob_id = validate_git_object_id(id)?;
434        let locked_repo = self.lock_git_repo();
435        let mut blob = locked_repo
436            .find_object(git_blob_id)
437            .map_err(|err| map_not_found_err(err, id))?
438            .try_into_blob()
439            .map_err(|err| to_read_object_err(err, id))?;
440        Ok(blob.take_data())
441    }
442
443    fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
444        let attributes = gix::worktree::Stack::new(
445            Path::new(""),
446            gix::worktree::stack::State::AttributesStack(Default::default()),
447            gix::worktree::glob::pattern::Case::Sensitive,
448            Vec::new(),
449            Vec::new(),
450        );
451        let filter = gix::diff::blob::Pipeline::new(
452            Default::default(),
453            gix::filter::plumbing::Pipeline::new(
454                self.git_repo()
455                    .command_context()
456                    .map_err(|err| BackendError::Other(Box::new(err)))?,
457                Default::default(),
458            ),
459            Vec::new(),
460            Default::default(),
461        );
462        Ok(gix::diff::blob::Platform::new(
463            Default::default(),
464            filter,
465            gix::diff::blob::pipeline::Mode::ToGit,
466            attributes,
467        ))
468    }
469
470    fn read_tree_for_commit<'repo>(
471        &self,
472        repo: &'repo gix::Repository,
473        id: &CommitId,
474    ) -> BackendResult<gix::Tree<'repo>> {
475        let tree = self.read_commit(id).block_on()?.root_tree.to_merge();
476        // TODO(kfm): probably want to do something here if it is a merge
477        let tree_id = tree.first().clone();
478        let gix_id = validate_git_object_id(&tree_id)?;
479        repo.find_object(gix_id)
480            .map_err(|err| map_not_found_err(err, &tree_id))?
481            .try_into_tree()
482            .map_err(|err| to_read_object_err(err, &tree_id))
483    }
484}
485
486/// Canonicalizes the given `path` except for the last `".git"` component.
487///
488/// The last path component matters when opening a Git repo without `core.bare`
489/// config. This config is usually set, but the "repo" tool will set up such
490/// repositories and symlinks. Opening such repo with fully-canonicalized path
491/// would turn a colocated Git repo into a bare repo.
492pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
493    if path.ends_with(".git") {
494        let workdir = path.parent().unwrap();
495        dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
496    } else {
497        dunce::canonicalize(path)
498    }
499}
500
501fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
502    let user_name = settings.user_name();
503    let user_email = settings.user_email();
504    gix::open::Options::default()
505        .config_overrides([
506            // Committer has to be configured to record reflog. Author isn't
507            // needed, but let's copy the same values.
508            format!("author.name={user_name}"),
509            format!("author.email={user_email}"),
510            format!("committer.name={user_name}"),
511            format!("committer.email={user_email}"),
512        ])
513        // The git_target path should point the repository, not the working directory.
514        .open_path_as_is(true)
515        // Gitoxide recommends this when correctness is preferred
516        .strict_config(true)
517}
518
519/// Reads the `jj:trees` header from the commit.
520fn root_tree_from_header(git_commit: &CommitRef) -> Result<Option<MergedTreeId>, ()> {
521    for (key, value) in &git_commit.extra_headers {
522        if *key == JJ_TREES_COMMIT_HEADER {
523            let mut tree_ids = SmallVec::new();
524            for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') {
525                let tree_id = TreeId::try_from_hex(hex).or(Err(()))?;
526                if tree_id.as_bytes().len() != HASH_LENGTH {
527                    return Err(());
528                }
529                tree_ids.push(tree_id);
530            }
531            // It is invalid to use `jj:trees` with a non-conflicted tree. If this were
532            // allowed, it would be possible to construct a commit which appears to have
533            // different contents depending on whether it is viewed using `jj` or `git`.
534            if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 {
535                return Err(());
536            }
537            return Ok(Some(MergedTreeId::Merge(Merge::from_vec(tree_ids))));
538        }
539    }
540    Ok(None)
541}
542
543fn commit_from_git_without_root_parent(
544    id: &CommitId,
545    git_object: &gix::Object,
546    uses_tree_conflict_format: bool,
547    is_shallow: bool,
548) -> BackendResult<Commit> {
549    let commit = git_object
550        .try_to_commit_ref()
551        .map_err(|err| to_read_object_err(err, id))?;
552
553    // If the git header has a change-id field, we attempt to convert that to a
554    // valid JJ Change Id
555    let change_id = commit
556        .extra_headers()
557        .find("change-id")
558        .and_then(to_forward_hex)
559        .and_then(|change_id_hex| ChangeId::try_from_hex(change_id_hex.as_str()).ok())
560        .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
561        // Otherwise, we reverse the bits of the commit id to create the change id.
562        // We don't want to use the first bytes unmodified because then it would be
563        // ambiguous if a given hash prefix refers to the commit id or the change id.
564        // It would have been enough to pick the last 16 bytes instead of the
565        // leading 16 bytes to address that. We also reverse the bits to make it
566        // less likely that users depend on any relationship between the two ids.
567        .unwrap_or_else(|| {
568            ChangeId::new(
569                id.as_bytes()[4..HASH_LENGTH]
570                    .iter()
571                    .rev()
572                    .map(|b| b.reverse_bits())
573                    .collect(),
574            )
575        });
576
577    // shallow commits don't have parents their parents actually fetched, so we
578    // discard them here
579    // TODO: This causes issues when a shallow repository is deepened/unshallowed
580    let parents = if is_shallow {
581        vec![]
582    } else {
583        commit
584            .parents()
585            .map(|oid| CommitId::from_bytes(oid.as_bytes()))
586            .collect_vec()
587    };
588    let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
589    // If this commit is a conflict, we'll update the root tree later, when we read
590    // the extra metadata.
591    let root_tree = root_tree_from_header(&commit)
592        .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?;
593    let root_tree = root_tree.unwrap_or_else(|| {
594        if uses_tree_conflict_format {
595            MergedTreeId::resolved(tree_id)
596        } else {
597            MergedTreeId::Legacy(tree_id)
598        }
599    });
600    // Use lossy conversion as commit message with "mojibake" is still better than
601    // nothing.
602    // TODO: what should we do with commit.encoding?
603    let description = String::from_utf8_lossy(commit.message).into_owned();
604    let author = signature_from_git(commit.author());
605    let committer = signature_from_git(commit.committer());
606
607    // If the commit is signed, extract both the signature and the signed data
608    // (which is the commit buffer with the gpgsig header omitted).
609    // We have to re-parse the raw commit data because gix CommitRef does not give
610    // us the sogned data, only the signature.
611    // Ideally, we could use try_to_commit_ref_iter at the beginning of this
612    // function and extract everything from that. For now, this works
613    let secure_sig = commit
614        .extra_headers
615        .iter()
616        // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
617        .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
618        .then(|| CommitRefIter::signature(&git_object.data))
619        .transpose()
620        .map_err(|err| to_read_object_err(err, id))?
621        .flatten()
622        .map(|(sig, data)| SecureSig {
623            data: data.to_bstring().into(),
624            sig: sig.into_owned().into(),
625        });
626
627    Ok(Commit {
628        parents,
629        predecessors: vec![],
630        // If this commit has associated extra metadata, we may reset this later.
631        root_tree,
632        change_id,
633        description,
634        author,
635        committer,
636        secure_sig,
637    })
638}
639
640const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
641
642fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
643    let name = signature.name;
644    let name = if name != EMPTY_STRING_PLACEHOLDER {
645        String::from_utf8_lossy(name).into_owned()
646    } else {
647        "".to_string()
648    };
649    let email = signature.email;
650    let email = if email != EMPTY_STRING_PLACEHOLDER {
651        String::from_utf8_lossy(email).into_owned()
652    } else {
653        "".to_string()
654    };
655    let timestamp = MillisSinceEpoch(signature.time.seconds * 1000);
656    let tz_offset = signature.time.offset.div_euclid(60); // in minutes
657    Signature {
658        name,
659        email,
660        timestamp: Timestamp {
661            timestamp,
662            tz_offset,
663        },
664    }
665}
666
667fn signature_to_git(signature: &Signature) -> gix::actor::SignatureRef<'_> {
668    // git does not support empty names or emails
669    let name = if !signature.name.is_empty() {
670        &signature.name
671    } else {
672        EMPTY_STRING_PLACEHOLDER
673    };
674    let email = if !signature.email.is_empty() {
675        &signature.email
676    } else {
677        EMPTY_STRING_PLACEHOLDER
678    };
679    let time = gix::date::Time::new(
680        signature.timestamp.timestamp.0.div_euclid(1000),
681        signature.timestamp.tz_offset * 60, // in seconds
682    );
683    gix::actor::SignatureRef {
684        name: name.into(),
685        email: email.into(),
686        time,
687    }
688}
689
690fn serialize_extras(commit: &Commit) -> Vec<u8> {
691    let mut proto = crate::protos::git_store::Commit {
692        change_id: commit.change_id.to_bytes(),
693        ..Default::default()
694    };
695    if let MergedTreeId::Merge(tree_ids) = &commit.root_tree {
696        proto.uses_tree_conflict_format = true;
697        if !tree_ids.is_resolved() {
698            // This is done for the sake of jj versions <0.28 (before commit
699            // f7b14be) being able to read the repo. At some point in the
700            // future, we can stop doing it.
701            proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
702        }
703    }
704    for predecessor in &commit.predecessors {
705        proto.predecessors.push(predecessor.to_bytes());
706    }
707    proto.encode_to_vec()
708}
709
710fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
711    let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
712    if !proto.change_id.is_empty() {
713        commit.change_id = ChangeId::new(proto.change_id);
714    }
715    if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree {
716        if proto.uses_tree_conflict_format {
717            if !proto.root_tree.is_empty() {
718                let merge_builder: MergeBuilder<_> = proto
719                    .root_tree
720                    .iter()
721                    .map(|id_bytes| TreeId::from_bytes(id_bytes))
722                    .collect();
723                commit.root_tree = MergedTreeId::Merge(merge_builder.build());
724            } else {
725                // uses_tree_conflict_format was set but there was no root_tree override in the
726                // proto, which means we should just promote the tree id from the
727                // git commit to be a known-conflict-free tree
728                commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone());
729            }
730        }
731    }
732    for predecessor in &proto.predecessors {
733        commit.predecessors.push(CommitId::from_bytes(predecessor));
734    }
735}
736
737/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
738/// Used for preventing GC of commits we create.
739fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
740    let name = format!("{NO_GC_REF_NAMESPACE}{id}");
741    let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()));
742    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
743    gix::refs::transaction::RefEdit {
744        change: gix::refs::transaction::Change::Update {
745            log: gix::refs::transaction::LogChange {
746                message: "used by jj".into(),
747                ..Default::default()
748            },
749            expected,
750            new,
751        },
752        name: name.try_into().unwrap(),
753        deref: false,
754    }
755}
756
757fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
758    let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
759    gix::refs::transaction::RefEdit {
760        change: gix::refs::transaction::Change::Delete {
761            expected,
762            log: gix::refs::transaction::RefLog::AndReference,
763        },
764        name: git_ref.name,
765        deref: false,
766    }
767}
768
769/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
770/// unreachable and non-head refs.
771fn recreate_no_gc_refs(
772    git_repo: &gix::Repository,
773    new_heads: impl IntoIterator<Item = CommitId>,
774    keep_newer: SystemTime,
775) -> BackendResult<()> {
776    // Calculate diff between existing no-gc refs and new heads.
777    let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
778    let mut no_gc_refs_to_keep_count: usize = 0;
779    let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
780    let git_references = git_repo
781        .references()
782        .map_err(|err| BackendError::Other(err.into()))?;
783    let no_gc_refs_iter = git_references
784        .prefixed(NO_GC_REF_NAMESPACE)
785        .map_err(|err| BackendError::Other(err.into()))?;
786    for git_ref in no_gc_refs_iter {
787        let git_ref = git_ref.map_err(BackendError::Other)?.detach();
788        let oid = git_ref.target.try_id().ok_or_else(|| {
789            let name = git_ref.name.as_bstr();
790            BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
791        })?;
792        let id = CommitId::from_bytes(oid.as_bytes());
793        let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
794        if new_heads.contains(&id) && name_good {
795            no_gc_refs_to_keep_count += 1;
796            continue;
797        }
798        // Check timestamp of loose ref, but this is still racy on re-import
799        // because:
800        // - existing packed ref won't be demoted to loose ref
801        // - existing loose ref won't be touched
802        //
803        // TODO: might be better to switch to a dummy merge, where new no-gc ref
804        // will always have a unique name. Doing that with the current
805        // ref-per-head strategy would increase the number of the no-gc refs.
806        // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
807        let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
808        if let Ok(metadata) = loose_ref_path.metadata() {
809            let mtime = metadata.modified().expect("unsupported platform?");
810            if mtime > keep_newer {
811                tracing::trace!(?git_ref, "not deleting new");
812                no_gc_refs_to_keep_count += 1;
813                continue;
814            }
815        }
816        // Also deletes no-gc ref of random name created by old jj.
817        tracing::trace!(?git_ref, ?name_good, "will delete");
818        no_gc_refs_to_delete.push(git_ref);
819    }
820    tracing::info!(
821        new_heads_count = new_heads.len(),
822        no_gc_refs_to_keep_count,
823        no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
824        "collected reachable refs"
825    );
826
827    // It's slow to delete packed refs one by one, so update refs all at once.
828    let ref_edits = itertools::chain(
829        no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
830        new_heads.iter().map(to_no_gc_ref_update),
831    );
832    git_repo
833        .edit_references(ref_edits)
834        .map_err(|err| BackendError::Other(err.into()))?;
835
836    Ok(())
837}
838
839fn run_git_gc(program: &OsStr, git_dir: &Path) -> Result<(), GitGcError> {
840    let mut git = Command::new(program);
841    git.arg("--git-dir=."); // turn off discovery
842    git.arg("gc");
843    // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
844    // canonicalized as UNC path, which wouldn't be supported by git.
845    git.current_dir(git_dir);
846    // TODO: pass output to UI layer instead of printing directly here
847    let status = git.status().map_err(GitGcError::GcCommand)?;
848    if !status.success() {
849        return Err(GitGcError::GcCommandErrorStatus(status));
850    }
851    Ok(())
852}
853
854fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
855    if id.as_bytes().len() != HASH_LENGTH {
856        return Err(BackendError::InvalidHashLength {
857            expected: HASH_LENGTH,
858            actual: id.as_bytes().len(),
859            object_type: id.object_type(),
860            hash: id.hex(),
861        });
862    }
863    Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
864}
865
866fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
867    if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
868        BackendError::ObjectNotFound {
869            object_type: id.object_type(),
870            hash: id.hex(),
871            source: Box::new(err),
872        }
873    } else {
874        to_read_object_err(err, id)
875    }
876}
877
878fn to_read_object_err(
879    err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
880    id: &impl ObjectId,
881) -> BackendError {
882    BackendError::ReadObject {
883        object_type: id.object_type(),
884        hash: id.hex(),
885        source: err.into(),
886    }
887}
888
889fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError {
890    BackendError::InvalidUtf8 {
891        object_type: id.object_type(),
892        hash: id.hex(),
893        source,
894    }
895}
896
897fn import_extra_metadata_entries_from_heads(
898    git_repo: &gix::Repository,
899    mut_table: &mut MutableTable,
900    _table_lock: &FileLock,
901    head_ids: &HashSet<&CommitId>,
902) -> BackendResult<()> {
903    let shallow_commits = git_repo
904        .shallow_commits()
905        .map_err(|e| BackendError::Other(Box::new(e)))?;
906
907    let mut work_ids = head_ids
908        .iter()
909        .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
910        .map(|&id| id.clone())
911        .collect_vec();
912    while let Some(id) = work_ids.pop() {
913        let git_object = git_repo
914            .find_object(validate_git_object_id(&id)?)
915            .map_err(|err| map_not_found_err(err, &id))?;
916        let is_shallow = shallow_commits
917            .as_ref()
918            .is_some_and(|shallow| shallow.contains(&git_object.id));
919        // TODO(#1624): Should we read the root tree here and check if it has a
920        // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
921        // change the description of a commit with tree-level conflicts.
922        let commit = commit_from_git_without_root_parent(&id, &git_object, true, is_shallow)?;
923        mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
924        work_ids.extend(
925            commit
926                .parents
927                .into_iter()
928                .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
929        );
930    }
931    Ok(())
932}
933
934impl Debug for GitBackend {
935    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
936        f.debug_struct("GitBackend")
937            .field("path", &self.git_repo_path())
938            .finish()
939    }
940}
941
942#[async_trait]
943impl Backend for GitBackend {
944    fn as_any(&self) -> &dyn Any {
945        self
946    }
947
948    fn name(&self) -> &str {
949        Self::name()
950    }
951
952    fn commit_id_length(&self) -> usize {
953        HASH_LENGTH
954    }
955
956    fn change_id_length(&self) -> usize {
957        CHANGE_ID_LENGTH
958    }
959
960    fn root_commit_id(&self) -> &CommitId {
961        &self.root_commit_id
962    }
963
964    fn root_change_id(&self) -> &ChangeId {
965        &self.root_change_id
966    }
967
968    fn empty_tree_id(&self) -> &TreeId {
969        &self.empty_tree_id
970    }
971
972    fn concurrency(&self) -> usize {
973        1
974    }
975
976    async fn read_file(
977        &self,
978        _path: &RepoPath,
979        id: &FileId,
980    ) -> BackendResult<Pin<Box<dyn AsyncRead>>> {
981        let data = self.read_file_sync(id)?;
982        Ok(Box::pin(Cursor::new(data)))
983    }
984
985    async fn write_file(
986        &self,
987        _path: &RepoPath,
988        contents: &mut (dyn AsyncRead + Send + Unpin),
989    ) -> BackendResult<FileId> {
990        let mut bytes = Vec::new();
991        contents.read_to_end(&mut bytes).await.unwrap();
992        let locked_repo = self.lock_git_repo();
993        let oid = locked_repo
994            .write_blob(bytes)
995            .map_err(|err| BackendError::WriteObject {
996                object_type: "file",
997                source: Box::new(err),
998            })?;
999        Ok(FileId::new(oid.as_bytes().to_vec()))
1000    }
1001
1002    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
1003        let git_blob_id = validate_git_object_id(id)?;
1004        let locked_repo = self.lock_git_repo();
1005        let mut blob = locked_repo
1006            .find_object(git_blob_id)
1007            .map_err(|err| map_not_found_err(err, id))?
1008            .try_into_blob()
1009            .map_err(|err| to_read_object_err(err, id))?;
1010        let target = String::from_utf8(blob.take_data())
1011            .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
1012        Ok(target)
1013    }
1014
1015    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
1016        let locked_repo = self.lock_git_repo();
1017        let oid =
1018            locked_repo
1019                .write_blob(target.as_bytes())
1020                .map_err(|err| BackendError::WriteObject {
1021                    object_type: "symlink",
1022                    source: Box::new(err),
1023                })?;
1024        Ok(SymlinkId::new(oid.as_bytes().to_vec()))
1025    }
1026
1027    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
1028        Err(BackendError::Unsupported(
1029            "The Git backend doesn't support tracked copies yet".to_string(),
1030        ))
1031    }
1032
1033    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
1034        Err(BackendError::Unsupported(
1035            "The Git backend doesn't support tracked copies yet".to_string(),
1036        ))
1037    }
1038
1039    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
1040        Err(BackendError::Unsupported(
1041            "The Git backend doesn't support tracked copies yet".to_string(),
1042        ))
1043    }
1044
1045    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
1046        if id == &self.empty_tree_id {
1047            return Ok(Tree::default());
1048        }
1049        let git_tree_id = validate_git_object_id(id)?;
1050
1051        let locked_repo = self.lock_git_repo();
1052        let git_tree = locked_repo
1053            .find_object(git_tree_id)
1054            .map_err(|err| map_not_found_err(err, id))?
1055            .try_into_tree()
1056            .map_err(|err| to_read_object_err(err, id))?;
1057        let mut tree = Tree::default();
1058        for entry in git_tree.iter() {
1059            let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1060            let name =
1061                str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
1062            let (name, value) = match entry.mode().kind() {
1063                gix::object::tree::EntryKind::Tree => {
1064                    let id = TreeId::from_bytes(entry.oid().as_bytes());
1065                    (name, TreeValue::Tree(id))
1066                }
1067                gix::object::tree::EntryKind::Blob => {
1068                    let id = FileId::from_bytes(entry.oid().as_bytes());
1069                    if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) {
1070                        (
1071                            basename,
1072                            TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())),
1073                        )
1074                    } else {
1075                        (
1076                            name,
1077                            TreeValue::File {
1078                                id,
1079                                executable: false,
1080                                copy_id: CopyId::placeholder(),
1081                            },
1082                        )
1083                    }
1084                }
1085                gix::object::tree::EntryKind::BlobExecutable => {
1086                    let id = FileId::from_bytes(entry.oid().as_bytes());
1087                    (
1088                        name,
1089                        TreeValue::File {
1090                            id,
1091                            executable: true,
1092                            copy_id: CopyId::placeholder(),
1093                        },
1094                    )
1095                }
1096                gix::object::tree::EntryKind::Link => {
1097                    let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1098                    (name, TreeValue::Symlink(id))
1099                }
1100                gix::object::tree::EntryKind::Commit => {
1101                    let id = CommitId::from_bytes(entry.oid().as_bytes());
1102                    (name, TreeValue::GitSubmodule(id))
1103                }
1104            };
1105            tree.set(RepoPathComponentBuf::new(name).unwrap(), value);
1106        }
1107        Ok(tree)
1108    }
1109
1110    async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1111        // Tree entries to be written must be sorted by Entry::filename(), which
1112        // is slightly different from the order of our backend::Tree.
1113        let entries = contents
1114            .entries()
1115            .map(|entry| {
1116                let name = entry.name().as_internal_str();
1117                match entry.value() {
1118                    TreeValue::File {
1119                        id,
1120                        executable: false,
1121                        copy_id: _, // TODO: Use the value
1122                    } => gix::objs::tree::Entry {
1123                        mode: gix::object::tree::EntryKind::Blob.into(),
1124                        filename: name.into(),
1125                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1126                    },
1127                    TreeValue::File {
1128                        id,
1129                        executable: true,
1130                        copy_id: _, // TODO: Use the value
1131                    } => gix::objs::tree::Entry {
1132                        mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1133                        filename: name.into(),
1134                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1135                    },
1136                    TreeValue::Symlink(id) => gix::objs::tree::Entry {
1137                        mode: gix::object::tree::EntryKind::Link.into(),
1138                        filename: name.into(),
1139                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1140                    },
1141                    TreeValue::Tree(id) => gix::objs::tree::Entry {
1142                        mode: gix::object::tree::EntryKind::Tree.into(),
1143                        filename: name.into(),
1144                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1145                    },
1146                    TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1147                        mode: gix::object::tree::EntryKind::Commit.into(),
1148                        filename: name.into(),
1149                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1150                    },
1151                    TreeValue::Conflict(id) => gix::objs::tree::Entry {
1152                        mode: gix::object::tree::EntryKind::Blob.into(),
1153                        filename: (name.to_owned() + CONFLICT_SUFFIX).into(),
1154                        oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()),
1155                    },
1156                }
1157            })
1158            .sorted_unstable()
1159            .collect();
1160        let locked_repo = self.lock_git_repo();
1161        let oid = locked_repo
1162            .write_object(gix::objs::Tree { entries })
1163            .map_err(|err| BackendError::WriteObject {
1164                object_type: "tree",
1165                source: Box::new(err),
1166            })?;
1167        Ok(TreeId::from_bytes(oid.as_bytes()))
1168    }
1169
1170    fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
1171        let data = self.read_file_sync(&FileId::new(id.to_bytes()))?;
1172        let json: serde_json::Value = serde_json::from_slice(&data).unwrap();
1173        Ok(Conflict {
1174            removes: conflict_term_list_from_json(json.get("removes").unwrap()),
1175            adds: conflict_term_list_from_json(json.get("adds").unwrap()),
1176        })
1177    }
1178
1179    fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
1180        let json = serde_json::json!({
1181            "removes": conflict_term_list_to_json(&conflict.removes),
1182            "adds": conflict_term_list_to_json(&conflict.adds),
1183        });
1184        let json_string = json.to_string();
1185        let bytes = json_string.as_bytes();
1186        let locked_repo = self.lock_git_repo();
1187        let oid = locked_repo
1188            .write_blob(bytes)
1189            .map_err(|err| BackendError::WriteObject {
1190                object_type: "conflict",
1191                source: Box::new(err),
1192            })?;
1193        Ok(ConflictId::from_bytes(oid.as_bytes()))
1194    }
1195
1196    #[tracing::instrument(skip(self))]
1197    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1198        if *id == self.root_commit_id {
1199            return Ok(make_root_commit(
1200                self.root_change_id().clone(),
1201                self.empty_tree_id.clone(),
1202            ));
1203        }
1204        let git_commit_id = validate_git_object_id(id)?;
1205
1206        let mut commit = {
1207            let locked_repo = self.lock_git_repo();
1208            let git_object = locked_repo
1209                .find_object(git_commit_id)
1210                .map_err(|err| map_not_found_err(err, id))?;
1211            let is_shallow = locked_repo
1212                .shallow_commits()
1213                .ok()
1214                .flatten()
1215                .is_some_and(|shallow| shallow.contains(&git_object.id));
1216            commit_from_git_without_root_parent(id, &git_object, false, is_shallow)?
1217        };
1218        if commit.parents.is_empty() {
1219            commit.parents.push(self.root_commit_id.clone());
1220        };
1221
1222        let table = self.cached_extra_metadata_table()?;
1223        if let Some(extras) = table.get_value(id.as_bytes()) {
1224            deserialize_extras(&mut commit, extras);
1225        } else {
1226            // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1227            // there are no reachable ancestor commits without extras metadata. Git commits
1228            // imported by jj < 0.8.0 might not have extras (#924).
1229            // https://github.com/jj-vcs/jj/issues/2343
1230            tracing::info!("unimported Git commit found");
1231            self.import_head_commits([id])?;
1232            let table = self.cached_extra_metadata_table()?;
1233            let extras = table.get_value(id.as_bytes()).unwrap();
1234            deserialize_extras(&mut commit, extras);
1235        }
1236        Ok(commit)
1237    }
1238
1239    async fn write_commit(
1240        &self,
1241        mut contents: Commit,
1242        mut sign_with: Option<&mut SigningFn>,
1243    ) -> BackendResult<(CommitId, Commit)> {
1244        assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1245
1246        let locked_repo = self.lock_git_repo();
1247        let git_tree_id = match &contents.root_tree {
1248            MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
1249            MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
1250                Some(tree_id) => validate_git_object_id(tree_id)?,
1251                None => write_tree_conflict(&locked_repo, tree_ids)?,
1252            },
1253        };
1254        let author = signature_to_git(&contents.author);
1255        let mut committer = signature_to_git(&contents.committer);
1256        let message = &contents.description;
1257        if contents.parents.is_empty() {
1258            return Err(BackendError::Other(
1259                "Cannot write a commit with no parents".into(),
1260            ));
1261        }
1262        let mut parents = SmallVec::new();
1263        for parent_id in &contents.parents {
1264            if *parent_id == self.root_commit_id {
1265                // Git doesn't have a root commit, so if the parent is the root commit, we don't
1266                // add it to the list of parents to write in the Git commit. We also check that
1267                // there are no other parents since Git cannot represent a merge between a root
1268                // commit and another commit.
1269                if contents.parents.len() > 1 {
1270                    return Err(BackendError::Unsupported(
1271                        "The Git backend does not support creating merge commits with the root \
1272                         commit as one of the parents."
1273                            .to_owned(),
1274                    ));
1275                }
1276            } else {
1277                parents.push(validate_git_object_id(parent_id)?);
1278            }
1279        }
1280        let mut extra_headers = vec![];
1281        if let MergedTreeId::Merge(tree_ids) = &contents.root_tree {
1282            if !tree_ids.is_resolved() {
1283                let value = tree_ids.iter().map(|id| id.hex()).join(" ").into_bytes();
1284                extra_headers.push((
1285                    BString::new(JJ_TREES_COMMIT_HEADER.to_vec()),
1286                    BString::new(value),
1287                ));
1288            }
1289        }
1290        if self.write_change_id_header {
1291            extra_headers.push((
1292                BString::new(CHANGE_ID_COMMIT_HEADER.to_vec()),
1293                BString::new(contents.change_id.reverse_hex().into()),
1294            ));
1295        }
1296
1297        let extras = serialize_extras(&contents);
1298
1299        // If two writers write commits of the same id with different metadata, they
1300        // will both succeed and the metadata entries will be "merged" later. Since
1301        // metadata entry is keyed by the commit id, one of the entries would be lost.
1302        // To prevent such race condition locally, we extend the scope covered by the
1303        // table lock. This is still racy if multiple machines are involved and the
1304        // repository is rsync-ed.
1305        let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1306        let id = loop {
1307            let mut commit = gix::objs::Commit {
1308                message: message.to_owned().into(),
1309                tree: git_tree_id,
1310                author: author.into(),
1311                committer: committer.into(),
1312                encoding: None,
1313                parents: parents.clone(),
1314                extra_headers: extra_headers.clone(),
1315            };
1316
1317            if let Some(sign) = &mut sign_with {
1318                // we don't use gix pool, but at least use their heuristic
1319                let mut data = Vec::with_capacity(512);
1320                commit.write_to(&mut data).unwrap();
1321
1322                let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1323                    object_type: "commit",
1324                    source: Box::new(err),
1325                })?;
1326                commit
1327                    .extra_headers
1328                    .push(("gpgsig".into(), sig.clone().into()));
1329                contents.secure_sig = Some(SecureSig { data, sig });
1330            }
1331
1332            let git_id =
1333                locked_repo
1334                    .write_object(&commit)
1335                    .map_err(|err| BackendError::WriteObject {
1336                        object_type: "commit",
1337                        source: Box::new(err),
1338                    })?;
1339
1340            match table.get_value(git_id.as_bytes()) {
1341                Some(existing_extras) if existing_extras != extras => {
1342                    // It's possible a commit already exists with the same commit id but different
1343                    // change id. Adjust the timestamp until this is no longer the case.
1344                    committer.time.seconds -= 1;
1345                }
1346                _ => break CommitId::from_bytes(git_id.as_bytes()),
1347            }
1348        };
1349
1350        // Everything up to this point had no permanent effect on the repo except
1351        // GC-able objects
1352        locked_repo
1353            .edit_reference(to_no_gc_ref_update(&id))
1354            .map_err(|err| BackendError::Other(Box::new(err)))?;
1355
1356        // Update the signature to match the one that was actually written to the object
1357        // store
1358        contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1359        let mut mut_table = table.start_mutation();
1360        mut_table.add_entry(id.to_bytes(), extras);
1361        self.save_extra_metadata_table(mut_table, &table_lock)?;
1362        Ok((id, contents))
1363    }
1364
1365    fn get_copy_records(
1366        &self,
1367        paths: Option<&[RepoPathBuf]>,
1368        root_id: &CommitId,
1369        head_id: &CommitId,
1370    ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
1371        let repo = self.git_repo();
1372        let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1373        let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1374
1375        let change_to_copy_record =
1376            |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1377                let gix::object::tree::diff::Change::Rewrite {
1378                    source_location,
1379                    source_entry_mode,
1380                    source_id,
1381                    entry_mode: dest_entry_mode,
1382                    location: dest_location,
1383                    ..
1384                } = change
1385                else {
1386                    return Ok(None);
1387                };
1388                // TODO: Renamed symlinks cannot be returned because CopyRecord
1389                // expects `source_file: FileId`.
1390                if !source_entry_mode.is_blob() || !dest_entry_mode.is_blob() {
1391                    return Ok(None);
1392                }
1393
1394                let source = str::from_utf8(source_location)
1395                    .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1396                let dest = str::from_utf8(dest_location)
1397                    .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1398
1399                let target = RepoPathBuf::from_internal_string(dest).unwrap();
1400                if !paths.is_none_or(|paths| paths.contains(&target)) {
1401                    return Ok(None);
1402                }
1403
1404                Ok(Some(CopyRecord {
1405                    target,
1406                    target_commit: head_id.clone(),
1407                    source: RepoPathBuf::from_internal_string(source).unwrap(),
1408                    source_file: FileId::from_bytes(source_id.as_bytes()),
1409                    source_commit: root_id.clone(),
1410                }))
1411            };
1412
1413        let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1414        root_tree
1415            .changes()
1416            .map_err(|err| BackendError::Other(err.into()))?
1417            .options(|opts| {
1418                opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1419                    copies: Some(gix::diff::rewrites::Copies {
1420                        source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1421                        percentage: Some(0.5),
1422                    }),
1423                    percentage: Some(0.5),
1424                    limit: 1000,
1425                    track_empty: false,
1426                }));
1427            })
1428            .for_each_to_obtain_tree_with_cache(
1429                &head_tree,
1430                &mut self.new_diff_platform()?,
1431                |change| -> BackendResult<_> {
1432                    match change_to_copy_record(change) {
1433                        Ok(None) => {}
1434                        Ok(Some(change)) => records.push(Ok(change)),
1435                        Err(err) => records.push(Err(err)),
1436                    }
1437                    Ok(gix::object::tree::diff::Action::Continue)
1438                },
1439            )
1440            .map_err(|err| BackendError::Other(err.into()))?;
1441        Ok(Box::pin(futures::stream::iter(records)))
1442    }
1443
1444    #[tracing::instrument(skip(self, index))]
1445    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1446        let git_repo = self.lock_git_repo();
1447        let new_heads = index
1448            .all_heads_for_gc()
1449            .map_err(|err| BackendError::Other(err.into()))?
1450            .filter(|id| *id != self.root_commit_id);
1451        recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1452        // TODO: remove unreachable entries from extras table if segment file
1453        // mtime <= keep_newer? (it won't be consistent with no-gc refs
1454        // preserved by the keep_newer timestamp though)
1455        // TODO: remove unreachable extras table segments
1456        // TODO: pass in keep_newer to "git gc" command
1457        run_git_gc(self.git_executable.as_ref(), self.git_repo_path())
1458            .map_err(|err| BackendError::Other(err.into()))?;
1459        // Since "git gc" will move loose refs into packed refs, in-memory
1460        // packed-refs cache should be invalidated without relying on mtime.
1461        git_repo.refs.force_refresh_packed_buffer().ok();
1462        Ok(())
1463    }
1464}
1465
1466/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1467/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1468fn write_tree_conflict(
1469    repo: &gix::Repository,
1470    conflict: &Merge<TreeId>,
1471) -> BackendResult<gix::ObjectId> {
1472    // Tree entries to be written must be sorted by Entry::filename().
1473    let mut entries = itertools::chain(
1474        conflict
1475            .removes()
1476            .enumerate()
1477            .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1478        conflict
1479            .adds()
1480            .enumerate()
1481            .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1482    )
1483    .map(|(name, tree_id)| gix::objs::tree::Entry {
1484        mode: gix::object::tree::EntryKind::Tree.into(),
1485        filename: name.into(),
1486        oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()),
1487    })
1488    .collect_vec();
1489    let readme_id = repo
1490        .write_blob(
1491            r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1492The commit contains file conflicts, and therefore looks wrong when used with plain
1493Git or other tools that are unfamiliar with jj.
1494
1495The .jjconflict-* directories represent the different inputs to the conflict.
1496For details, see
1497https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1498
1499If you see this file in your working copy, it probably means that you used a
1500regular `git` command to check out a conflicted commit. Use `jj abandon` to
1501recover.
1502"#,
1503        )
1504        .map_err(|err| {
1505            BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1506        })?
1507        .detach();
1508    entries.push(gix::objs::tree::Entry {
1509        mode: gix::object::tree::EntryKind::Blob.into(),
1510        filename: "README".into(),
1511        oid: readme_id,
1512    });
1513    entries.sort_unstable();
1514    let id = repo
1515        .write_object(gix::objs::Tree { entries })
1516        .map_err(|err| BackendError::WriteObject {
1517            object_type: "tree",
1518            source: Box::new(err),
1519        })?;
1520    Ok(id.detach())
1521}
1522
1523fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
1524    serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
1525}
1526
1527fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> {
1528    json.as_array()
1529        .unwrap()
1530        .iter()
1531        .map(conflict_term_from_json)
1532        .collect()
1533}
1534
1535fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value {
1536    serde_json::json!({
1537        "value": tree_value_to_json(&part.value),
1538    })
1539}
1540
1541fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm {
1542    let json_value = json.get("value").unwrap();
1543    ConflictTerm {
1544        value: tree_value_from_json(json_value),
1545    }
1546}
1547
1548fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
1549    match value {
1550        TreeValue::File {
1551            id,
1552            executable,
1553            copy_id: _,
1554        } => serde_json::json!({
1555             "file": {
1556                 "id": id.hex(),
1557                 "executable": executable,
1558             },
1559        }),
1560        TreeValue::Symlink(id) => serde_json::json!({
1561             "symlink_id": id.hex(),
1562        }),
1563        TreeValue::Tree(id) => serde_json::json!({
1564             "tree_id": id.hex(),
1565        }),
1566        TreeValue::GitSubmodule(id) => serde_json::json!({
1567             "submodule_id": id.hex(),
1568        }),
1569        TreeValue::Conflict(id) => serde_json::json!({
1570             "conflict_id": id.hex(),
1571        }),
1572    }
1573}
1574
1575fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
1576    if let Some(json_file) = json.get("file") {
1577        TreeValue::File {
1578            id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())),
1579            executable: json_file.get("executable").unwrap().as_bool().unwrap(),
1580            copy_id: CopyId::placeholder(),
1581        }
1582    } else if let Some(json_id) = json.get("symlink_id") {
1583        TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id)))
1584    } else if let Some(json_id) = json.get("tree_id") {
1585        TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id)))
1586    } else if let Some(json_id) = json.get("submodule_id") {
1587        TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id)))
1588    } else if let Some(json_id) = json.get("conflict_id") {
1589        TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id)))
1590    } else {
1591        panic!("unexpected json value in conflict: {json:#?}");
1592    }
1593}
1594
1595fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
1596    hex::decode(value.as_str().unwrap()).unwrap()
1597}
1598
1599#[cfg(test)]
1600mod tests {
1601    use assert_matches::assert_matches;
1602    use hex::ToHex as _;
1603    use pollster::FutureExt as _;
1604
1605    use super::*;
1606    use crate::config::StackedConfig;
1607    use crate::content_hash::blake2b_hash;
1608    use crate::tests::new_temp_dir;
1609
1610    const GIT_USER: &str = "Someone";
1611    const GIT_EMAIL: &str = "someone@example.com";
1612
1613    fn git_config() -> Vec<bstr::BString> {
1614        vec![
1615            format!("user.name = {GIT_USER}").into(),
1616            format!("user.email = {GIT_EMAIL}").into(),
1617            "init.defaultBranch = master".into(),
1618        ]
1619    }
1620
1621    fn open_options() -> gix::open::Options {
1622        gix::open::Options::isolated()
1623            .config_overrides(git_config())
1624            .strict_config(true)
1625    }
1626
1627    fn git_init(directory: impl AsRef<Path>) -> gix::Repository {
1628        gix::ThreadSafeRepository::init_opts(
1629            directory,
1630            gix::create::Kind::WithWorktree,
1631            gix::create::Options::default(),
1632            open_options(),
1633        )
1634        .unwrap()
1635        .to_thread_local()
1636    }
1637
1638    #[test]
1639    fn read_plain_git_commit() {
1640        let settings = user_settings();
1641        let temp_dir = new_temp_dir();
1642        let store_path = temp_dir.path();
1643        let git_repo_path = temp_dir.path().join("git");
1644        let git_repo = git_init(git_repo_path);
1645
1646        // Add a commit with some files in
1647        let blob1 = git_repo.write_blob(b"content1").unwrap().detach();
1648        let blob2 = git_repo.write_blob(b"normal").unwrap().detach();
1649        let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap();
1650        dir_tree_editor
1651            .upsert("normal", gix::object::tree::EntryKind::Blob, blob1)
1652            .unwrap();
1653        dir_tree_editor
1654            .upsert("symlink", gix::object::tree::EntryKind::Link, blob2)
1655            .unwrap();
1656        let dir_tree_id = dir_tree_editor.write().unwrap().detach();
1657        let mut root_tree_builder = git_repo.empty_tree().edit().unwrap();
1658        root_tree_builder
1659            .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id)
1660            .unwrap();
1661        let root_tree_id = root_tree_builder.write().unwrap().detach();
1662        let git_author = gix::actor::Signature {
1663            name: "git author".into(),
1664            email: "git.author@example.com".into(),
1665            time: gix::date::Time::new(1000, 60 * 60),
1666        };
1667        let git_committer = gix::actor::Signature {
1668            name: "git committer".into(),
1669            email: "git.committer@example.com".into(),
1670            time: gix::date::Time::new(2000, -480 * 60),
1671        };
1672        let git_commit_id = git_repo
1673            .commit_as(
1674                &git_committer,
1675                &git_author,
1676                "refs/heads/dummy",
1677                "git commit message",
1678                root_tree_id,
1679                [] as [gix::ObjectId; 0],
1680            )
1681            .unwrap()
1682            .detach();
1683        git_repo
1684            .find_reference("refs/heads/dummy")
1685            .unwrap()
1686            .delete()
1687            .unwrap();
1688        let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1689        // The change id is the leading reverse bits of the commit id
1690        let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1691        // Check that the git commit above got the hash we expect
1692        assert_eq!(
1693            git_commit_id.as_bytes(),
1694            commit_id.as_bytes(),
1695            "{git_commit_id:?} vs {commit_id:?}"
1696        );
1697
1698        // Add an empty commit on top
1699        let git_commit_id2 = git_repo
1700            .commit_as(
1701                &git_committer,
1702                &git_author,
1703                "refs/heads/dummy2",
1704                "git commit message 2",
1705                root_tree_id,
1706                [git_commit_id],
1707            )
1708            .unwrap()
1709            .detach();
1710        git_repo
1711            .find_reference("refs/heads/dummy2")
1712            .unwrap()
1713            .delete()
1714            .unwrap();
1715        let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1716
1717        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1718
1719        // Import the head commit and its ancestors
1720        backend.import_head_commits([&commit_id2]).unwrap();
1721        // Ref should be created only for the head commit
1722        let git_refs = backend
1723            .git_repo()
1724            .references()
1725            .unwrap()
1726            .prefixed("refs/jj/keep/")
1727            .unwrap()
1728            .map(|git_ref| git_ref.unwrap().id().detach())
1729            .collect_vec();
1730        assert_eq!(git_refs, vec![git_commit_id2]);
1731
1732        let commit = backend.read_commit(&commit_id).block_on().unwrap();
1733        assert_eq!(&commit.change_id, &change_id);
1734        assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1735        assert_eq!(commit.predecessors, vec![]);
1736        assert_eq!(
1737            commit.root_tree.to_merge(),
1738            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1739        );
1740        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1741        assert_eq!(commit.description, "git commit message");
1742        assert_eq!(commit.author.name, "git author");
1743        assert_eq!(commit.author.email, "git.author@example.com");
1744        assert_eq!(
1745            commit.author.timestamp.timestamp,
1746            MillisSinceEpoch(1000 * 1000)
1747        );
1748        assert_eq!(commit.author.timestamp.tz_offset, 60);
1749        assert_eq!(commit.committer.name, "git committer");
1750        assert_eq!(commit.committer.email, "git.committer@example.com");
1751        assert_eq!(
1752            commit.committer.timestamp.timestamp,
1753            MillisSinceEpoch(2000 * 1000)
1754        );
1755        assert_eq!(commit.committer.timestamp.tz_offset, -480);
1756
1757        let root_tree = backend
1758            .read_tree(
1759                RepoPath::root(),
1760                &TreeId::from_bytes(root_tree_id.as_bytes()),
1761            )
1762            .block_on()
1763            .unwrap();
1764        let mut root_entries = root_tree.entries();
1765        let dir = root_entries.next().unwrap();
1766        assert_eq!(root_entries.next(), None);
1767        assert_eq!(dir.name().as_internal_str(), "dir");
1768        assert_eq!(
1769            dir.value(),
1770            &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1771        );
1772
1773        let dir_tree = backend
1774            .read_tree(
1775                RepoPath::from_internal_string("dir").unwrap(),
1776                &TreeId::from_bytes(dir_tree_id.as_bytes()),
1777            )
1778            .block_on()
1779            .unwrap();
1780        let mut entries = dir_tree.entries();
1781        let file = entries.next().unwrap();
1782        let symlink = entries.next().unwrap();
1783        assert_eq!(entries.next(), None);
1784        assert_eq!(file.name().as_internal_str(), "normal");
1785        assert_eq!(
1786            file.value(),
1787            &TreeValue::File {
1788                id: FileId::from_bytes(blob1.as_bytes()),
1789                executable: false,
1790                copy_id: CopyId::placeholder(),
1791            }
1792        );
1793        assert_eq!(symlink.name().as_internal_str(), "symlink");
1794        assert_eq!(
1795            symlink.value(),
1796            &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1797        );
1798
1799        let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1800        assert_eq!(commit2.parents, vec![commit_id.clone()]);
1801        assert_eq!(commit.predecessors, vec![]);
1802        assert_eq!(
1803            commit.root_tree.to_merge(),
1804            Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1805        );
1806        assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1807    }
1808
1809    #[test]
1810    fn read_git_commit_without_importing() {
1811        let settings = user_settings();
1812        let temp_dir = new_temp_dir();
1813        let store_path = temp_dir.path();
1814        let git_repo_path = temp_dir.path().join("git");
1815        let git_repo = git_init(&git_repo_path);
1816
1817        let signature = gix::actor::Signature {
1818            name: GIT_USER.into(),
1819            email: GIT_EMAIL.into(),
1820            time: gix::date::Time::now_utc(),
1821        };
1822        let empty_tree_id =
1823            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1824        let git_commit_id = git_repo
1825            .commit_as(
1826                &signature,
1827                &signature,
1828                "refs/heads/main",
1829                "git commit message",
1830                empty_tree_id,
1831                [] as [gix::ObjectId; 0],
1832            )
1833            .unwrap();
1834
1835        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1836
1837        // read_commit() without import_head_commits() works as of now. This might be
1838        // changed later.
1839        assert!(backend
1840            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1841            .block_on()
1842            .is_ok());
1843        assert!(
1844            backend
1845                .cached_extra_metadata_table()
1846                .unwrap()
1847                .get_value(git_commit_id.as_bytes())
1848                .is_some(),
1849            "extra metadata should have been be created"
1850        );
1851    }
1852
1853    #[test]
1854    fn read_signed_git_commit() {
1855        let settings = user_settings();
1856        let temp_dir = new_temp_dir();
1857        let store_path = temp_dir.path();
1858        let git_repo_path = temp_dir.path().join("git");
1859        let git_repo = git_init(git_repo_path);
1860
1861        let signature = gix::actor::Signature {
1862            name: GIT_USER.into(),
1863            email: GIT_EMAIL.into(),
1864            time: gix::date::Time::now_utc(),
1865        };
1866        let empty_tree_id =
1867            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1868
1869        let secure_sig =
1870            "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1871
1872        let mut commit = gix::objs::Commit {
1873            tree: empty_tree_id,
1874            parents: smallvec::SmallVec::new(),
1875            author: signature.clone(),
1876            committer: signature.clone(),
1877            encoding: None,
1878            message: "git commit message".into(),
1879            extra_headers: Vec::new(),
1880        };
1881
1882        let mut commit_buf = Vec::new();
1883        commit.write_to(&mut commit_buf).unwrap();
1884        let commit_str = std::str::from_utf8(&commit_buf).unwrap();
1885
1886        commit
1887            .extra_headers
1888            .push(("gpgsig".into(), secure_sig.into()));
1889
1890        let git_commit_id = git_repo.write_object(&commit).unwrap();
1891
1892        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1893
1894        let commit = backend
1895            .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1896            .block_on()
1897            .unwrap();
1898
1899        let sig = commit.secure_sig.expect("failed to read the signature");
1900
1901        // converting to string for nicer assert diff
1902        assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig);
1903        assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str);
1904    }
1905
1906    #[test]
1907    fn round_trip_change_id_via_git_header() {
1908        let settings = user_settings();
1909        let temp_dir = new_temp_dir();
1910
1911        let store_path = temp_dir.path().join("store");
1912        fs::create_dir(&store_path).unwrap();
1913        let empty_store_path = temp_dir.path().join("empty_store");
1914        fs::create_dir(&empty_store_path).unwrap();
1915        let git_repo_path = temp_dir.path().join("git");
1916        let git_repo = git_init(git_repo_path);
1917
1918        let backend = GitBackend::init_external(&settings, &store_path, git_repo.path()).unwrap();
1919        let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
1920        let commit = Commit {
1921            parents: vec![backend.root_commit_id().clone()],
1922            predecessors: vec![],
1923            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1924            change_id: original_change_id.clone(),
1925            description: "initial".to_string(),
1926            author: create_signature(),
1927            committer: create_signature(),
1928            secure_sig: None,
1929        };
1930
1931        let (initial_commit_id, _init_commit) =
1932            backend.write_commit(commit, None).block_on().unwrap();
1933        let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
1934        assert_eq!(
1935            commit.change_id, original_change_id,
1936            "The change-id header did not roundtrip"
1937        );
1938
1939        // Because of how change ids are also persisted in extra proto files,
1940        // initialize a new store without those files, but reuse the same git
1941        // storage. This change-id must be derived from the git commit header.
1942        let no_extra_backend =
1943            GitBackend::init_external(&settings, &empty_store_path, git_repo.path()).unwrap();
1944        let no_extra_commit = no_extra_backend
1945            .read_commit(&initial_commit_id)
1946            .block_on()
1947            .unwrap();
1948
1949        assert_eq!(
1950            no_extra_commit.change_id, original_change_id,
1951            "The change-id header did not roundtrip"
1952        );
1953    }
1954
1955    #[test]
1956    fn read_empty_string_placeholder() {
1957        let git_signature1 = gix::actor::SignatureRef {
1958            name: EMPTY_STRING_PLACEHOLDER.into(),
1959            email: "git.author@example.com".into(),
1960            time: gix::date::Time::new(1000, 60 * 60),
1961        };
1962        let signature1 = signature_from_git(git_signature1);
1963        assert!(signature1.name.is_empty());
1964        assert_eq!(signature1.email, "git.author@example.com");
1965        let git_signature2 = gix::actor::SignatureRef {
1966            name: "git committer".into(),
1967            email: EMPTY_STRING_PLACEHOLDER.into(),
1968            time: gix::date::Time::new(2000, -480 * 60),
1969        };
1970        let signature2 = signature_from_git(git_signature2);
1971        assert_eq!(signature2.name, "git committer");
1972        assert!(signature2.email.is_empty());
1973    }
1974
1975    #[test]
1976    fn write_empty_string_placeholder() {
1977        let signature1 = Signature {
1978            name: "".to_string(),
1979            email: "someone@example.com".to_string(),
1980            timestamp: Timestamp {
1981                timestamp: MillisSinceEpoch(0),
1982                tz_offset: 0,
1983            },
1984        };
1985        let git_signature1 = signature_to_git(&signature1);
1986        assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
1987        assert_eq!(git_signature1.email, "someone@example.com");
1988        let signature2 = Signature {
1989            name: "Someone".to_string(),
1990            email: "".to_string(),
1991            timestamp: Timestamp {
1992                timestamp: MillisSinceEpoch(0),
1993                tz_offset: 0,
1994            },
1995        };
1996        let git_signature2 = signature_to_git(&signature2);
1997        assert_eq!(git_signature2.name, "Someone");
1998        assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
1999    }
2000
2001    /// Test that parents get written correctly
2002    #[test]
2003    fn git_commit_parents() {
2004        let settings = user_settings();
2005        let temp_dir = new_temp_dir();
2006        let store_path = temp_dir.path();
2007        let git_repo_path = temp_dir.path().join("git");
2008        let git_repo = git_init(&git_repo_path);
2009
2010        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2011        let mut commit = Commit {
2012            parents: vec![],
2013            predecessors: vec![],
2014            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2015            change_id: ChangeId::from_hex("abc123"),
2016            description: "".to_string(),
2017            author: create_signature(),
2018            committer: create_signature(),
2019            secure_sig: None,
2020        };
2021
2022        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2023            backend.write_commit(commit, None).block_on()
2024        };
2025
2026        // No parents
2027        commit.parents = vec![];
2028        assert_matches!(
2029            write_commit(commit.clone()),
2030            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
2031        );
2032
2033        // Only root commit as parent
2034        commit.parents = vec![backend.root_commit_id().clone()];
2035        let first_id = write_commit(commit.clone()).unwrap().0;
2036        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
2037        assert_eq!(first_commit, commit);
2038        let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
2039        assert!(first_git_commit.parent_ids().collect_vec().is_empty());
2040
2041        // Only non-root commit as parent
2042        commit.parents = vec![first_id.clone()];
2043        let second_id = write_commit(commit.clone()).unwrap().0;
2044        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
2045        assert_eq!(second_commit, commit);
2046        let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
2047        assert_eq!(
2048            second_git_commit.parent_ids().collect_vec(),
2049            vec![git_id(&first_id)]
2050        );
2051
2052        // Merge commit
2053        commit.parents = vec![first_id.clone(), second_id.clone()];
2054        let merge_id = write_commit(commit.clone()).unwrap().0;
2055        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
2056        assert_eq!(merge_commit, commit);
2057        let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
2058        assert_eq!(
2059            merge_git_commit.parent_ids().collect_vec(),
2060            vec![git_id(&first_id), git_id(&second_id)]
2061        );
2062
2063        // Merge commit with root as one parent
2064        commit.parents = vec![first_id, backend.root_commit_id().clone()];
2065        assert_matches!(
2066            write_commit(commit),
2067            Err(BackendError::Unsupported(message)) if message.contains("root commit")
2068        );
2069    }
2070
2071    #[test]
2072    fn write_tree_conflicts() {
2073        let settings = user_settings();
2074        let temp_dir = new_temp_dir();
2075        let store_path = temp_dir.path();
2076        let git_repo_path = temp_dir.path().join("git");
2077        let git_repo = git_init(&git_repo_path);
2078
2079        let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
2080        let create_tree = |i| {
2081            let blob_id = git_repo.write_blob(format!("content {i}")).unwrap();
2082            let mut tree_builder = git_repo.empty_tree().edit().unwrap();
2083            tree_builder
2084                .upsert(
2085                    format!("file{i}"),
2086                    gix::object::tree::EntryKind::Blob,
2087                    blob_id,
2088                )
2089                .unwrap();
2090            TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
2091        };
2092
2093        let root_tree = Merge::from_removes_adds(
2094            vec![create_tree(0), create_tree(1)],
2095            vec![create_tree(2), create_tree(3), create_tree(4)],
2096        );
2097        let mut commit = Commit {
2098            parents: vec![backend.root_commit_id().clone()],
2099            predecessors: vec![],
2100            root_tree: MergedTreeId::Merge(root_tree.clone()),
2101            change_id: ChangeId::from_hex("abc123"),
2102            description: "".to_string(),
2103            author: create_signature(),
2104            committer: create_signature(),
2105            secure_sig: None,
2106        };
2107
2108        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2109            backend.write_commit(commit, None).block_on()
2110        };
2111
2112        // When writing a tree-level conflict, the root tree on the git side has the
2113        // individual trees as subtrees.
2114        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2115        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2116        assert_eq!(read_commit, commit);
2117        let git_commit = git_repo
2118            .find_commit(gix::ObjectId::from_bytes_or_panic(
2119                read_commit_id.as_bytes(),
2120            ))
2121            .unwrap();
2122        let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap();
2123        assert!(git_tree
2124            .iter()
2125            .map(Result::unwrap)
2126            .filter(|entry| entry.filename() != b"README")
2127            .all(|entry| entry.mode().0 == 0o040000));
2128        let mut iter = git_tree.iter().map(Result::unwrap);
2129        let entry = iter.next().unwrap();
2130        assert_eq!(entry.filename(), b".jjconflict-base-0");
2131        assert_eq!(
2132            entry.id().as_bytes(),
2133            root_tree.get_remove(0).unwrap().as_bytes()
2134        );
2135        let entry = iter.next().unwrap();
2136        assert_eq!(entry.filename(), b".jjconflict-base-1");
2137        assert_eq!(
2138            entry.id().as_bytes(),
2139            root_tree.get_remove(1).unwrap().as_bytes()
2140        );
2141        let entry = iter.next().unwrap();
2142        assert_eq!(entry.filename(), b".jjconflict-side-0");
2143        assert_eq!(
2144            entry.id().as_bytes(),
2145            root_tree.get_add(0).unwrap().as_bytes()
2146        );
2147        let entry = iter.next().unwrap();
2148        assert_eq!(entry.filename(), b".jjconflict-side-1");
2149        assert_eq!(
2150            entry.id().as_bytes(),
2151            root_tree.get_add(1).unwrap().as_bytes()
2152        );
2153        let entry = iter.next().unwrap();
2154        assert_eq!(entry.filename(), b".jjconflict-side-2");
2155        assert_eq!(
2156            entry.id().as_bytes(),
2157            root_tree.get_add(2).unwrap().as_bytes()
2158        );
2159        let entry = iter.next().unwrap();
2160        assert_eq!(entry.filename(), b"README");
2161        assert_eq!(entry.mode().0, 0o100644);
2162        assert!(iter.next().is_none());
2163
2164        // When writing a single tree using the new format, it's represented by a
2165        // regular git tree.
2166        commit.root_tree = MergedTreeId::resolved(create_tree(5));
2167        let read_commit_id = write_commit(commit.clone()).unwrap().0;
2168        let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
2169        assert_eq!(read_commit, commit);
2170        let git_commit = git_repo
2171            .find_commit(gix::ObjectId::from_bytes_or_panic(
2172                read_commit_id.as_bytes(),
2173            ))
2174            .unwrap();
2175        assert_eq!(
2176            MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())),
2177            commit.root_tree
2178        );
2179    }
2180
2181    #[test]
2182    fn commit_has_ref() {
2183        let settings = user_settings();
2184        let temp_dir = new_temp_dir();
2185        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2186        let git_repo = backend.git_repo();
2187        let signature = Signature {
2188            name: "Someone".to_string(),
2189            email: "someone@example.com".to_string(),
2190            timestamp: Timestamp {
2191                timestamp: MillisSinceEpoch(0),
2192                tz_offset: 0,
2193            },
2194        };
2195        let commit = Commit {
2196            parents: vec![backend.root_commit_id().clone()],
2197            predecessors: vec![],
2198            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2199            change_id: ChangeId::new(vec![42; 16]),
2200            description: "initial".to_string(),
2201            author: signature.clone(),
2202            committer: signature,
2203            secure_sig: None,
2204        };
2205        let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2206        let git_refs = git_repo.references().unwrap();
2207        let git_ref_ids: Vec<_> = git_refs
2208            .prefixed("refs/jj/keep/")
2209            .unwrap()
2210            .map(|x| x.unwrap().id().detach())
2211            .collect();
2212        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2213
2214        // Concurrently-running GC deletes the ref, leaving the extra metadata.
2215        for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() {
2216            git_ref.unwrap().delete().unwrap();
2217        }
2218        // Re-imported commit should have new ref.
2219        backend.import_head_commits([&commit_id]).unwrap();
2220        let git_refs = git_repo.references().unwrap();
2221        let git_ref_ids: Vec<_> = git_refs
2222            .prefixed("refs/jj/keep/")
2223            .unwrap()
2224            .map(|x| x.unwrap().id().detach())
2225            .collect();
2226        assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id)));
2227    }
2228
2229    #[test]
2230    fn import_head_commits_duplicates() {
2231        let settings = user_settings();
2232        let temp_dir = new_temp_dir();
2233        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2234        let git_repo = backend.git_repo();
2235
2236        let signature = gix::actor::Signature {
2237            name: GIT_USER.into(),
2238            email: GIT_EMAIL.into(),
2239            time: gix::date::Time::now_utc(),
2240        };
2241        let empty_tree_id =
2242            gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2243        let git_commit_id = git_repo
2244            .commit_as(
2245                &signature,
2246                &signature,
2247                "refs/heads/main",
2248                "git commit message",
2249                empty_tree_id,
2250                [] as [gix::ObjectId; 0],
2251            )
2252            .unwrap()
2253            .detach();
2254        let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2255
2256        // Ref creation shouldn't fail because of duplicated head ids.
2257        backend
2258            .import_head_commits([&commit_id, &commit_id])
2259            .unwrap();
2260        assert!(git_repo
2261            .references()
2262            .unwrap()
2263            .prefixed("refs/jj/keep/")
2264            .unwrap()
2265            .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id));
2266    }
2267
2268    #[test]
2269    fn overlapping_git_commit_id() {
2270        let settings = user_settings();
2271        let temp_dir = new_temp_dir();
2272        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2273        let commit1 = Commit {
2274            parents: vec![backend.root_commit_id().clone()],
2275            predecessors: vec![],
2276            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2277            change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
2278            description: "initial".to_string(),
2279            author: create_signature(),
2280            committer: create_signature(),
2281            secure_sig: None,
2282        };
2283
2284        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2285            backend.write_commit(commit, None).block_on()
2286        };
2287
2288        let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2289        commit2.predecessors.push(commit_id1.clone());
2290        // `write_commit` should prevent the ids from being the same by changing the
2291        // committer timestamp of the commit it actually writes.
2292        let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2293        // The returned matches the ID
2294        assert_eq!(
2295            backend.read_commit(&commit_id2).block_on().unwrap(),
2296            actual_commit2
2297        );
2298        assert_ne!(commit_id2, commit_id1);
2299        // The committer timestamp should differ
2300        assert_ne!(
2301            actual_commit2.committer.timestamp.timestamp,
2302            commit2.committer.timestamp.timestamp
2303        );
2304        // The rest of the commit should be the same
2305        actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2306        assert_eq!(actual_commit2, commit2);
2307    }
2308
2309    #[test]
2310    fn write_signed_commit() {
2311        let settings = user_settings();
2312        let temp_dir = new_temp_dir();
2313        let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2314
2315        let commit = Commit {
2316            parents: vec![backend.root_commit_id().clone()],
2317            predecessors: vec![],
2318            root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2319            change_id: ChangeId::new(vec![42; 16]),
2320            description: "initial".to_string(),
2321            author: create_signature(),
2322            committer: create_signature(),
2323            secure_sig: None,
2324        };
2325
2326        let mut signer = |data: &_| {
2327            let hash: String = blake2b_hash(data).encode_hex();
2328            Ok(format!("test sig\nhash={hash}\n").into_bytes())
2329        };
2330
2331        let (id, commit) = backend
2332            .write_commit(commit, Some(&mut signer as &mut SigningFn))
2333            .block_on()
2334            .unwrap();
2335
2336        let git_repo = backend.git_repo();
2337        let obj = git_repo
2338            .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes()))
2339            .unwrap();
2340        insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r"
2341        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2342        author Someone <someone@example.com> 0 +0000
2343        committer Someone <someone@example.com> 0 +0000
2344        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2345        gpgsig test sig
2346         hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2347
2348        initial
2349        ");
2350
2351        let returned_sig = commit.secure_sig.expect("failed to return the signature");
2352
2353        let commit = backend.read_commit(&id).block_on().unwrap();
2354
2355        let sig = commit.secure_sig.expect("failed to read the signature");
2356        assert_eq!(&sig, &returned_sig);
2357
2358        insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r"
2359        test sig
2360        hash=03feb0caccbacce2e7b7bca67f4c82292dd487e669ed8a813120c9f82d3fd0801420a1f5d05e1393abfe4e9fc662399ec4a9a1898c5f1e547e0044a52bd4bd29
2361        ");
2362        insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r"
2363        tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2364        author Someone <someone@example.com> 0 +0000
2365        committer Someone <someone@example.com> 0 +0000
2366        change-id xpxpxpxpxpxpxpxpxpxpxpxpxpxpxpxp
2367
2368        initial
2369        ");
2370    }
2371
2372    fn git_id(commit_id: &CommitId) -> gix::ObjectId {
2373        gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes())
2374    }
2375
2376    fn create_signature() -> Signature {
2377        Signature {
2378            name: GIT_USER.to_string(),
2379            email: GIT_EMAIL.to_string(),
2380            timestamp: Timestamp {
2381                timestamp: MillisSinceEpoch(0),
2382                tz_offset: 0,
2383            },
2384        }
2385    }
2386
2387    // Not using testutils::user_settings() because there is a dependency cycle
2388    // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2389    // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2390    // our UserSettings type comes from jj_lib (1).
2391    fn user_settings() -> UserSettings {
2392        let config = StackedConfig::with_defaults();
2393        UserSettings::from_config(config).unwrap()
2394    }
2395}