jj_lib/
backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::BTreeMap;
19use std::fmt::Debug;
20use std::pin::Pin;
21use std::time::SystemTime;
22
23use async_trait::async_trait;
24use futures::stream::BoxStream;
25use thiserror::Error;
26use tokio::io::AsyncRead;
27
28use crate::content_hash::ContentHash;
29use crate::hex_util;
30use crate::index::Index;
31use crate::merge::Merge;
32use crate::object_id::id_type;
33use crate::object_id::ObjectId as _;
34use crate::repo_path::RepoPath;
35use crate::repo_path::RepoPathBuf;
36use crate::repo_path::RepoPathComponent;
37use crate::repo_path::RepoPathComponentBuf;
38use crate::signing::SignResult;
39
40id_type!(
41    /// Identifier for a [`Commit`] based on its content. When a commit is
42    /// rewritten, its `CommitId` changes.
43    pub CommitId { hex() }
44);
45id_type!(
46    /// Stable identifier for a [`Commit`]. Unlike the `CommitId`, the `ChangeId`
47    /// follows the commit and is not updated when the commit is rewritten.
48    pub ChangeId { reverse_hex() }
49);
50id_type!(pub TreeId { hex() });
51id_type!(pub FileId { hex() });
52id_type!(pub SymlinkId { hex() });
53id_type!(pub ConflictId { hex() });
54id_type!(pub CopyId { hex() });
55
56impl ChangeId {
57    /// Returns the hex string representation of this ID, which uses `z-k`
58    /// "digits" instead of `0-9a-f`.
59    pub fn reverse_hex(&self) -> String {
60        hex_util::encode_reverse_hex(&self.0)
61    }
62}
63
64impl CopyId {
65    /// Returns a placeholder copy id to be used when we don't have a real copy
66    /// id yet.
67    // TODO: Delete this
68    pub fn placeholder() -> Self {
69        Self::new(vec![])
70    }
71}
72
73#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
74pub struct MillisSinceEpoch(pub i64);
75
76#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
77pub struct Timestamp {
78    pub timestamp: MillisSinceEpoch,
79    // time zone offset in minutes
80    pub tz_offset: i32,
81}
82
83impl Timestamp {
84    pub fn now() -> Self {
85        Self::from_datetime(chrono::offset::Local::now())
86    }
87
88    pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
89        datetime: chrono::DateTime<Tz>,
90    ) -> Self {
91        Self {
92            timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
93            tz_offset: datetime.offset().local_minus_utc() / 60,
94        }
95    }
96}
97
98/// Represents a [`Commit`] signature.
99#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
100pub struct Signature {
101    pub name: String,
102    pub email: String,
103    pub timestamp: Timestamp,
104}
105
106/// Represents a cryptographically signed [`Commit`] signature.
107#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
108pub struct SecureSig {
109    pub data: Vec<u8>,
110    pub sig: Vec<u8>,
111}
112
113pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult<Vec<u8>> + Send + 'a;
114
115/// Identifies a single legacy tree, which may have path-level conflicts, or a
116/// merge of multiple trees, where the individual trees do not have conflicts.
117// TODO(#1624): Delete this type at some point in the future, when we decide to drop
118// support for conflicts in older repos, or maybe after we have provided an upgrade
119// mechanism.
120#[derive(ContentHash, Debug, Clone)]
121pub enum MergedTreeId {
122    /// The tree id of a legacy tree
123    Legacy(TreeId),
124    /// The tree id(s) of a merge tree
125    Merge(Merge<TreeId>),
126}
127
128impl PartialEq for MergedTreeId {
129    /// Overridden to make conflict-free trees be considered equal even if their
130    /// `MergedTreeId` variant is different.
131    fn eq(&self, other: &Self) -> bool {
132        self.to_merge() == other.to_merge()
133    }
134}
135
136impl Eq for MergedTreeId {}
137
138impl MergedTreeId {
139    /// Create a resolved `MergedTreeId` from a single regular tree.
140    pub fn resolved(tree_id: TreeId) -> Self {
141        MergedTreeId::Merge(Merge::resolved(tree_id))
142    }
143
144    /// Return this id as `Merge<TreeId>`
145    pub fn to_merge(&self) -> Merge<TreeId> {
146        match self {
147            MergedTreeId::Legacy(tree_id) => Merge::resolved(tree_id.clone()),
148            MergedTreeId::Merge(tree_ids) => tree_ids.clone(),
149        }
150    }
151}
152
153#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
154pub struct Commit {
155    pub parents: Vec<CommitId>,
156    // TODO: delete commit.predecessors when we can assume that most commits are
157    // tracked by op.commit_predecessors. (in jj 0.42 or so?)
158    pub predecessors: Vec<CommitId>,
159    pub root_tree: MergedTreeId,
160    pub change_id: ChangeId,
161    pub description: String,
162    pub author: Signature,
163    pub committer: Signature,
164    pub secure_sig: Option<SecureSig>,
165}
166
167#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
168pub struct ConflictTerm {
169    pub value: TreeValue,
170}
171
172#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
173pub struct Conflict {
174    // A conflict is represented by a list of positive and negative states that need to be applied.
175    // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
176    // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
177    // same as non-conflict A.
178    pub removes: Vec<ConflictTerm>,
179    pub adds: Vec<ConflictTerm>,
180}
181
182/// An individual copy event, from file A -> B.
183#[derive(Debug, PartialEq, Eq, Clone)]
184pub struct CopyRecord {
185    /// The destination of the copy, B.
186    pub target: RepoPathBuf,
187    /// The CommitId where the copy took place.
188    pub target_commit: CommitId,
189    /// The source path a target was copied from.
190    ///
191    /// It is not required that the source path is different than the target
192    /// path. A custom backend may choose to represent 'rollbacks' as copies
193    /// from a file unto itself, from a specific prior commit.
194    pub source: RepoPathBuf,
195    pub source_file: FileId,
196    /// The source commit the target was copied from. Backends may use this
197    /// field to implement 'integration' logic, where a source may be
198    /// periodically merged into a target, similar to a branch, but the
199    /// branching occurs at the file level rather than the repository level. It
200    /// also follows naturally that any copy source targeted to a specific
201    /// commit should avoid copy propagation on rebasing, which is desirable
202    /// for 'fork' style copies.
203    ///
204    /// It is required that the commit id is an ancestor of the commit with
205    /// which this copy source is associated.
206    pub source_commit: CommitId,
207}
208
209/// Describes the copy history of a file. The copy object is unchanged when a
210/// file is modified.
211#[derive(ContentHash, Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
212pub struct CopyHistory {
213    /// The file's current path.
214    pub current_path: RepoPathBuf,
215    /// IDs of the files that became the current incarnation of this file.
216    ///
217    /// A newly created file has no parents. A regular copy or rename has one
218    /// parent. A merge of multiple files has multiple parents.
219    pub parents: Vec<CopyId>,
220    /// An optional piece of data to give the Copy object a different ID. May be
221    /// randomly generated. This allows a commit to say that a file was replaced
222    /// by a new incarnation of it, indicating a logically distinct file
223    /// taking the place of the previous file at the path.
224    pub salt: Vec<u8>,
225}
226
227/// Error that may occur during backend initialization.
228#[derive(Debug, Error)]
229#[error(transparent)]
230pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
231
232/// Error that may occur during backend loading.
233#[derive(Debug, Error)]
234#[error(transparent)]
235pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
236
237/// Commit-backend error that may occur after the backend is loaded.
238#[derive(Debug, Error)]
239pub enum BackendError {
240    #[error(
241        "Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
242         {actual} bytes): {hash}"
243    )]
244    InvalidHashLength {
245        expected: usize,
246        actual: usize,
247        object_type: String,
248        hash: String,
249    },
250    #[error("Invalid UTF-8 for object {hash} of type {object_type}")]
251    InvalidUtf8 {
252        object_type: String,
253        hash: String,
254        source: std::str::Utf8Error,
255    },
256    #[error("Object {hash} of type {object_type} not found")]
257    ObjectNotFound {
258        object_type: String,
259        hash: String,
260        source: Box<dyn std::error::Error + Send + Sync>,
261    },
262    #[error("Error when reading object {hash} of type {object_type}")]
263    ReadObject {
264        object_type: String,
265        hash: String,
266        source: Box<dyn std::error::Error + Send + Sync>,
267    },
268    #[error("Access denied to read object {hash} of type {object_type}")]
269    ReadAccessDenied {
270        object_type: String,
271        hash: String,
272        source: Box<dyn std::error::Error + Send + Sync>,
273    },
274    #[error(
275        "Error when reading file content for file {path} with id {id}",
276        path = path.as_internal_file_string()
277    )]
278    ReadFile {
279        path: RepoPathBuf,
280        id: FileId,
281        source: Box<dyn std::error::Error + Send + Sync>,
282    },
283    #[error("Could not write object of type {object_type}")]
284    WriteObject {
285        object_type: &'static str,
286        source: Box<dyn std::error::Error + Send + Sync>,
287    },
288    #[error(transparent)]
289    Other(Box<dyn std::error::Error + Send + Sync>),
290    /// A valid operation attempted, but failed because it isn't supported by
291    /// the particular backend.
292    #[error("{0}")]
293    Unsupported(String),
294}
295
296pub type BackendResult<T> = Result<T, BackendError>;
297
298#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Hash)]
299pub enum TreeValue {
300    // TODO: When there's a CopyId here, the copy object's path must match
301    // the path identified by the tree.
302    File {
303        id: FileId,
304        executable: bool,
305        copy_id: CopyId,
306    },
307    Symlink(SymlinkId),
308    Tree(TreeId),
309    GitSubmodule(CommitId),
310    Conflict(ConflictId),
311}
312
313impl TreeValue {
314    pub fn hex(&self) -> String {
315        match self {
316            TreeValue::File { id, .. } => id.hex(),
317            TreeValue::Symlink(id) => id.hex(),
318            TreeValue::Tree(id) => id.hex(),
319            TreeValue::GitSubmodule(id) => id.hex(),
320            TreeValue::Conflict(id) => id.hex(),
321        }
322    }
323}
324
325#[derive(Debug, PartialEq, Eq, Clone)]
326pub struct TreeEntry<'a> {
327    name: &'a RepoPathComponent,
328    value: &'a TreeValue,
329}
330
331impl<'a> TreeEntry<'a> {
332    pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
333        TreeEntry { name, value }
334    }
335
336    pub fn name(&self) -> &'a RepoPathComponent {
337        self.name
338    }
339
340    pub fn value(&self) -> &'a TreeValue {
341        self.value
342    }
343}
344
345pub struct TreeEntriesNonRecursiveIterator<'a> {
346    iter: std::collections::btree_map::Iter<'a, RepoPathComponentBuf, TreeValue>,
347}
348
349impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
350    type Item = TreeEntry<'a>;
351
352    fn next(&mut self) -> Option<Self::Item> {
353        self.iter
354            .next()
355            .map(|(name, value)| TreeEntry { name, value })
356    }
357}
358
359#[derive(ContentHash, Default, PartialEq, Eq, Debug, Clone)]
360pub struct Tree {
361    entries: BTreeMap<RepoPathComponentBuf, TreeValue>,
362}
363
364impl Tree {
365    pub fn is_empty(&self) -> bool {
366        self.entries.is_empty()
367    }
368
369    pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
370        self.entries.keys().map(|name| name.as_ref())
371    }
372
373    pub fn entries(&self) -> TreeEntriesNonRecursiveIterator {
374        TreeEntriesNonRecursiveIterator {
375            iter: self.entries.iter(),
376        }
377    }
378
379    pub fn set(&mut self, name: RepoPathComponentBuf, value: TreeValue) {
380        self.entries.insert(name, value);
381    }
382
383    pub fn remove(&mut self, name: &RepoPathComponent) {
384        self.entries.remove(name);
385    }
386
387    pub fn set_or_remove(&mut self, name: &RepoPathComponent, value: Option<TreeValue>) {
388        match value {
389            None => {
390                self.entries.remove(name);
391            }
392            Some(value) => {
393                self.entries.insert(name.to_owned(), value);
394            }
395        }
396    }
397
398    pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry> {
399        self.entries
400            .get_key_value(name)
401            .map(|(name, value)| TreeEntry { name, value })
402    }
403
404    pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
405        self.entries.get(name)
406    }
407}
408
409pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
410    let timestamp = Timestamp {
411        timestamp: MillisSinceEpoch(0),
412        tz_offset: 0,
413    };
414    let signature = Signature {
415        name: String::new(),
416        email: String::new(),
417        timestamp,
418    };
419    Commit {
420        parents: vec![],
421        predecessors: vec![],
422        root_tree: MergedTreeId::resolved(empty_tree_id),
423        change_id: root_change_id,
424        description: String::new(),
425        author: signature.clone(),
426        committer: signature,
427        secure_sig: None,
428    }
429}
430
431/// Defines the interface for commit backends.
432#[async_trait]
433pub trait Backend: Send + Sync + Debug {
434    fn as_any(&self) -> &dyn Any;
435
436    /// A unique name that identifies this backend. Written to
437    /// `.jj/repo/store/backend` when the repo is created.
438    fn name(&self) -> &str;
439
440    /// The length of commit IDs in bytes.
441    fn commit_id_length(&self) -> usize;
442
443    /// The length of change IDs in bytes.
444    fn change_id_length(&self) -> usize;
445
446    fn root_commit_id(&self) -> &CommitId;
447
448    fn root_change_id(&self) -> &ChangeId;
449
450    fn empty_tree_id(&self) -> &TreeId;
451
452    /// An estimate of how many concurrent requests this backend handles well. A
453    /// local backend like the Git backend (at until it supports partial clones)
454    /// may want to set this to 1. A cloud-backed backend may want to set it to
455    /// 100 or so.
456    ///
457    /// It is not guaranteed that at most this number of concurrent requests are
458    /// sent.
459    fn concurrency(&self) -> usize;
460
461    async fn read_file(
462        &self,
463        path: &RepoPath,
464        id: &FileId,
465    ) -> BackendResult<Pin<Box<dyn AsyncRead>>>;
466
467    async fn write_file(
468        &self,
469        path: &RepoPath,
470        contents: &mut (dyn AsyncRead + Send + Unpin),
471    ) -> BackendResult<FileId>;
472
473    async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
474
475    async fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
476
477    /// Read the specified `CopyHistory` object.
478    ///
479    /// Backends that don't support copy tracking may return
480    /// `BackendError::Unsupported`.
481    async fn read_copy(&self, id: &CopyId) -> BackendResult<CopyHistory>;
482
483    /// Write the `CopyHistory` object and return its ID.
484    ///
485    /// Backends that don't support copy tracking may return
486    /// `BackendError::Unsupported`.
487    async fn write_copy(&self, copy: &CopyHistory) -> BackendResult<CopyId>;
488
489    /// Find all copy histories that are related to the specified one. This is
490    /// defined as those that are ancestors of the given specified one, plus
491    /// their descendants. Children must be returned before parents.
492    ///
493    /// It is valid (but wasteful) to include other copy histories, such as
494    /// siblings, or even completely unrelated copy histories.
495    ///
496    /// Backends that don't support copy tracking may return
497    /// `BackendError::Unsupported`.
498    async fn get_related_copies(&self, copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>>;
499
500    async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
501
502    async fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
503
504    // Not async because it would force `MergedTree::value()` to be async. We don't
505    // need this to be async anyway because it's only used by legacy repos.
506    fn read_conflict(&self, path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict>;
507
508    fn write_conflict(&self, path: &RepoPath, contents: &Conflict) -> BackendResult<ConflictId>;
509
510    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
511
512    /// Writes a commit and returns its ID and the commit itself. The commit
513    /// should contain the data that was actually written, which may differ
514    /// from the data passed in. For example, the backend may change the
515    /// committer name to an authenticated user's name, or the backend's
516    /// timestamps may have less precision than the millisecond precision in
517    /// `Commit`.
518    ///
519    /// The `sign_with` parameter could contain a function to cryptographically
520    /// sign some binary representation of the commit.
521    /// If the backend supports it, it could call it and store the result in
522    /// an implementation specific fashion, and both `read_commit` and the
523    /// return of `write_commit` should read it back as the `secure_sig`
524    /// field.
525    async fn write_commit(
526        &self,
527        contents: Commit,
528        sign_with: Option<&mut SigningFn>,
529    ) -> BackendResult<(CommitId, Commit)>;
530
531    /// Get copy records for the dag range `root..head`.  If `paths` is None
532    /// include all paths, otherwise restrict to only `paths`.
533    ///
534    /// The exact order these are returned is unspecified, but it is guaranteed
535    /// to be reverse-topological. That is, for any two copy records with
536    /// different commit ids A and B, if A is an ancestor of B, A is streamed
537    /// after B.
538    ///
539    /// Streaming by design to better support large backends which may have very
540    /// large single-file histories. This also allows more iterative algorithms
541    /// like blame/annotate to short-circuit after a point without wasting
542    /// unnecessary resources.
543    fn get_copy_records(
544        &self,
545        paths: Option<&[RepoPathBuf]>,
546        root: &CommitId,
547        head: &CommitId,
548    ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>>;
549
550    /// Perform garbage collection.
551    ///
552    /// All commits found in the `index` won't be removed. In addition to that,
553    /// objects created after `keep_newer` will be preserved. This mitigates a
554    /// risk of deleting new commits created concurrently by another process.
555    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()>;
556}