jj_lib/
backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::BTreeMap;
19use std::fmt::Debug;
20use std::io::Read;
21use std::time::SystemTime;
22
23use async_trait::async_trait;
24use futures::stream::BoxStream;
25use thiserror::Error;
26
27use crate::content_hash::ContentHash;
28use crate::hex_util;
29use crate::index::Index;
30use crate::merge::Merge;
31use crate::object_id::id_type;
32use crate::object_id::ObjectId as _;
33use crate::repo_path::RepoPath;
34use crate::repo_path::RepoPathBuf;
35use crate::repo_path::RepoPathComponent;
36use crate::repo_path::RepoPathComponentBuf;
37use crate::signing::SignResult;
38
39id_type!(
40    /// Identifier for a [`Commit`] based on its content. When a commit is
41    /// rewritten, its `CommitId` changes.
42    pub CommitId { hex() }
43);
44id_type!(
45    /// Stable identifier for a [`Commit`]. Unlike the `CommitId`, the `ChangeId`
46    /// follows the commit and is not updated when the commit is rewritten.
47    pub ChangeId { reverse_hex() }
48);
49id_type!(pub TreeId { hex() });
50id_type!(pub FileId { hex() });
51id_type!(pub SymlinkId { hex() });
52id_type!(pub ConflictId { hex() });
53
54impl ChangeId {
55    /// Returns the hex string representation of this ID, which uses `z-k`
56    /// "digits" instead of `0-9a-f`.
57    pub fn reverse_hex(&self) -> String {
58        hex_util::encode_reverse_hex(&self.0)
59    }
60}
61
62#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
63pub struct MillisSinceEpoch(pub i64);
64
65#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
66pub struct Timestamp {
67    pub timestamp: MillisSinceEpoch,
68    // time zone offset in minutes
69    pub tz_offset: i32,
70}
71
72impl Timestamp {
73    pub fn now() -> Self {
74        Self::from_datetime(chrono::offset::Local::now())
75    }
76
77    pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
78        datetime: chrono::DateTime<Tz>,
79    ) -> Self {
80        Self {
81            timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
82            tz_offset: datetime.offset().local_minus_utc() / 60,
83        }
84    }
85}
86
87/// Represents a [`Commit`] signature.
88#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
89pub struct Signature {
90    pub name: String,
91    pub email: String,
92    pub timestamp: Timestamp,
93}
94
95/// Represents a cryptographically signed [`Commit`] signature.
96#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
97pub struct SecureSig {
98    pub data: Vec<u8>,
99    pub sig: Vec<u8>,
100}
101
102pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult<Vec<u8>> + Send + 'a;
103
104/// Identifies a single legacy tree, which may have path-level conflicts, or a
105/// merge of multiple trees, where the individual trees do not have conflicts.
106// TODO(#1624): Delete this type at some point in the future, when we decide to drop
107// support for conflicts in older repos, or maybe after we have provided an upgrade
108// mechanism.
109#[derive(ContentHash, Debug, Clone)]
110pub enum MergedTreeId {
111    /// The tree id of a legacy tree
112    Legacy(TreeId),
113    /// The tree id(s) of a merge tree
114    Merge(Merge<TreeId>),
115}
116
117impl PartialEq for MergedTreeId {
118    /// Overridden to make conflict-free trees be considered equal even if their
119    /// `MergedTreeId` variant is different.
120    fn eq(&self, other: &Self) -> bool {
121        self.to_merge() == other.to_merge()
122    }
123}
124
125impl Eq for MergedTreeId {}
126
127impl MergedTreeId {
128    /// Create a resolved `MergedTreeId` from a single regular tree.
129    pub fn resolved(tree_id: TreeId) -> Self {
130        MergedTreeId::Merge(Merge::resolved(tree_id))
131    }
132
133    /// Return this id as `Merge<TreeId>`
134    pub fn to_merge(&self) -> Merge<TreeId> {
135        match self {
136            MergedTreeId::Legacy(tree_id) => Merge::resolved(tree_id.clone()),
137            MergedTreeId::Merge(tree_ids) => tree_ids.clone(),
138        }
139    }
140}
141
142#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
143pub struct Commit {
144    pub parents: Vec<CommitId>,
145    pub predecessors: Vec<CommitId>,
146    pub root_tree: MergedTreeId,
147    pub change_id: ChangeId,
148    pub description: String,
149    pub author: Signature,
150    pub committer: Signature,
151    pub secure_sig: Option<SecureSig>,
152}
153
154#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
155pub struct ConflictTerm {
156    pub value: TreeValue,
157}
158
159#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
160pub struct Conflict {
161    // A conflict is represented by a list of positive and negative states that need to be applied.
162    // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
163    // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
164    // same as non-conflict A.
165    pub removes: Vec<ConflictTerm>,
166    pub adds: Vec<ConflictTerm>,
167}
168
169/// An individual copy event, from file A -> B.
170#[derive(Debug, PartialEq, Eq, Clone)]
171pub struct CopyRecord {
172    /// The destination of the copy, B.
173    pub target: RepoPathBuf,
174    /// The CommitId where the copy took place.
175    pub target_commit: CommitId,
176    /// The source path a target was copied from.
177    ///
178    /// It is not required that the source path is different than the target
179    /// path. A custom backend may choose to represent 'rollbacks' as copies
180    /// from a file unto itself, from a specific prior commit.
181    pub source: RepoPathBuf,
182    pub source_file: FileId,
183    /// The source commit the target was copied from. Backends may use this
184    /// field to implement 'integration' logic, where a source may be
185    /// periodically merged into a target, similar to a branch, but the
186    /// branching occurs at the file level rather than the repository level. It
187    /// also follows naturally that any copy source targeted to a specific
188    /// commit should avoid copy propagation on rebasing, which is desirable
189    /// for 'fork' style copies.
190    ///
191    /// It is required that the commit id is an ancestor of the commit with
192    /// which this copy source is associated.
193    pub source_commit: CommitId,
194}
195
196/// Error that may occur during backend initialization.
197#[derive(Debug, Error)]
198#[error(transparent)]
199pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
200
201/// Error that may occur during backend loading.
202#[derive(Debug, Error)]
203#[error(transparent)]
204pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
205
206/// Commit-backend error that may occur after the backend is loaded.
207#[derive(Debug, Error)]
208pub enum BackendError {
209    #[error(
210        "Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
211         {actual} bytes): {hash}"
212    )]
213    InvalidHashLength {
214        expected: usize,
215        actual: usize,
216        object_type: String,
217        hash: String,
218    },
219    #[error("Invalid UTF-8 for object {hash} of type {object_type}")]
220    InvalidUtf8 {
221        object_type: String,
222        hash: String,
223        source: std::str::Utf8Error,
224    },
225    #[error("Object {hash} of type {object_type} not found")]
226    ObjectNotFound {
227        object_type: String,
228        hash: String,
229        source: Box<dyn std::error::Error + Send + Sync>,
230    },
231    #[error("Error when reading object {hash} of type {object_type}")]
232    ReadObject {
233        object_type: String,
234        hash: String,
235        source: Box<dyn std::error::Error + Send + Sync>,
236    },
237    #[error("Access denied to read object {hash} of type {object_type}")]
238    ReadAccessDenied {
239        object_type: String,
240        hash: String,
241        source: Box<dyn std::error::Error + Send + Sync>,
242    },
243    #[error(
244        "Error when reading file content for file {path} with id {id}",
245        path = path.as_internal_file_string()
246    )]
247    ReadFile {
248        path: RepoPathBuf,
249        id: FileId,
250        source: Box<dyn std::error::Error + Send + Sync>,
251    },
252    #[error("Could not write object of type {object_type}")]
253    WriteObject {
254        object_type: &'static str,
255        source: Box<dyn std::error::Error + Send + Sync>,
256    },
257    #[error(transparent)]
258    Other(Box<dyn std::error::Error + Send + Sync>),
259    /// A valid operation attempted, but failed because it isn't supported by
260    /// the particular backend.
261    #[error("{0}")]
262    Unsupported(String),
263}
264
265pub type BackendResult<T> = Result<T, BackendError>;
266
267#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Hash)]
268pub enum TreeValue {
269    File { id: FileId, executable: bool },
270    Symlink(SymlinkId),
271    Tree(TreeId),
272    GitSubmodule(CommitId),
273    Conflict(ConflictId),
274}
275
276impl TreeValue {
277    pub fn hex(&self) -> String {
278        match self {
279            TreeValue::File { id, .. } => id.hex(),
280            TreeValue::Symlink(id) => id.hex(),
281            TreeValue::Tree(id) => id.hex(),
282            TreeValue::GitSubmodule(id) => id.hex(),
283            TreeValue::Conflict(id) => id.hex(),
284        }
285    }
286}
287
288#[derive(Debug, PartialEq, Eq, Clone)]
289pub struct TreeEntry<'a> {
290    name: &'a RepoPathComponent,
291    value: &'a TreeValue,
292}
293
294impl<'a> TreeEntry<'a> {
295    pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
296        TreeEntry { name, value }
297    }
298
299    pub fn name(&self) -> &'a RepoPathComponent {
300        self.name
301    }
302
303    pub fn value(&self) -> &'a TreeValue {
304        self.value
305    }
306}
307
308pub struct TreeEntriesNonRecursiveIterator<'a> {
309    iter: std::collections::btree_map::Iter<'a, RepoPathComponentBuf, TreeValue>,
310}
311
312impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
313    type Item = TreeEntry<'a>;
314
315    fn next(&mut self) -> Option<Self::Item> {
316        self.iter
317            .next()
318            .map(|(name, value)| TreeEntry { name, value })
319    }
320}
321
322#[derive(ContentHash, Default, PartialEq, Eq, Debug, Clone)]
323pub struct Tree {
324    entries: BTreeMap<RepoPathComponentBuf, TreeValue>,
325}
326
327impl Tree {
328    pub fn is_empty(&self) -> bool {
329        self.entries.is_empty()
330    }
331
332    pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
333        self.entries.keys().map(|name| name.as_ref())
334    }
335
336    pub fn entries(&self) -> TreeEntriesNonRecursiveIterator {
337        TreeEntriesNonRecursiveIterator {
338            iter: self.entries.iter(),
339        }
340    }
341
342    pub fn set(&mut self, name: RepoPathComponentBuf, value: TreeValue) {
343        self.entries.insert(name, value);
344    }
345
346    pub fn remove(&mut self, name: &RepoPathComponent) {
347        self.entries.remove(name);
348    }
349
350    pub fn set_or_remove(&mut self, name: &RepoPathComponent, value: Option<TreeValue>) {
351        match value {
352            None => {
353                self.entries.remove(name);
354            }
355            Some(value) => {
356                self.entries.insert(name.to_owned(), value);
357            }
358        }
359    }
360
361    pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry> {
362        self.entries
363            .get_key_value(name)
364            .map(|(name, value)| TreeEntry { name, value })
365    }
366
367    pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
368        self.entries.get(name)
369    }
370}
371
372pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
373    let timestamp = Timestamp {
374        timestamp: MillisSinceEpoch(0),
375        tz_offset: 0,
376    };
377    let signature = Signature {
378        name: String::new(),
379        email: String::new(),
380        timestamp,
381    };
382    Commit {
383        parents: vec![],
384        predecessors: vec![],
385        root_tree: MergedTreeId::resolved(empty_tree_id),
386        change_id: root_change_id,
387        description: String::new(),
388        author: signature.clone(),
389        committer: signature,
390        secure_sig: None,
391    }
392}
393
394/// Defines the interface for commit backends.
395#[async_trait]
396pub trait Backend: Send + Sync + Debug {
397    fn as_any(&self) -> &dyn Any;
398
399    /// A unique name that identifies this backend. Written to
400    /// `.jj/repo/store/backend` when the repo is created.
401    fn name(&self) -> &str;
402
403    /// The length of commit IDs in bytes.
404    fn commit_id_length(&self) -> usize;
405
406    /// The length of change IDs in bytes.
407    fn change_id_length(&self) -> usize;
408
409    fn root_commit_id(&self) -> &CommitId;
410
411    fn root_change_id(&self) -> &ChangeId;
412
413    fn empty_tree_id(&self) -> &TreeId;
414
415    /// An estimate of how many concurrent requests this backend handles well. A
416    /// local backend like the Git backend (at until it supports partial clones)
417    /// may want to set this to 1. A cloud-backed backend may want to set it to
418    /// 100 or so.
419    ///
420    /// It is not guaranteed that at most this number of concurrent requests are
421    /// sent.
422    fn concurrency(&self) -> usize;
423
424    async fn read_file(&self, path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>>;
425
426    async fn write_file(
427        &self,
428        path: &RepoPath,
429        contents: &mut (dyn Read + Send),
430    ) -> BackendResult<FileId>;
431
432    async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
433
434    async fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
435
436    async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
437
438    async fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
439
440    // Not async because it would force `MergedTree::value()` to be async. We don't
441    // need this to be async anyway because it's only used by legacy repos.
442    fn read_conflict(&self, path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict>;
443
444    fn write_conflict(&self, path: &RepoPath, contents: &Conflict) -> BackendResult<ConflictId>;
445
446    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
447
448    /// Writes a commit and returns its ID and the commit itself. The commit
449    /// should contain the data that was actually written, which may differ
450    /// from the data passed in. For example, the backend may change the
451    /// committer name to an authenticated user's name, or the backend's
452    /// timestamps may have less precision than the millisecond precision in
453    /// `Commit`.
454    ///
455    /// The `sign_with` parameter could contain a function to cryptographically
456    /// sign some binary representation of the commit.
457    /// If the backend supports it, it could call it and store the result in
458    /// an implementation specific fashion, and both `read_commit` and the
459    /// return of `write_commit` should read it back as the `secure_sig`
460    /// field.
461    async fn write_commit(
462        &self,
463        contents: Commit,
464        sign_with: Option<&mut SigningFn>,
465    ) -> BackendResult<(CommitId, Commit)>;
466
467    /// Get copy records for the dag range `root..head`.  If `paths` is None
468    /// include all paths, otherwise restrict to only `paths`.
469    ///
470    /// The exact order these are returned is unspecified, but it is guaranteed
471    /// to be reverse-topological. That is, for any two copy records with
472    /// different commit ids A and B, if A is an ancestor of B, A is streamed
473    /// after B.
474    ///
475    /// Streaming by design to better support large backends which may have very
476    /// large single-file histories. This also allows more iterative algorithms
477    /// like blame/annotate to short-circuit after a point without wasting
478    /// unnecessary resources.
479    fn get_copy_records(
480        &self,
481        paths: Option<&[RepoPathBuf]>,
482        root: &CommitId,
483        head: &CommitId,
484    ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>>;
485
486    /// Perform garbage collection.
487    ///
488    /// All commits found in the `index` won't be removed. In addition to that,
489    /// objects created after `keep_newer` will be preserved. This mitigates a
490    /// risk of deleting new commits created concurrently by another process.
491    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()>;
492}