jj_lib/
backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::BTreeMap;
19use std::fmt::Debug;
20use std::pin::Pin;
21use std::time::SystemTime;
22
23use async_trait::async_trait;
24use chrono::TimeZone as _;
25use futures::stream::BoxStream;
26use thiserror::Error;
27use tokio::io::AsyncRead;
28
29use crate::content_hash::ContentHash;
30use crate::hex_util;
31use crate::index::Index;
32use crate::merge::Merge;
33use crate::object_id::id_type;
34use crate::object_id::ObjectId as _;
35use crate::repo_path::RepoPath;
36use crate::repo_path::RepoPathBuf;
37use crate::repo_path::RepoPathComponent;
38use crate::repo_path::RepoPathComponentBuf;
39use crate::signing::SignResult;
40
41id_type!(
42    /// Identifier for a [`Commit`] based on its content. When a commit is
43    /// rewritten, its `CommitId` changes.
44    pub CommitId { hex() }
45);
46id_type!(
47    /// Stable identifier for a [`Commit`]. Unlike the `CommitId`, the `ChangeId`
48    /// follows the commit and is not updated when the commit is rewritten.
49    pub ChangeId { reverse_hex() }
50);
51id_type!(pub TreeId { hex() });
52id_type!(pub FileId { hex() });
53id_type!(pub SymlinkId { hex() });
54id_type!(pub ConflictId { hex() });
55id_type!(pub CopyId { hex() });
56
57impl ChangeId {
58    /// Parses the given "reverse" hex string into a `ChangeId`.
59    pub fn try_from_reverse_hex(hex: impl AsRef<[u8]>) -> Option<Self> {
60        hex_util::decode_reverse_hex(hex.as_ref()).map(Self)
61    }
62
63    /// Returns the hex string representation of this ID, which uses `z-k`
64    /// "digits" instead of `0-9a-f`.
65    pub fn reverse_hex(&self) -> String {
66        hex_util::encode_reverse_hex(&self.0)
67    }
68}
69
70impl CopyId {
71    /// Returns a placeholder copy id to be used when we don't have a real copy
72    /// id yet.
73    // TODO: Delete this
74    pub fn placeholder() -> Self {
75        Self::new(vec![])
76    }
77}
78
79#[derive(Debug, Error)]
80#[error("Out-of-range date")]
81pub struct TimestampOutOfRange;
82
83#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
84pub struct MillisSinceEpoch(pub i64);
85
86#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
87pub struct Timestamp {
88    pub timestamp: MillisSinceEpoch,
89    // time zone offset in minutes
90    pub tz_offset: i32,
91}
92
93impl Timestamp {
94    pub fn now() -> Self {
95        Self::from_datetime(chrono::offset::Local::now())
96    }
97
98    pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
99        datetime: chrono::DateTime<Tz>,
100    ) -> Self {
101        Self {
102            timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
103            tz_offset: datetime.offset().local_minus_utc() / 60,
104        }
105    }
106
107    pub fn to_datetime(
108        &self,
109    ) -> Result<chrono::DateTime<chrono::FixedOffset>, TimestampOutOfRange> {
110        let utc = match chrono::Utc.timestamp_opt(
111            self.timestamp.0.div_euclid(1000),
112            (self.timestamp.0.rem_euclid(1000)) as u32 * 1000000,
113        ) {
114            chrono::LocalResult::None => {
115                return Err(TimestampOutOfRange);
116            }
117            chrono::LocalResult::Single(x) => x,
118            chrono::LocalResult::Ambiguous(y, _z) => y,
119        };
120
121        Ok(utc.with_timezone(
122            &chrono::FixedOffset::east_opt(self.tz_offset * 60)
123                .unwrap_or_else(|| chrono::FixedOffset::east_opt(0).unwrap()),
124        ))
125    }
126}
127
128impl serde::Serialize for Timestamp {
129    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
130    where
131        S: serde::Serializer,
132    {
133        // TODO: test is_human_readable() to use raw format?
134        let t = self.to_datetime().map_err(serde::ser::Error::custom)?;
135        t.serialize(serializer)
136    }
137}
138
139/// Represents a [`Commit`] signature.
140#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
141pub struct Signature {
142    pub name: String,
143    pub email: String,
144    pub timestamp: Timestamp,
145}
146
147/// Represents a cryptographically signed [`Commit`] signature.
148#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
149pub struct SecureSig {
150    pub data: Vec<u8>,
151    pub sig: Vec<u8>,
152}
153
154pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult<Vec<u8>> + Send + 'a;
155
156/// Identifies a single legacy tree, which may have path-level conflicts, or a
157/// merge of multiple trees, where the individual trees do not have conflicts.
158// TODO(#1624): Delete this type at some point in the future, when we decide to drop
159// support for conflicts in older repos, or maybe after we have provided an upgrade
160// mechanism.
161#[derive(ContentHash, Debug, Clone)]
162pub enum MergedTreeId {
163    /// The tree id of a legacy tree
164    Legacy(TreeId),
165    /// The tree id(s) of a merge tree
166    Merge(Merge<TreeId>),
167}
168
169impl PartialEq for MergedTreeId {
170    /// Overridden to make conflict-free trees be considered equal even if their
171    /// `MergedTreeId` variant is different.
172    fn eq(&self, other: &Self) -> bool {
173        self.to_merge() == other.to_merge()
174    }
175}
176
177impl Eq for MergedTreeId {}
178
179impl MergedTreeId {
180    /// Create a resolved `MergedTreeId` from a single regular tree.
181    pub fn resolved(tree_id: TreeId) -> Self {
182        MergedTreeId::Merge(Merge::resolved(tree_id))
183    }
184
185    /// Return this id as `Merge<TreeId>`
186    pub fn to_merge(&self) -> Merge<TreeId> {
187        match self {
188            MergedTreeId::Legacy(tree_id) => Merge::resolved(tree_id.clone()),
189            MergedTreeId::Merge(tree_ids) => tree_ids.clone(),
190        }
191    }
192}
193
194#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
195pub struct Commit {
196    pub parents: Vec<CommitId>,
197    // TODO: delete commit.predecessors when we can assume that most commits are
198    // tracked by op.commit_predecessors. (in jj 0.42 or so?)
199    #[serde(skip)] // deprecated
200    pub predecessors: Vec<CommitId>,
201    #[serde(skip)] // TODO: should be exposed?
202    pub root_tree: MergedTreeId,
203    pub change_id: ChangeId,
204    pub description: String,
205    pub author: Signature,
206    pub committer: Signature,
207    #[serde(skip)] // raw data wouldn't be useful
208    pub secure_sig: Option<SecureSig>,
209}
210
211#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
212pub struct ConflictTerm {
213    pub value: TreeValue,
214}
215
216#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
217pub struct Conflict {
218    // A conflict is represented by a list of positive and negative states that need to be applied.
219    // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
220    // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
221    // same as non-conflict A.
222    pub removes: Vec<ConflictTerm>,
223    pub adds: Vec<ConflictTerm>,
224}
225
226/// An individual copy event, from file A -> B.
227#[derive(Debug, PartialEq, Eq, Clone)]
228pub struct CopyRecord {
229    /// The destination of the copy, B.
230    pub target: RepoPathBuf,
231    /// The CommitId where the copy took place.
232    pub target_commit: CommitId,
233    /// The source path a target was copied from.
234    ///
235    /// It is not required that the source path is different than the target
236    /// path. A custom backend may choose to represent 'rollbacks' as copies
237    /// from a file unto itself, from a specific prior commit.
238    pub source: RepoPathBuf,
239    pub source_file: FileId,
240    /// The source commit the target was copied from. Backends may use this
241    /// field to implement 'integration' logic, where a source may be
242    /// periodically merged into a target, similar to a branch, but the
243    /// branching occurs at the file level rather than the repository level. It
244    /// also follows naturally that any copy source targeted to a specific
245    /// commit should avoid copy propagation on rebasing, which is desirable
246    /// for 'fork' style copies.
247    ///
248    /// It is required that the commit id is an ancestor of the commit with
249    /// which this copy source is associated.
250    pub source_commit: CommitId,
251}
252
253/// Describes the copy history of a file. The copy object is unchanged when a
254/// file is modified.
255#[derive(ContentHash, Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
256pub struct CopyHistory {
257    /// The file's current path.
258    pub current_path: RepoPathBuf,
259    /// IDs of the files that became the current incarnation of this file.
260    ///
261    /// A newly created file has no parents. A regular copy or rename has one
262    /// parent. A merge of multiple files has multiple parents.
263    pub parents: Vec<CopyId>,
264    /// An optional piece of data to give the Copy object a different ID. May be
265    /// randomly generated. This allows a commit to say that a file was replaced
266    /// by a new incarnation of it, indicating a logically distinct file
267    /// taking the place of the previous file at the path.
268    pub salt: Vec<u8>,
269}
270
271/// Error that may occur during backend initialization.
272#[derive(Debug, Error)]
273#[error(transparent)]
274pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
275
276/// Error that may occur during backend loading.
277#[derive(Debug, Error)]
278#[error(transparent)]
279pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
280
281/// Commit-backend error that may occur after the backend is loaded.
282#[derive(Debug, Error)]
283pub enum BackendError {
284    #[error(
285        "Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
286         {actual} bytes): {hash}"
287    )]
288    InvalidHashLength {
289        expected: usize,
290        actual: usize,
291        object_type: String,
292        hash: String,
293    },
294    #[error("Invalid UTF-8 for object {hash} of type {object_type}")]
295    InvalidUtf8 {
296        object_type: String,
297        hash: String,
298        source: std::str::Utf8Error,
299    },
300    #[error("Object {hash} of type {object_type} not found")]
301    ObjectNotFound {
302        object_type: String,
303        hash: String,
304        source: Box<dyn std::error::Error + Send + Sync>,
305    },
306    #[error("Error when reading object {hash} of type {object_type}")]
307    ReadObject {
308        object_type: String,
309        hash: String,
310        source: Box<dyn std::error::Error + Send + Sync>,
311    },
312    #[error("Access denied to read object {hash} of type {object_type}")]
313    ReadAccessDenied {
314        object_type: String,
315        hash: String,
316        source: Box<dyn std::error::Error + Send + Sync>,
317    },
318    #[error(
319        "Error when reading file content for file {path} with id {id}",
320        path = path.as_internal_file_string()
321    )]
322    ReadFile {
323        path: RepoPathBuf,
324        id: FileId,
325        source: Box<dyn std::error::Error + Send + Sync>,
326    },
327    #[error("Could not write object of type {object_type}")]
328    WriteObject {
329        object_type: &'static str,
330        source: Box<dyn std::error::Error + Send + Sync>,
331    },
332    #[error(transparent)]
333    Other(Box<dyn std::error::Error + Send + Sync>),
334    /// A valid operation attempted, but failed because it isn't supported by
335    /// the particular backend.
336    #[error("{0}")]
337    Unsupported(String),
338}
339
340pub type BackendResult<T> = Result<T, BackendError>;
341
342#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Hash)]
343pub enum TreeValue {
344    // TODO: When there's a CopyId here, the copy object's path must match
345    // the path identified by the tree.
346    File {
347        id: FileId,
348        executable: bool,
349        copy_id: CopyId,
350    },
351    Symlink(SymlinkId),
352    Tree(TreeId),
353    GitSubmodule(CommitId),
354    Conflict(ConflictId),
355}
356
357impl TreeValue {
358    pub fn hex(&self) -> String {
359        match self {
360            TreeValue::File { id, .. } => id.hex(),
361            TreeValue::Symlink(id) => id.hex(),
362            TreeValue::Tree(id) => id.hex(),
363            TreeValue::GitSubmodule(id) => id.hex(),
364            TreeValue::Conflict(id) => id.hex(),
365        }
366    }
367}
368
369#[derive(Debug, PartialEq, Eq, Clone)]
370pub struct TreeEntry<'a> {
371    name: &'a RepoPathComponent,
372    value: &'a TreeValue,
373}
374
375impl<'a> TreeEntry<'a> {
376    pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
377        TreeEntry { name, value }
378    }
379
380    pub fn name(&self) -> &'a RepoPathComponent {
381        self.name
382    }
383
384    pub fn value(&self) -> &'a TreeValue {
385        self.value
386    }
387}
388
389pub struct TreeEntriesNonRecursiveIterator<'a> {
390    iter: std::collections::btree_map::Iter<'a, RepoPathComponentBuf, TreeValue>,
391}
392
393impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
394    type Item = TreeEntry<'a>;
395
396    fn next(&mut self) -> Option<Self::Item> {
397        self.iter
398            .next()
399            .map(|(name, value)| TreeEntry { name, value })
400    }
401}
402
403#[derive(ContentHash, Default, PartialEq, Eq, Debug, Clone)]
404pub struct Tree {
405    entries: BTreeMap<RepoPathComponentBuf, TreeValue>,
406}
407
408impl Tree {
409    pub fn is_empty(&self) -> bool {
410        self.entries.is_empty()
411    }
412
413    pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
414        self.entries.keys().map(|name| name.as_ref())
415    }
416
417    pub fn entries(&self) -> TreeEntriesNonRecursiveIterator<'_> {
418        TreeEntriesNonRecursiveIterator {
419            iter: self.entries.iter(),
420        }
421    }
422
423    pub fn set(&mut self, name: RepoPathComponentBuf, value: TreeValue) {
424        self.entries.insert(name, value);
425    }
426
427    pub fn remove(&mut self, name: &RepoPathComponent) {
428        self.entries.remove(name);
429    }
430
431    pub fn set_or_remove(&mut self, name: &RepoPathComponent, value: Option<TreeValue>) {
432        match value {
433            None => {
434                self.entries.remove(name);
435            }
436            Some(value) => {
437                self.entries.insert(name.to_owned(), value);
438            }
439        }
440    }
441
442    pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry<'_>> {
443        self.entries
444            .get_key_value(name)
445            .map(|(name, value)| TreeEntry { name, value })
446    }
447
448    pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
449        self.entries.get(name)
450    }
451}
452
453pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
454    let timestamp = Timestamp {
455        timestamp: MillisSinceEpoch(0),
456        tz_offset: 0,
457    };
458    let signature = Signature {
459        name: String::new(),
460        email: String::new(),
461        timestamp,
462    };
463    Commit {
464        parents: vec![],
465        predecessors: vec![],
466        root_tree: MergedTreeId::resolved(empty_tree_id),
467        change_id: root_change_id,
468        description: String::new(),
469        author: signature.clone(),
470        committer: signature,
471        secure_sig: None,
472    }
473}
474
475/// Defines the interface for commit backends.
476#[async_trait]
477pub trait Backend: Send + Sync + Debug {
478    fn as_any(&self) -> &dyn Any;
479
480    /// A unique name that identifies this backend. Written to
481    /// `.jj/repo/store/type` when the repo is created.
482    fn name(&self) -> &str;
483
484    /// The length of commit IDs in bytes.
485    fn commit_id_length(&self) -> usize;
486
487    /// The length of change IDs in bytes.
488    fn change_id_length(&self) -> usize;
489
490    fn root_commit_id(&self) -> &CommitId;
491
492    fn root_change_id(&self) -> &ChangeId;
493
494    fn empty_tree_id(&self) -> &TreeId;
495
496    /// An estimate of how many concurrent requests this backend handles well. A
497    /// local backend like the Git backend (at until it supports partial clones)
498    /// may want to set this to 1. A cloud-backed backend may want to set it to
499    /// 100 or so.
500    ///
501    /// It is not guaranteed that at most this number of concurrent requests are
502    /// sent.
503    fn concurrency(&self) -> usize;
504
505    async fn read_file(
506        &self,
507        path: &RepoPath,
508        id: &FileId,
509    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>>;
510
511    async fn write_file(
512        &self,
513        path: &RepoPath,
514        contents: &mut (dyn AsyncRead + Send + Unpin),
515    ) -> BackendResult<FileId>;
516
517    async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
518
519    async fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
520
521    /// Read the specified `CopyHistory` object.
522    ///
523    /// Backends that don't support copy tracking may return
524    /// `BackendError::Unsupported`.
525    async fn read_copy(&self, id: &CopyId) -> BackendResult<CopyHistory>;
526
527    /// Write the `CopyHistory` object and return its ID.
528    ///
529    /// Backends that don't support copy tracking may return
530    /// `BackendError::Unsupported`.
531    async fn write_copy(&self, copy: &CopyHistory) -> BackendResult<CopyId>;
532
533    /// Find all copy histories that are related to the specified one. This is
534    /// defined as those that are ancestors of the given specified one, plus
535    /// their descendants. Children must be returned before parents.
536    ///
537    /// It is valid (but wasteful) to include other copy histories, such as
538    /// siblings, or even completely unrelated copy histories.
539    ///
540    /// Backends that don't support copy tracking may return
541    /// `BackendError::Unsupported`.
542    async fn get_related_copies(&self, copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>>;
543
544    async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
545
546    async fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
547
548    // Not async because it would force `MergedTree::value()` to be async. We don't
549    // need this to be async anyway because it's only used by legacy repos.
550    fn read_conflict(&self, path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict>;
551
552    fn write_conflict(&self, path: &RepoPath, contents: &Conflict) -> BackendResult<ConflictId>;
553
554    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
555
556    /// Writes a commit and returns its ID and the commit itself. The commit
557    /// should contain the data that was actually written, which may differ
558    /// from the data passed in. For example, the backend may change the
559    /// committer name to an authenticated user's name, or the backend's
560    /// timestamps may have less precision than the millisecond precision in
561    /// `Commit`.
562    ///
563    /// The `sign_with` parameter could contain a function to cryptographically
564    /// sign some binary representation of the commit.
565    /// If the backend supports it, it could call it and store the result in
566    /// an implementation specific fashion, and both `read_commit` and the
567    /// return of `write_commit` should read it back as the `secure_sig`
568    /// field.
569    async fn write_commit(
570        &self,
571        contents: Commit,
572        sign_with: Option<&mut SigningFn>,
573    ) -> BackendResult<(CommitId, Commit)>;
574
575    /// Get copy records for the dag range `root..head`.  If `paths` is None
576    /// include all paths, otherwise restrict to only `paths`.
577    ///
578    /// The exact order these are returned is unspecified, but it is guaranteed
579    /// to be reverse-topological. That is, for any two copy records with
580    /// different commit ids A and B, if A is an ancestor of B, A is streamed
581    /// after B.
582    ///
583    /// Streaming by design to better support large backends which may have very
584    /// large single-file histories. This also allows more iterative algorithms
585    /// like blame/annotate to short-circuit after a point without wasting
586    /// unnecessary resources.
587    fn get_copy_records(
588        &self,
589        paths: Option<&[RepoPathBuf]>,
590        root: &CommitId,
591        head: &CommitId,
592    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>>;
593
594    /// Perform garbage collection.
595    ///
596    /// All commits found in the `index` won't be removed. In addition to that,
597    /// objects created after `keep_newer` will be preserved. This mitigates a
598    /// risk of deleting new commits created concurrently by another process.
599    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()>;
600}