jj_lib/
backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::any::Any;
18use std::fmt::Debug;
19use std::pin::Pin;
20use std::slice;
21use std::time::SystemTime;
22
23use async_trait::async_trait;
24use chrono::TimeZone as _;
25use futures::stream::BoxStream;
26use thiserror::Error;
27use tokio::io::AsyncRead;
28
29use crate::content_hash::ContentHash;
30use crate::hex_util;
31use crate::index::Index;
32use crate::merge::Merge;
33use crate::object_id::ObjectId as _;
34use crate::object_id::id_type;
35use crate::repo_path::RepoPath;
36use crate::repo_path::RepoPathBuf;
37use crate::repo_path::RepoPathComponent;
38use crate::repo_path::RepoPathComponentBuf;
39use crate::signing::SignResult;
40
41id_type!(
42    /// Identifier for a [`Commit`] based on its content. When a commit is
43    /// rewritten, its `CommitId` changes.
44    pub CommitId { hex() }
45);
46id_type!(
47    /// Stable identifier for a [`Commit`]. Unlike the `CommitId`, the `ChangeId`
48    /// follows the commit and is not updated when the commit is rewritten.
49    pub ChangeId { reverse_hex() }
50);
51id_type!(pub TreeId { hex() });
52id_type!(pub FileId { hex() });
53id_type!(pub SymlinkId { hex() });
54id_type!(pub CopyId { hex() });
55
56impl ChangeId {
57    /// Parses the given "reverse" hex string into a `ChangeId`.
58    pub fn try_from_reverse_hex(hex: impl AsRef<[u8]>) -> Option<Self> {
59        hex_util::decode_reverse_hex(hex).map(Self)
60    }
61
62    /// Returns the hex string representation of this ID, which uses `z-k`
63    /// "digits" instead of `0-9a-f`.
64    pub fn reverse_hex(&self) -> String {
65        hex_util::encode_reverse_hex(&self.0)
66    }
67}
68
69impl CopyId {
70    /// Returns a placeholder copy id to be used when we don't have a real copy
71    /// id yet.
72    // TODO: Delete this
73    pub fn placeholder() -> Self {
74        Self::new(vec![])
75    }
76}
77
78#[derive(Debug, Error)]
79#[error("Out-of-range date")]
80pub struct TimestampOutOfRange;
81
82#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
83pub struct MillisSinceEpoch(pub i64);
84
85#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
86pub struct Timestamp {
87    pub timestamp: MillisSinceEpoch,
88    // time zone offset in minutes
89    pub tz_offset: i32,
90}
91
92impl Timestamp {
93    pub fn now() -> Self {
94        Self::from_datetime(chrono::offset::Local::now())
95    }
96
97    pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
98        datetime: chrono::DateTime<Tz>,
99    ) -> Self {
100        Self {
101            timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
102            tz_offset: datetime.offset().local_minus_utc() / 60,
103        }
104    }
105
106    pub fn to_datetime(
107        &self,
108    ) -> Result<chrono::DateTime<chrono::FixedOffset>, TimestampOutOfRange> {
109        let utc = match chrono::Utc.timestamp_opt(
110            self.timestamp.0.div_euclid(1000),
111            (self.timestamp.0.rem_euclid(1000)) as u32 * 1000000,
112        ) {
113            chrono::LocalResult::None => {
114                return Err(TimestampOutOfRange);
115            }
116            chrono::LocalResult::Single(x) => x,
117            chrono::LocalResult::Ambiguous(y, _z) => y,
118        };
119
120        Ok(utc.with_timezone(
121            &chrono::FixedOffset::east_opt(self.tz_offset * 60)
122                .unwrap_or_else(|| chrono::FixedOffset::east_opt(0).unwrap()),
123        ))
124    }
125}
126
127impl serde::Serialize for Timestamp {
128    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
129    where
130        S: serde::Serializer,
131    {
132        // TODO: test is_human_readable() to use raw format?
133        let t = self.to_datetime().map_err(serde::ser::Error::custom)?;
134        t.serialize(serializer)
135    }
136}
137
138/// Represents a [`Commit`] signature.
139#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
140pub struct Signature {
141    pub name: String,
142    pub email: String,
143    pub timestamp: Timestamp,
144}
145
146/// Represents a cryptographically signed [`Commit`] signature.
147#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
148pub struct SecureSig {
149    pub data: Vec<u8>,
150    pub sig: Vec<u8>,
151}
152
153pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult<Vec<u8>> + Send + 'a;
154
155/// Identifies a merge of multiple trees. Can be read as a `MergedTree`.
156// TODO: this type doesn't add anything over `Merge<TreeId>` currently, but conflict labels could be
157// added here in the future if we also add them to `MergedTree`.
158#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
159pub struct MergedTreeId {
160    /// The tree id(s) of a merge tree
161    tree_ids: Merge<TreeId>,
162}
163
164impl MergedTreeId {
165    /// Create a resolved `MergedTreeId` from a single regular tree.
166    pub fn resolved(tree_id: TreeId) -> Self {
167        Self::new(Merge::resolved(tree_id))
168    }
169
170    /// Create a `MergedTreeId` from a `Merge<TreeId>`.
171    pub fn new(tree_ids: Merge<TreeId>) -> Self {
172        Self { tree_ids }
173    }
174
175    /// Returns the underlying `Merge<TreeId>`.
176    pub fn as_merge(&self) -> &Merge<TreeId> {
177        &self.tree_ids
178    }
179
180    /// Extracts the underlying `Merge<TreeId>`.
181    pub fn into_merge(self) -> Merge<TreeId> {
182        self.tree_ids
183    }
184}
185
186#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
187pub struct Commit {
188    pub parents: Vec<CommitId>,
189    // TODO: delete commit.predecessors when we can assume that most commits are
190    // tracked by op.commit_predecessors. (in jj 0.42 or so?)
191    #[serde(skip)] // deprecated
192    pub predecessors: Vec<CommitId>,
193    #[serde(skip)] // TODO: should be exposed?
194    pub root_tree: MergedTreeId,
195    pub change_id: ChangeId,
196    pub description: String,
197    pub author: Signature,
198    pub committer: Signature,
199    #[serde(skip)] // raw data wouldn't be useful
200    pub secure_sig: Option<SecureSig>,
201}
202
203/// An individual copy event, from file A -> B.
204#[derive(Debug, PartialEq, Eq, Clone)]
205pub struct CopyRecord {
206    /// The destination of the copy, B.
207    pub target: RepoPathBuf,
208    /// The CommitId where the copy took place.
209    pub target_commit: CommitId,
210    /// The source path a target was copied from.
211    ///
212    /// It is not required that the source path is different than the target
213    /// path. A custom backend may choose to represent 'rollbacks' as copies
214    /// from a file unto itself, from a specific prior commit.
215    pub source: RepoPathBuf,
216    pub source_file: FileId,
217    /// The source commit the target was copied from. Backends may use this
218    /// field to implement 'integration' logic, where a source may be
219    /// periodically merged into a target, similar to a branch, but the
220    /// branching occurs at the file level rather than the repository level. It
221    /// also follows naturally that any copy source targeted to a specific
222    /// commit should avoid copy propagation on rebasing, which is desirable
223    /// for 'fork' style copies.
224    ///
225    /// It is required that the commit id is an ancestor of the commit with
226    /// which this copy source is associated.
227    pub source_commit: CommitId,
228}
229
230/// Describes the copy history of a file. The copy object is unchanged when a
231/// file is modified.
232#[derive(ContentHash, Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
233pub struct CopyHistory {
234    /// The file's current path.
235    pub current_path: RepoPathBuf,
236    /// IDs of the files that became the current incarnation of this file.
237    ///
238    /// A newly created file has no parents. A regular copy or rename has one
239    /// parent. A merge of multiple files has multiple parents.
240    pub parents: Vec<CopyId>,
241    /// An optional piece of data to give the Copy object a different ID. May be
242    /// randomly generated. This allows a commit to say that a file was replaced
243    /// by a new incarnation of it, indicating a logically distinct file
244    /// taking the place of the previous file at the path.
245    pub salt: Vec<u8>,
246}
247
248/// Error that may occur during backend initialization.
249#[derive(Debug, Error)]
250#[error(transparent)]
251pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
252
253/// Error that may occur during backend loading.
254#[derive(Debug, Error)]
255#[error(transparent)]
256pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
257
258/// Commit-backend error that may occur after the backend is loaded.
259#[derive(Debug, Error)]
260pub enum BackendError {
261    #[error(
262        "Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
263         {actual} bytes): {hash}"
264    )]
265    InvalidHashLength {
266        expected: usize,
267        actual: usize,
268        object_type: String,
269        hash: String,
270    },
271    #[error("Invalid UTF-8 for object {hash} of type {object_type}")]
272    InvalidUtf8 {
273        object_type: String,
274        hash: String,
275        source: std::str::Utf8Error,
276    },
277    #[error("Object {hash} of type {object_type} not found")]
278    ObjectNotFound {
279        object_type: String,
280        hash: String,
281        source: Box<dyn std::error::Error + Send + Sync>,
282    },
283    #[error("Error when reading object {hash} of type {object_type}")]
284    ReadObject {
285        object_type: String,
286        hash: String,
287        source: Box<dyn std::error::Error + Send + Sync>,
288    },
289    #[error("Access denied to read object {hash} of type {object_type}")]
290    ReadAccessDenied {
291        object_type: String,
292        hash: String,
293        source: Box<dyn std::error::Error + Send + Sync>,
294    },
295    #[error(
296        "Error when reading file content for file {path} with id {id}",
297        path = path.as_internal_file_string()
298    )]
299    ReadFile {
300        path: RepoPathBuf,
301        id: FileId,
302        source: Box<dyn std::error::Error + Send + Sync>,
303    },
304    #[error("Could not write object of type {object_type}")]
305    WriteObject {
306        object_type: &'static str,
307        source: Box<dyn std::error::Error + Send + Sync>,
308    },
309    #[error(transparent)]
310    Other(Box<dyn std::error::Error + Send + Sync>),
311    /// A valid operation attempted, but failed because it isn't supported by
312    /// the particular backend.
313    #[error("{0}")]
314    Unsupported(String),
315}
316
317pub type BackendResult<T> = Result<T, BackendError>;
318
319#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Hash)]
320pub enum TreeValue {
321    // TODO: When there's a CopyId here, the copy object's path must match
322    // the path identified by the tree.
323    File {
324        id: FileId,
325        executable: bool,
326        copy_id: CopyId,
327    },
328    Symlink(SymlinkId),
329    Tree(TreeId),
330    GitSubmodule(CommitId),
331}
332
333impl TreeValue {
334    pub fn hex(&self) -> String {
335        match self {
336            Self::File { id, .. } => id.hex(),
337            Self::Symlink(id) => id.hex(),
338            Self::Tree(id) => id.hex(),
339            Self::GitSubmodule(id) => id.hex(),
340        }
341    }
342}
343
344#[derive(Debug, PartialEq, Eq, Clone)]
345pub struct TreeEntry<'a> {
346    name: &'a RepoPathComponent,
347    value: &'a TreeValue,
348}
349
350impl<'a> TreeEntry<'a> {
351    pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
352        Self { name, value }
353    }
354
355    pub fn name(&self) -> &'a RepoPathComponent {
356        self.name
357    }
358
359    pub fn value(&self) -> &'a TreeValue {
360        self.value
361    }
362}
363
364pub struct TreeEntriesNonRecursiveIterator<'a> {
365    iter: slice::Iter<'a, (RepoPathComponentBuf, TreeValue)>,
366}
367
368impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
369    type Item = TreeEntry<'a>;
370
371    fn next(&mut self) -> Option<Self::Item> {
372        self.iter
373            .next()
374            .map(|(name, value)| TreeEntry { name, value })
375    }
376}
377
378#[derive(ContentHash, Default, PartialEq, Eq, Debug, Clone)]
379pub struct Tree {
380    entries: Vec<(RepoPathComponentBuf, TreeValue)>,
381}
382
383impl Tree {
384    pub fn from_sorted_entries(entries: Vec<(RepoPathComponentBuf, TreeValue)>) -> Self {
385        debug_assert!(entries.is_sorted_by(|(a, _), (b, _)| a < b));
386        Self { entries }
387    }
388
389    pub fn is_empty(&self) -> bool {
390        self.entries.is_empty()
391    }
392
393    pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
394        self.entries.iter().map(|(name, _)| name.as_ref())
395    }
396
397    pub fn entries(&self) -> TreeEntriesNonRecursiveIterator<'_> {
398        TreeEntriesNonRecursiveIterator {
399            iter: self.entries.iter(),
400        }
401    }
402
403    pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry<'_>> {
404        let index = self
405            .entries
406            .binary_search_by_key(&name, |(name, _)| name)
407            .ok()?;
408        let (name, value) = &self.entries[index];
409        Some(TreeEntry { name, value })
410    }
411
412    pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
413        self.entry(name).map(|entry| entry.value)
414    }
415}
416
417pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
418    let timestamp = Timestamp {
419        timestamp: MillisSinceEpoch(0),
420        tz_offset: 0,
421    };
422    let signature = Signature {
423        name: String::new(),
424        email: String::new(),
425        timestamp,
426    };
427    Commit {
428        parents: vec![],
429        predecessors: vec![],
430        root_tree: MergedTreeId::resolved(empty_tree_id),
431        change_id: root_change_id,
432        description: String::new(),
433        author: signature.clone(),
434        committer: signature,
435        secure_sig: None,
436    }
437}
438
439/// Defines the interface for commit backends.
440#[async_trait]
441pub trait Backend: Any + Send + Sync + Debug {
442    /// A unique name that identifies this backend. Written to
443    /// `.jj/repo/store/type` when the repo is created.
444    fn name(&self) -> &str;
445
446    /// The length of commit IDs in bytes.
447    fn commit_id_length(&self) -> usize;
448
449    /// The length of change IDs in bytes.
450    fn change_id_length(&self) -> usize;
451
452    fn root_commit_id(&self) -> &CommitId;
453
454    fn root_change_id(&self) -> &ChangeId;
455
456    fn empty_tree_id(&self) -> &TreeId;
457
458    /// An estimate of how many concurrent requests this backend handles well. A
459    /// local backend like the Git backend (at until it supports partial clones)
460    /// may want to set this to 1. A cloud-backed backend may want to set it to
461    /// 100 or so.
462    ///
463    /// It is not guaranteed that at most this number of concurrent requests are
464    /// sent.
465    fn concurrency(&self) -> usize;
466
467    async fn read_file(
468        &self,
469        path: &RepoPath,
470        id: &FileId,
471    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>>;
472
473    async fn write_file(
474        &self,
475        path: &RepoPath,
476        contents: &mut (dyn AsyncRead + Send + Unpin),
477    ) -> BackendResult<FileId>;
478
479    async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
480
481    async fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
482
483    /// Read the specified `CopyHistory` object.
484    ///
485    /// Backends that don't support copy tracking may return
486    /// `BackendError::Unsupported`.
487    async fn read_copy(&self, id: &CopyId) -> BackendResult<CopyHistory>;
488
489    /// Write the `CopyHistory` object and return its ID.
490    ///
491    /// Backends that don't support copy tracking may return
492    /// `BackendError::Unsupported`.
493    async fn write_copy(&self, copy: &CopyHistory) -> BackendResult<CopyId>;
494
495    /// Find all copy histories that are related to the specified one. This is
496    /// defined as those that are ancestors of the given specified one, plus
497    /// their descendants. Children must be returned before parents.
498    ///
499    /// It is valid (but wasteful) to include other copy histories, such as
500    /// siblings, or even completely unrelated copy histories.
501    ///
502    /// Backends that don't support copy tracking may return
503    /// `BackendError::Unsupported`.
504    async fn get_related_copies(&self, copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>>;
505
506    async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
507
508    async fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
509
510    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
511
512    /// Writes a commit and returns its ID and the commit itself. The commit
513    /// should contain the data that was actually written, which may differ
514    /// from the data passed in. For example, the backend may change the
515    /// committer name to an authenticated user's name, or the backend's
516    /// timestamps may have less precision than the millisecond precision in
517    /// `Commit`.
518    ///
519    /// The `sign_with` parameter could contain a function to cryptographically
520    /// sign some binary representation of the commit.
521    /// If the backend supports it, it could call it and store the result in
522    /// an implementation specific fashion, and both `read_commit` and the
523    /// return of `write_commit` should read it back as the `secure_sig`
524    /// field.
525    async fn write_commit(
526        &self,
527        contents: Commit,
528        sign_with: Option<&mut SigningFn>,
529    ) -> BackendResult<(CommitId, Commit)>;
530
531    /// Get copy records for the dag range `root..head`.  If `paths` is None
532    /// include all paths, otherwise restrict to only `paths`.
533    ///
534    /// The exact order these are returned is unspecified, but it is guaranteed
535    /// to be reverse-topological. That is, for any two copy records with
536    /// different commit ids A and B, if A is an ancestor of B, A is streamed
537    /// after B.
538    ///
539    /// Streaming by design to better support large backends which may have very
540    /// large single-file histories. This also allows more iterative algorithms
541    /// like blame/annotate to short-circuit after a point without wasting
542    /// unnecessary resources.
543    fn get_copy_records(
544        &self,
545        paths: Option<&[RepoPathBuf]>,
546        root: &CommitId,
547        head: &CommitId,
548    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>>;
549
550    /// Perform garbage collection.
551    ///
552    /// All commits found in the `index` won't be removed. In addition to that,
553    /// objects created after `keep_newer` will be preserved. This mitigates a
554    /// risk of deleting new commits created concurrently by another process.
555    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()>;
556}
557
558impl dyn Backend {
559    /// Returns reference of the implementation type.
560    pub fn downcast_ref<T: Backend>(&self) -> Option<&T> {
561        (self as &dyn Any).downcast_ref()
562    }
563}