jj_lib/
backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::any::Any;
18use std::fmt::Debug;
19use std::pin::Pin;
20use std::slice;
21use std::time::SystemTime;
22
23use async_trait::async_trait;
24use chrono::TimeZone as _;
25use futures::stream::BoxStream;
26use thiserror::Error;
27use tokio::io::AsyncRead;
28
29use crate::content_hash::ContentHash;
30use crate::hex_util;
31use crate::index::Index;
32use crate::merge::Merge;
33use crate::object_id::ObjectId as _;
34use crate::object_id::id_type;
35use crate::repo_path::RepoPath;
36use crate::repo_path::RepoPathBuf;
37use crate::repo_path::RepoPathComponent;
38use crate::repo_path::RepoPathComponentBuf;
39use crate::signing::SignResult;
40
41id_type!(
42    /// Identifier for a [`Commit`] based on its content. When a commit is
43    /// rewritten, its `CommitId` changes.
44    pub CommitId { hex() }
45);
46id_type!(
47    /// Stable identifier for a [`Commit`]. Unlike the `CommitId`, the `ChangeId`
48    /// follows the commit and is not updated when the commit is rewritten.
49    pub ChangeId { reverse_hex() }
50);
51id_type!(pub TreeId { hex() });
52id_type!(pub FileId { hex() });
53id_type!(pub SymlinkId { hex() });
54id_type!(pub CopyId { hex() });
55
56impl ChangeId {
57    /// Parses the given "reverse" hex string into a `ChangeId`.
58    pub fn try_from_reverse_hex(hex: impl AsRef<[u8]>) -> Option<Self> {
59        hex_util::decode_reverse_hex(hex).map(Self)
60    }
61
62    /// Returns the hex string representation of this ID, which uses `z-k`
63    /// "digits" instead of `0-9a-f`.
64    pub fn reverse_hex(&self) -> String {
65        hex_util::encode_reverse_hex(&self.0)
66    }
67}
68
69impl CopyId {
70    /// Returns a placeholder copy id to be used when we don't have a real copy
71    /// id yet.
72    // TODO: Delete this
73    pub fn placeholder() -> Self {
74        Self::new(vec![])
75    }
76}
77
78#[derive(Debug, Error)]
79#[error("Out-of-range date")]
80pub struct TimestampOutOfRange;
81
82#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
83pub struct MillisSinceEpoch(pub i64);
84
85#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
86pub struct Timestamp {
87    pub timestamp: MillisSinceEpoch,
88    // time zone offset in minutes
89    pub tz_offset: i32,
90}
91
92impl Timestamp {
93    pub fn now() -> Self {
94        Self::from_datetime(chrono::offset::Local::now())
95    }
96
97    pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
98        datetime: chrono::DateTime<Tz>,
99    ) -> Self {
100        Self {
101            timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
102            tz_offset: datetime.offset().local_minus_utc() / 60,
103        }
104    }
105
106    pub fn to_datetime(
107        &self,
108    ) -> Result<chrono::DateTime<chrono::FixedOffset>, TimestampOutOfRange> {
109        let utc = match chrono::Utc.timestamp_opt(
110            self.timestamp.0.div_euclid(1000),
111            (self.timestamp.0.rem_euclid(1000)) as u32 * 1000000,
112        ) {
113            chrono::LocalResult::None => {
114                return Err(TimestampOutOfRange);
115            }
116            chrono::LocalResult::Single(x) => x,
117            chrono::LocalResult::Ambiguous(y, _z) => y,
118        };
119
120        Ok(utc.with_timezone(
121            &chrono::FixedOffset::east_opt(self.tz_offset * 60)
122                .unwrap_or_else(|| chrono::FixedOffset::east_opt(0).unwrap()),
123        ))
124    }
125}
126
127impl serde::Serialize for Timestamp {
128    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
129    where
130        S: serde::Serializer,
131    {
132        // TODO: test is_human_readable() to use raw format?
133        let t = self.to_datetime().map_err(serde::ser::Error::custom)?;
134        t.serialize(serializer)
135    }
136}
137
138/// Represents a [`Commit`] signature.
139#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
140pub struct Signature {
141    pub name: String,
142    pub email: String,
143    pub timestamp: Timestamp,
144}
145
146/// Represents a cryptographically signed [`Commit`] signature.
147#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
148pub struct SecureSig {
149    pub data: Vec<u8>,
150    pub sig: Vec<u8>,
151}
152
153pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult<Vec<u8>> + Send + 'a;
154
155#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
156pub struct Commit {
157    pub parents: Vec<CommitId>,
158    // TODO: delete commit.predecessors when we can assume that most commits are
159    // tracked by op.commit_predecessors. (in jj 0.42 or so?)
160    #[serde(skip)] // deprecated
161    pub predecessors: Vec<CommitId>,
162    #[serde(skip)] // TODO: should be exposed?
163    pub root_tree: Merge<TreeId>,
164    // If resolved, must be empty string. Otherwise, must have same number of terms as `root_tree`.
165    #[serde(skip)]
166    pub conflict_labels: Merge<String>,
167    pub change_id: ChangeId,
168    pub description: String,
169    pub author: Signature,
170    pub committer: Signature,
171    #[serde(skip)] // raw data wouldn't be useful
172    pub secure_sig: Option<SecureSig>,
173}
174
175/// An individual copy event, from file A -> B.
176#[derive(Debug, PartialEq, Eq, Clone)]
177pub struct CopyRecord {
178    /// The destination of the copy, B.
179    pub target: RepoPathBuf,
180    /// The CommitId where the copy took place.
181    pub target_commit: CommitId,
182    /// The source path a target was copied from.
183    ///
184    /// It is not required that the source path is different than the target
185    /// path. A custom backend may choose to represent 'rollbacks' as copies
186    /// from a file unto itself, from a specific prior commit.
187    pub source: RepoPathBuf,
188    pub source_file: FileId,
189    /// The source commit the target was copied from. Backends may use this
190    /// field to implement 'integration' logic, where a source may be
191    /// periodically merged into a target, similar to a branch, but the
192    /// branching occurs at the file level rather than the repository level. It
193    /// also follows naturally that any copy source targeted to a specific
194    /// commit should avoid copy propagation on rebasing, which is desirable
195    /// for 'fork' style copies.
196    ///
197    /// It is required that the commit id is an ancestor of the commit with
198    /// which this copy source is associated.
199    pub source_commit: CommitId,
200}
201
202/// Describes the copy history of a file. The copy object is unchanged when a
203/// file is modified.
204#[derive(ContentHash, Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
205pub struct CopyHistory {
206    /// The file's current path.
207    pub current_path: RepoPathBuf,
208    /// IDs of the files that became the current incarnation of this file.
209    ///
210    /// A newly created file has no parents. A regular copy or rename has one
211    /// parent. A merge of multiple files has multiple parents.
212    pub parents: Vec<CopyId>,
213    /// An optional piece of data to give the Copy object a different ID. May be
214    /// randomly generated. This allows a commit to say that a file was replaced
215    /// by a new incarnation of it, indicating a logically distinct file
216    /// taking the place of the previous file at the path.
217    pub salt: Vec<u8>,
218}
219
220/// Error that may occur during backend initialization.
221#[derive(Debug, Error)]
222#[error(transparent)]
223pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
224
225/// Error that may occur during backend loading.
226#[derive(Debug, Error)]
227#[error(transparent)]
228pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
229
230/// Commit-backend error that may occur after the backend is loaded.
231#[derive(Debug, Error)]
232pub enum BackendError {
233    #[error(
234        "Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
235         {actual} bytes): {hash}"
236    )]
237    InvalidHashLength {
238        expected: usize,
239        actual: usize,
240        object_type: String,
241        hash: String,
242    },
243    #[error("Invalid UTF-8 for object {hash} of type {object_type}")]
244    InvalidUtf8 {
245        object_type: String,
246        hash: String,
247        source: std::str::Utf8Error,
248    },
249    #[error("Object {hash} of type {object_type} not found")]
250    ObjectNotFound {
251        object_type: String,
252        hash: String,
253        source: Box<dyn std::error::Error + Send + Sync>,
254    },
255    #[error("Error when reading object {hash} of type {object_type}")]
256    ReadObject {
257        object_type: String,
258        hash: String,
259        source: Box<dyn std::error::Error + Send + Sync>,
260    },
261    #[error("Access denied to read object {hash} of type {object_type}")]
262    ReadAccessDenied {
263        object_type: String,
264        hash: String,
265        source: Box<dyn std::error::Error + Send + Sync>,
266    },
267    #[error(
268        "Error when reading file content for file {path} with id {id}",
269        path = path.as_internal_file_string()
270    )]
271    ReadFile {
272        path: RepoPathBuf,
273        id: FileId,
274        source: Box<dyn std::error::Error + Send + Sync>,
275    },
276    #[error("Could not write object of type {object_type}")]
277    WriteObject {
278        object_type: &'static str,
279        source: Box<dyn std::error::Error + Send + Sync>,
280    },
281    #[error(transparent)]
282    Other(Box<dyn std::error::Error + Send + Sync>),
283    /// A valid operation attempted, but failed because it isn't supported by
284    /// the particular backend.
285    #[error("{0}")]
286    Unsupported(String),
287}
288
289pub type BackendResult<T> = Result<T, BackendError>;
290
291#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Hash)]
292pub enum TreeValue {
293    // TODO: When there's a CopyId here, the copy object's path must match
294    // the path identified by the tree.
295    File {
296        id: FileId,
297        executable: bool,
298        copy_id: CopyId,
299    },
300    Symlink(SymlinkId),
301    Tree(TreeId),
302    GitSubmodule(CommitId),
303}
304
305impl TreeValue {
306    pub fn hex(&self) -> String {
307        match self {
308            Self::File { id, .. } => id.hex(),
309            Self::Symlink(id) => id.hex(),
310            Self::Tree(id) => id.hex(),
311            Self::GitSubmodule(id) => id.hex(),
312        }
313    }
314}
315
316#[derive(Debug, PartialEq, Eq, Clone)]
317pub struct TreeEntry<'a> {
318    name: &'a RepoPathComponent,
319    value: &'a TreeValue,
320}
321
322impl<'a> TreeEntry<'a> {
323    pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
324        Self { name, value }
325    }
326
327    pub fn name(&self) -> &'a RepoPathComponent {
328        self.name
329    }
330
331    pub fn value(&self) -> &'a TreeValue {
332        self.value
333    }
334}
335
336pub struct TreeEntriesNonRecursiveIterator<'a> {
337    iter: slice::Iter<'a, (RepoPathComponentBuf, TreeValue)>,
338}
339
340impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
341    type Item = TreeEntry<'a>;
342
343    fn next(&mut self) -> Option<Self::Item> {
344        self.iter
345            .next()
346            .map(|(name, value)| TreeEntry { name, value })
347    }
348}
349
350#[derive(ContentHash, Default, PartialEq, Eq, Debug, Clone)]
351pub struct Tree {
352    entries: Vec<(RepoPathComponentBuf, TreeValue)>,
353}
354
355impl Tree {
356    pub fn from_sorted_entries(entries: Vec<(RepoPathComponentBuf, TreeValue)>) -> Self {
357        debug_assert!(entries.is_sorted_by(|(a, _), (b, _)| a < b));
358        Self { entries }
359    }
360
361    pub fn is_empty(&self) -> bool {
362        self.entries.is_empty()
363    }
364
365    pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
366        self.entries.iter().map(|(name, _)| name.as_ref())
367    }
368
369    pub fn entries(&self) -> TreeEntriesNonRecursiveIterator<'_> {
370        TreeEntriesNonRecursiveIterator {
371            iter: self.entries.iter(),
372        }
373    }
374
375    pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry<'_>> {
376        let index = self
377            .entries
378            .binary_search_by_key(&name, |(name, _)| name)
379            .ok()?;
380        let (name, value) = &self.entries[index];
381        Some(TreeEntry { name, value })
382    }
383
384    pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
385        self.entry(name).map(|entry| entry.value)
386    }
387}
388
389pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
390    let timestamp = Timestamp {
391        timestamp: MillisSinceEpoch(0),
392        tz_offset: 0,
393    };
394    let signature = Signature {
395        name: String::new(),
396        email: String::new(),
397        timestamp,
398    };
399    Commit {
400        parents: vec![],
401        predecessors: vec![],
402        root_tree: Merge::resolved(empty_tree_id),
403        conflict_labels: Merge::resolved(String::new()),
404        change_id: root_change_id,
405        description: String::new(),
406        author: signature.clone(),
407        committer: signature,
408        secure_sig: None,
409    }
410}
411
412/// Defines the interface for commit backends.
413#[async_trait]
414pub trait Backend: Any + Send + Sync + Debug {
415    /// A unique name that identifies this backend. Written to
416    /// `.jj/repo/store/type` when the repo is created.
417    fn name(&self) -> &str;
418
419    /// The length of commit IDs in bytes.
420    fn commit_id_length(&self) -> usize;
421
422    /// The length of change IDs in bytes.
423    fn change_id_length(&self) -> usize;
424
425    fn root_commit_id(&self) -> &CommitId;
426
427    fn root_change_id(&self) -> &ChangeId;
428
429    fn empty_tree_id(&self) -> &TreeId;
430
431    /// An estimate of how many concurrent requests this backend handles well. A
432    /// local backend like the Git backend (at until it supports partial clones)
433    /// may want to set this to 1. A cloud-backed backend may want to set it to
434    /// 100 or so.
435    ///
436    /// It is not guaranteed that at most this number of concurrent requests are
437    /// sent.
438    fn concurrency(&self) -> usize;
439
440    async fn read_file(
441        &self,
442        path: &RepoPath,
443        id: &FileId,
444    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>>;
445
446    async fn write_file(
447        &self,
448        path: &RepoPath,
449        contents: &mut (dyn AsyncRead + Send + Unpin),
450    ) -> BackendResult<FileId>;
451
452    async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
453
454    async fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
455
456    /// Read the specified `CopyHistory` object.
457    ///
458    /// Backends that don't support copy tracking may return
459    /// `BackendError::Unsupported`.
460    async fn read_copy(&self, id: &CopyId) -> BackendResult<CopyHistory>;
461
462    /// Write the `CopyHistory` object and return its ID.
463    ///
464    /// Backends that don't support copy tracking may return
465    /// `BackendError::Unsupported`.
466    async fn write_copy(&self, copy: &CopyHistory) -> BackendResult<CopyId>;
467
468    /// Find all copy histories that are related to the specified one. This is
469    /// defined as those that are ancestors of the given specified one, plus
470    /// their descendants. Children must be returned before parents.
471    ///
472    /// It is valid (but wasteful) to include other copy histories, such as
473    /// siblings, or even completely unrelated copy histories.
474    ///
475    /// Backends that don't support copy tracking may return
476    /// `BackendError::Unsupported`.
477    async fn get_related_copies(&self, copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>>;
478
479    async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
480
481    async fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
482
483    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
484
485    /// Writes a commit and returns its ID and the commit itself. The commit
486    /// should contain the data that was actually written, which may differ
487    /// from the data passed in. For example, the backend may change the
488    /// committer name to an authenticated user's name, or the backend's
489    /// timestamps may have less precision than the millisecond precision in
490    /// `Commit`.
491    ///
492    /// The `sign_with` parameter could contain a function to cryptographically
493    /// sign some binary representation of the commit.
494    /// If the backend supports it, it could call it and store the result in
495    /// an implementation specific fashion, and both `read_commit` and the
496    /// return of `write_commit` should read it back as the `secure_sig`
497    /// field.
498    async fn write_commit(
499        &self,
500        contents: Commit,
501        sign_with: Option<&mut SigningFn>,
502    ) -> BackendResult<(CommitId, Commit)>;
503
504    /// Get copy records for the dag range `root..head`. If `paths` is None
505    /// include all paths, otherwise restrict to only `paths`.
506    ///
507    /// The exact order these are returned is unspecified, but it is guaranteed
508    /// to be reverse-topological. That is, for any two copy records with
509    /// different commit ids A and B, if A is an ancestor of B, A is streamed
510    /// after B.
511    ///
512    /// Streaming by design to better support large backends which may have very
513    /// large single-file histories. This also allows more iterative algorithms
514    /// like blame/annotate to short-circuit after a point without wasting
515    /// unnecessary resources.
516    fn get_copy_records(
517        &self,
518        paths: Option<&[RepoPathBuf]>,
519        root: &CommitId,
520        head: &CommitId,
521    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>>;
522
523    /// Perform garbage collection.
524    ///
525    /// All commits found in the `index` won't be removed. In addition to that,
526    /// objects created after `keep_newer` will be preserved. This mitigates a
527    /// risk of deleting new commits created concurrently by another process.
528    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()>;
529}
530
531impl dyn Backend {
532    /// Returns reference of the implementation type.
533    pub fn downcast_ref<T: Backend>(&self) -> Option<&T> {
534        (self as &dyn Any).downcast_ref()
535    }
536}