jj_lib/
backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::any::Any;
18use std::fmt::Debug;
19use std::pin::Pin;
20use std::slice;
21use std::time::SystemTime;
22
23use async_trait::async_trait;
24use chrono::TimeZone as _;
25use futures::stream::BoxStream;
26use thiserror::Error;
27use tokio::io::AsyncRead;
28
29use crate::content_hash::ContentHash;
30use crate::hex_util;
31use crate::index::Index;
32use crate::merge::Merge;
33use crate::object_id::ObjectId as _;
34use crate::object_id::id_type;
35use crate::repo_path::RepoPath;
36use crate::repo_path::RepoPathBuf;
37use crate::repo_path::RepoPathComponent;
38use crate::repo_path::RepoPathComponentBuf;
39use crate::signing::SignResult;
40
41id_type!(
42    /// Identifier for a [`Commit`] based on its content. When a commit is
43    /// rewritten, its `CommitId` changes.
44    pub CommitId { hex() }
45);
46id_type!(
47    /// Stable identifier for a [`Commit`]. Unlike the `CommitId`, the `ChangeId`
48    /// follows the commit and is not updated when the commit is rewritten.
49    pub ChangeId { reverse_hex() }
50);
51id_type!(pub TreeId { hex() });
52id_type!(pub FileId { hex() });
53id_type!(pub SymlinkId { hex() });
54id_type!(pub CopyId { hex() });
55
56impl ChangeId {
57    /// Parses the given "reverse" hex string into a `ChangeId`.
58    pub fn try_from_reverse_hex(hex: impl AsRef<[u8]>) -> Option<Self> {
59        hex_util::decode_reverse_hex(hex).map(Self)
60    }
61
62    /// Returns the hex string representation of this ID, which uses `z-k`
63    /// "digits" instead of `0-9a-f`.
64    pub fn reverse_hex(&self) -> String {
65        hex_util::encode_reverse_hex(&self.0)
66    }
67}
68
69impl CopyId {
70    /// Returns a placeholder copy id to be used when we don't have a real copy
71    /// id yet.
72    // TODO: Delete this
73    pub fn placeholder() -> Self {
74        Self::new(vec![])
75    }
76}
77
78#[derive(Debug, Error)]
79#[error("Out-of-range date")]
80pub struct TimestampOutOfRange;
81
82#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
83pub struct MillisSinceEpoch(pub i64);
84
85#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
86pub struct Timestamp {
87    pub timestamp: MillisSinceEpoch,
88    // time zone offset in minutes
89    pub tz_offset: i32,
90}
91
92impl Timestamp {
93    pub fn now() -> Self {
94        Self::from_datetime(chrono::offset::Local::now())
95    }
96
97    pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
98        datetime: chrono::DateTime<Tz>,
99    ) -> Self {
100        Self {
101            timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
102            tz_offset: datetime.offset().local_minus_utc() / 60,
103        }
104    }
105
106    pub fn to_datetime(
107        &self,
108    ) -> Result<chrono::DateTime<chrono::FixedOffset>, TimestampOutOfRange> {
109        let utc = match chrono::Utc.timestamp_opt(
110            self.timestamp.0.div_euclid(1000),
111            (self.timestamp.0.rem_euclid(1000)) as u32 * 1000000,
112        ) {
113            chrono::LocalResult::None => {
114                return Err(TimestampOutOfRange);
115            }
116            chrono::LocalResult::Single(x) => x,
117            chrono::LocalResult::Ambiguous(y, _z) => y,
118        };
119
120        Ok(utc.with_timezone(
121            &chrono::FixedOffset::east_opt(self.tz_offset * 60)
122                .unwrap_or_else(|| chrono::FixedOffset::east_opt(0).unwrap()),
123        ))
124    }
125}
126
127impl serde::Serialize for Timestamp {
128    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
129    where
130        S: serde::Serializer,
131    {
132        // TODO: test is_human_readable() to use raw format?
133        let t = self.to_datetime().map_err(serde::ser::Error::custom)?;
134        t.serialize(serializer)
135    }
136}
137
138/// Represents a [`Commit`] signature.
139#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
140pub struct Signature {
141    pub name: String,
142    pub email: String,
143    pub timestamp: Timestamp,
144}
145
146/// Represents a cryptographically signed [`Commit`] signature.
147#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
148pub struct SecureSig {
149    pub data: Vec<u8>,
150    pub sig: Vec<u8>,
151}
152
153pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult<Vec<u8>> + Send + 'a;
154
155/// Identifies a single legacy tree, which may have path-level conflicts, or a
156/// merge of multiple trees, where the individual trees do not have conflicts.
157// TODO(#1624): Delete this type at some point in the future, when we decide to drop
158// support for conflicts in older repos, or maybe after we have provided an upgrade
159// mechanism.
160#[derive(ContentHash, Debug, Clone)]
161pub enum MergedTreeId {
162    /// The tree id of a legacy tree
163    Legacy(TreeId),
164    /// The tree id(s) of a merge tree
165    Merge(Merge<TreeId>),
166}
167
168impl PartialEq for MergedTreeId {
169    /// Overridden to make conflict-free trees be considered equal even if their
170    /// `MergedTreeId` variant is different.
171    fn eq(&self, other: &Self) -> bool {
172        self.to_merge() == other.to_merge()
173    }
174}
175
176impl Eq for MergedTreeId {}
177
178impl MergedTreeId {
179    /// Create a resolved `MergedTreeId` from a single regular tree.
180    pub fn resolved(tree_id: TreeId) -> Self {
181        Self::Merge(Merge::resolved(tree_id))
182    }
183
184    /// Return this id as `Merge<TreeId>`
185    pub fn to_merge(&self) -> Merge<TreeId> {
186        match self {
187            Self::Legacy(tree_id) => Merge::resolved(tree_id.clone()),
188            Self::Merge(tree_ids) => tree_ids.clone(),
189        }
190    }
191}
192
193#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
194pub struct Commit {
195    pub parents: Vec<CommitId>,
196    // TODO: delete commit.predecessors when we can assume that most commits are
197    // tracked by op.commit_predecessors. (in jj 0.42 or so?)
198    #[serde(skip)] // deprecated
199    pub predecessors: Vec<CommitId>,
200    #[serde(skip)] // TODO: should be exposed?
201    pub root_tree: MergedTreeId,
202    pub change_id: ChangeId,
203    pub description: String,
204    pub author: Signature,
205    pub committer: Signature,
206    #[serde(skip)] // raw data wouldn't be useful
207    pub secure_sig: Option<SecureSig>,
208}
209
210/// An individual copy event, from file A -> B.
211#[derive(Debug, PartialEq, Eq, Clone)]
212pub struct CopyRecord {
213    /// The destination of the copy, B.
214    pub target: RepoPathBuf,
215    /// The CommitId where the copy took place.
216    pub target_commit: CommitId,
217    /// The source path a target was copied from.
218    ///
219    /// It is not required that the source path is different than the target
220    /// path. A custom backend may choose to represent 'rollbacks' as copies
221    /// from a file unto itself, from a specific prior commit.
222    pub source: RepoPathBuf,
223    pub source_file: FileId,
224    /// The source commit the target was copied from. Backends may use this
225    /// field to implement 'integration' logic, where a source may be
226    /// periodically merged into a target, similar to a branch, but the
227    /// branching occurs at the file level rather than the repository level. It
228    /// also follows naturally that any copy source targeted to a specific
229    /// commit should avoid copy propagation on rebasing, which is desirable
230    /// for 'fork' style copies.
231    ///
232    /// It is required that the commit id is an ancestor of the commit with
233    /// which this copy source is associated.
234    pub source_commit: CommitId,
235}
236
237/// Describes the copy history of a file. The copy object is unchanged when a
238/// file is modified.
239#[derive(ContentHash, Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
240pub struct CopyHistory {
241    /// The file's current path.
242    pub current_path: RepoPathBuf,
243    /// IDs of the files that became the current incarnation of this file.
244    ///
245    /// A newly created file has no parents. A regular copy or rename has one
246    /// parent. A merge of multiple files has multiple parents.
247    pub parents: Vec<CopyId>,
248    /// An optional piece of data to give the Copy object a different ID. May be
249    /// randomly generated. This allows a commit to say that a file was replaced
250    /// by a new incarnation of it, indicating a logically distinct file
251    /// taking the place of the previous file at the path.
252    pub salt: Vec<u8>,
253}
254
255/// Error that may occur during backend initialization.
256#[derive(Debug, Error)]
257#[error(transparent)]
258pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
259
260/// Error that may occur during backend loading.
261#[derive(Debug, Error)]
262#[error(transparent)]
263pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
264
265/// Commit-backend error that may occur after the backend is loaded.
266#[derive(Debug, Error)]
267pub enum BackendError {
268    #[error(
269        "Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
270         {actual} bytes): {hash}"
271    )]
272    InvalidHashLength {
273        expected: usize,
274        actual: usize,
275        object_type: String,
276        hash: String,
277    },
278    #[error("Invalid UTF-8 for object {hash} of type {object_type}")]
279    InvalidUtf8 {
280        object_type: String,
281        hash: String,
282        source: std::str::Utf8Error,
283    },
284    #[error("Object {hash} of type {object_type} not found")]
285    ObjectNotFound {
286        object_type: String,
287        hash: String,
288        source: Box<dyn std::error::Error + Send + Sync>,
289    },
290    #[error("Error when reading object {hash} of type {object_type}")]
291    ReadObject {
292        object_type: String,
293        hash: String,
294        source: Box<dyn std::error::Error + Send + Sync>,
295    },
296    #[error("Access denied to read object {hash} of type {object_type}")]
297    ReadAccessDenied {
298        object_type: String,
299        hash: String,
300        source: Box<dyn std::error::Error + Send + Sync>,
301    },
302    #[error(
303        "Error when reading file content for file {path} with id {id}",
304        path = path.as_internal_file_string()
305    )]
306    ReadFile {
307        path: RepoPathBuf,
308        id: FileId,
309        source: Box<dyn std::error::Error + Send + Sync>,
310    },
311    #[error("Could not write object of type {object_type}")]
312    WriteObject {
313        object_type: &'static str,
314        source: Box<dyn std::error::Error + Send + Sync>,
315    },
316    #[error(transparent)]
317    Other(Box<dyn std::error::Error + Send + Sync>),
318    /// A valid operation attempted, but failed because it isn't supported by
319    /// the particular backend.
320    #[error("{0}")]
321    Unsupported(String),
322}
323
324pub type BackendResult<T> = Result<T, BackendError>;
325
326#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Hash)]
327pub enum TreeValue {
328    // TODO: When there's a CopyId here, the copy object's path must match
329    // the path identified by the tree.
330    File {
331        id: FileId,
332        executable: bool,
333        copy_id: CopyId,
334    },
335    Symlink(SymlinkId),
336    Tree(TreeId),
337    GitSubmodule(CommitId),
338}
339
340impl TreeValue {
341    pub fn hex(&self) -> String {
342        match self {
343            Self::File { id, .. } => id.hex(),
344            Self::Symlink(id) => id.hex(),
345            Self::Tree(id) => id.hex(),
346            Self::GitSubmodule(id) => id.hex(),
347        }
348    }
349}
350
351#[derive(Debug, PartialEq, Eq, Clone)]
352pub struct TreeEntry<'a> {
353    name: &'a RepoPathComponent,
354    value: &'a TreeValue,
355}
356
357impl<'a> TreeEntry<'a> {
358    pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
359        Self { name, value }
360    }
361
362    pub fn name(&self) -> &'a RepoPathComponent {
363        self.name
364    }
365
366    pub fn value(&self) -> &'a TreeValue {
367        self.value
368    }
369}
370
371pub struct TreeEntriesNonRecursiveIterator<'a> {
372    iter: slice::Iter<'a, (RepoPathComponentBuf, TreeValue)>,
373}
374
375impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
376    type Item = TreeEntry<'a>;
377
378    fn next(&mut self) -> Option<Self::Item> {
379        self.iter
380            .next()
381            .map(|(name, value)| TreeEntry { name, value })
382    }
383}
384
385#[derive(ContentHash, Default, PartialEq, Eq, Debug, Clone)]
386pub struct Tree {
387    entries: Vec<(RepoPathComponentBuf, TreeValue)>,
388}
389
390impl Tree {
391    pub fn from_sorted_entries(entries: Vec<(RepoPathComponentBuf, TreeValue)>) -> Self {
392        debug_assert!(entries.is_sorted_by(|(a, _), (b, _)| a < b));
393        Self { entries }
394    }
395
396    pub fn is_empty(&self) -> bool {
397        self.entries.is_empty()
398    }
399
400    pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
401        self.entries.iter().map(|(name, _)| name.as_ref())
402    }
403
404    pub fn entries(&self) -> TreeEntriesNonRecursiveIterator<'_> {
405        TreeEntriesNonRecursiveIterator {
406            iter: self.entries.iter(),
407        }
408    }
409
410    pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry<'_>> {
411        let index = self
412            .entries
413            .binary_search_by_key(&name, |(name, _)| name)
414            .ok()?;
415        let (name, value) = &self.entries[index];
416        Some(TreeEntry { name, value })
417    }
418
419    pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
420        self.entry(name).map(|entry| entry.value)
421    }
422}
423
424pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
425    let timestamp = Timestamp {
426        timestamp: MillisSinceEpoch(0),
427        tz_offset: 0,
428    };
429    let signature = Signature {
430        name: String::new(),
431        email: String::new(),
432        timestamp,
433    };
434    Commit {
435        parents: vec![],
436        predecessors: vec![],
437        root_tree: MergedTreeId::resolved(empty_tree_id),
438        change_id: root_change_id,
439        description: String::new(),
440        author: signature.clone(),
441        committer: signature,
442        secure_sig: None,
443    }
444}
445
446/// Defines the interface for commit backends.
447#[async_trait]
448pub trait Backend: Any + Send + Sync + Debug {
449    /// A unique name that identifies this backend. Written to
450    /// `.jj/repo/store/type` when the repo is created.
451    fn name(&self) -> &str;
452
453    /// The length of commit IDs in bytes.
454    fn commit_id_length(&self) -> usize;
455
456    /// The length of change IDs in bytes.
457    fn change_id_length(&self) -> usize;
458
459    fn root_commit_id(&self) -> &CommitId;
460
461    fn root_change_id(&self) -> &ChangeId;
462
463    fn empty_tree_id(&self) -> &TreeId;
464
465    /// An estimate of how many concurrent requests this backend handles well. A
466    /// local backend like the Git backend (at until it supports partial clones)
467    /// may want to set this to 1. A cloud-backed backend may want to set it to
468    /// 100 or so.
469    ///
470    /// It is not guaranteed that at most this number of concurrent requests are
471    /// sent.
472    fn concurrency(&self) -> usize;
473
474    async fn read_file(
475        &self,
476        path: &RepoPath,
477        id: &FileId,
478    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>>;
479
480    async fn write_file(
481        &self,
482        path: &RepoPath,
483        contents: &mut (dyn AsyncRead + Send + Unpin),
484    ) -> BackendResult<FileId>;
485
486    async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
487
488    async fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
489
490    /// Read the specified `CopyHistory` object.
491    ///
492    /// Backends that don't support copy tracking may return
493    /// `BackendError::Unsupported`.
494    async fn read_copy(&self, id: &CopyId) -> BackendResult<CopyHistory>;
495
496    /// Write the `CopyHistory` object and return its ID.
497    ///
498    /// Backends that don't support copy tracking may return
499    /// `BackendError::Unsupported`.
500    async fn write_copy(&self, copy: &CopyHistory) -> BackendResult<CopyId>;
501
502    /// Find all copy histories that are related to the specified one. This is
503    /// defined as those that are ancestors of the given specified one, plus
504    /// their descendants. Children must be returned before parents.
505    ///
506    /// It is valid (but wasteful) to include other copy histories, such as
507    /// siblings, or even completely unrelated copy histories.
508    ///
509    /// Backends that don't support copy tracking may return
510    /// `BackendError::Unsupported`.
511    async fn get_related_copies(&self, copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>>;
512
513    async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
514
515    async fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
516
517    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
518
519    /// Writes a commit and returns its ID and the commit itself. The commit
520    /// should contain the data that was actually written, which may differ
521    /// from the data passed in. For example, the backend may change the
522    /// committer name to an authenticated user's name, or the backend's
523    /// timestamps may have less precision than the millisecond precision in
524    /// `Commit`.
525    ///
526    /// The `sign_with` parameter could contain a function to cryptographically
527    /// sign some binary representation of the commit.
528    /// If the backend supports it, it could call it and store the result in
529    /// an implementation specific fashion, and both `read_commit` and the
530    /// return of `write_commit` should read it back as the `secure_sig`
531    /// field.
532    async fn write_commit(
533        &self,
534        contents: Commit,
535        sign_with: Option<&mut SigningFn>,
536    ) -> BackendResult<(CommitId, Commit)>;
537
538    /// Get copy records for the dag range `root..head`.  If `paths` is None
539    /// include all paths, otherwise restrict to only `paths`.
540    ///
541    /// The exact order these are returned is unspecified, but it is guaranteed
542    /// to be reverse-topological. That is, for any two copy records with
543    /// different commit ids A and B, if A is an ancestor of B, A is streamed
544    /// after B.
545    ///
546    /// Streaming by design to better support large backends which may have very
547    /// large single-file histories. This also allows more iterative algorithms
548    /// like blame/annotate to short-circuit after a point without wasting
549    /// unnecessary resources.
550    fn get_copy_records(
551        &self,
552        paths: Option<&[RepoPathBuf]>,
553        root: &CommitId,
554        head: &CommitId,
555    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>>;
556
557    /// Perform garbage collection.
558    ///
559    /// All commits found in the `index` won't be removed. In addition to that,
560    /// objects created after `keep_newer` will be preserved. This mitigates a
561    /// risk of deleting new commits created concurrently by another process.
562    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()>;
563}
564
565impl dyn Backend {
566    /// Returns reference of the implementation type.
567    pub fn downcast_ref<T: Backend>(&self) -> Option<&T> {
568        (self as &dyn Any).downcast_ref()
569    }
570}