jj_lib/
backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::any::Any;
18use std::fmt::Debug;
19use std::pin::Pin;
20use std::slice;
21use std::time::SystemTime;
22
23use async_trait::async_trait;
24use chrono::TimeZone as _;
25use futures::stream::BoxStream;
26use thiserror::Error;
27use tokio::io::AsyncRead;
28
29use crate::content_hash::ContentHash;
30use crate::hex_util;
31use crate::index::Index;
32use crate::merge::Merge;
33use crate::object_id::ObjectId as _;
34use crate::object_id::id_type;
35use crate::repo_path::RepoPath;
36use crate::repo_path::RepoPathBuf;
37use crate::repo_path::RepoPathComponent;
38use crate::repo_path::RepoPathComponentBuf;
39use crate::signing::SignResult;
40
41id_type!(
42    /// Identifier for a [`Commit`] based on its content. When a commit is
43    /// rewritten, its `CommitId` changes.
44    pub CommitId { hex() }
45);
46id_type!(
47    /// Stable identifier for a [`Commit`]. Unlike the `CommitId`, the `ChangeId`
48    /// follows the commit and is not updated when the commit is rewritten.
49    pub ChangeId { reverse_hex() }
50);
51id_type!(pub TreeId { hex() });
52id_type!(pub FileId { hex() });
53id_type!(pub SymlinkId { hex() });
54id_type!(pub CopyId { hex() });
55
56impl ChangeId {
57    /// Parses the given "reverse" hex string into a `ChangeId`.
58    pub fn try_from_reverse_hex(hex: impl AsRef<[u8]>) -> Option<Self> {
59        hex_util::decode_reverse_hex(hex).map(Self)
60    }
61
62    /// Returns the hex string representation of this ID, which uses `z-k`
63    /// "digits" instead of `0-9a-f`.
64    pub fn reverse_hex(&self) -> String {
65        hex_util::encode_reverse_hex(&self.0)
66    }
67}
68
69impl CopyId {
70    /// Returns a placeholder copy id to be used when we don't have a real copy
71    /// id yet.
72    // TODO: Delete this
73    pub fn placeholder() -> Self {
74        Self::new(vec![])
75    }
76}
77
78#[derive(Debug, Error)]
79#[error("Out-of-range date")]
80pub struct TimestampOutOfRange;
81
82#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
83pub struct MillisSinceEpoch(pub i64);
84
85#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
86pub struct Timestamp {
87    pub timestamp: MillisSinceEpoch,
88    // time zone offset in minutes
89    pub tz_offset: i32,
90}
91
92impl Timestamp {
93    pub fn now() -> Self {
94        Self::from_datetime(chrono::offset::Local::now())
95    }
96
97    pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
98        datetime: chrono::DateTime<Tz>,
99    ) -> Self {
100        Self {
101            timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
102            tz_offset: datetime.offset().local_minus_utc() / 60,
103        }
104    }
105
106    pub fn to_datetime(
107        &self,
108    ) -> Result<chrono::DateTime<chrono::FixedOffset>, TimestampOutOfRange> {
109        let utc = match chrono::Utc.timestamp_opt(
110            self.timestamp.0.div_euclid(1000),
111            (self.timestamp.0.rem_euclid(1000)) as u32 * 1000000,
112        ) {
113            chrono::LocalResult::None => {
114                return Err(TimestampOutOfRange);
115            }
116            chrono::LocalResult::Single(x) => x,
117            chrono::LocalResult::Ambiguous(y, _z) => y,
118        };
119
120        Ok(utc.with_timezone(
121            &chrono::FixedOffset::east_opt(self.tz_offset * 60)
122                .unwrap_or_else(|| chrono::FixedOffset::east_opt(0).unwrap()),
123        ))
124    }
125}
126
127impl serde::Serialize for Timestamp {
128    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
129    where
130        S: serde::Serializer,
131    {
132        // TODO: test is_human_readable() to use raw format?
133        let t = self.to_datetime().map_err(serde::ser::Error::custom)?;
134        t.serialize(serializer)
135    }
136}
137
138/// Represents a [`Commit`] signature.
139#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
140pub struct Signature {
141    pub name: String,
142    pub email: String,
143    pub timestamp: Timestamp,
144}
145
146/// Represents a cryptographically signed [`Commit`] signature.
147#[derive(ContentHash, Debug, PartialEq, Eq, Clone)]
148pub struct SecureSig {
149    pub data: Vec<u8>,
150    pub sig: Vec<u8>,
151}
152
153pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult<Vec<u8>> + Send + 'a;
154
155#[derive(ContentHash, Debug, PartialEq, Eq, Clone, serde::Serialize)]
156pub struct Commit {
157    pub parents: Vec<CommitId>,
158    // TODO: delete commit.predecessors when we can assume that most commits are
159    // tracked by op.commit_predecessors. (in jj 0.42 or so?)
160    #[serde(skip)] // deprecated
161    pub predecessors: Vec<CommitId>,
162    #[serde(skip)] // TODO: should be exposed?
163    pub root_tree: Merge<TreeId>,
164    pub change_id: ChangeId,
165    pub description: String,
166    pub author: Signature,
167    pub committer: Signature,
168    #[serde(skip)] // raw data wouldn't be useful
169    pub secure_sig: Option<SecureSig>,
170}
171
172/// An individual copy event, from file A -> B.
173#[derive(Debug, PartialEq, Eq, Clone)]
174pub struct CopyRecord {
175    /// The destination of the copy, B.
176    pub target: RepoPathBuf,
177    /// The CommitId where the copy took place.
178    pub target_commit: CommitId,
179    /// The source path a target was copied from.
180    ///
181    /// It is not required that the source path is different than the target
182    /// path. A custom backend may choose to represent 'rollbacks' as copies
183    /// from a file unto itself, from a specific prior commit.
184    pub source: RepoPathBuf,
185    pub source_file: FileId,
186    /// The source commit the target was copied from. Backends may use this
187    /// field to implement 'integration' logic, where a source may be
188    /// periodically merged into a target, similar to a branch, but the
189    /// branching occurs at the file level rather than the repository level. It
190    /// also follows naturally that any copy source targeted to a specific
191    /// commit should avoid copy propagation on rebasing, which is desirable
192    /// for 'fork' style copies.
193    ///
194    /// It is required that the commit id is an ancestor of the commit with
195    /// which this copy source is associated.
196    pub source_commit: CommitId,
197}
198
199/// Describes the copy history of a file. The copy object is unchanged when a
200/// file is modified.
201#[derive(ContentHash, Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
202pub struct CopyHistory {
203    /// The file's current path.
204    pub current_path: RepoPathBuf,
205    /// IDs of the files that became the current incarnation of this file.
206    ///
207    /// A newly created file has no parents. A regular copy or rename has one
208    /// parent. A merge of multiple files has multiple parents.
209    pub parents: Vec<CopyId>,
210    /// An optional piece of data to give the Copy object a different ID. May be
211    /// randomly generated. This allows a commit to say that a file was replaced
212    /// by a new incarnation of it, indicating a logically distinct file
213    /// taking the place of the previous file at the path.
214    pub salt: Vec<u8>,
215}
216
217/// Error that may occur during backend initialization.
218#[derive(Debug, Error)]
219#[error(transparent)]
220pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
221
222/// Error that may occur during backend loading.
223#[derive(Debug, Error)]
224#[error(transparent)]
225pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
226
227/// Commit-backend error that may occur after the backend is loaded.
228#[derive(Debug, Error)]
229pub enum BackendError {
230    #[error(
231        "Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
232         {actual} bytes): {hash}"
233    )]
234    InvalidHashLength {
235        expected: usize,
236        actual: usize,
237        object_type: String,
238        hash: String,
239    },
240    #[error("Invalid UTF-8 for object {hash} of type {object_type}")]
241    InvalidUtf8 {
242        object_type: String,
243        hash: String,
244        source: std::str::Utf8Error,
245    },
246    #[error("Object {hash} of type {object_type} not found")]
247    ObjectNotFound {
248        object_type: String,
249        hash: String,
250        source: Box<dyn std::error::Error + Send + Sync>,
251    },
252    #[error("Error when reading object {hash} of type {object_type}")]
253    ReadObject {
254        object_type: String,
255        hash: String,
256        source: Box<dyn std::error::Error + Send + Sync>,
257    },
258    #[error("Access denied to read object {hash} of type {object_type}")]
259    ReadAccessDenied {
260        object_type: String,
261        hash: String,
262        source: Box<dyn std::error::Error + Send + Sync>,
263    },
264    #[error(
265        "Error when reading file content for file {path} with id {id}",
266        path = path.as_internal_file_string()
267    )]
268    ReadFile {
269        path: RepoPathBuf,
270        id: FileId,
271        source: Box<dyn std::error::Error + Send + Sync>,
272    },
273    #[error("Could not write object of type {object_type}")]
274    WriteObject {
275        object_type: &'static str,
276        source: Box<dyn std::error::Error + Send + Sync>,
277    },
278    #[error(transparent)]
279    Other(Box<dyn std::error::Error + Send + Sync>),
280    /// A valid operation attempted, but failed because it isn't supported by
281    /// the particular backend.
282    #[error("{0}")]
283    Unsupported(String),
284}
285
286pub type BackendResult<T> = Result<T, BackendError>;
287
288#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Hash)]
289pub enum TreeValue {
290    // TODO: When there's a CopyId here, the copy object's path must match
291    // the path identified by the tree.
292    File {
293        id: FileId,
294        executable: bool,
295        copy_id: CopyId,
296    },
297    Symlink(SymlinkId),
298    Tree(TreeId),
299    GitSubmodule(CommitId),
300}
301
302impl TreeValue {
303    pub fn hex(&self) -> String {
304        match self {
305            Self::File { id, .. } => id.hex(),
306            Self::Symlink(id) => id.hex(),
307            Self::Tree(id) => id.hex(),
308            Self::GitSubmodule(id) => id.hex(),
309        }
310    }
311}
312
313#[derive(Debug, PartialEq, Eq, Clone)]
314pub struct TreeEntry<'a> {
315    name: &'a RepoPathComponent,
316    value: &'a TreeValue,
317}
318
319impl<'a> TreeEntry<'a> {
320    pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
321        Self { name, value }
322    }
323
324    pub fn name(&self) -> &'a RepoPathComponent {
325        self.name
326    }
327
328    pub fn value(&self) -> &'a TreeValue {
329        self.value
330    }
331}
332
333pub struct TreeEntriesNonRecursiveIterator<'a> {
334    iter: slice::Iter<'a, (RepoPathComponentBuf, TreeValue)>,
335}
336
337impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
338    type Item = TreeEntry<'a>;
339
340    fn next(&mut self) -> Option<Self::Item> {
341        self.iter
342            .next()
343            .map(|(name, value)| TreeEntry { name, value })
344    }
345}
346
347#[derive(ContentHash, Default, PartialEq, Eq, Debug, Clone)]
348pub struct Tree {
349    entries: Vec<(RepoPathComponentBuf, TreeValue)>,
350}
351
352impl Tree {
353    pub fn from_sorted_entries(entries: Vec<(RepoPathComponentBuf, TreeValue)>) -> Self {
354        debug_assert!(entries.is_sorted_by(|(a, _), (b, _)| a < b));
355        Self { entries }
356    }
357
358    pub fn is_empty(&self) -> bool {
359        self.entries.is_empty()
360    }
361
362    pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
363        self.entries.iter().map(|(name, _)| name.as_ref())
364    }
365
366    pub fn entries(&self) -> TreeEntriesNonRecursiveIterator<'_> {
367        TreeEntriesNonRecursiveIterator {
368            iter: self.entries.iter(),
369        }
370    }
371
372    pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry<'_>> {
373        let index = self
374            .entries
375            .binary_search_by_key(&name, |(name, _)| name)
376            .ok()?;
377        let (name, value) = &self.entries[index];
378        Some(TreeEntry { name, value })
379    }
380
381    pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
382        self.entry(name).map(|entry| entry.value)
383    }
384}
385
386pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
387    let timestamp = Timestamp {
388        timestamp: MillisSinceEpoch(0),
389        tz_offset: 0,
390    };
391    let signature = Signature {
392        name: String::new(),
393        email: String::new(),
394        timestamp,
395    };
396    Commit {
397        parents: vec![],
398        predecessors: vec![],
399        root_tree: Merge::resolved(empty_tree_id),
400        change_id: root_change_id,
401        description: String::new(),
402        author: signature.clone(),
403        committer: signature,
404        secure_sig: None,
405    }
406}
407
408/// Defines the interface for commit backends.
409#[async_trait]
410pub trait Backend: Any + Send + Sync + Debug {
411    /// A unique name that identifies this backend. Written to
412    /// `.jj/repo/store/type` when the repo is created.
413    fn name(&self) -> &str;
414
415    /// The length of commit IDs in bytes.
416    fn commit_id_length(&self) -> usize;
417
418    /// The length of change IDs in bytes.
419    fn change_id_length(&self) -> usize;
420
421    fn root_commit_id(&self) -> &CommitId;
422
423    fn root_change_id(&self) -> &ChangeId;
424
425    fn empty_tree_id(&self) -> &TreeId;
426
427    /// An estimate of how many concurrent requests this backend handles well. A
428    /// local backend like the Git backend (at until it supports partial clones)
429    /// may want to set this to 1. A cloud-backed backend may want to set it to
430    /// 100 or so.
431    ///
432    /// It is not guaranteed that at most this number of concurrent requests are
433    /// sent.
434    fn concurrency(&self) -> usize;
435
436    async fn read_file(
437        &self,
438        path: &RepoPath,
439        id: &FileId,
440    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>>;
441
442    async fn write_file(
443        &self,
444        path: &RepoPath,
445        contents: &mut (dyn AsyncRead + Send + Unpin),
446    ) -> BackendResult<FileId>;
447
448    async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
449
450    async fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
451
452    /// Read the specified `CopyHistory` object.
453    ///
454    /// Backends that don't support copy tracking may return
455    /// `BackendError::Unsupported`.
456    async fn read_copy(&self, id: &CopyId) -> BackendResult<CopyHistory>;
457
458    /// Write the `CopyHistory` object and return its ID.
459    ///
460    /// Backends that don't support copy tracking may return
461    /// `BackendError::Unsupported`.
462    async fn write_copy(&self, copy: &CopyHistory) -> BackendResult<CopyId>;
463
464    /// Find all copy histories that are related to the specified one. This is
465    /// defined as those that are ancestors of the given specified one, plus
466    /// their descendants. Children must be returned before parents.
467    ///
468    /// It is valid (but wasteful) to include other copy histories, such as
469    /// siblings, or even completely unrelated copy histories.
470    ///
471    /// Backends that don't support copy tracking may return
472    /// `BackendError::Unsupported`.
473    async fn get_related_copies(&self, copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>>;
474
475    async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
476
477    async fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
478
479    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
480
481    /// Writes a commit and returns its ID and the commit itself. The commit
482    /// should contain the data that was actually written, which may differ
483    /// from the data passed in. For example, the backend may change the
484    /// committer name to an authenticated user's name, or the backend's
485    /// timestamps may have less precision than the millisecond precision in
486    /// `Commit`.
487    ///
488    /// The `sign_with` parameter could contain a function to cryptographically
489    /// sign some binary representation of the commit.
490    /// If the backend supports it, it could call it and store the result in
491    /// an implementation specific fashion, and both `read_commit` and the
492    /// return of `write_commit` should read it back as the `secure_sig`
493    /// field.
494    async fn write_commit(
495        &self,
496        contents: Commit,
497        sign_with: Option<&mut SigningFn>,
498    ) -> BackendResult<(CommitId, Commit)>;
499
500    /// Get copy records for the dag range `root..head`.  If `paths` is None
501    /// include all paths, otherwise restrict to only `paths`.
502    ///
503    /// The exact order these are returned is unspecified, but it is guaranteed
504    /// to be reverse-topological. That is, for any two copy records with
505    /// different commit ids A and B, if A is an ancestor of B, A is streamed
506    /// after B.
507    ///
508    /// Streaming by design to better support large backends which may have very
509    /// large single-file histories. This also allows more iterative algorithms
510    /// like blame/annotate to short-circuit after a point without wasting
511    /// unnecessary resources.
512    fn get_copy_records(
513        &self,
514        paths: Option<&[RepoPathBuf]>,
515        root: &CommitId,
516        head: &CommitId,
517    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>>;
518
519    /// Perform garbage collection.
520    ///
521    /// All commits found in the `index` won't be removed. In addition to that,
522    /// objects created after `keep_newer` will be preserved. This mitigates a
523    /// risk of deleting new commits created concurrently by another process.
524    fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()>;
525}
526
527impl dyn Backend {
528    /// Returns reference of the implementation type.
529    pub fn downcast_ref<T: Backend>(&self) -> Option<&T> {
530        (self as &dyn Any).downcast_ref()
531    }
532}