jujutsu_lib/
backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16use std::fmt::{Debug, Error, Formatter};
17use std::io::Read;
18use std::result::Result;
19use std::vec::Vec;
20
21use thiserror::Error;
22
23use crate::content_hash::ContentHash;
24use crate::repo_path::{RepoPath, RepoPathComponent};
25
26pub trait ObjectId {
27    fn new(value: Vec<u8>) -> Self;
28    fn object_type(&self) -> String;
29    fn from_bytes(bytes: &[u8]) -> Self;
30    fn as_bytes(&self) -> &[u8];
31    fn to_bytes(&self) -> Vec<u8>;
32    fn from_hex(hex: &str) -> Self;
33    fn hex(&self) -> String;
34}
35
36macro_rules! id_type {
37    ($vis:vis $name:ident) => {
38        content_hash! {
39            #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
40            $vis struct $name(Vec<u8>);
41        }
42        impl_id_type!($name);
43    };
44}
45
46macro_rules! impl_id_type {
47    ($name:ident) => {
48        impl Debug for $name {
49            fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
50                f.debug_tuple(stringify!($name)).field(&self.hex()).finish()
51            }
52        }
53
54        impl crate::backend::ObjectId for $name {
55            fn new(value: Vec<u8>) -> Self {
56                Self(value)
57            }
58
59            fn object_type(&self) -> String {
60                stringify!($name)
61                    .strip_suffix("Id")
62                    .unwrap()
63                    .to_ascii_lowercase()
64                    .to_string()
65            }
66
67            fn from_bytes(bytes: &[u8]) -> Self {
68                Self(bytes.to_vec())
69            }
70
71            fn as_bytes(&self) -> &[u8] {
72                &self.0
73            }
74
75            fn to_bytes(&self) -> Vec<u8> {
76                self.0.clone()
77            }
78
79            fn from_hex(hex: &str) -> Self {
80                Self(hex::decode(hex).unwrap())
81            }
82
83            fn hex(&self) -> String {
84                hex::encode(&self.0)
85            }
86        }
87    };
88}
89
90id_type!(pub CommitId);
91id_type!(pub ChangeId);
92id_type!(pub TreeId);
93id_type!(pub FileId);
94id_type!(pub SymlinkId);
95id_type!(pub ConflictId);
96
97pub enum Phase {
98    Public,
99    Draft,
100}
101
102content_hash! {
103    #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
104    pub struct MillisSinceEpoch(pub i64);
105}
106
107content_hash! {
108    #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
109    pub struct Timestamp {
110        pub timestamp: MillisSinceEpoch,
111        // time zone offset in minutes
112        pub tz_offset: i32,
113    }
114}
115
116impl Timestamp {
117    pub fn now() -> Self {
118        Self::from_datetime(chrono::offset::Local::now())
119    }
120
121    pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
122        datetime: chrono::DateTime<Tz>,
123    ) -> Self {
124        Self {
125            timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
126            tz_offset: datetime.offset().local_minus_utc() / 60,
127        }
128    }
129}
130
131content_hash! {
132    #[derive(Debug, PartialEq, Eq, Clone)]
133    pub struct Signature {
134        pub name: String,
135        pub email: String,
136        pub timestamp: Timestamp,
137    }
138}
139
140content_hash! {
141    #[derive(Debug, PartialEq, Eq, Clone)]
142    pub struct Commit {
143        pub parents: Vec<CommitId>,
144        pub predecessors: Vec<CommitId>,
145        pub root_tree: TreeId,
146        pub change_id: ChangeId,
147        pub description: String,
148        pub author: Signature,
149        pub committer: Signature,
150    }
151}
152
153content_hash! {
154    #[derive(Debug, PartialEq, Eq, Clone)]
155    pub struct ConflictPart {
156        // TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be
157        //       useful e.g. after rebasing this conflict?
158        pub value: TreeValue,
159    }
160}
161
162content_hash! {
163    #[derive(Default, Debug, PartialEq, Eq, Clone)]
164    pub struct Conflict {
165        // A conflict is represented by a list of positive and negative states that need to be applied.
166        // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
167        // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
168        // same as non-conflict A.
169        pub removes: Vec<ConflictPart>,
170        pub adds: Vec<ConflictPart>,
171    }
172}
173
174#[derive(Debug, Error)]
175pub enum BackendError {
176    #[error(
177        "Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
178         {actual} bytes): {hash}"
179    )]
180    InvalidHashLength {
181        expected: usize,
182        actual: usize,
183        object_type: String,
184        hash: String,
185    },
186    #[error("Invalid hash for object of type {object_type} with hash {hash}: {source}")]
187    InvalidHash {
188        object_type: String,
189        hash: String,
190        source: Box<dyn std::error::Error + Send + Sync>,
191    },
192    #[error("Invalid UTF-8 for object {hash} of type {object_type}: {source}")]
193    InvalidUtf8 {
194        object_type: String,
195        hash: String,
196        source: std::string::FromUtf8Error,
197    },
198    #[error("Object {hash} of type {object_type} not found: {source}")]
199    ObjectNotFound {
200        object_type: String,
201        hash: String,
202        source: Box<dyn std::error::Error + Send + Sync>,
203    },
204    #[error("Error when reading object {hash} of type {object_type}: {source}")]
205    ReadObject {
206        object_type: String,
207        hash: String,
208        source: Box<dyn std::error::Error + Send + Sync>,
209    },
210    #[error("Could not write object of type {object_type}: {source}")]
211    WriteObject {
212        object_type: &'static str,
213        source: Box<dyn std::error::Error + Send + Sync>,
214    },
215    #[error("Error: {0}")]
216    Other(String),
217}
218
219pub type BackendResult<T> = Result<T, BackendError>;
220
221#[derive(Debug, PartialEq, Eq, Clone, Hash)]
222pub enum TreeValue {
223    File { id: FileId, executable: bool },
224    Symlink(SymlinkId),
225    Tree(TreeId),
226    GitSubmodule(CommitId),
227    Conflict(ConflictId),
228}
229
230impl ContentHash for TreeValue {
231    fn hash(&self, state: &mut impl digest::Update) {
232        use TreeValue::*;
233        match self {
234            File { id, executable } => {
235                state.update(&0u32.to_le_bytes());
236                id.hash(state);
237                executable.hash(state);
238            }
239            Symlink(id) => {
240                state.update(&1u32.to_le_bytes());
241                id.hash(state);
242            }
243            Tree(id) => {
244                state.update(&2u32.to_le_bytes());
245                id.hash(state);
246            }
247            GitSubmodule(id) => {
248                state.update(&3u32.to_le_bytes());
249                id.hash(state);
250            }
251            Conflict(id) => {
252                state.update(&4u32.to_le_bytes());
253                id.hash(state);
254            }
255        }
256    }
257}
258
259#[derive(Debug, PartialEq, Eq, Clone)]
260pub struct TreeEntry<'a> {
261    name: &'a RepoPathComponent,
262    value: &'a TreeValue,
263}
264
265impl<'a> TreeEntry<'a> {
266    pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
267        TreeEntry { name, value }
268    }
269
270    pub fn name(&self) -> &'a RepoPathComponent {
271        self.name
272    }
273
274    pub fn value(&self) -> &'a TreeValue {
275        self.value
276    }
277}
278
279pub struct TreeEntriesNonRecursiveIterator<'a> {
280    iter: std::collections::btree_map::Iter<'a, RepoPathComponent, TreeValue>,
281}
282
283impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
284    type Item = TreeEntry<'a>;
285
286    fn next(&mut self) -> Option<Self::Item> {
287        self.iter
288            .next()
289            .map(|(name, value)| TreeEntry { name, value })
290    }
291}
292
293content_hash! {
294    #[derive(Default, PartialEq, Eq, Debug, Clone)]
295    pub struct Tree {
296        entries: BTreeMap<RepoPathComponent, TreeValue>,
297    }
298}
299
300impl Tree {
301    pub fn is_empty(&self) -> bool {
302        self.entries.is_empty()
303    }
304
305    pub fn entries(&self) -> TreeEntriesNonRecursiveIterator {
306        TreeEntriesNonRecursiveIterator {
307            iter: self.entries.iter(),
308        }
309    }
310
311    pub fn set(&mut self, name: RepoPathComponent, value: TreeValue) {
312        self.entries.insert(name, value);
313    }
314
315    pub fn remove(&mut self, name: &RepoPathComponent) {
316        self.entries.remove(name);
317    }
318
319    pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry> {
320        self.entries
321            .get_key_value(name)
322            .map(|(name, value)| TreeEntry { name, value })
323    }
324
325    pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
326        self.entries.get(name)
327    }
328}
329
330/// Calculates common prefix length of two bytes. The length to be returned is
331/// a number of hexadecimal digits.
332pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
333    iter_half_bytes(bytes_a)
334        .zip(iter_half_bytes(bytes_b))
335        .take_while(|(a, b)| a == b)
336        .count()
337}
338
339fn iter_half_bytes(bytes: &[u8]) -> impl ExactSizeIterator<Item = u8> + '_ {
340    (0..bytes.len() * 2).map(|i| {
341        let v = bytes[i / 2];
342        if i & 1 == 0 {
343            v >> 4
344        } else {
345            v & 0xf
346        }
347    })
348}
349
350pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
351    let timestamp = Timestamp {
352        timestamp: MillisSinceEpoch(0),
353        tz_offset: 0,
354    };
355    let signature = Signature {
356        name: String::new(),
357        email: String::new(),
358        timestamp,
359    };
360    Commit {
361        parents: vec![],
362        predecessors: vec![],
363        root_tree: empty_tree_id,
364        change_id: root_change_id,
365        description: String::new(),
366        author: signature.clone(),
367        committer: signature,
368    }
369}
370
371pub trait Backend: Send + Sync + Debug {
372    /// A unique name that identifies this backend. Written to
373    /// `.jj/repo/store/backend` when the repo is created.
374    fn name(&self) -> &str;
375
376    /// The length of commit IDs in bytes.
377    fn commit_id_length(&self) -> usize;
378
379    /// The length of change IDs in bytes.
380    fn change_id_length(&self) -> usize;
381
382    fn git_repo(&self) -> Option<git2::Repository>;
383
384    fn read_file(&self, path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>>;
385
386    fn write_file(&self, path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId>;
387
388    fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
389
390    fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
391
392    fn root_commit_id(&self) -> &CommitId;
393
394    fn root_change_id(&self) -> &ChangeId;
395
396    fn empty_tree_id(&self) -> &TreeId;
397
398    fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
399
400    fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
401
402    fn read_conflict(&self, path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict>;
403
404    fn write_conflict(&self, path: &RepoPath, contents: &Conflict) -> BackendResult<ConflictId>;
405
406    fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
407
408    fn write_commit(&self, contents: &Commit) -> BackendResult<CommitId>;
409}