jj_lib/
simple_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::fmt::Debug;
18use std::fs;
19use std::fs::File;
20use std::io::Cursor;
21use std::io::Read as _;
22use std::io::Write as _;
23use std::path::Path;
24use std::path::PathBuf;
25use std::pin::Pin;
26use std::time::SystemTime;
27
28use async_trait::async_trait;
29use blake2::Blake2b512;
30use blake2::Digest as _;
31use futures::stream;
32use futures::stream::BoxStream;
33use pollster::FutureExt as _;
34use prost::Message as _;
35use tempfile::NamedTempFile;
36use tokio::io::AsyncRead;
37use tokio::io::AsyncReadExt as _;
38
39use crate::backend::Backend;
40use crate::backend::BackendError;
41use crate::backend::BackendResult;
42use crate::backend::ChangeId;
43use crate::backend::Commit;
44use crate::backend::CommitId;
45use crate::backend::CopyHistory;
46use crate::backend::CopyId;
47use crate::backend::CopyRecord;
48use crate::backend::FileId;
49use crate::backend::MergedTreeId;
50use crate::backend::MillisSinceEpoch;
51use crate::backend::SecureSig;
52use crate::backend::Signature;
53use crate::backend::SigningFn;
54use crate::backend::SymlinkId;
55use crate::backend::Timestamp;
56use crate::backend::Tree;
57use crate::backend::TreeId;
58use crate::backend::TreeValue;
59use crate::backend::make_root_commit;
60use crate::content_hash::blake2b_hash;
61use crate::file_util::persist_content_addressed_temp_file;
62use crate::index::Index;
63use crate::merge::MergeBuilder;
64use crate::object_id::ObjectId;
65use crate::repo_path::RepoPath;
66use crate::repo_path::RepoPathBuf;
67use crate::repo_path::RepoPathComponentBuf;
68
69const COMMIT_ID_LENGTH: usize = 64;
70const CHANGE_ID_LENGTH: usize = 16;
71
72fn map_not_found_err(err: std::io::Error, id: &impl ObjectId) -> BackendError {
73    if err.kind() == std::io::ErrorKind::NotFound {
74        BackendError::ObjectNotFound {
75            object_type: id.object_type(),
76            hash: id.hex(),
77            source: Box::new(err),
78        }
79    } else {
80        BackendError::ReadObject {
81            object_type: id.object_type(),
82            hash: id.hex(),
83            source: Box::new(err),
84        }
85    }
86}
87
88fn to_other_err(err: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> BackendError {
89    BackendError::Other(err.into())
90}
91
92#[derive(Debug)]
93pub struct SimpleBackend {
94    path: PathBuf,
95    root_commit_id: CommitId,
96    root_change_id: ChangeId,
97    empty_tree_id: TreeId,
98}
99
100impl SimpleBackend {
101    pub fn name() -> &'static str {
102        "Simple"
103    }
104
105    pub fn init(store_path: &Path) -> Self {
106        fs::create_dir(store_path.join("commits")).unwrap();
107        fs::create_dir(store_path.join("trees")).unwrap();
108        fs::create_dir(store_path.join("files")).unwrap();
109        fs::create_dir(store_path.join("symlinks")).unwrap();
110        fs::create_dir(store_path.join("conflicts")).unwrap();
111        let backend = Self::load(store_path);
112        let empty_tree_id = backend
113            .write_tree(RepoPath::root(), &Tree::default())
114            .block_on()
115            .unwrap();
116        assert_eq!(empty_tree_id, backend.empty_tree_id);
117        backend
118    }
119
120    pub fn load(store_path: &Path) -> Self {
121        let root_commit_id = CommitId::from_bytes(&[0; COMMIT_ID_LENGTH]);
122        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
123        let empty_tree_id = TreeId::from_hex(
124            "482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310",
125        );
126        Self {
127            path: store_path.to_path_buf(),
128            root_commit_id,
129            root_change_id,
130            empty_tree_id,
131        }
132    }
133
134    fn file_path(&self, id: &FileId) -> PathBuf {
135        self.path.join("files").join(id.hex())
136    }
137
138    fn symlink_path(&self, id: &SymlinkId) -> PathBuf {
139        self.path.join("symlinks").join(id.hex())
140    }
141
142    fn tree_path(&self, id: &TreeId) -> PathBuf {
143        self.path.join("trees").join(id.hex())
144    }
145
146    fn commit_path(&self, id: &CommitId) -> PathBuf {
147        self.path.join("commits").join(id.hex())
148    }
149}
150
151#[async_trait]
152impl Backend for SimpleBackend {
153    fn name(&self) -> &str {
154        Self::name()
155    }
156
157    fn commit_id_length(&self) -> usize {
158        COMMIT_ID_LENGTH
159    }
160
161    fn change_id_length(&self) -> usize {
162        CHANGE_ID_LENGTH
163    }
164
165    fn root_commit_id(&self) -> &CommitId {
166        &self.root_commit_id
167    }
168
169    fn root_change_id(&self) -> &ChangeId {
170        &self.root_change_id
171    }
172
173    fn empty_tree_id(&self) -> &TreeId {
174        &self.empty_tree_id
175    }
176
177    fn concurrency(&self) -> usize {
178        1
179    }
180
181    async fn read_file(
182        &self,
183        path: &RepoPath,
184        id: &FileId,
185    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
186        let disk_path = self.file_path(id);
187        let mut file = File::open(disk_path).map_err(|err| map_not_found_err(err, id))?;
188        let mut buf = vec![];
189        file.read_to_end(&mut buf)
190            .map_err(|err| BackendError::ReadFile {
191                path: path.to_owned(),
192                id: id.clone(),
193                source: err.into(),
194            })?;
195        Ok(Box::pin(Cursor::new(buf)))
196    }
197
198    async fn write_file(
199        &self,
200        _path: &RepoPath,
201        contents: &mut (dyn AsyncRead + Send + Unpin),
202    ) -> BackendResult<FileId> {
203        // TODO: Write temporary file in the destination directory (#5712)
204        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
205        let mut file = temp_file.as_file();
206        let mut hasher = Blake2b512::new();
207        let mut buff: Vec<u8> = vec![0; 1 << 14];
208        loop {
209            let bytes_read = contents.read(&mut buff).await.map_err(to_other_err)?;
210            if bytes_read == 0 {
211                break;
212            }
213            let bytes = &buff[..bytes_read];
214            file.write_all(bytes).map_err(to_other_err)?;
215            hasher.update(bytes);
216        }
217        file.flush().map_err(to_other_err)?;
218        let id = FileId::new(hasher.finalize().to_vec());
219
220        persist_content_addressed_temp_file(temp_file, self.file_path(&id))
221            .map_err(to_other_err)?;
222        Ok(id)
223    }
224
225    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
226        let path = self.symlink_path(id);
227        let target = fs::read_to_string(path).map_err(|err| map_not_found_err(err, id))?;
228        Ok(target)
229    }
230
231    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
232        // TODO: Write temporary file in the destination directory (#5712)
233        let mut temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
234        temp_file
235            .write_all(target.as_bytes())
236            .map_err(to_other_err)?;
237        let mut hasher = Blake2b512::new();
238        hasher.update(target.as_bytes());
239        let id = SymlinkId::new(hasher.finalize().to_vec());
240
241        persist_content_addressed_temp_file(temp_file, self.symlink_path(&id))
242            .map_err(to_other_err)?;
243        Ok(id)
244    }
245
246    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
247        Err(BackendError::Unsupported(
248            "The simple backend doesn't support copies".to_string(),
249        ))
250    }
251
252    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
253        Err(BackendError::Unsupported(
254            "The simple backend doesn't support copies".to_string(),
255        ))
256    }
257
258    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
259        Err(BackendError::Unsupported(
260            "The simple backend doesn't support copies".to_string(),
261        ))
262    }
263
264    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
265        let path = self.tree_path(id);
266        let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
267
268        let proto = crate::protos::simple_store::Tree::decode(&*buf).map_err(to_other_err)?;
269        Ok(tree_from_proto(proto))
270    }
271
272    async fn write_tree(&self, _path: &RepoPath, tree: &Tree) -> BackendResult<TreeId> {
273        // TODO: Write temporary file in the destination directory (#5712)
274        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
275
276        let proto = tree_to_proto(tree);
277        temp_file
278            .as_file()
279            .write_all(&proto.encode_to_vec())
280            .map_err(to_other_err)?;
281
282        let id = TreeId::new(blake2b_hash(tree).to_vec());
283
284        persist_content_addressed_temp_file(temp_file, self.tree_path(&id))
285            .map_err(to_other_err)?;
286        Ok(id)
287    }
288
289    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
290        if *id == self.root_commit_id {
291            return Ok(make_root_commit(
292                self.root_change_id().clone(),
293                self.empty_tree_id.clone(),
294            ));
295        }
296
297        let path = self.commit_path(id);
298        let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
299
300        let proto = crate::protos::simple_store::Commit::decode(&*buf).map_err(to_other_err)?;
301        Ok(commit_from_proto(proto))
302    }
303
304    async fn write_commit(
305        &self,
306        mut commit: Commit,
307        sign_with: Option<&mut SigningFn>,
308    ) -> BackendResult<(CommitId, Commit)> {
309        assert!(commit.secure_sig.is_none(), "commit.secure_sig was set");
310
311        if commit.parents.is_empty() {
312            return Err(BackendError::Other(
313                "Cannot write a commit with no parents".into(),
314            ));
315        }
316        // TODO: Write temporary file in the destination directory (#5712)
317        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
318
319        let mut proto = commit_to_proto(&commit);
320        if let Some(sign) = sign_with {
321            let data = proto.encode_to_vec();
322            let sig = sign(&data).map_err(to_other_err)?;
323            proto.secure_sig = Some(sig.clone());
324            commit.secure_sig = Some(SecureSig { data, sig });
325        }
326
327        temp_file
328            .as_file()
329            .write_all(&proto.encode_to_vec())
330            .map_err(to_other_err)?;
331
332        let id = CommitId::new(blake2b_hash(&commit).to_vec());
333
334        persist_content_addressed_temp_file(temp_file, self.commit_path(&id))
335            .map_err(to_other_err)?;
336        Ok((id, commit))
337    }
338
339    fn get_copy_records(
340        &self,
341        _paths: Option<&[RepoPathBuf]>,
342        _root: &CommitId,
343        _head: &CommitId,
344    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
345        Ok(Box::pin(stream::empty()))
346    }
347
348    fn gc(&self, _index: &dyn Index, _keep_newer: SystemTime) -> BackendResult<()> {
349        Ok(())
350    }
351}
352
353#[expect(clippy::assigning_clones)]
354pub fn commit_to_proto(commit: &Commit) -> crate::protos::simple_store::Commit {
355    let mut proto = crate::protos::simple_store::Commit::default();
356    for parent in &commit.parents {
357        proto.parents.push(parent.to_bytes());
358    }
359    for predecessor in &commit.predecessors {
360        proto.predecessors.push(predecessor.to_bytes());
361    }
362    match &commit.root_tree {
363        MergedTreeId::Legacy(_) => {
364            panic!("The simple backend doesn't support legacy trees");
365        }
366        MergedTreeId::Merge(tree_ids) => {
367            proto.root_tree = tree_ids.iter().map(|id| id.to_bytes()).collect();
368        }
369    }
370    proto.change_id = commit.change_id.to_bytes();
371    proto.description = commit.description.clone();
372    proto.author = Some(signature_to_proto(&commit.author));
373    proto.committer = Some(signature_to_proto(&commit.committer));
374    proto
375}
376
377fn commit_from_proto(mut proto: crate::protos::simple_store::Commit) -> Commit {
378    // Note how .take() sets the secure_sig field to None before we encode the data.
379    // Needs to be done first since proto is partially moved a bunch below
380    let secure_sig = proto.secure_sig.take().map(|sig| SecureSig {
381        data: proto.encode_to_vec(),
382        sig,
383    });
384
385    let parents = proto.parents.into_iter().map(CommitId::new).collect();
386    let predecessors = proto.predecessors.into_iter().map(CommitId::new).collect();
387    let merge_builder: MergeBuilder<_> = proto.root_tree.into_iter().map(TreeId::new).collect();
388    let root_tree = MergedTreeId::Merge(merge_builder.build());
389    let change_id = ChangeId::new(proto.change_id);
390    Commit {
391        parents,
392        predecessors,
393        root_tree,
394        change_id,
395        description: proto.description,
396        author: signature_from_proto(proto.author.unwrap_or_default()),
397        committer: signature_from_proto(proto.committer.unwrap_or_default()),
398        secure_sig,
399    }
400}
401
402fn tree_to_proto(tree: &Tree) -> crate::protos::simple_store::Tree {
403    let mut proto = crate::protos::simple_store::Tree::default();
404    for entry in tree.entries() {
405        proto
406            .entries
407            .push(crate::protos::simple_store::tree::Entry {
408                name: entry.name().as_internal_str().to_owned(),
409                value: Some(tree_value_to_proto(entry.value())),
410            });
411    }
412    proto
413}
414
415fn tree_from_proto(proto: crate::protos::simple_store::Tree) -> Tree {
416    // Serialized data should be sorted
417    let entries = proto
418        .entries
419        .into_iter()
420        .map(|proto_entry| {
421            let value = tree_value_from_proto(proto_entry.value.unwrap());
422            (RepoPathComponentBuf::new(proto_entry.name).unwrap(), value)
423        })
424        .collect();
425    Tree::from_sorted_entries(entries)
426}
427
428fn tree_value_to_proto(value: &TreeValue) -> crate::protos::simple_store::TreeValue {
429    let mut proto = crate::protos::simple_store::TreeValue::default();
430    match value {
431        TreeValue::File {
432            id,
433            executable,
434            copy_id,
435        } => {
436            proto.value = Some(crate::protos::simple_store::tree_value::Value::File(
437                crate::protos::simple_store::tree_value::File {
438                    id: id.to_bytes(),
439                    executable: *executable,
440                    copy_id: copy_id.to_bytes(),
441                },
442            ));
443        }
444        TreeValue::Symlink(id) => {
445            proto.value = Some(crate::protos::simple_store::tree_value::Value::SymlinkId(
446                id.to_bytes(),
447            ));
448        }
449        TreeValue::GitSubmodule(_id) => {
450            panic!("cannot store git submodules");
451        }
452        TreeValue::Tree(id) => {
453            proto.value = Some(crate::protos::simple_store::tree_value::Value::TreeId(
454                id.to_bytes(),
455            ));
456        }
457    }
458    proto
459}
460
461fn tree_value_from_proto(proto: crate::protos::simple_store::TreeValue) -> TreeValue {
462    match proto.value.unwrap() {
463        crate::protos::simple_store::tree_value::Value::TreeId(id) => {
464            TreeValue::Tree(TreeId::new(id))
465        }
466        crate::protos::simple_store::tree_value::Value::File(
467            crate::protos::simple_store::tree_value::File {
468                id,
469                executable,
470                copy_id,
471            },
472        ) => TreeValue::File {
473            id: FileId::new(id),
474            executable,
475            copy_id: CopyId::new(copy_id),
476        },
477        crate::protos::simple_store::tree_value::Value::SymlinkId(id) => {
478            TreeValue::Symlink(SymlinkId::new(id))
479        }
480    }
481}
482
483fn signature_to_proto(signature: &Signature) -> crate::protos::simple_store::commit::Signature {
484    crate::protos::simple_store::commit::Signature {
485        name: signature.name.clone(),
486        email: signature.email.clone(),
487        timestamp: Some(crate::protos::simple_store::commit::Timestamp {
488            millis_since_epoch: signature.timestamp.timestamp.0,
489            tz_offset: signature.timestamp.tz_offset,
490        }),
491    }
492}
493
494fn signature_from_proto(proto: crate::protos::simple_store::commit::Signature) -> Signature {
495    let timestamp = proto.timestamp.unwrap_or_default();
496    Signature {
497        name: proto.name,
498        email: proto.email,
499        timestamp: Timestamp {
500            timestamp: MillisSinceEpoch(timestamp.millis_since_epoch),
501            tz_offset: timestamp.tz_offset,
502        },
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use assert_matches::assert_matches;
509    use pollster::FutureExt as _;
510
511    use super::*;
512    use crate::tests::new_temp_dir;
513
514    /// Test that parents get written correctly
515    #[test]
516    fn write_commit_parents() {
517        let temp_dir = new_temp_dir();
518        let store_path = temp_dir.path();
519
520        let backend = SimpleBackend::init(store_path);
521        let mut commit = Commit {
522            parents: vec![],
523            predecessors: vec![],
524            root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()),
525            change_id: ChangeId::from_hex("abc123"),
526            description: "".to_string(),
527            author: create_signature(),
528            committer: create_signature(),
529            secure_sig: None,
530        };
531
532        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
533            backend.write_commit(commit, None).block_on()
534        };
535
536        // No parents
537        commit.parents = vec![];
538        assert_matches!(
539            write_commit(commit.clone()),
540            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
541        );
542
543        // Only root commit as parent
544        commit.parents = vec![backend.root_commit_id().clone()];
545        let first_id = write_commit(commit.clone()).unwrap().0;
546        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
547        assert_eq!(first_commit, commit);
548
549        // Only non-root commit as parent
550        commit.parents = vec![first_id.clone()];
551        let second_id = write_commit(commit.clone()).unwrap().0;
552        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
553        assert_eq!(second_commit, commit);
554
555        // Merge commit
556        commit.parents = vec![first_id.clone(), second_id.clone()];
557        let merge_id = write_commit(commit.clone()).unwrap().0;
558        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
559        assert_eq!(merge_commit, commit);
560
561        // Merge commit with root as one parent
562        commit.parents = vec![first_id, backend.root_commit_id().clone()];
563        let root_merge_id = write_commit(commit.clone()).unwrap().0;
564        let root_merge_commit = backend.read_commit(&root_merge_id).block_on().unwrap();
565        assert_eq!(root_merge_commit, commit);
566    }
567
568    fn create_signature() -> Signature {
569        Signature {
570            name: "Someone".to_string(),
571            email: "someone@example.com".to_string(),
572            timestamp: Timestamp {
573                timestamp: MillisSinceEpoch(0),
574                tz_offset: 0,
575            },
576        }
577    }
578}