jj_lib/
simple_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::fmt::Debug;
18use std::fs;
19use std::fs::File;
20use std::io::Cursor;
21use std::io::Read as _;
22use std::io::Write as _;
23use std::path::Path;
24use std::path::PathBuf;
25use std::pin::Pin;
26use std::time::SystemTime;
27
28use async_trait::async_trait;
29use blake2::Blake2b512;
30use blake2::Digest as _;
31use futures::stream;
32use futures::stream::BoxStream;
33use pollster::FutureExt as _;
34use prost::Message as _;
35use tempfile::NamedTempFile;
36use tokio::io::AsyncRead;
37use tokio::io::AsyncReadExt as _;
38
39use crate::backend::Backend;
40use crate::backend::BackendError;
41use crate::backend::BackendResult;
42use crate::backend::ChangeId;
43use crate::backend::Commit;
44use crate::backend::CommitId;
45use crate::backend::CopyHistory;
46use crate::backend::CopyId;
47use crate::backend::CopyRecord;
48use crate::backend::FileId;
49use crate::backend::MillisSinceEpoch;
50use crate::backend::SecureSig;
51use crate::backend::Signature;
52use crate::backend::SigningFn;
53use crate::backend::SymlinkId;
54use crate::backend::Timestamp;
55use crate::backend::Tree;
56use crate::backend::TreeId;
57use crate::backend::TreeValue;
58use crate::backend::make_root_commit;
59use crate::content_hash::blake2b_hash;
60use crate::file_util::persist_content_addressed_temp_file;
61use crate::index::Index;
62use crate::merge::MergeBuilder;
63use crate::object_id::ObjectId;
64use crate::repo_path::RepoPath;
65use crate::repo_path::RepoPathBuf;
66use crate::repo_path::RepoPathComponentBuf;
67
68const COMMIT_ID_LENGTH: usize = 64;
69const CHANGE_ID_LENGTH: usize = 16;
70
71fn map_not_found_err(err: std::io::Error, id: &impl ObjectId) -> BackendError {
72    if err.kind() == std::io::ErrorKind::NotFound {
73        BackendError::ObjectNotFound {
74            object_type: id.object_type(),
75            hash: id.hex(),
76            source: Box::new(err),
77        }
78    } else {
79        BackendError::ReadObject {
80            object_type: id.object_type(),
81            hash: id.hex(),
82            source: Box::new(err),
83        }
84    }
85}
86
87fn to_other_err(err: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> BackendError {
88    BackendError::Other(err.into())
89}
90
91#[derive(Debug)]
92pub struct SimpleBackend {
93    path: PathBuf,
94    root_commit_id: CommitId,
95    root_change_id: ChangeId,
96    empty_tree_id: TreeId,
97}
98
99impl SimpleBackend {
100    pub fn name() -> &'static str {
101        "Simple"
102    }
103
104    pub fn init(store_path: &Path) -> Self {
105        fs::create_dir(store_path.join("commits")).unwrap();
106        fs::create_dir(store_path.join("trees")).unwrap();
107        fs::create_dir(store_path.join("files")).unwrap();
108        fs::create_dir(store_path.join("symlinks")).unwrap();
109        fs::create_dir(store_path.join("conflicts")).unwrap();
110        let backend = Self::load(store_path);
111        let empty_tree_id = backend
112            .write_tree(RepoPath::root(), &Tree::default())
113            .block_on()
114            .unwrap();
115        assert_eq!(empty_tree_id, backend.empty_tree_id);
116        backend
117    }
118
119    pub fn load(store_path: &Path) -> Self {
120        let root_commit_id = CommitId::from_bytes(&[0; COMMIT_ID_LENGTH]);
121        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
122        let empty_tree_id = TreeId::from_hex(
123            "482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310",
124        );
125        Self {
126            path: store_path.to_path_buf(),
127            root_commit_id,
128            root_change_id,
129            empty_tree_id,
130        }
131    }
132
133    fn file_path(&self, id: &FileId) -> PathBuf {
134        self.path.join("files").join(id.hex())
135    }
136
137    fn symlink_path(&self, id: &SymlinkId) -> PathBuf {
138        self.path.join("symlinks").join(id.hex())
139    }
140
141    fn tree_path(&self, id: &TreeId) -> PathBuf {
142        self.path.join("trees").join(id.hex())
143    }
144
145    fn commit_path(&self, id: &CommitId) -> PathBuf {
146        self.path.join("commits").join(id.hex())
147    }
148}
149
150#[async_trait]
151impl Backend for SimpleBackend {
152    fn name(&self) -> &str {
153        Self::name()
154    }
155
156    fn commit_id_length(&self) -> usize {
157        COMMIT_ID_LENGTH
158    }
159
160    fn change_id_length(&self) -> usize {
161        CHANGE_ID_LENGTH
162    }
163
164    fn root_commit_id(&self) -> &CommitId {
165        &self.root_commit_id
166    }
167
168    fn root_change_id(&self) -> &ChangeId {
169        &self.root_change_id
170    }
171
172    fn empty_tree_id(&self) -> &TreeId {
173        &self.empty_tree_id
174    }
175
176    fn concurrency(&self) -> usize {
177        1
178    }
179
180    async fn read_file(
181        &self,
182        path: &RepoPath,
183        id: &FileId,
184    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
185        let disk_path = self.file_path(id);
186        let mut file = File::open(disk_path).map_err(|err| map_not_found_err(err, id))?;
187        let mut buf = vec![];
188        file.read_to_end(&mut buf)
189            .map_err(|err| BackendError::ReadFile {
190                path: path.to_owned(),
191                id: id.clone(),
192                source: err.into(),
193            })?;
194        Ok(Box::pin(Cursor::new(buf)))
195    }
196
197    async fn write_file(
198        &self,
199        _path: &RepoPath,
200        contents: &mut (dyn AsyncRead + Send + Unpin),
201    ) -> BackendResult<FileId> {
202        // TODO: Write temporary file in the destination directory (#5712)
203        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
204        let mut file = temp_file.as_file();
205        let mut hasher = Blake2b512::new();
206        let mut buff: Vec<u8> = vec![0; 1 << 14];
207        loop {
208            let bytes_read = contents.read(&mut buff).await.map_err(to_other_err)?;
209            if bytes_read == 0 {
210                break;
211            }
212            let bytes = &buff[..bytes_read];
213            file.write_all(bytes).map_err(to_other_err)?;
214            hasher.update(bytes);
215        }
216        file.flush().map_err(to_other_err)?;
217        let id = FileId::new(hasher.finalize().to_vec());
218
219        persist_content_addressed_temp_file(temp_file, self.file_path(&id))
220            .map_err(to_other_err)?;
221        Ok(id)
222    }
223
224    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
225        let path = self.symlink_path(id);
226        let target = fs::read_to_string(path).map_err(|err| map_not_found_err(err, id))?;
227        Ok(target)
228    }
229
230    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
231        // TODO: Write temporary file in the destination directory (#5712)
232        let mut temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
233        temp_file
234            .write_all(target.as_bytes())
235            .map_err(to_other_err)?;
236        let mut hasher = Blake2b512::new();
237        hasher.update(target.as_bytes());
238        let id = SymlinkId::new(hasher.finalize().to_vec());
239
240        persist_content_addressed_temp_file(temp_file, self.symlink_path(&id))
241            .map_err(to_other_err)?;
242        Ok(id)
243    }
244
245    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
246        Err(BackendError::Unsupported(
247            "The simple backend doesn't support copies".to_string(),
248        ))
249    }
250
251    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
252        Err(BackendError::Unsupported(
253            "The simple backend doesn't support copies".to_string(),
254        ))
255    }
256
257    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
258        Err(BackendError::Unsupported(
259            "The simple backend doesn't support copies".to_string(),
260        ))
261    }
262
263    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
264        let path = self.tree_path(id);
265        let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
266
267        let proto = crate::protos::simple_store::Tree::decode(&*buf).map_err(to_other_err)?;
268        Ok(tree_from_proto(proto))
269    }
270
271    async fn write_tree(&self, _path: &RepoPath, tree: &Tree) -> BackendResult<TreeId> {
272        // TODO: Write temporary file in the destination directory (#5712)
273        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
274
275        let proto = tree_to_proto(tree);
276        temp_file
277            .as_file()
278            .write_all(&proto.encode_to_vec())
279            .map_err(to_other_err)?;
280
281        let id = TreeId::new(blake2b_hash(tree).to_vec());
282
283        persist_content_addressed_temp_file(temp_file, self.tree_path(&id))
284            .map_err(to_other_err)?;
285        Ok(id)
286    }
287
288    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
289        if *id == self.root_commit_id {
290            return Ok(make_root_commit(
291                self.root_change_id().clone(),
292                self.empty_tree_id.clone(),
293            ));
294        }
295
296        let path = self.commit_path(id);
297        let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
298
299        let proto = crate::protos::simple_store::Commit::decode(&*buf).map_err(to_other_err)?;
300        Ok(commit_from_proto(proto))
301    }
302
303    async fn write_commit(
304        &self,
305        mut commit: Commit,
306        sign_with: Option<&mut SigningFn>,
307    ) -> BackendResult<(CommitId, Commit)> {
308        assert!(commit.secure_sig.is_none(), "commit.secure_sig was set");
309
310        if commit.parents.is_empty() {
311            return Err(BackendError::Other(
312                "Cannot write a commit with no parents".into(),
313            ));
314        }
315        // TODO: Write temporary file in the destination directory (#5712)
316        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
317
318        let mut proto = commit_to_proto(&commit);
319        if let Some(sign) = sign_with {
320            let data = proto.encode_to_vec();
321            let sig = sign(&data).map_err(to_other_err)?;
322            proto.secure_sig = Some(sig.clone());
323            commit.secure_sig = Some(SecureSig { data, sig });
324        }
325
326        temp_file
327            .as_file()
328            .write_all(&proto.encode_to_vec())
329            .map_err(to_other_err)?;
330
331        let id = CommitId::new(blake2b_hash(&commit).to_vec());
332
333        persist_content_addressed_temp_file(temp_file, self.commit_path(&id))
334            .map_err(to_other_err)?;
335        Ok((id, commit))
336    }
337
338    fn get_copy_records(
339        &self,
340        _paths: Option<&[RepoPathBuf]>,
341        _root: &CommitId,
342        _head: &CommitId,
343    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
344        Ok(Box::pin(stream::empty()))
345    }
346
347    fn gc(&self, _index: &dyn Index, _keep_newer: SystemTime) -> BackendResult<()> {
348        Ok(())
349    }
350}
351
352#[expect(clippy::assigning_clones)]
353pub fn commit_to_proto(commit: &Commit) -> crate::protos::simple_store::Commit {
354    let mut proto = crate::protos::simple_store::Commit::default();
355    for parent in &commit.parents {
356        proto.parents.push(parent.to_bytes());
357    }
358    for predecessor in &commit.predecessors {
359        proto.predecessors.push(predecessor.to_bytes());
360    }
361    proto.root_tree = commit.root_tree.iter().map(|id| id.to_bytes()).collect();
362    proto.change_id = commit.change_id.to_bytes();
363    proto.description = commit.description.clone();
364    proto.author = Some(signature_to_proto(&commit.author));
365    proto.committer = Some(signature_to_proto(&commit.committer));
366    proto
367}
368
369fn commit_from_proto(mut proto: crate::protos::simple_store::Commit) -> Commit {
370    // Note how .take() sets the secure_sig field to None before we encode the data.
371    // Needs to be done first since proto is partially moved a bunch below
372    let secure_sig = proto.secure_sig.take().map(|sig| SecureSig {
373        data: proto.encode_to_vec(),
374        sig,
375    });
376
377    let parents = proto.parents.into_iter().map(CommitId::new).collect();
378    let predecessors = proto.predecessors.into_iter().map(CommitId::new).collect();
379    let merge_builder: MergeBuilder<_> = proto.root_tree.into_iter().map(TreeId::new).collect();
380    let root_tree = merge_builder.build();
381    let change_id = ChangeId::new(proto.change_id);
382    Commit {
383        parents,
384        predecessors,
385        root_tree,
386        change_id,
387        description: proto.description,
388        author: signature_from_proto(proto.author.unwrap_or_default()),
389        committer: signature_from_proto(proto.committer.unwrap_or_default()),
390        secure_sig,
391    }
392}
393
394fn tree_to_proto(tree: &Tree) -> crate::protos::simple_store::Tree {
395    let mut proto = crate::protos::simple_store::Tree::default();
396    for entry in tree.entries() {
397        proto
398            .entries
399            .push(crate::protos::simple_store::tree::Entry {
400                name: entry.name().as_internal_str().to_owned(),
401                value: Some(tree_value_to_proto(entry.value())),
402            });
403    }
404    proto
405}
406
407fn tree_from_proto(proto: crate::protos::simple_store::Tree) -> Tree {
408    // Serialized data should be sorted
409    let entries = proto
410        .entries
411        .into_iter()
412        .map(|proto_entry| {
413            let value = tree_value_from_proto(proto_entry.value.unwrap());
414            (RepoPathComponentBuf::new(proto_entry.name).unwrap(), value)
415        })
416        .collect();
417    Tree::from_sorted_entries(entries)
418}
419
420fn tree_value_to_proto(value: &TreeValue) -> crate::protos::simple_store::TreeValue {
421    let mut proto = crate::protos::simple_store::TreeValue::default();
422    match value {
423        TreeValue::File {
424            id,
425            executable,
426            copy_id,
427        } => {
428            proto.value = Some(crate::protos::simple_store::tree_value::Value::File(
429                crate::protos::simple_store::tree_value::File {
430                    id: id.to_bytes(),
431                    executable: *executable,
432                    copy_id: copy_id.to_bytes(),
433                },
434            ));
435        }
436        TreeValue::Symlink(id) => {
437            proto.value = Some(crate::protos::simple_store::tree_value::Value::SymlinkId(
438                id.to_bytes(),
439            ));
440        }
441        TreeValue::GitSubmodule(_id) => {
442            panic!("cannot store git submodules");
443        }
444        TreeValue::Tree(id) => {
445            proto.value = Some(crate::protos::simple_store::tree_value::Value::TreeId(
446                id.to_bytes(),
447            ));
448        }
449    }
450    proto
451}
452
453fn tree_value_from_proto(proto: crate::protos::simple_store::TreeValue) -> TreeValue {
454    match proto.value.unwrap() {
455        crate::protos::simple_store::tree_value::Value::TreeId(id) => {
456            TreeValue::Tree(TreeId::new(id))
457        }
458        crate::protos::simple_store::tree_value::Value::File(
459            crate::protos::simple_store::tree_value::File {
460                id,
461                executable,
462                copy_id,
463            },
464        ) => TreeValue::File {
465            id: FileId::new(id),
466            executable,
467            copy_id: CopyId::new(copy_id),
468        },
469        crate::protos::simple_store::tree_value::Value::SymlinkId(id) => {
470            TreeValue::Symlink(SymlinkId::new(id))
471        }
472    }
473}
474
475fn signature_to_proto(signature: &Signature) -> crate::protos::simple_store::commit::Signature {
476    crate::protos::simple_store::commit::Signature {
477        name: signature.name.clone(),
478        email: signature.email.clone(),
479        timestamp: Some(crate::protos::simple_store::commit::Timestamp {
480            millis_since_epoch: signature.timestamp.timestamp.0,
481            tz_offset: signature.timestamp.tz_offset,
482        }),
483    }
484}
485
486fn signature_from_proto(proto: crate::protos::simple_store::commit::Signature) -> Signature {
487    let timestamp = proto.timestamp.unwrap_or_default();
488    Signature {
489        name: proto.name,
490        email: proto.email,
491        timestamp: Timestamp {
492            timestamp: MillisSinceEpoch(timestamp.millis_since_epoch),
493            tz_offset: timestamp.tz_offset,
494        },
495    }
496}
497
498#[cfg(test)]
499mod tests {
500    use assert_matches::assert_matches;
501    use pollster::FutureExt as _;
502
503    use super::*;
504    use crate::merge::Merge;
505    use crate::tests::new_temp_dir;
506
507    /// Test that parents get written correctly
508    #[test]
509    fn write_commit_parents() {
510        let temp_dir = new_temp_dir();
511        let store_path = temp_dir.path();
512
513        let backend = SimpleBackend::init(store_path);
514        let mut commit = Commit {
515            parents: vec![],
516            predecessors: vec![],
517            root_tree: Merge::resolved(backend.empty_tree_id().clone()),
518            change_id: ChangeId::from_hex("abc123"),
519            description: "".to_string(),
520            author: create_signature(),
521            committer: create_signature(),
522            secure_sig: None,
523        };
524
525        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
526            backend.write_commit(commit, None).block_on()
527        };
528
529        // No parents
530        commit.parents = vec![];
531        assert_matches!(
532            write_commit(commit.clone()),
533            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
534        );
535
536        // Only root commit as parent
537        commit.parents = vec![backend.root_commit_id().clone()];
538        let first_id = write_commit(commit.clone()).unwrap().0;
539        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
540        assert_eq!(first_commit, commit);
541
542        // Only non-root commit as parent
543        commit.parents = vec![first_id.clone()];
544        let second_id = write_commit(commit.clone()).unwrap().0;
545        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
546        assert_eq!(second_commit, commit);
547
548        // Merge commit
549        commit.parents = vec![first_id.clone(), second_id.clone()];
550        let merge_id = write_commit(commit.clone()).unwrap().0;
551        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
552        assert_eq!(merge_commit, commit);
553
554        // Merge commit with root as one parent
555        commit.parents = vec![first_id, backend.root_commit_id().clone()];
556        let root_merge_id = write_commit(commit.clone()).unwrap().0;
557        let root_merge_commit = backend.read_commit(&root_merge_id).block_on().unwrap();
558        assert_eq!(root_merge_commit, commit);
559    }
560
561    fn create_signature() -> Signature {
562        Signature {
563            name: "Someone".to_string(),
564            email: "someone@example.com".to_string(),
565            timestamp: Timestamp {
566                timestamp: MillisSinceEpoch(0),
567                tz_offset: 0,
568            },
569        }
570    }
571}