jj_lib/
simple_backend.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::fmt::Debug;
19use std::fs;
20use std::fs::File;
21use std::io::Cursor;
22use std::io::Read as _;
23use std::io::Write as _;
24use std::path::Path;
25use std::path::PathBuf;
26use std::pin::Pin;
27use std::time::SystemTime;
28
29use async_trait::async_trait;
30use blake2::Blake2b512;
31use blake2::Digest as _;
32use futures::stream;
33use futures::stream::BoxStream;
34use pollster::FutureExt as _;
35use prost::Message as _;
36use tempfile::NamedTempFile;
37use tokio::io::AsyncRead;
38use tokio::io::AsyncReadExt as _;
39
40use crate::backend::Backend;
41use crate::backend::BackendError;
42use crate::backend::BackendResult;
43use crate::backend::ChangeId;
44use crate::backend::Commit;
45use crate::backend::CommitId;
46use crate::backend::Conflict;
47use crate::backend::ConflictId;
48use crate::backend::ConflictTerm;
49use crate::backend::CopyHistory;
50use crate::backend::CopyId;
51use crate::backend::CopyRecord;
52use crate::backend::FileId;
53use crate::backend::MergedTreeId;
54use crate::backend::MillisSinceEpoch;
55use crate::backend::SecureSig;
56use crate::backend::Signature;
57use crate::backend::SigningFn;
58use crate::backend::SymlinkId;
59use crate::backend::Timestamp;
60use crate::backend::Tree;
61use crate::backend::TreeId;
62use crate::backend::TreeValue;
63use crate::backend::make_root_commit;
64use crate::content_hash::blake2b_hash;
65use crate::file_util::persist_content_addressed_temp_file;
66use crate::index::Index;
67use crate::merge::MergeBuilder;
68use crate::object_id::ObjectId;
69use crate::repo_path::RepoPath;
70use crate::repo_path::RepoPathBuf;
71use crate::repo_path::RepoPathComponentBuf;
72
73const COMMIT_ID_LENGTH: usize = 64;
74const CHANGE_ID_LENGTH: usize = 16;
75
76fn map_not_found_err(err: std::io::Error, id: &impl ObjectId) -> BackendError {
77    if err.kind() == std::io::ErrorKind::NotFound {
78        BackendError::ObjectNotFound {
79            object_type: id.object_type(),
80            hash: id.hex(),
81            source: Box::new(err),
82        }
83    } else {
84        BackendError::ReadObject {
85            object_type: id.object_type(),
86            hash: id.hex(),
87            source: Box::new(err),
88        }
89    }
90}
91
92fn to_other_err(err: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> BackendError {
93    BackendError::Other(err.into())
94}
95
96#[derive(Debug)]
97pub struct SimpleBackend {
98    path: PathBuf,
99    root_commit_id: CommitId,
100    root_change_id: ChangeId,
101    empty_tree_id: TreeId,
102}
103
104impl SimpleBackend {
105    pub fn name() -> &'static str {
106        "Simple"
107    }
108
109    pub fn init(store_path: &Path) -> Self {
110        fs::create_dir(store_path.join("commits")).unwrap();
111        fs::create_dir(store_path.join("trees")).unwrap();
112        fs::create_dir(store_path.join("files")).unwrap();
113        fs::create_dir(store_path.join("symlinks")).unwrap();
114        fs::create_dir(store_path.join("conflicts")).unwrap();
115        let backend = Self::load(store_path);
116        let empty_tree_id = backend
117            .write_tree(RepoPath::root(), &Tree::default())
118            .block_on()
119            .unwrap();
120        assert_eq!(empty_tree_id, backend.empty_tree_id);
121        backend
122    }
123
124    pub fn load(store_path: &Path) -> Self {
125        let root_commit_id = CommitId::from_bytes(&[0; COMMIT_ID_LENGTH]);
126        let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
127        let empty_tree_id = TreeId::from_hex(
128            "482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310",
129        );
130        Self {
131            path: store_path.to_path_buf(),
132            root_commit_id,
133            root_change_id,
134            empty_tree_id,
135        }
136    }
137
138    fn file_path(&self, id: &FileId) -> PathBuf {
139        self.path.join("files").join(id.hex())
140    }
141
142    fn symlink_path(&self, id: &SymlinkId) -> PathBuf {
143        self.path.join("symlinks").join(id.hex())
144    }
145
146    fn tree_path(&self, id: &TreeId) -> PathBuf {
147        self.path.join("trees").join(id.hex())
148    }
149
150    fn commit_path(&self, id: &CommitId) -> PathBuf {
151        self.path.join("commits").join(id.hex())
152    }
153
154    fn conflict_path(&self, id: &ConflictId) -> PathBuf {
155        self.path.join("conflicts").join(id.hex())
156    }
157}
158
159#[async_trait]
160impl Backend for SimpleBackend {
161    fn as_any(&self) -> &dyn Any {
162        self
163    }
164
165    fn name(&self) -> &str {
166        Self::name()
167    }
168
169    fn commit_id_length(&self) -> usize {
170        COMMIT_ID_LENGTH
171    }
172
173    fn change_id_length(&self) -> usize {
174        CHANGE_ID_LENGTH
175    }
176
177    fn root_commit_id(&self) -> &CommitId {
178        &self.root_commit_id
179    }
180
181    fn root_change_id(&self) -> &ChangeId {
182        &self.root_change_id
183    }
184
185    fn empty_tree_id(&self) -> &TreeId {
186        &self.empty_tree_id
187    }
188
189    fn concurrency(&self) -> usize {
190        1
191    }
192
193    async fn read_file(
194        &self,
195        path: &RepoPath,
196        id: &FileId,
197    ) -> BackendResult<Pin<Box<dyn AsyncRead + Send>>> {
198        let disk_path = self.file_path(id);
199        let mut file = File::open(disk_path).map_err(|err| map_not_found_err(err, id))?;
200        let mut buf = vec![];
201        file.read_to_end(&mut buf)
202            .map_err(|err| BackendError::ReadFile {
203                path: path.to_owned(),
204                id: id.clone(),
205                source: err.into(),
206            })?;
207        Ok(Box::pin(Cursor::new(buf)))
208    }
209
210    async fn write_file(
211        &self,
212        _path: &RepoPath,
213        contents: &mut (dyn AsyncRead + Send + Unpin),
214    ) -> BackendResult<FileId> {
215        // TODO: Write temporary file in the destination directory (#5712)
216        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
217        let mut file = temp_file.as_file();
218        let mut hasher = Blake2b512::new();
219        let mut buff: Vec<u8> = vec![0; 1 << 14];
220        loop {
221            let bytes_read = contents.read(&mut buff).await.map_err(to_other_err)?;
222            if bytes_read == 0 {
223                break;
224            }
225            let bytes = &buff[..bytes_read];
226            file.write_all(bytes).map_err(to_other_err)?;
227            hasher.update(bytes);
228        }
229        file.flush().map_err(to_other_err)?;
230        let id = FileId::new(hasher.finalize().to_vec());
231
232        persist_content_addressed_temp_file(temp_file, self.file_path(&id))
233            .map_err(to_other_err)?;
234        Ok(id)
235    }
236
237    async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
238        let path = self.symlink_path(id);
239        let target = fs::read_to_string(path).map_err(|err| map_not_found_err(err, id))?;
240        Ok(target)
241    }
242
243    async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
244        // TODO: Write temporary file in the destination directory (#5712)
245        let mut temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
246        temp_file
247            .write_all(target.as_bytes())
248            .map_err(to_other_err)?;
249        let mut hasher = Blake2b512::new();
250        hasher.update(target.as_bytes());
251        let id = SymlinkId::new(hasher.finalize().to_vec());
252
253        persist_content_addressed_temp_file(temp_file, self.symlink_path(&id))
254            .map_err(to_other_err)?;
255        Ok(id)
256    }
257
258    async fn read_copy(&self, _id: &CopyId) -> BackendResult<CopyHistory> {
259        Err(BackendError::Unsupported(
260            "The simple backend doesn't support copies".to_string(),
261        ))
262    }
263
264    async fn write_copy(&self, _contents: &CopyHistory) -> BackendResult<CopyId> {
265        Err(BackendError::Unsupported(
266            "The simple backend doesn't support copies".to_string(),
267        ))
268    }
269
270    async fn get_related_copies(&self, _copy_id: &CopyId) -> BackendResult<Vec<CopyHistory>> {
271        Err(BackendError::Unsupported(
272            "The simple backend doesn't support copies".to_string(),
273        ))
274    }
275
276    async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
277        let path = self.tree_path(id);
278        let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
279
280        let proto = crate::protos::simple_store::Tree::decode(&*buf).map_err(to_other_err)?;
281        Ok(tree_from_proto(proto))
282    }
283
284    async fn write_tree(&self, _path: &RepoPath, tree: &Tree) -> BackendResult<TreeId> {
285        // TODO: Write temporary file in the destination directory (#5712)
286        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
287
288        let proto = tree_to_proto(tree);
289        temp_file
290            .as_file()
291            .write_all(&proto.encode_to_vec())
292            .map_err(to_other_err)?;
293
294        let id = TreeId::new(blake2b_hash(tree).to_vec());
295
296        persist_content_addressed_temp_file(temp_file, self.tree_path(&id))
297            .map_err(to_other_err)?;
298        Ok(id)
299    }
300
301    fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
302        let path = self.conflict_path(id);
303        let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
304
305        let proto = crate::protos::simple_store::Conflict::decode(&*buf).map_err(to_other_err)?;
306        Ok(conflict_from_proto(proto))
307    }
308
309    fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
310        // TODO: Write temporary file in the destination directory (#5712)
311        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
312
313        let proto = conflict_to_proto(conflict);
314        temp_file
315            .as_file()
316            .write_all(&proto.encode_to_vec())
317            .map_err(to_other_err)?;
318
319        let id = ConflictId::new(blake2b_hash(conflict).to_vec());
320
321        persist_content_addressed_temp_file(temp_file, self.conflict_path(&id))
322            .map_err(to_other_err)?;
323        Ok(id)
324    }
325
326    async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
327        if *id == self.root_commit_id {
328            return Ok(make_root_commit(
329                self.root_change_id().clone(),
330                self.empty_tree_id.clone(),
331            ));
332        }
333
334        let path = self.commit_path(id);
335        let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
336
337        let proto = crate::protos::simple_store::Commit::decode(&*buf).map_err(to_other_err)?;
338        Ok(commit_from_proto(proto))
339    }
340
341    async fn write_commit(
342        &self,
343        mut commit: Commit,
344        sign_with: Option<&mut SigningFn>,
345    ) -> BackendResult<(CommitId, Commit)> {
346        assert!(commit.secure_sig.is_none(), "commit.secure_sig was set");
347
348        if commit.parents.is_empty() {
349            return Err(BackendError::Other(
350                "Cannot write a commit with no parents".into(),
351            ));
352        }
353        // TODO: Write temporary file in the destination directory (#5712)
354        let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
355
356        let mut proto = commit_to_proto(&commit);
357        if let Some(sign) = sign_with {
358            let data = proto.encode_to_vec();
359            let sig = sign(&data).map_err(to_other_err)?;
360            proto.secure_sig = Some(sig.clone());
361            commit.secure_sig = Some(SecureSig { data, sig });
362        }
363
364        temp_file
365            .as_file()
366            .write_all(&proto.encode_to_vec())
367            .map_err(to_other_err)?;
368
369        let id = CommitId::new(blake2b_hash(&commit).to_vec());
370
371        persist_content_addressed_temp_file(temp_file, self.commit_path(&id))
372            .map_err(to_other_err)?;
373        Ok((id, commit))
374    }
375
376    fn get_copy_records(
377        &self,
378        _paths: Option<&[RepoPathBuf]>,
379        _root: &CommitId,
380        _head: &CommitId,
381    ) -> BackendResult<BoxStream<'_, BackendResult<CopyRecord>>> {
382        Ok(Box::pin(stream::empty()))
383    }
384
385    fn gc(&self, _index: &dyn Index, _keep_newer: SystemTime) -> BackendResult<()> {
386        Ok(())
387    }
388}
389
390#[allow(clippy::assigning_clones)]
391pub fn commit_to_proto(commit: &Commit) -> crate::protos::simple_store::Commit {
392    let mut proto = crate::protos::simple_store::Commit::default();
393    for parent in &commit.parents {
394        proto.parents.push(parent.to_bytes());
395    }
396    for predecessor in &commit.predecessors {
397        proto.predecessors.push(predecessor.to_bytes());
398    }
399    match &commit.root_tree {
400        MergedTreeId::Legacy(tree_id) => {
401            proto.root_tree = vec![tree_id.to_bytes()];
402        }
403        MergedTreeId::Merge(tree_ids) => {
404            proto.uses_tree_conflict_format = true;
405            proto.root_tree = tree_ids.iter().map(|id| id.to_bytes()).collect();
406        }
407    }
408    proto.change_id = commit.change_id.to_bytes();
409    proto.description = commit.description.clone();
410    proto.author = Some(signature_to_proto(&commit.author));
411    proto.committer = Some(signature_to_proto(&commit.committer));
412    proto
413}
414
415fn commit_from_proto(mut proto: crate::protos::simple_store::Commit) -> Commit {
416    // Note how .take() sets the secure_sig field to None before we encode the data.
417    // Needs to be done first since proto is partially moved a bunch below
418    let secure_sig = proto.secure_sig.take().map(|sig| SecureSig {
419        data: proto.encode_to_vec(),
420        sig,
421    });
422
423    let parents = proto.parents.into_iter().map(CommitId::new).collect();
424    let predecessors = proto.predecessors.into_iter().map(CommitId::new).collect();
425    let root_tree = if proto.uses_tree_conflict_format {
426        let merge_builder: MergeBuilder<_> = proto.root_tree.into_iter().map(TreeId::new).collect();
427        MergedTreeId::Merge(merge_builder.build())
428    } else {
429        assert_eq!(proto.root_tree.len(), 1);
430        MergedTreeId::Legacy(TreeId::new(proto.root_tree[0].clone()))
431    };
432    let change_id = ChangeId::new(proto.change_id);
433    Commit {
434        parents,
435        predecessors,
436        root_tree,
437        change_id,
438        description: proto.description,
439        author: signature_from_proto(proto.author.unwrap_or_default()),
440        committer: signature_from_proto(proto.committer.unwrap_or_default()),
441        secure_sig,
442    }
443}
444
445fn tree_to_proto(tree: &Tree) -> crate::protos::simple_store::Tree {
446    let mut proto = crate::protos::simple_store::Tree::default();
447    for entry in tree.entries() {
448        proto
449            .entries
450            .push(crate::protos::simple_store::tree::Entry {
451                name: entry.name().as_internal_str().to_owned(),
452                value: Some(tree_value_to_proto(entry.value())),
453            });
454    }
455    proto
456}
457
458fn tree_from_proto(proto: crate::protos::simple_store::Tree) -> Tree {
459    // Serialized data should be sorted
460    let entries = proto
461        .entries
462        .into_iter()
463        .map(|proto_entry| {
464            let value = tree_value_from_proto(proto_entry.value.unwrap());
465            (RepoPathComponentBuf::new(proto_entry.name).unwrap(), value)
466        })
467        .collect();
468    Tree::from_sorted_entries(entries)
469}
470
471fn tree_value_to_proto(value: &TreeValue) -> crate::protos::simple_store::TreeValue {
472    let mut proto = crate::protos::simple_store::TreeValue::default();
473    match value {
474        TreeValue::File {
475            id,
476            executable,
477            copy_id,
478        } => {
479            proto.value = Some(crate::protos::simple_store::tree_value::Value::File(
480                crate::protos::simple_store::tree_value::File {
481                    id: id.to_bytes(),
482                    executable: *executable,
483                    copy_id: copy_id.to_bytes(),
484                },
485            ));
486        }
487        TreeValue::Symlink(id) => {
488            proto.value = Some(crate::protos::simple_store::tree_value::Value::SymlinkId(
489                id.to_bytes(),
490            ));
491        }
492        TreeValue::GitSubmodule(_id) => {
493            panic!("cannot store git submodules");
494        }
495        TreeValue::Tree(id) => {
496            proto.value = Some(crate::protos::simple_store::tree_value::Value::TreeId(
497                id.to_bytes(),
498            ));
499        }
500        TreeValue::Conflict(id) => {
501            proto.value = Some(crate::protos::simple_store::tree_value::Value::ConflictId(
502                id.to_bytes(),
503            ));
504        }
505    }
506    proto
507}
508
509fn tree_value_from_proto(proto: crate::protos::simple_store::TreeValue) -> TreeValue {
510    match proto.value.unwrap() {
511        crate::protos::simple_store::tree_value::Value::TreeId(id) => {
512            TreeValue::Tree(TreeId::new(id))
513        }
514        crate::protos::simple_store::tree_value::Value::File(
515            crate::protos::simple_store::tree_value::File {
516                id,
517                executable,
518                copy_id,
519            },
520        ) => TreeValue::File {
521            id: FileId::new(id),
522            executable,
523            copy_id: CopyId::new(copy_id),
524        },
525        crate::protos::simple_store::tree_value::Value::SymlinkId(id) => {
526            TreeValue::Symlink(SymlinkId::new(id))
527        }
528        crate::protos::simple_store::tree_value::Value::ConflictId(id) => {
529            TreeValue::Conflict(ConflictId::new(id))
530        }
531    }
532}
533
534fn signature_to_proto(signature: &Signature) -> crate::protos::simple_store::commit::Signature {
535    crate::protos::simple_store::commit::Signature {
536        name: signature.name.clone(),
537        email: signature.email.clone(),
538        timestamp: Some(crate::protos::simple_store::commit::Timestamp {
539            millis_since_epoch: signature.timestamp.timestamp.0,
540            tz_offset: signature.timestamp.tz_offset,
541        }),
542    }
543}
544
545fn signature_from_proto(proto: crate::protos::simple_store::commit::Signature) -> Signature {
546    let timestamp = proto.timestamp.unwrap_or_default();
547    Signature {
548        name: proto.name,
549        email: proto.email,
550        timestamp: Timestamp {
551            timestamp: MillisSinceEpoch(timestamp.millis_since_epoch),
552            tz_offset: timestamp.tz_offset,
553        },
554    }
555}
556
557fn conflict_to_proto(conflict: &Conflict) -> crate::protos::simple_store::Conflict {
558    let mut proto = crate::protos::simple_store::Conflict::default();
559    for term in &conflict.removes {
560        proto.removes.push(conflict_term_to_proto(term));
561    }
562    for term in &conflict.adds {
563        proto.adds.push(conflict_term_to_proto(term));
564    }
565    proto
566}
567
568fn conflict_from_proto(proto: crate::protos::simple_store::Conflict) -> Conflict {
569    let removes = proto
570        .removes
571        .into_iter()
572        .map(conflict_term_from_proto)
573        .collect();
574    let adds = proto
575        .adds
576        .into_iter()
577        .map(conflict_term_from_proto)
578        .collect();
579    Conflict { removes, adds }
580}
581
582fn conflict_term_from_proto(proto: crate::protos::simple_store::conflict::Term) -> ConflictTerm {
583    ConflictTerm {
584        value: tree_value_from_proto(proto.content.unwrap()),
585    }
586}
587
588fn conflict_term_to_proto(part: &ConflictTerm) -> crate::protos::simple_store::conflict::Term {
589    crate::protos::simple_store::conflict::Term {
590        content: Some(tree_value_to_proto(&part.value)),
591    }
592}
593
594#[cfg(test)]
595mod tests {
596    use assert_matches::assert_matches;
597    use pollster::FutureExt as _;
598
599    use super::*;
600    use crate::tests::new_temp_dir;
601
602    /// Test that parents get written correctly
603    #[test]
604    fn write_commit_parents() {
605        let temp_dir = new_temp_dir();
606        let store_path = temp_dir.path();
607
608        let backend = SimpleBackend::init(store_path);
609        let mut commit = Commit {
610            parents: vec![],
611            predecessors: vec![],
612            root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()),
613            change_id: ChangeId::from_hex("abc123"),
614            description: "".to_string(),
615            author: create_signature(),
616            committer: create_signature(),
617            secure_sig: None,
618        };
619
620        let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
621            backend.write_commit(commit, None).block_on()
622        };
623
624        // No parents
625        commit.parents = vec![];
626        assert_matches!(
627            write_commit(commit.clone()),
628            Err(BackendError::Other(err)) if err.to_string().contains("no parents")
629        );
630
631        // Only root commit as parent
632        commit.parents = vec![backend.root_commit_id().clone()];
633        let first_id = write_commit(commit.clone()).unwrap().0;
634        let first_commit = backend.read_commit(&first_id).block_on().unwrap();
635        assert_eq!(first_commit, commit);
636
637        // Only non-root commit as parent
638        commit.parents = vec![first_id.clone()];
639        let second_id = write_commit(commit.clone()).unwrap().0;
640        let second_commit = backend.read_commit(&second_id).block_on().unwrap();
641        assert_eq!(second_commit, commit);
642
643        // Merge commit
644        commit.parents = vec![first_id.clone(), second_id.clone()];
645        let merge_id = write_commit(commit.clone()).unwrap().0;
646        let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
647        assert_eq!(merge_commit, commit);
648
649        // Merge commit with root as one parent
650        commit.parents = vec![first_id, backend.root_commit_id().clone()];
651        let root_merge_id = write_commit(commit.clone()).unwrap().0;
652        let root_merge_commit = backend.read_commit(&root_merge_id).block_on().unwrap();
653        assert_eq!(root_merge_commit, commit);
654    }
655
656    fn create_signature() -> Signature {
657        Signature {
658            name: "Someone".to_string(),
659            email: "someone@example.com".to_string(),
660            timestamp: Timestamp {
661                timestamp: MillisSinceEpoch(0),
662                tz_offset: 0,
663            },
664        }
665    }
666}