libojo/
lib.rs

1// Copyright 2018-2019 Joe Neeman.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8//
9// See the LICENSE-APACHE or LICENSE-MIT files at the top-level directory
10// of this distribution.
11
12#![deny(missing_docs)]
13
14//! A library for creating, reading, and manipulating `ojo` repositories.
15//!
16//! `ojo` is a toy implementation of a version control system inspired by the same ideas as
17//! [`pijul`](https://pijul.com). These ideas, and eventually the implementation of `ojo`,
18//! are documented in some [`blog posts`](https://jneem.github.io). This crate itself is not so
19//! well documented, but doing so is one of my goals.
20
21#[macro_use]
22extern crate serde_derive;
23
24#[macro_use]
25extern crate log;
26
27#[cfg(test)]
28#[macro_use]
29extern crate proptest;
30
31#[cfg(test)]
32#[macro_use]
33extern crate pretty_assertions;
34
35use ojo_graph::Graph;
36use std::collections::HashSet;
37use std::fs;
38use std::path::{Path, PathBuf};
39
40// This module needs to go first, because it supplies some macros (for testing) that the other
41// modules use.
42#[macro_use]
43mod storage;
44
45mod chain_graggle;
46mod error;
47mod patch;
48pub mod resolver;
49
50pub use crate::chain_graggle::ChainGraggle;
51pub use crate::error::{Error, PatchIdError};
52pub use crate::patch::{Change, Changes, Patch, PatchId, UnidentifiedPatch};
53pub use crate::storage::graggle::{Edge, EdgeKind};
54pub use crate::storage::{File, FullGraph, Graggle, LiveGraph};
55pub use ojo_diff::LineDiff;
56
57/// A globally unique ID for identifying a node.
58#[derive(Clone, Copy, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
59pub struct NodeId {
60    /// The ID of the patch that first introduced this node.
61    pub patch: PatchId,
62    /// The index of this node within the patch.
63    ///
64    /// If a patch introduces `n` nodes, they are given `node` values of `0` through `n-1`.
65    pub node: u64,
66}
67
68impl std::fmt::Debug for NodeId {
69    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
70        fmt.debug_tuple("NodeId")
71            .field(&format!("{}/{:?}", self.patch.to_base64(), self.node))
72            .finish()
73    }
74}
75
76impl NodeId {
77    fn set_patch_id(&mut self, id: &PatchId) {
78        if self.patch.is_cur() {
79            self.patch = *id;
80        }
81    }
82
83    /// Creates a new `NodeId` for referring to a node that is being introduced in the current
84    /// patch.
85    ///
86    /// See [`PatchId`] for more information on the current patch and its ID.
87    pub fn cur(node: u64) -> NodeId {
88        NodeId {
89            patch: PatchId::cur(),
90            node,
91        }
92    }
93}
94
95/// This is the main interface to a `ojo` repository.
96///
97/// Be aware that any modifications made to a repository will not be saved unless [`Repo::write`]
98/// is called.
99#[derive(Debug)]
100pub struct Repo {
101    /// The path to the root directory of the repository.
102    pub root_dir: PathBuf,
103    /// The path to the directory where all of ojo's data is stored.
104    pub repo_dir: PathBuf,
105    /// The path to the database containing all the history, and so on.
106    pub db_path: PathBuf,
107    /// The path to the directory where patches are stored.
108    /// The name of the current branch.
109    pub current_branch: String,
110
111    storage: storage::Storage,
112}
113
114impl Repo {
115    /// Given the path of the root directory of a repository, returns the directory where ojo's data
116    /// is stored.
117    fn repo_dir(dir: &Path) -> Result<PathBuf, Error> {
118        let mut ret = dir.to_path_buf();
119        ret.push(".ojo");
120        Ok(ret)
121    }
122
123    /// Given the path of the root directory of a repository, returns the path containing ojo's
124    /// serialized data.
125    fn db_path(dir: &Path) -> Result<PathBuf, Error> {
126        let mut ret = Repo::repo_dir(dir)?;
127        ret.push("db");
128        Ok(ret)
129    }
130
131    /// Opens the existing repository with the given root directory.
132    pub fn open<P: AsRef<Path>>(dir: P) -> Result<Repo, Error> {
133        let db_path = Repo::db_path(dir.as_ref())?;
134        let db_file = fs::File::open(&db_path)?;
135        let db: Db = serde_yaml::from_reader(db_file)?;
136        Ok(Repo {
137            root_dir: dir.as_ref().to_owned(),
138            repo_dir: Repo::repo_dir(dir.as_ref())?,
139            db_path,
140            current_branch: db.current_branch,
141            storage: db.storage,
142        })
143    }
144
145    /// Creates a repo at the given path (which should point to a directory).
146    pub fn init<P: AsRef<Path>>(path: P) -> Result<Repo, Error> {
147        let root_dir = path.as_ref().to_owned();
148        let repo_dir = Repo::repo_dir(&root_dir)?;
149        let db_path = Repo::db_path(&root_dir)?;
150        if db_path.exists() {
151            return Err(Error::RepoExists(repo_dir.clone()));
152        }
153
154        let mut storage = storage::Storage::new();
155        let master_inode = storage.allocate_inode();
156        storage.set_inode("master", master_inode);
157        Ok(Repo {
158            root_dir,
159            repo_dir,
160            db_path,
161            current_branch: "master".to_owned(),
162            storage,
163        })
164    }
165
166    /// Creates a temporary in-memory repo that cannot be stored.
167    pub fn init_tmp() -> Repo {
168        let mut storage = storage::Storage::new();
169        let master_inode = storage.allocate_inode();
170        storage.set_inode("master", master_inode);
171
172        Repo {
173            root_dir: PathBuf::new(),
174            repo_dir: PathBuf::new(),
175            db_path: PathBuf::new(),
176            current_branch: "master".to_owned(),
177            storage,
178        }
179    }
180
181    /// Clears a branch, removing all of its patches.
182    pub fn clear(&mut self, branch: &str) -> Result<(), Error> {
183        let inode = self.inode(branch)?;
184        self.storage.branch_patches.remove_all(branch);
185        self.storage.remove_graggle(inode);
186        self.storage
187            .set_graggle(inode, storage::graggle::GraggleData::new());
188        Ok(())
189    }
190
191    /// Persists the repository to disk.
192    ///
193    /// Any modifications that were previously made become permanent.
194    pub fn write(&self) -> Result<(), Error> {
195        let db = DbRef {
196            current_branch: &self.current_branch,
197            storage: &self.storage,
198        };
199        self.try_create_dir(&self.repo_dir)?;
200        let db_file = fs::File::create(&self.db_path)?;
201        serde_yaml::to_writer(db_file, &db)?;
202        Ok(())
203    }
204
205    fn inode(&self, branch: &str) -> Result<storage::INode, Error> {
206        Ok(self
207            .storage
208            .inode(branch)
209            .ok_or_else(|| Error::UnknownBranch(branch.to_owned()))?)
210    }
211
212    /// Returns a read-only view to the data associated with a branch.
213    pub fn graggle<'a>(&'a self, branch: &str) -> Result<storage::Graggle<'a>, Error> {
214        let inode = self
215            .storage
216            .inode(branch)
217            .ok_or_else(|| Error::UnknownBranch(branch.to_owned()))?;
218        Ok(self.storage.graggle(inode))
219    }
220
221    /// Retrieves the data associated with a branch, assuming that it represents a totally ordered
222    /// file.
223    pub fn file(&self, branch: &str) -> Result<File, Error> {
224        let inode = self.inode(branch)?;
225        self.storage
226            .graggle(inode)
227            .as_live_graph()
228            .linear_order()
229            .map(|ref order| File::from_ids(order, &self.storage))
230            .ok_or(Error::NotOrdered)
231    }
232
233    /// Retrieves the contents associated with a node.
234    pub fn contents(&self, id: &NodeId) -> &[u8] {
235        self.storage.contents(id)
236    }
237
238    /// Opens a patch.
239    ///
240    /// The patch must already be known to the repository, either because it was created locally
241    /// (i.e. with [`Repo::create_patch`]) or because it was (possibly created elsewhere but)
242    /// registered locally with [`Repo::register_patch`].
243    pub fn open_patch(&self, id: &PatchId) -> Result<Patch, Error> {
244        let patch_data = self.open_patch_data(id)?;
245        let ret = Patch::from_reader(patch_data)?;
246        if ret.id() != id {
247            Err(Error::IdMismatch(*ret.id(), *id))
248        } else {
249            Ok(ret)
250        }
251    }
252
253    /// Returns the data associated with a patch.
254    ///
255    /// Currently, this data consists of the patch's contents serialized as YAML, but that isn't
256    /// guaranteed. What is guaranteed is that the return value of this function is of the same
257    /// format as the argument to [`Repo::register_patch`].
258    pub fn open_patch_data(&self, id: &PatchId) -> Result<&[u8], Error> {
259        self.storage
260            .patches
261            .get(id)
262            .map(|s| s.as_bytes())
263            .ok_or_else(|| Error::UnknownPatch(*id))
264    }
265
266    /// Introduces a patch to the repository.
267    ///
268    /// After registering a patch, its data will be stored in the repository and you will be able
269    /// to access it by its ID.
270    pub fn register_patch(&mut self, patch_data: &[u8]) -> Result<PatchId, Error> {
271        let patch = Patch::from_reader(patch_data)?;
272        let data = String::from_utf8(patch_data.to_owned())?;
273        self.register_patch_with_data(&patch, data)?;
274        Ok(*patch.id())
275    }
276
277    // Before making any modifications, check the patch for consistency. That means:
278    // - all dependencies must already be known
279    // - every node that we refer to must already be present
280    // - every node that we refer to must be either new, or we must depend on its patch
281    // This part is *IMPORTANT*, because it contains all the validation for patches. After
282    // this, they go from being treated as untrusted input to being internal data.
283    fn check_patch_validity(&self, patch: &Patch) -> Result<(), Error> {
284        for dep in patch.deps() {
285            if !self.storage.patches.contains_key(dep) {
286                return Err(Error::MissingDep(*dep));
287            }
288        }
289        let dep_set = patch.deps().iter().cloned().collect::<HashSet<_>>();
290        let new_nodes = patch
291            .changes()
292            .changes
293            .iter()
294            .filter_map(|ch| {
295                if let Change::NewNode { ref id, .. } = ch {
296                    Some(id)
297                } else {
298                    None
299                }
300            })
301            .collect::<HashSet<_>>();
302        for ch in &patch.changes().changes {
303            use crate::patch::Change::*;
304            let has_node = |id| {
305                new_nodes.contains(id)
306                    || (self.storage.contains_node(id) && dep_set.contains(&id.patch))
307            };
308            match ch {
309                NewNode { ref id, .. } => {
310                    if !has_node(id) {
311                        return Err(Error::UnknownNode(*id));
312                    }
313                }
314                NewEdge { ref src, ref dest } => {
315                    if !has_node(src) {
316                        return Err(Error::UnknownNode(*src));
317                    }
318                    if !has_node(dest) {
319                        return Err(Error::UnknownNode(*dest));
320                    }
321                }
322                DeleteNode { ref id } => {
323                    if !has_node(id) {
324                        return Err(Error::UnknownNode(*id));
325                    }
326                }
327            }
328        }
329        Ok(())
330    }
331
332    fn register_patch_with_data(&mut self, patch: &Patch, data: String) -> Result<(), Error> {
333        // If the patch already exists in our repository then there's nothing to do. But if there's
334        // a file there with the same hash but different contents then something's really wrong.
335        if self.storage.patches.contains_key(patch.id()) {
336            let old_patch = self.open_patch(patch.id())?;
337            if &old_patch == patch {
338                return Ok(());
339            } else {
340                return Err(PatchIdError::Collision(*patch.id()).into());
341            }
342        }
343
344        self.check_patch_validity(patch)?;
345
346        // Record the deps and reverse-deps.
347        for dep in patch.deps() {
348            self.storage
349                .patch_deps
350                .insert(patch.id().clone(), dep.clone());
351            self.storage
352                .patch_rev_deps
353                .insert(dep.clone(), patch.id().clone());
354        }
355
356        self.storage.patches.insert(patch.id().clone(), data);
357        Ok(())
358    }
359
360    // Applies a single patch to a branch.
361    //
362    // Panics if not all of the dependencies are already present.
363    fn apply_one_patch(&mut self, branch: &str, patch_id: &PatchId) -> Result<(), Error> {
364        let patch = self.open_patch(patch_id)?;
365        for dep in patch.deps() {
366            debug_assert!(
367                self.storage.branch_patches.contains(branch, dep),
368                "tried to apply a patch while it was missing a dependency"
369            );
370        }
371        let inode = self.storage.inode(branch).unwrap();
372        self.storage
373            .apply_changes(inode, patch.changes(), *patch_id);
374        self.storage
375            .branch_patches
376            .insert(branch.to_owned(), patch.id().clone());
377        Ok(())
378    }
379
380    /// Applies a patch (and all its dependencies) to a branch.
381    ///
382    /// Returns a list of all the patches that were applied.
383    pub fn apply_patch(&mut self, branch: &str, patch_id: &PatchId) -> Result<Vec<PatchId>, Error> {
384        // If the branch already contains the patch, this is a no-op.
385        if self.storage.branch_patches.contains(branch, patch_id) {
386            return Ok(vec![]);
387        }
388
389        let mut patch_stack = vec![*patch_id];
390        let mut applied = Vec::new();
391        while !patch_stack.is_empty() {
392            // The unwrap is ok because the stack is non-empty inside the loop.
393            let cur = patch_stack.last().unwrap();
394            let unapplied_deps = self
395                .storage
396                .patch_deps
397                .get(&cur)
398                .filter(|dep| !self.storage.branch_patches.contains(branch, dep))
399                .cloned()
400                .collect::<Vec<_>>();
401            if unapplied_deps.is_empty() {
402                // It's possible that this patch was already applied, because it was a dep of
403                // multiple other patches.
404                if !self.storage.branch_patches.contains(branch, &cur) {
405                    self.apply_one_patch(branch, &cur)?;
406                    applied.push(cur.clone());
407                }
408                patch_stack.pop();
409            } else {
410                patch_stack.extend_from_slice(&unapplied_deps[..]);
411            }
412        }
413
414        // Having applied all the patches, resolve the cache.
415        let inode = self.storage.inode(branch).unwrap();
416        self.storage.update_cache(inode);
417        Ok(applied)
418    }
419
420    fn unapply_one_patch(&mut self, branch: &str, patch_id: &PatchId) -> Result<(), Error> {
421        debug!("unapplying patch {:?} from branch {:?}", patch_id, branch);
422
423        let patch = self.open_patch(patch_id)?;
424        let inode = self.inode(branch)?;
425        self.storage
426            .unapply_changes(inode, patch.changes(), *patch_id);
427        self.storage.branch_patches.remove(branch, patch.id());
428        Ok(())
429    }
430
431    /// Unapplies a patch (and everything that depends on it) to a branch.
432    ///
433    /// Returns a list of all the patches that were unapplied.
434    pub fn unapply_patch(
435        &mut self,
436        branch: &str,
437        patch_id: &PatchId,
438    ) -> Result<Vec<PatchId>, Error> {
439        // If the branch doesn't contain the patch, this is a no-op.
440        if !self.storage.branch_patches.contains(branch, patch_id) {
441            return Ok(vec![]);
442        }
443
444        let mut patch_stack = vec![*patch_id];
445        let mut unapplied = Vec::new();
446        while !patch_stack.is_empty() {
447            // The unwrap is ok because the stack is non-empty inside the loop.
448            let cur = patch_stack.last().unwrap();
449            let applied_rev_deps = self
450                .storage
451                .patch_rev_deps
452                .get(&cur)
453                .filter(|dep| self.storage.branch_patches.contains(branch, dep))
454                .cloned()
455                .collect::<Vec<_>>();
456            if applied_rev_deps.is_empty() {
457                // It's possible that this patch was already unapplied, because it was a revdep of
458                // multiple other patches.
459                if self.storage.branch_patches.contains(branch, &cur) {
460                    self.unapply_one_patch(branch, &cur)?;
461                    unapplied.push(cur.clone());
462                }
463                patch_stack.pop();
464            } else {
465                patch_stack.extend_from_slice(&applied_rev_deps[..]);
466            }
467        }
468
469        // Having unapplied all the patches, resolve the cache.
470        let inode = self.storage.inode(branch).unwrap();
471        self.storage.update_cache(inode);
472        Ok(unapplied)
473    }
474
475    /// Returns an iterator over all known patches, applied or otherwise.
476    pub fn all_patches(&self) -> impl Iterator<Item = &PatchId> {
477        self.storage.patches.keys()
478    }
479
480    /// Returns an iterator over all of the patches being used in a branch.
481    // TODO: maybe a way to check whether a patch is applied to a branch?
482    pub fn patches(&self, branch: &str) -> impl Iterator<Item = &PatchId> {
483        self.storage.branch_patches.get(branch)
484    }
485
486    /// Returns an iterator over all direct dependencies of the given patch.
487    pub fn patch_deps(&self, patch: &PatchId) -> impl Iterator<Item = &PatchId> {
488        self.storage.patch_deps.get(patch)
489    }
490
491    /// Returns an iterator over all direct dependents of the given patch.
492    pub fn patch_rev_deps(&self, patch: &PatchId) -> impl Iterator<Item = &PatchId> {
493        self.storage.patch_rev_deps.get(patch)
494    }
495
496    /// Creates a new patch with the given changes and metadata and returns its ID.
497    ///
498    /// The newly created patch will be automatically registered in the current repository, so
499    /// there is no need to call [`Repo::register_patch`] on it.
500    pub fn create_patch(
501        &mut self,
502        author: &str,
503        msg: &str,
504        changes: Changes,
505    ) -> Result<PatchId, Error> {
506        let patch = UnidentifiedPatch::new(author.to_owned(), msg.to_owned(), changes);
507
508        // Serialize the patch to a buffer, and get back the identified patch.
509        let mut patch_data = Vec::new();
510        let patch = patch.write_out(&mut patch_data)?;
511        let patch_data =
512            String::from_utf8(patch_data).expect("YAML serializer failed to produce UTF-8");
513
514        // Now that we know the patch's id, store it in the patches map.
515        self.register_patch_with_data(&patch, patch_data)?;
516
517        Ok(*patch.id())
518    }
519
520    fn try_create_dir(&self, dir: &Path) -> Result<(), Error> {
521        if let Err(e) = std::fs::create_dir(dir) {
522            // If the directory already exists, just swallow the error.
523            if e.kind() != std::io::ErrorKind::AlreadyExists {
524                return Err(e)?;
525            }
526        }
527        Ok(())
528    }
529
530    /// Returns an iterator over the names of all branches.
531    pub fn branches(&self) -> impl Iterator<Item = &str> {
532        self.storage.branches()
533    }
534
535    /// Creates a new, empty branch.
536    pub fn create_branch(&mut self, branch: &str) -> Result<(), Error> {
537        if self.storage.inode(branch).is_some() {
538            Err(Error::BranchExists(branch.to_owned()))
539        } else {
540            let inode = self.storage.allocate_inode();
541            self.storage.set_inode(branch, inode);
542            Ok(())
543        }
544    }
545
546    /// Copies data to a new branch (which must not already exist).
547    pub fn clone_branch(&mut self, from: &str, to: &str) -> Result<(), Error> {
548        if self.storage.inode(to).is_some() {
549            Err(Error::BranchExists(to.to_owned()))
550        } else {
551            let from_inode = self
552                .storage
553                .inode(from)
554                .ok_or_else(|| Error::UnknownBranch(from.to_owned()))?;
555            let to_inode = self.storage.clone_inode(from_inode);
556            self.storage.set_inode(to, to_inode);
557
558            // Record the fact that all the patches in the old branch are also present in the new
559            // branch.
560            let from_patches = self
561                .storage
562                .branch_patches
563                .get(from)
564                .cloned()
565                .collect::<Vec<_>>();
566            for p in from_patches {
567                self.storage.branch_patches.insert(to.to_owned(), p);
568            }
569            Ok(())
570        }
571    }
572
573    /// Deletes the branch named `branch`.
574    pub fn delete_branch(&mut self, branch: &str) -> Result<(), Error> {
575        if branch == self.current_branch {
576            return Err(Error::CurrentBranch(branch.to_owned()));
577        }
578        let inode = self
579            .storage
580            .inode(branch)
581            .ok_or_else(|| Error::UnknownBranch(branch.to_owned()))?;
582        self.storage.remove_graggle(inode);
583        self.storage.remove_inode(branch);
584        self.storage.branch_patches.remove_all(branch);
585        Ok(())
586    }
587
588    /// Changes the current branch to the one named `branch` (which must already exist).
589    pub fn switch_branch(&mut self, branch: &str) -> Result<(), Error> {
590        if self.storage.inode(branch).is_none() {
591            Err(Error::UnknownBranch(branch.to_owned()))
592        } else {
593            self.current_branch = branch.to_owned();
594            Ok(())
595        }
596    }
597
598    /// If the given branch represents a totally ordered file (i.e. if [`Repo::file`] returns
599    /// something), returns the result of diffing the given branch against `file`.
600    pub fn diff(&self, branch: &str, file: &[u8]) -> Result<Diff, Error> {
601        let file_a = self.file(branch)?;
602        let lines_a = (0..file_a.num_nodes())
603            .map(|i| file_a.node(i))
604            .collect::<Vec<_>>();
605
606        let file_b = File::from_bytes(file);
607        let lines_b = (0..file_b.num_nodes())
608            .map(|i| file_b.node(i))
609            .collect::<Vec<_>>();
610
611        let diff = ojo_diff::diff(&lines_a, &lines_b);
612        Ok(Diff {
613            diff,
614            file_a,
615            file_b,
616        })
617    }
618}
619
620/// This struct, serialized, is the contents of the database.
621#[derive(Debug, Deserialize, Serialize)]
622struct Db {
623    current_branch: String,
624    storage: storage::Storage,
625}
626
627// The auto-generated Serialize implementation here should be compatible with the auto-generated
628// Seserialize implementation for Db.
629#[derive(Debug, Serialize)]
630struct DbRef<'a> {
631    current_branch: &'a str,
632    storage: &'a storage::Storage,
633}
634
635/// Represents a diff between two [`File`](crate::File)s.
636#[derive(Debug, Clone, Eq, PartialEq)]
637pub struct Diff {
638    /// The first file.
639    pub file_a: File,
640    /// The second file.
641    pub file_b: File,
642    /// The diff going from `file_a` to `file_b`.
643    pub diff: Vec<LineDiff>,
644}