cola/
lib.rs

1//! cola is a Conflict-free Replicated Data Type ([CRDT]) for real-time
2//! collaborative editing of plain text documents.
3//!
4//! CRDTs can be roughly divided into two categories: state-based and
5//! operation-based. cola falls in the latter category.
6//!
7//! The basic idea behind an Operation-based CRDT -- also known as a
8//! *Commutative* Replicated Data Type or C*m*RDT -- is to design the core data
9//! structure and the operations applied to it in such a way that they
10//! *commute*, so that the order in which they're applied at each peer doesn't
11//! matter.
12//!
13//! Commutativity makes the final state of the data structure only a function
14//! of its initial state and the *set* of operations applied to it, but *not*
15//! of the order in which they were applied. This ensures that once all peers
16//! have received all operations from all other peers they're guaranteed to
17//! converge to the same final state.
18//!
19//! In cola, the core data structure which represents the state of the document
20//! at each peer is the [`Replica`], and the operations which the peers
21//! exchange to communicate their local edits are [`Insertion`]s and
22//! [`Deletion`]s.
23//!
24//! If you're new to this crate, reading the documentations of the
25//! [`Replica`] struct and its methods would be a good place to start.
26//!
27//! For a deeper dive into cola's design and implementation you can check out
28//! [this blog post][cola].
29//!
30//! # Example usage
31//!
32//! ```rust
33//! use std::ops::Range;
34//!
35//! use cola::{Deletion, Replica, ReplicaId};
36//!
37//! struct Document {
38//!     buffer: String,
39//!     crdt: Replica,
40//! }
41//!
42//! struct Insertion {
43//!     text: String,
44//!     crdt: cola::Insertion,
45//! }
46//!
47//! impl Document {
48//!     fn new<S: Into<String>>(text: S, replica_id: ReplicaId) -> Self {
49//!         let buffer = text.into();
50//!         let crdt = Replica::new(replica_id, buffer.len());
51//!         Document { buffer, crdt }
52//!     }
53//!
54//!     fn fork(&self, new_replica_id: ReplicaId) -> Self {
55//!         let crdt = self.crdt.fork(new_replica_id);
56//!         Document { buffer: self.buffer.clone(), crdt }
57//!     }
58//!
59//!     fn insert<S: Into<String>>(
60//!         &mut self,
61//!         insert_at: usize,
62//!         text: S,
63//!     ) -> Insertion {
64//!         let text = text.into();
65//!         self.buffer.insert_str(insert_at, &text);
66//!         let insertion = self.crdt.inserted(insert_at, text.len());
67//!         Insertion { text, crdt: insertion }
68//!     }
69//!
70//!     fn delete(&mut self, range: Range<usize>) -> Deletion {
71//!         self.buffer.replace_range(range.clone(), "");
72//!         self.crdt.deleted(range)
73//!     }
74//!
75//!     fn integrate_insertion(&mut self, insertion: Insertion) {
76//!         if let Some(offset) =
77//!             self.crdt.integrate_insertion(&insertion.crdt)
78//!         {
79//!             self.buffer.insert_str(offset, &insertion.text);
80//!         }
81//!     }
82//!
83//!     fn integrate_deletion(&mut self, deletion: Deletion) {
84//!         let ranges = self.crdt.integrate_deletion(&deletion);
85//!         for range in ranges.into_iter().rev() {
86//!             self.buffer.replace_range(range, "");
87//!         }
88//!     }
89//! }
90//!
91//! fn main() {
92//!     let mut peer_1 = Document::new("Hello, world", 1);
93//!     let mut peer_2 = peer_1.fork(2);
94//!
95//!     let delete_comma = peer_1.delete(5..6);
96//!     let insert_exclamation = peer_2.insert(12, "!");
97//!
98//!     peer_1.integrate_insertion(insert_exclamation);
99//!     peer_2.integrate_deletion(delete_comma);
100//!
101//!     assert_eq!(peer_1.buffer, "Hello world!");
102//!     assert_eq!(peer_2.buffer, "Hello world!");
103//! }
104//! ```
105//!
106//! # Feature flags
107//!
108//! - `encode`: enables the [`encode`](Replica::encode) and
109//! [`decode`](Replica::decode) methods on [`Replica`] (disabled by default);
110//!
111//! - `serde`: enables the [`Serialize`] and [`Deserialize`] impls for
112//! [`Insertion`], [`Deletion`] and [`EncodedReplica`] (disabled by default).
113//!
114//! [CRDT]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type
115//! [cola]: https://nomad.foo/blog/cola
116//! [`Serialize`]: https://docs.rs/serde/latest/serde/trait.Serialize.html
117//! [`Deserialize`]: https://docs.rs/serde/latest/serde/trait.Deserialize.html
118
119#![allow(clippy::explicit_auto_deref)]
120#![allow(clippy::module_inception)]
121#![allow(clippy::needless_doctest_main)]
122#![cfg_attr(docsrs, feature(doc_cfg))]
123#![deny(missing_docs)]
124#![deny(rustdoc::broken_intra_doc_links)]
125#![deny(rustdoc::private_intra_doc_links)]
126
127extern crate alloc;
128
129mod backlog;
130mod crdt_edit;
131mod gtree;
132mod replica;
133mod replica_id;
134mod run_indices;
135mod run_tree;
136mod text_edit;
137mod utils;
138mod version_map;
139
140use backlog::Backlog;
141pub use backlog::{BackloggedDeletions, BackloggedInsertions};
142pub use crdt_edit::{Deletion, Insertion};
143use gtree::{Gtree, LeafIdx};
144pub use replica::Replica;
145use replica::*;
146pub use replica_id::ReplicaId;
147use replica_id::{ReplicaIdMap, ReplicaIdMapValuesMut};
148use run_indices::{AnchorBias, RunIndices};
149use run_tree::*;
150pub use text_edit::Text;
151use utils::*;
152use version_map::{DeletionMap, VersionMap};
153
154#[cfg(feature = "encode")]
155mod encoded_replica;
156#[cfg(feature = "encode")]
157use encoded_replica::checksum;
158#[cfg(feature = "encode")]
159pub use encoded_replica::{DecodeError, EncodedReplica};
160
161/// The version of the protocol cola uses to represent `EncodedReplica`s and
162/// `CrdtEdit`s.
163///
164/// You can think of this as a version number for cola's internal
165/// representation of the subset of its data structures that are exchanged
166/// between peers.
167///
168/// If different peers are using versions of cola with the same protocol number
169/// they're compatible. If not, decoding `EncodedReplica`s and deserializing
170/// `CrdtEdit`s will fail.
171///
172/// # Protocol stability
173///
174/// cola is still far away from reaching stability, and until that happens its
175/// internal `ProtocolVersion` might change very frequently. After the 1.0
176/// release the protocol version will only be allowed to be incremented in
177/// major releases (if at all).
178pub type ProtocolVersion = u64;
179
180/// The length of a piece of text according to some user-defined metric.
181///
182/// The meaning of a unit of length is decided by you, the user of this
183/// library, depending on the kind of buffer you're using cola with. This
184/// allows cola to work with buffers using a variety of encodings (UTF-8,
185/// UTF-16, etc.) and indexing metrics (bytes, codepoints, graphemes, etc.).
186///
187/// While the particular meaning of a unit of length is up to the user, it is
188/// important that it is consistent across all peers. For example, if one peer
189/// uses bytes as its unit of length, all other peers must also use bytes or
190/// the contents of their buffers will diverge.
191///
192/// # Examples
193///
194/// In this example all peers use the same metric (codepoints) and everything
195/// works as expected:
196///
197/// ```
198/// # use cola::Replica;
199/// fn insert_at_codepoint(s: &mut String, offset: usize, insert: &str) {
200///     let byte_offset = s.chars().take(offset).map(char::len_utf8).sum();
201///     s.insert_str(byte_offset, insert);
202/// }
203///
204/// // Peer 1 uses a String as its buffer and codepoints as its unit of
205/// // length.
206/// let mut buf1 = String::from("àc");
207/// let mut replica1 = Replica::new(1, 2); // "àc" has 2 codepoints.
208///
209/// let mut buf2 = buf1.clone();
210/// let mut replica2 = replica1.fork(2);
211///
212/// // Peer 1 inserts a 'b' between 'à' and 'c' and sends the edit over to the
213/// // other peer.
214/// let b = "b";
215/// insert_at_codepoint(&mut buf1, 1, b);
216/// let insert_b = replica1.inserted(1, 1);
217///
218/// // Peer 2 receives the edit.
219/// let offset = replica2.integrate_insertion(&insert_b).unwrap();
220///
221/// assert_eq!(offset, 1);
222///
223/// // Peer 2 also uses codepoints as its unit of length, so it inserts the
224/// // 'b' after the 'à' as expected.
225/// insert_at_codepoint(&mut buf2, offset, b);
226///
227/// // If all the peers use the same metric they'll always converge to the
228/// // same state.
229/// assert_eq!(buf1, "àbc");
230/// assert_eq!(buf2, "àbc");
231/// ```
232///
233/// If different peers use different metrics, however, their buffers can
234/// diverge or even cause the program to crash, like in the following example:
235///
236/// ```should_panic
237/// # use cola::Replica;
238/// # let b = "b";
239/// # let mut buf2 = String::from("àc");
240/// # let mut replica1 = Replica::new(1, 2);
241/// # let mut replica2 = replica1.fork(2);
242/// # let insert_b = replica1.inserted(1, 1);
243/// // ..same as before.
244///
245/// assert_eq!(buf2, "àc");
246///
247/// // Peer 2 receives the edit.
248/// let offset = replica2.integrate_insertion(&insert_b).unwrap();
249///
250/// assert_eq!(offset, 1);
251///
252/// // Now let's say peer 2 interprets `offset` as a byte offset even though
253/// // the insertion of the 'b' was done using codepoint offsets on peer 1.
254/// //
255/// // In this case the program just panics because a byte offset of 1 is not a
256/// // valid insertion point in the string "àc" since it falls in the middle of
257/// // the 'à' codepoint, which is 2 bytes long.
258/// //
259/// // In other cases the program might not panic but instead cause the peers
260/// // to silently diverge, which is arguably worse.
261///
262/// buf2.insert_str(offset, b); // 💥 panics!
263/// ```
264pub type Length = usize;
265
266/// The protocol version of the current cola release.
267///
268/// See [`ProtocolVersion`] for more infos.
269#[cfg(feature = "encode")]
270const PROTOCOL_VERSION: ProtocolVersion = 0;