Skip to main content

crdt_kit/
text.rs

1use alloc::collections::BTreeMap;
2use alloc::string::String;
3use alloc::vec::Vec;
4
5use crate::Crdt;
6
7/// A collaborative text CRDT based on RGA (Replicated Growable Array) principles.
8///
9/// Each character is assigned a unique identifier `(actor, counter)` and is
10/// stored in an internal sequence. Deletions use tombstones: characters are
11/// marked as deleted but remain in the internal list so that concurrent
12/// operations from different replicas can be merged deterministically.
13///
14/// Ordering of concurrent inserts at the same position is resolved by
15/// comparing `(counter, actor)` tuples — higher counters come first, and
16/// ties are broken lexicographically by actor ID.
17///
18/// # Example
19///
20/// ```
21/// use crdt_kit::prelude::*;
22///
23/// let mut t1 = TextCrdt::new("alice");
24/// t1.insert_str(0, "hello");
25///
26/// let mut t2 = TextCrdt::new("bob");
27/// t2.insert_str(0, "world");
28///
29/// t1.merge(&t2);
30/// t2.merge(&t1);
31///
32/// // Both replicas converge to the same text.
33/// assert_eq!(t1.to_string(), t2.to_string());
34/// ```
35#[derive(Debug, Clone, PartialEq, Eq)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37pub struct TextCrdt {
38    actor: String,
39    counter: u64,
40    /// The ordered sequence of elements (including tombstones).
41    elements: Vec<Element>,
42    /// Tracks the maximum counter observed per actor, used during merge to
43    /// avoid re-inserting elements that are already present.
44    version: BTreeMap<String, u64>,
45}
46
47/// A single element in the text sequence.
48#[derive(Debug, Clone, PartialEq, Eq)]
49#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
50struct Element {
51    /// Unique identifier: (actor, counter).
52    id: (String, u64),
53    /// The character value.
54    value: char,
55    /// Whether this element has been tombstoned (logically deleted).
56    deleted: bool,
57}
58
59impl TextCrdt {
60    /// Create a new empty text CRDT for the given actor.
61    pub fn new(actor: impl Into<String>) -> Self {
62        Self {
63            actor: actor.into(),
64            counter: 0,
65            elements: Vec::new(),
66            version: BTreeMap::new(),
67        }
68    }
69
70    /// Create a fork of this replica with a different actor ID.
71    ///
72    /// The returned replica contains an identical copy of the current content
73    /// and version state, but subsequent inserts will use the new actor,
74    /// preventing ID collisions between the two replicas.
75    pub fn fork(&self, new_actor: impl Into<String>) -> Self {
76        Self {
77            actor: new_actor.into(),
78            counter: self.counter,
79            elements: self.elements.clone(),
80            version: self.version.clone(),
81        }
82    }
83
84    /// Insert a character at the given visible index.
85    ///
86    /// # Panics
87    ///
88    /// Panics if `index` is greater than `self.len()`.
89    pub fn insert(&mut self, index: usize, ch: char) {
90        assert!(
91            index <= self.len(),
92            "index {index} out of bounds for text of length {}",
93            self.len()
94        );
95
96        self.counter += 1;
97        let id = (self.actor.clone(), self.counter);
98        self.version
99            .entry(self.actor.clone())
100            .and_modify(|c| *c = (*c).max(self.counter))
101            .or_insert(self.counter);
102
103        let elem = Element {
104            id,
105            value: ch,
106            deleted: false,
107        };
108
109        let raw_index = self.raw_index_for_insert(index);
110        self.elements.insert(raw_index, elem);
111    }
112
113    /// Insert a string at the given visible index.
114    ///
115    /// Characters are inserted left-to-right so that the resulting visible
116    /// text contains the string starting at `index`.
117    ///
118    /// # Panics
119    ///
120    /// Panics if `index` is greater than `self.len()`.
121    pub fn insert_str(&mut self, index: usize, s: &str) {
122        assert!(
123            index <= self.len(),
124            "index {index} out of bounds for text of length {}",
125            self.len()
126        );
127
128        for (i, ch) in s.chars().enumerate() {
129            self.insert(index + i, ch);
130        }
131    }
132
133    /// Remove (tombstone) the character at the given visible index.
134    ///
135    /// # Panics
136    ///
137    /// Panics if `index` is greater than or equal to `self.len()`.
138    pub fn remove(&mut self, index: usize) {
139        assert!(
140            index < self.len(),
141            "index {index} out of bounds for text of length {}",
142            self.len()
143        );
144
145        let raw = self.visible_to_raw(index);
146        self.elements[raw].deleted = true;
147    }
148
149    /// Remove a range of characters starting at `start` with the given `len`.
150    ///
151    /// # Panics
152    ///
153    /// Panics if `start + len` is greater than `self.len()`.
154    pub fn remove_range(&mut self, start: usize, len: usize) {
155        assert!(
156            start + len <= self.len(),
157            "range {}..{} out of bounds for text of length {}",
158            start,
159            start + len,
160            self.len()
161        );
162
163        // Remove from right to left so that indices remain valid.
164        for i in (0..len).rev() {
165            self.remove(start + i);
166        }
167    }
168
169    /// Return the number of visible (non-deleted) characters.
170    #[must_use]
171    pub fn len(&self) -> usize {
172        self.elements.iter().filter(|e| !e.deleted).count()
173    }
174
175    /// Check whether the visible text is empty.
176    #[must_use]
177    pub fn is_empty(&self) -> bool {
178        self.len() == 0
179    }
180
181    /// Get this replica's actor ID.
182    #[must_use]
183    pub fn actor(&self) -> &str {
184        &self.actor
185    }
186
187    // ---- internal helpers ----
188
189    /// Convert a visible index to a raw index in `self.elements`.
190    ///
191    /// Returns the raw index of the `n`-th visible element.
192    fn visible_to_raw(&self, visible: usize) -> usize {
193        let mut seen = 0;
194        for (raw, elem) in self.elements.iter().enumerate() {
195            if !elem.deleted {
196                if seen == visible {
197                    return raw;
198                }
199                seen += 1;
200            }
201        }
202        panic!(
203            "visible index {} not found (only {} visible elements)",
204            visible, seen
205        );
206    }
207
208    /// Determine the raw position at which to insert a new element so that it
209    /// appears at the given visible index.
210    ///
211    /// If `index == self.len()` (append), the new element goes at the end.
212    /// Otherwise it is placed just before the element currently at `index`.
213    fn raw_index_for_insert(&self, visible_index: usize) -> usize {
214        if visible_index == 0 {
215            return 0;
216        }
217
218        let visible_count = self.len();
219        if visible_index >= visible_count {
220            // For append, go past all existing elements (including trailing
221            // tombstones that belong after the last visible character).
222            return self.elements.len();
223        }
224
225        // Insert just before the element that is currently at visible_index.
226        self.visible_to_raw(visible_index)
227    }
228
229    /// Find the raw index of an element with the given `id`, or `None`.
230    fn find_by_id(&self, id: &(String, u64)) -> Option<usize> {
231        self.elements.iter().position(|e| &e.id == id)
232    }
233
234    /// Determine where an element from a remote replica should be inserted
235    /// based on RGA ordering.
236    ///
237    /// We scan from `start` looking for the correct position. Among
238    /// consecutive elements with higher or equal `(counter, actor)` we skip
239    /// forward; the new element is placed before the first element that is
240    /// strictly less.
241    fn find_insert_position(&self, elem: &Element, after_raw: Option<usize>) -> usize {
242        let start = match after_raw {
243            Some(idx) => idx + 1,
244            None => 0,
245        };
246
247        let new_key = (elem.id.1, &elem.id.0); // (counter, actor)
248
249        for i in start..self.elements.len() {
250            let existing = &self.elements[i];
251            let existing_key = (existing.id.1, &existing.id.0);
252            // The new element goes before any element that has a strictly
253            // smaller ordering key. We use reverse ordering: larger counter
254            // first, then larger actor first, so the new element is inserted
255            // *before* the first element whose key is strictly less.
256            if existing_key < new_key {
257                return i;
258            }
259        }
260
261        self.elements.len()
262    }
263
264    /// Find the raw index in `self` of the causal predecessor of `elem` from
265    /// `other`. The predecessor is the element that directly precedes `elem`
266    /// in `other.elements`.
267    fn find_predecessor_raw(&self, other: &Self, elem: &Element) -> Option<usize> {
268        // Find the position of `elem` inside `other`.
269        let other_pos = other.elements.iter().position(|e| e.id == elem.id)?;
270
271        if other_pos == 0 {
272            return None;
273        }
274
275        // Walk backwards in `other` to find the first predecessor that exists
276        // in `self`.
277        for i in (0..other_pos).rev() {
278            let pred_id = &other.elements[i].id;
279            if let Some(raw) = self.find_by_id(pred_id) {
280                return Some(raw);
281            }
282        }
283
284        None
285    }
286}
287
288impl Crdt for TextCrdt {
289    fn merge(&mut self, other: &Self) {
290        // We integrate the remote elements one by one, in the order they
291        // appear in `other.elements`. For each remote element we either:
292        //   - update the tombstone flag if the element already exists locally,
293        //   - or insert it at the correct RGA position if it is new.
294
295        for other_elem in &other.elements {
296            if let Some(raw) = self.find_by_id(&other_elem.id) {
297                // Element already present — propagate tombstones (delete wins).
298                if other_elem.deleted {
299                    self.elements[raw].deleted = true;
300                }
301            } else {
302                // New element — figure out where to place it.
303                //
304                // In RGA the element is positioned *after* its causal
305                // predecessor (the element that was directly before it in the
306                // originating replica). We approximate this by looking at the
307                // element that precedes `other_elem` in `other.elements`.
308                let predecessor_raw = self.find_predecessor_raw(other, other_elem);
309                let pos = self.find_insert_position(other_elem, predecessor_raw);
310                self.elements.insert(pos, other_elem.clone());
311            }
312        }
313
314        // Merge version vectors.
315        for (actor, &cnt) in &other.version {
316            let entry = self.version.entry(actor.clone()).or_insert(0);
317            *entry = (*entry).max(cnt);
318        }
319
320        // Advance local counter past everything we have seen.
321        if let Some(&max_cnt) = self.version.values().max() {
322            self.counter = self.counter.max(max_cnt);
323        }
324    }
325}
326
327impl core::fmt::Display for TextCrdt {
328    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
329        for elem in &self.elements {
330            if !elem.deleted {
331                write!(f, "{}", elem.value)?;
332            }
333        }
334        Ok(())
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341
342    #[test]
343    fn new_text_is_empty() {
344        let t = TextCrdt::new("a");
345        assert!(t.is_empty());
346        assert_eq!(t.len(), 0);
347        assert_eq!(t.to_string(), "");
348    }
349
350    #[test]
351    fn insert_single_char() {
352        let mut t = TextCrdt::new("a");
353        t.insert(0, 'x');
354        assert_eq!(t.to_string(), "x");
355        assert_eq!(t.len(), 1);
356    }
357
358    #[test]
359    fn insert_at_beginning_middle_end() {
360        let mut t = TextCrdt::new("a");
361        t.insert(0, 'a'); // "a"
362        t.insert(1, 'c'); // "ac"
363        t.insert(1, 'b'); // "abc"
364        t.insert(0, 'z'); // "zabc"
365        assert_eq!(t.to_string(), "zabc");
366    }
367
368    #[test]
369    fn delete_char() {
370        let mut t = TextCrdt::new("a");
371        t.insert_str(0, "hello");
372        assert_eq!(t.to_string(), "hello");
373
374        t.remove(1); // remove 'e'
375        assert_eq!(t.to_string(), "hllo");
376        assert_eq!(t.len(), 4);
377    }
378
379    #[test]
380    fn insert_str_basic() {
381        let mut t = TextCrdt::new("a");
382        t.insert_str(0, "hello");
383        assert_eq!(t.to_string(), "hello");
384        assert_eq!(t.len(), 5);
385    }
386
387    #[test]
388    fn insert_str_at_middle() {
389        let mut t = TextCrdt::new("a");
390        t.insert_str(0, "hd");
391        t.insert_str(1, "ello worl");
392        assert_eq!(t.to_string(), "hello world");
393    }
394
395    #[test]
396    fn remove_range_basic() {
397        let mut t = TextCrdt::new("a");
398        t.insert_str(0, "hello world");
399        t.remove_range(5, 6); // remove " world"
400        assert_eq!(t.to_string(), "hello");
401    }
402
403    #[test]
404    fn remove_range_from_start() {
405        let mut t = TextCrdt::new("a");
406        t.insert_str(0, "hello");
407        t.remove_range(0, 3); // remove "hel"
408        assert_eq!(t.to_string(), "lo");
409    }
410
411    #[test]
412    fn remove_all() {
413        let mut t = TextCrdt::new("a");
414        t.insert_str(0, "abc");
415        t.remove_range(0, 3);
416        assert!(t.is_empty());
417        assert_eq!(t.to_string(), "");
418    }
419
420    #[test]
421    fn merge_disjoint_inserts() {
422        let mut t1 = TextCrdt::new("alice");
423        t1.insert_str(0, "hello");
424
425        let mut t2 = TextCrdt::new("bob");
426        t2.insert_str(0, "world");
427
428        t1.merge(&t2);
429
430        // Both sets of characters should be present.
431        let result = t1.to_string();
432        assert!(result.contains("hello") || result.contains("world"));
433        assert_eq!(t1.len(), 10);
434    }
435
436    #[test]
437    fn merge_propagates_tombstones() {
438        let mut t1 = TextCrdt::new("alice");
439        t1.insert_str(0, "abc");
440
441        // Fork with a different actor to simulate a second replica that
442        // received the same state. Only deletes here, so no new IDs needed,
443        // but fork is still the safe pattern.
444        let mut t2 = t1.fork("bob");
445        t2.remove(1); // delete 'b' on t2
446
447        t1.merge(&t2);
448        assert_eq!(t1.to_string(), "ac");
449    }
450
451    #[test]
452    fn merge_commutativity() {
453        let mut t1 = TextCrdt::new("alice");
454        t1.insert_str(0, "hello");
455
456        let mut t2 = TextCrdt::new("bob");
457        t2.insert_str(0, "world");
458
459        let mut left = t1.clone();
460        left.merge(&t2);
461
462        let mut right = t2.clone();
463        right.merge(&t1);
464
465        assert_eq!(left.to_string(), right.to_string());
466    }
467
468    #[test]
469    fn merge_idempotency() {
470        let mut t1 = TextCrdt::new("alice");
471        t1.insert_str(0, "hello");
472
473        let mut t2 = TextCrdt::new("bob");
474        t2.insert_str(0, "world");
475
476        t1.merge(&t2);
477        let after_first = t1.clone();
478        t1.merge(&t2);
479
480        assert_eq!(t1.to_string(), after_first.to_string());
481        assert_eq!(t1.len(), after_first.len());
482    }
483
484    #[test]
485    fn concurrent_inserts_at_same_position() {
486        // Both replicas start empty and insert at position 0.
487        let mut t1 = TextCrdt::new("alice");
488        t1.insert(0, 'a');
489
490        let mut t2 = TextCrdt::new("bob");
491        t2.insert(0, 'b');
492
493        let mut left = t1.clone();
494        left.merge(&t2);
495
496        let mut right = t2.clone();
497        right.merge(&t1);
498
499        // The order must be deterministic and identical on both replicas.
500        assert_eq!(left.to_string(), right.to_string());
501        assert_eq!(left.len(), 2);
502        // Both characters must be present.
503        let s = left.to_string();
504        assert!(s.contains('a'));
505        assert!(s.contains('b'));
506    }
507
508    #[test]
509    fn concurrent_inserts_at_same_position_in_existing_text() {
510        // Both replicas share the same base text and insert at the same index.
511        let mut t1 = TextCrdt::new("alice");
512        t1.insert_str(0, "ac");
513
514        // Fork to a different actor so new inserts get unique IDs.
515        let mut t2 = t1.fork("bob");
516
517        // Both insert at position 1 (between 'a' and 'c').
518        t1.insert(1, 'X');
519        t2.insert(1, 'Y');
520
521        let mut left = t1.clone();
522        left.merge(&t2);
523
524        let mut right = t2.clone();
525        right.merge(&t1);
526
527        assert_eq!(left.to_string(), right.to_string());
528        let s = left.to_string();
529        assert!(s.starts_with('a'));
530        assert!(s.ends_with('c'));
531        assert!(s.contains('X'));
532        assert!(s.contains('Y'));
533    }
534
535    #[test]
536    fn concurrent_insert_and_delete() {
537        let mut t1 = TextCrdt::new("alice");
538        t1.insert_str(0, "abc");
539
540        let mut t2 = t1.fork("bob");
541
542        // alice deletes 'b'
543        t1.remove(1);
544        // bob inserts 'X' at position 1
545        t2.insert(1, 'X');
546
547        let mut left = t1.clone();
548        left.merge(&t2);
549
550        let mut right = t2.clone();
551        right.merge(&t1);
552
553        // Both should converge.
554        assert_eq!(left.to_string(), right.to_string());
555        // 'b' should be deleted but 'X' should be present.
556        let s = left.to_string();
557        assert!(!s.contains('b'));
558        assert!(s.contains('X'));
559        assert!(s.contains('a'));
560        assert!(s.contains('c'));
561    }
562
563    #[test]
564    fn merge_after_local_edits_on_both_sides() {
565        let mut t1 = TextCrdt::new("alice");
566        t1.insert_str(0, "hello");
567
568        let mut t2 = t1.fork("bob");
569
570        // alice appends " world"
571        t1.insert_str(5, " world");
572        // bob deletes "llo" and inserts "p"
573        t2.remove_range(2, 3);
574        t2.insert(2, 'p');
575
576        let mut left = t1.clone();
577        left.merge(&t2);
578
579        let mut right = t2.clone();
580        right.merge(&t1);
581
582        assert_eq!(left.to_string(), right.to_string());
583    }
584
585    #[test]
586    fn display_trait() {
587        let mut t = TextCrdt::new("a");
588        t.insert_str(0, "hello");
589        assert_eq!(format!("{t}"), "hello");
590    }
591
592    #[test]
593    fn actor_getter() {
594        let t = TextCrdt::new("my-node");
595        assert_eq!(t.actor(), "my-node");
596    }
597
598    #[test]
599    #[should_panic(expected = "out of bounds")]
600    fn insert_out_of_bounds_panics() {
601        let mut t = TextCrdt::new("a");
602        t.insert(1, 'x'); // only index 0 is valid for empty text
603    }
604
605    #[test]
606    #[should_panic(expected = "out of bounds")]
607    fn remove_out_of_bounds_panics() {
608        let mut t = TextCrdt::new("a");
609        t.remove(0);
610    }
611
612    #[test]
613    #[should_panic(expected = "out of bounds")]
614    fn remove_range_out_of_bounds_panics() {
615        let mut t = TextCrdt::new("a");
616        t.insert_str(0, "abc");
617        t.remove_range(1, 5);
618    }
619
620    #[test]
621    fn triple_merge_convergence() {
622        let mut t1 = TextCrdt::new("alice");
623        t1.insert_str(0, "base");
624
625        let mut t2 = t1.fork("bob");
626        let mut t3 = t1.fork("carol");
627
628        t1.insert(4, '!');
629        t2.insert(0, '>');
630        t3.remove(2); // remove 's'
631
632        // Merge in different orders and verify convergence.
633        let mut r1 = t1.clone();
634        r1.merge(&t2);
635        r1.merge(&t3);
636
637        let mut r2 = t2.clone();
638        r2.merge(&t3);
639        r2.merge(&t1);
640
641        let mut r3 = t3.clone();
642        r3.merge(&t1);
643        r3.merge(&t2);
644
645        assert_eq!(r1.to_string(), r2.to_string());
646        assert_eq!(r2.to_string(), r3.to_string());
647    }
648}