Skip to main content

mdcs_db/
rga_text.rs

1//! RGA Text - Collaborative text CRDT based on Replicated Growable Array.
2//!
3//! Provides character-level collaborative text editing with:
4//! - Insert at any position
5//! - Delete ranges
6//! - Stable position anchors for cursor sync
7//!
8//! Based on the RGA algorithm but optimized for text.
9
10use mdcs_core::lattice::Lattice;
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet};
13
14/// Unique identifier for a character in the text.
15#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
16pub struct TextId {
17    /// The replica that created this character.
18    pub replica: String,
19    /// Sequence number within that replica.
20    pub seq: u64,
21}
22
23impl TextId {
24    pub fn new(replica: impl Into<String>, seq: u64) -> Self {
25        Self {
26            replica: replica.into(),
27            seq,
28        }
29    }
30
31    /// Create a genesis ID (for the virtual start of text).
32    pub fn genesis() -> Self {
33        Self {
34            replica: "".to_string(),
35            seq: 0,
36        }
37    }
38
39    /// Create an end marker ID.
40    pub fn end() -> Self {
41        Self {
42            replica: "".to_string(),
43            seq: u64::MAX,
44        }
45    }
46}
47
48impl PartialOrd for TextId {
49    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
50        Some(self.cmp(other))
51    }
52}
53
54impl Ord for TextId {
55    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
56        // Higher sequence = later in causal order
57        // Tie-break on replica ID for determinism
58        self.seq
59            .cmp(&other.seq)
60            .then_with(|| self.replica.cmp(&other.replica))
61    }
62}
63
64/// A character node in the RGA text.
65#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
66struct TextNode {
67    /// The unique ID of this character.
68    id: TextId,
69    /// The character (or None if deleted).
70    char: Option<char>,
71    /// The ID of the character this was inserted after.
72    origin: TextId,
73    /// Whether this node is deleted (tombstone).
74    deleted: bool,
75}
76
77impl TextNode {
78    fn new(id: TextId, ch: char, origin: TextId) -> Self {
79        Self {
80            id,
81            char: Some(ch),
82            origin,
83            deleted: false,
84        }
85    }
86}
87
88/// Delta for text operations.
89#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
90pub struct RGATextDelta {
91    /// Characters to insert.
92    pub inserts: Vec<(TextId, char, TextId)>, // (id, char, origin)
93    /// IDs of characters to delete.
94    pub deletes: Vec<TextId>,
95}
96
97impl RGATextDelta {
98    pub fn new() -> Self {
99        Self {
100            inserts: Vec::new(),
101            deletes: Vec::new(),
102        }
103    }
104
105    pub fn is_empty(&self) -> bool {
106        self.inserts.is_empty() && self.deletes.is_empty()
107    }
108}
109
110impl Default for RGATextDelta {
111    fn default() -> Self {
112        Self::new()
113    }
114}
115
116/// Collaborative text CRDT using RGA algorithm.
117///
118/// Supports character-level insert and delete with
119/// deterministic conflict resolution.
120#[derive(Clone, Debug, Serialize, Deserialize)]
121pub struct RGAText {
122    /// All nodes indexed by their ID.
123    nodes: HashMap<TextId, TextNode>,
124    /// Children of each node (characters inserted after it).
125    /// Maps origin -> list of children sorted by ID (descending for RGA).
126    children: HashMap<TextId, Vec<TextId>>,
127    /// The replica ID for this instance.
128    replica_id: String,
129    /// Sequence counter for generating IDs.
130    seq: u64,
131    /// Pending delta for replication.
132    #[serde(skip)]
133    pending_delta: Option<RGATextDelta>,
134}
135
136impl RGAText {
137    /// Create a new empty text.
138    pub fn new(replica_id: impl Into<String>) -> Self {
139        let replica_id = replica_id.into();
140        let mut text = Self {
141            nodes: HashMap::new(),
142            children: HashMap::new(),
143            replica_id,
144            seq: 0,
145            pending_delta: None,
146        };
147
148        // Initialize with genesis node's children list
149        text.children.insert(TextId::genesis(), Vec::new());
150
151        text
152    }
153
154    /// Get the replica ID.
155    pub fn replica_id(&self) -> &str {
156        &self.replica_id
157    }
158
159    /// Generate a new unique ID.
160    fn next_id(&mut self) -> TextId {
161        self.seq += 1;
162        TextId::new(&self.replica_id, self.seq)
163    }
164
165    /// Insert a string at the given position.
166    pub fn insert(&mut self, position: usize, text: &str) {
167        let mut origin = self
168            .id_at_index(position.saturating_sub(1))
169            .unwrap_or(TextId::genesis());
170
171        for ch in text.chars() {
172            let id = self.next_id();
173            let node = TextNode::new(id.clone(), ch, origin.clone());
174
175            self.integrate_node(node.clone());
176
177            // Record in delta
178            let delta = self.pending_delta.get_or_insert_with(RGATextDelta::new);
179            delta.inserts.push((id.clone(), ch, origin));
180
181            origin = id;
182        }
183    }
184
185    /// Delete characters from start to start+length.
186    pub fn delete(&mut self, start: usize, length: usize) {
187        let ids: Vec<_> = self
188            .visible_ids()
189            .skip(start)
190            .take(length)
191            .cloned()
192            .collect();
193
194        for id in ids {
195            self.delete_by_id(&id);
196        }
197    }
198
199    /// Delete a character by its ID.
200    fn delete_by_id(&mut self, id: &TextId) -> Option<char> {
201        if let Some(node) = self.nodes.get_mut(id) {
202            if !node.deleted {
203                node.deleted = true;
204                let ch = node.char.take();
205
206                // Record delta
207                let delta = self.pending_delta.get_or_insert_with(RGATextDelta::new);
208                delta.deletes.push(id.clone());
209
210                return ch;
211            }
212        }
213        None
214    }
215
216    /// Replace a range with new text.
217    pub fn replace(&mut self, start: usize, end: usize, text: &str) {
218        self.delete(start, end - start);
219        self.insert(start, text);
220    }
221
222    /// Splice: delete some characters and insert new ones.
223    pub fn splice(&mut self, position: usize, delete_count: usize, insert: &str) {
224        self.delete(position, delete_count);
225        self.insert(position, insert);
226    }
227
228    /// Get the length (number of visible characters).
229    pub fn len(&self) -> usize {
230        self.nodes.values().filter(|n| !n.deleted).count()
231    }
232
233    /// Check if empty.
234    pub fn is_empty(&self) -> bool {
235        self.len() == 0
236    }
237
238    /// Get character at position.
239    pub fn char_at(&self, position: usize) -> Option<char> {
240        self.iter().nth(position)
241    }
242
243    /// Get a substring.
244    pub fn slice(&self, start: usize, end: usize) -> String {
245        self.iter().skip(start).take(end - start).collect()
246    }
247
248    /// Iterate over visible characters.
249    pub fn iter(&self) -> impl Iterator<Item = char> + '_ {
250        self.iter_nodes()
251            .filter(|n| !n.deleted)
252            .filter_map(|n| n.char)
253    }
254
255    /// Get the ID at a visible index.
256    fn id_at_index(&self, index: usize) -> Option<TextId> {
257        self.visible_ids().nth(index).cloned()
258    }
259
260    /// Iterate over visible IDs.
261    fn visible_ids(&self) -> impl Iterator<Item = &TextId> + '_ {
262        self.iter_nodes().filter(|n| !n.deleted).map(|n| &n.id)
263    }
264
265    /// Convert a TextId to a visible position.
266    pub fn id_to_position(&self, id: &TextId) -> Option<usize> {
267        self.visible_ids().position(|i| i == id)
268    }
269
270    /// Convert a visible position to a TextId.
271    pub fn position_to_id(&self, position: usize) -> Option<TextId> {
272        self.id_at_index(position)
273    }
274
275    /// Iterate over all nodes in order.
276    fn iter_nodes(&self) -> impl Iterator<Item = &TextNode> + '_ {
277        TextIterator {
278            text: self,
279            stack: vec![TextId::genesis()],
280            visited: HashSet::new(),
281        }
282    }
283
284    /// Integrate a node into the text.
285    fn integrate_node(&mut self, node: TextNode) {
286        let id = node.id.clone();
287        let origin = node.origin.clone();
288
289        // Add to nodes map
290        self.nodes.insert(id.clone(), node);
291
292        // Add to children of origin, maintaining sort order (descending by ID for RGA)
293        let children = self.children.entry(origin).or_default();
294        let pos = children
295            .iter()
296            .position(|c| c < &id)
297            .unwrap_or(children.len());
298        children.insert(pos, id.clone());
299
300        // Ensure this node has a children entry
301        self.children.entry(id).or_default();
302    }
303
304    /// Take the pending delta.
305    pub fn take_delta(&mut self) -> Option<RGATextDelta> {
306        self.pending_delta.take()
307    }
308
309    /// Apply a delta from another replica.
310    pub fn apply_delta(&mut self, delta: &RGATextDelta) {
311        // Apply inserts
312        for (id, ch, origin) in &delta.inserts {
313            if !self.nodes.contains_key(id) {
314                let node = TextNode::new(id.clone(), *ch, origin.clone());
315                self.integrate_node(node);
316            }
317        }
318
319        // Apply deletes
320        for id in &delta.deletes {
321            if let Some(node) = self.nodes.get_mut(id) {
322                node.deleted = true;
323                node.char = None;
324            }
325        }
326    }
327}
328
329/// Iterator for traversing text nodes in order.
330struct TextIterator<'a> {
331    text: &'a RGAText,
332    stack: Vec<TextId>,
333    visited: HashSet<TextId>,
334}
335
336impl<'a> Iterator for TextIterator<'a> {
337    type Item = &'a TextNode;
338
339    fn next(&mut self) -> Option<Self::Item> {
340        while let Some(id) = self.stack.pop() {
341            if self.visited.contains(&id) {
342                continue;
343            }
344            self.visited.insert(id.clone());
345
346            // Push children in reverse order
347            if let Some(children) = self.text.children.get(&id) {
348                for child in children.iter().rev() {
349                    if !self.visited.contains(child) {
350                        self.stack.push(child.clone());
351                    }
352                }
353            }
354
355            // Return the node (skip genesis)
356            if id != TextId::genesis() {
357                if let Some(node) = self.text.nodes.get(&id) {
358                    return Some(node);
359                }
360            }
361        }
362        None
363    }
364}
365
366impl std::fmt::Display for RGAText {
367    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
368        for ch in self.iter() {
369            write!(f, "{}", ch)?;
370        }
371        Ok(())
372    }
373}
374
375impl PartialEq for RGAText {
376    fn eq(&self, other: &Self) -> bool {
377        self.to_string() == other.to_string()
378    }
379}
380
381impl Eq for RGAText {}
382
383impl Lattice for RGAText {
384    fn bottom() -> Self {
385        Self::new("")
386    }
387
388    fn join(&self, other: &Self) -> Self {
389        let mut result = self.clone();
390
391        // Merge all nodes from other
392        for (id, node) in &other.nodes {
393            if let Some(existing) = result.nodes.get_mut(id) {
394                if node.deleted {
395                    existing.deleted = true;
396                    existing.char = None;
397                }
398            } else {
399                result.integrate_node(node.clone());
400            }
401        }
402
403        result
404    }
405}
406
407impl Default for RGAText {
408    fn default() -> Self {
409        Self::new("")
410    }
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416
417    #[test]
418    fn test_basic_insert() {
419        let mut text = RGAText::new("r1");
420        text.insert(0, "Hello");
421        assert_eq!(text.to_string(), "Hello");
422        assert_eq!(text.len(), 5);
423    }
424
425    #[test]
426    fn test_insert_at_position() {
427        let mut text = RGAText::new("r1");
428        text.insert(0, "Hello");
429        text.insert(5, " World");
430        assert_eq!(text.to_string(), "Hello World");
431    }
432
433    #[test]
434    fn test_insert_in_middle() {
435        let mut text = RGAText::new("r1");
436        text.insert(0, "Helo");
437        text.insert(2, "l");
438        assert_eq!(text.to_string(), "Hello");
439    }
440
441    #[test]
442    fn test_delete() {
443        let mut text = RGAText::new("r1");
444        text.insert(0, "Hello World");
445        text.delete(5, 6); // Delete " World"
446        assert_eq!(text.to_string(), "Hello");
447    }
448
449    #[test]
450    fn test_replace() {
451        let mut text = RGAText::new("r1");
452        text.insert(0, "Hello World");
453        text.replace(6, 11, "Rust");
454        assert_eq!(text.to_string(), "Hello Rust");
455    }
456
457    #[test]
458    fn test_concurrent_inserts() {
459        let mut text1 = RGAText::new("r1");
460        let mut text2 = RGAText::new("r2");
461
462        // Both start with "Hello"
463        text1.insert(0, "Hello");
464        text2.apply_delta(&text1.take_delta().unwrap());
465
466        // Concurrent inserts at the end
467        text1.insert(5, " World");
468        text2.insert(5, " Rust");
469
470        // Exchange deltas
471        let delta1 = text1.take_delta().unwrap();
472        let delta2 = text2.take_delta().unwrap();
473
474        text1.apply_delta(&delta2);
475        text2.apply_delta(&delta1);
476
477        // Should converge to same text
478        assert_eq!(text1.to_string(), text2.to_string());
479        // Both additions should be present
480        assert!(text1.to_string().contains("World") || text1.to_string().contains("Rust"));
481    }
482
483    #[test]
484    fn test_concurrent_insert_delete() {
485        let mut text1 = RGAText::new("r1");
486        let mut text2 = RGAText::new("r2");
487
488        // Both start with "Hello"
489        text1.insert(0, "Hello");
490        text2.apply_delta(&text1.take_delta().unwrap());
491
492        // r1 deletes "llo", r2 inserts "x" after "He"
493        text1.delete(2, 3);
494        text2.insert(2, "x");
495
496        // Exchange deltas
497        let delta1 = text1.take_delta().unwrap();
498        let delta2 = text2.take_delta().unwrap();
499
500        text1.apply_delta(&delta2);
501        text2.apply_delta(&delta1);
502
503        // Should converge
504        assert_eq!(text1.to_string(), text2.to_string());
505        // x should survive, llo should be deleted
506        assert!(text1.to_string().contains("x"));
507        assert!(!text1.to_string().contains("llo"));
508    }
509
510    #[test]
511    fn test_char_at() {
512        let mut text = RGAText::new("r1");
513        text.insert(0, "Hello");
514
515        assert_eq!(text.char_at(0), Some('H'));
516        assert_eq!(text.char_at(4), Some('o'));
517        assert_eq!(text.char_at(5), None);
518    }
519
520    #[test]
521    fn test_slice() {
522        let mut text = RGAText::new("r1");
523        text.insert(0, "Hello World");
524
525        assert_eq!(text.slice(0, 5), "Hello");
526        assert_eq!(text.slice(6, 11), "World");
527    }
528
529    #[test]
530    fn test_position_id_conversion() {
531        let mut text = RGAText::new("r1");
532        text.insert(0, "Hello");
533
534        let id = text.position_to_id(2).unwrap();
535        let pos = text.id_to_position(&id).unwrap();
536
537        assert_eq!(pos, 2);
538    }
539
540    #[test]
541    fn test_lattice_join() {
542        let mut text1 = RGAText::new("r1");
543        let mut text2 = RGAText::new("r2");
544
545        text1.insert(0, "Hello");
546        text2.insert(0, "World");
547
548        let merged = text1.join(&text2);
549
550        // Both texts should be somehow combined
551        assert!(merged.len() >= 5);
552    }
553}