Skip to main content

lb_rs/model/text/
buffer.rs

1use super::offset_types::{DocByteOffset, DocCharOffset, RangeExt, RelCharOffset};
2use super::operation_types::{InverseOperation, Operation, Replace};
3use super::unicode_segs::UnicodeSegs;
4use super::{diff, unicode_segs};
5use std::ops::Index;
6use unicode_segmentation::UnicodeSegmentation;
7use web_time::{Duration, Instant};
8
9/// Long-lived state of the editor's text buffer. Factored into sub-structs for borrow-checking.
10/// # Operation algebra
11/// Operations are created based on a version of the buffer. This version is called the operation's base and is
12/// identified with a sequence number. When the base of an operation is equal to the buffer's current sequence number,
13/// the operation can be applied and increments the buffer's sequence number.
14///
15/// When multiple operations are created based on the same version of the buffer, such as when a user types a few
16/// keystrokes in one frame or issues a command like indenting multiple list items, the operations all have the same
17/// base. Once the first operation is applied and the buffer's sequence number is incremented, the base of the
18/// remaining operations must be incremented by using the first operation to transform them before they can be applied.
19/// This corresponds to the reality that the buffer state has changed since the operation was created and the operation
20/// must be re-interpreted. For example, if text is typed at the beginning then end of a buffer in one frame, the
21/// position of the text typed at the end of the buffer is greater when it is applied than it was when it was typed.
22///
23/// External changes are merged into the buffer by creating a set of operations that would transform the buffer from
24/// the last external state to the current state. These operations, based on the version of the buffer at the last
25/// successful save or load, must be transformed by all operations that have been applied since (this means we must
26/// preserve the undo history for at least that long; if this presents performance issues, we can always save). Each
27/// operation that is transforming the new operations will match the base of the new operations at the time of
28/// transformation. Finally, the operations will need to transform each other just like any other set of operations
29/// made in a single frame/made based on the same version of the buffer.
30///
31/// # Undo (work in progress)
32/// Undo should revert local changes only, leaving external changes in-tact, so that when all local changes are undone,
33/// the buffer is in a state reflecting external changes only. This is complicated by the fact that external changes
34/// may have been based on local changes that were synced to another client. To undo an operation that had an external
35/// change based on it, we have to interpret the external change in the absence of local changes that were present when
36/// it was created. This is the opposite of interpreting the external change in the presence of local changes that were
37/// not present when it was created i.e. the normal flow of merging external changes. Here, we are removing a local
38/// operation from the middle of the chain of operations that led to the current state of the buffer.
39///
40/// To do this, we perform the dance of transforming operations in reverse, taking a chain of operations each based on
41/// the prior and transforming them into a set of operations based on the same base as the operation to be undone. Then
42/// we remove the operation to be undone and apply the remaining operations with the forward transformation flow.
43///
44/// Operations are not invertible i.e. you cannot construct an inverse operation that will perfectly cancel out the
45/// effect of another operation regardless of the time of interpretation. For example, with a text replacement, you can
46/// construct an inverse text replacement that replaces the new range with the original text, but when operations are
47/// undone from the middle of the chain, it may affect the original text. The operation will be re-interpreted based on
48/// a new state of the buffer at its time of application. The replaced text has no fixed value by design.
49///
50/// However, it is possible to undo the specific application of an operation in the context of the state of the buffer
51/// when it was applied. We store information necessary to undo applied operations alongside the operations themselves
52/// i.e. the text replaced in the application. When the operation is transformed for any reason, this undo information
53/// is invalidated.
54#[derive(Default)]
55pub struct Buffer {
56    /// Current contents of the buffer (what should be displayed in the editor). Todo: hide behind a read-only accessor
57    pub current: Snapshot,
58
59    /// Snapshot of the buffer at the earliest undoable state. Operations are compacted into this as they overflow the
60    /// undo limit.
61    base: Snapshot,
62
63    /// Operations received by the buffer. Used for undo/redo and for merging external changes.
64    ops: Ops,
65
66    /// State for tracking out-of-editor changes
67    external: External,
68}
69
70#[derive(Debug, Default)]
71pub struct Snapshot {
72    pub text: String,
73    pub segs: UnicodeSegs,
74    pub selection: (DocCharOffset, DocCharOffset),
75    pub seq: usize,
76}
77
78impl Snapshot {
79    fn apply_select(&mut self, range: (DocCharOffset, DocCharOffset)) -> Response {
80        self.selection = range;
81        Response::default()
82    }
83
84    fn apply_replace(&mut self, replace: &Replace) -> Response {
85        let Replace { range, text } = replace;
86        let byte_range = self.segs.range_to_byte(*range);
87
88        self.text
89            .replace_range(byte_range.start().0..byte_range.end().0, text);
90        self.segs = unicode_segs::calc(&self.text);
91        adjust_subsequent_range(
92            *range,
93            text.graphemes(true).count().into(),
94            false,
95            &mut self.selection,
96        );
97
98        Response { text_updated: true, ..Default::default() }
99    }
100
101    fn invert(&self, op: &Operation) -> InverseOperation {
102        let mut inverse = InverseOperation { replace: None, select: self.selection };
103        if let Operation::Replace(replace) = op {
104            inverse.replace = Some(self.invert_replace(replace));
105        }
106        inverse
107    }
108
109    fn invert_replace(&self, replace: &Replace) -> Replace {
110        let Replace { range, text } = replace;
111        let byte_range = self.segs.range_to_byte(*range);
112        let replaced_text = self[byte_range].into();
113        let replacement_range = (range.start(), range.start() + text.graphemes(true).count());
114        Replace { range: replacement_range, text: replaced_text }
115    }
116}
117
118#[derive(Default)]
119struct Ops {
120    /// Operations that have been received by the buffer
121    all: Vec<Operation>,
122    meta: Vec<OpMeta>,
123
124    /// Sequence number of the first unapplied operation. Operations starting with this one are queued for processing.
125    processed_seq: usize,
126
127    /// Operations that have been applied to the buffer and already transformed, in order of application. Each of these
128    /// operations is based on the previous operation in this list, with the first based on the history base. Derived
129    /// from other data and invalidated by some undo/redo flows.
130    transformed: Vec<Operation>,
131
132    /// Operations representing the inverse of the operations in `transformed_ops`, in order of application. Useful for
133    /// undoing operations. The data model differs because an operation that replaces text containing the cursor needs
134    /// two operations to revert the text and cursor. Derived from other data and invalidated by some undo/redo flows.
135    transformed_inverted: Vec<InverseOperation>,
136}
137
138impl Ops {
139    fn len(&self) -> usize {
140        self.all.len()
141    }
142
143    fn is_undo_checkpoint(&self, idx: usize) -> bool {
144        // start and end of undo history are checkpoints
145        if idx == 0 {
146            return true;
147        }
148        if idx == self.len() {
149            return true;
150        }
151
152        // events separated by enough time are checkpoints
153        let meta = &self.meta[idx];
154        let prev_meta = &self.meta[idx - 1];
155        if meta.timestamp - prev_meta.timestamp > Duration::from_millis(500) {
156            return true;
157        }
158
159        // immediately after a standalone selection is a checkpoint
160        let mut prev_op_standalone = meta.base != prev_meta.base;
161        if idx > 1 {
162            let prev_prev_meta = &self.meta[idx - 2];
163            prev_op_standalone &= prev_meta.base != prev_prev_meta.base;
164        }
165        let prev_op_selection = matches!(&self.all[idx - 1], Operation::Select(..));
166        if prev_op_standalone && prev_op_selection {
167            return true;
168        }
169
170        false
171    }
172}
173
174#[derive(Default)]
175struct External {
176    /// Text last loaded into the editor. Used as a reference point for merging out-of-editor changes with in-editor
177    /// changes, similar to a base in a 3-way merge. May be a state that never appears in the buffer's history.
178    text: String,
179
180    /// Index of the last external operation referenced when merging changes. May be ahead of current.seq if there has
181    /// not been a call to `update()` (updates current.seq) since the last call to `reload()` (assigns new greatest seq
182    /// to `external_seq`).
183    seq: usize,
184}
185
186#[derive(Default)]
187pub struct Response {
188    pub text_updated: bool,
189    pub open_camera: bool,
190    /// Sequence range of operations applied this frame. Use
191    /// `buffer.replacements_since(seq_before)` to get the edits.
192    pub seq_before: usize,
193    pub seq_after: usize,
194}
195
196impl std::ops::BitOrAssign for Response {
197    fn bitor_assign(&mut self, other: Response) {
198        self.text_updated |= other.text_updated;
199        self.open_camera |= other.open_camera;
200        // keep the earliest seq_before and latest seq_after
201        if self.seq_before == self.seq_after {
202            self.seq_before = other.seq_before;
203        }
204        self.seq_after = other.seq_after;
205    }
206}
207
208/// Additional metadata tracked alongside operations internally.
209#[derive(Clone, Debug)]
210struct OpMeta {
211    /// At what time was this operation applied? Affects undo units.
212    pub timestamp: Instant,
213
214    /// What version of the buffer was the modifier looking at when they made this operation? Used for operational
215    /// transformation, both when applying multiple operations in one frame and when merging out-of-editor changes.
216    /// The magic happens here.
217    pub base: usize,
218}
219
220impl Buffer {
221    /// Push a series of operations onto the buffer's input queue; operations will be undone/redone atomically. Useful
222    /// for batches of internal operations produced from a single input event e.g. multi-line list identation.
223    pub fn queue(&mut self, mut ops: Vec<Operation>) {
224        let timestamp = Instant::now();
225        let base = self.current.seq;
226
227        // combine adjacent replacements
228        let mut combined_ops = Vec::new();
229        ops.sort_by_key(|op| match op {
230            Operation::Select(range) | Operation::Replace(Replace { range, .. }) => range.start(),
231        });
232        for op in ops.into_iter() {
233            match &op {
234                Operation::Replace(Replace { range: op_range, text: op_text }) => {
235                    if let Some(Operation::Replace(Replace {
236                        range: last_op_range,
237                        text: last_op_text,
238                    })) = combined_ops.last_mut()
239                    {
240                        if last_op_range.end() == op_range.start() {
241                            last_op_range.1 = op_range.1;
242                            last_op_text.push_str(op_text);
243                        } else {
244                            combined_ops.push(op);
245                        }
246                    } else {
247                        combined_ops.push(op);
248                    }
249                }
250                Operation::Select(_) => combined_ops.push(op),
251            }
252        }
253
254        self.ops
255            .meta
256            .extend(combined_ops.iter().map(|_| OpMeta { timestamp, base }));
257        self.ops.all.extend(combined_ops);
258    }
259
260    /// Loads a new string into the buffer, merging out-of-editor changes made since last load with in-editor changes
261    /// made since last load. The buffer's undo history is preserved; undo'ing will revert in-editor changes only.
262    /// Exercising undo's may put the buffer in never-before-seen states and exercising all undo's will revert the
263    /// buffer to the most recently loaded state (undo limit permitting).
264    /// Note: undo behavior described here is aspirational and not yet implemented.
265    pub fn reload(&mut self, text: String) {
266        let timestamp = Instant::now();
267        let base = self.external.seq;
268        let ops = diff(&self.external.text, &text);
269
270        self.ops
271            .meta
272            .extend(ops.iter().map(|_| OpMeta { timestamp, base }));
273        self.ops.all.extend(ops.into_iter().map(Operation::Replace));
274
275        self.external.text = text;
276        self.external.seq = self.base.seq + self.ops.all.len();
277    }
278
279    /// Indicates to the buffer the changes that have been saved outside the editor. This will serve as the new base
280    /// for merging external changes. The sequence number should be taken from `current.seq` of the buffer when the
281    /// buffer's contents are read for saving.
282    pub fn saved(&mut self, external_seq: usize, external_text: String) {
283        self.external.text = external_text;
284        self.external.seq = external_seq;
285    }
286
287    pub fn merge(mut self, external_text_a: String, external_text_b: String) -> String {
288        let ops_a = diff(&self.external.text, &external_text_a);
289        let ops_b = diff(&self.external.text, &external_text_b);
290
291        let timestamp = Instant::now();
292        let base = self.external.seq;
293        self.ops
294            .meta
295            .extend(ops_a.iter().map(|_| OpMeta { timestamp, base }));
296        self.ops
297            .meta
298            .extend(ops_b.iter().map(|_| OpMeta { timestamp, base }));
299
300        self.ops
301            .all
302            .extend(ops_a.into_iter().map(Operation::Replace));
303        self.ops
304            .all
305            .extend(ops_b.into_iter().map(Operation::Replace));
306
307        self.update();
308        self.current.text
309    }
310
311    /// Applies all operations in the buffer's input queue
312    pub fn update(&mut self) -> Response {
313        // clear redo stack
314        //             v base        v current    v processed
315        // ops before: |<- applied ->|<- undone ->|<- queued ->|
316        // ops after:  |<- applied ->|<- queued ->|
317        let queue_len = self.base.seq + self.ops.len() - self.ops.processed_seq;
318        if queue_len > 0 {
319            let drain_range = self.current.seq..self.ops.processed_seq;
320            self.ops.all.drain(drain_range.clone());
321            self.ops.meta.drain(drain_range.clone());
322            self.ops.transformed.drain(drain_range.clone());
323            self.ops.transformed_inverted.drain(drain_range.clone());
324            self.ops.processed_seq = self.current.seq;
325        } else {
326            return Response::default();
327        }
328
329        // transform & apply
330        let mut result = Response { seq_before: self.current.seq, ..Default::default() };
331        for idx in self.current_idx()..self.current_idx() + queue_len {
332            let mut op = self.ops.all[idx].clone();
333            let meta = &self.ops.meta[idx];
334            self.transform(&mut op, meta);
335            self.ops.transformed_inverted.push(self.current.invert(&op));
336            self.ops.transformed.push(op.clone());
337            self.ops.processed_seq += 1;
338
339            result |= self.redo();
340        }
341
342        result.seq_after = self.current.seq;
343        result
344    }
345
346    fn transform(&self, op: &mut Operation, meta: &OpMeta) {
347        let base_idx = meta.base - self.base.seq;
348        for transforming_idx in base_idx..self.ops.processed_seq {
349            let preceding_op = &self.ops.transformed[transforming_idx];
350            if let Operation::Replace(Replace {
351                range: preceding_replaced_range,
352                text: preceding_replacement_text,
353            }) = preceding_op
354            {
355                if let Operation::Replace(Replace { range: transformed_range, text }) = op {
356                    if preceding_replaced_range.intersects(transformed_range, false)
357                        && !(preceding_replaced_range.is_empty() && transformed_range.is_empty())
358                    {
359                        // concurrent replacements to intersecting ranges choose the first/local edit as the winner
360                        // this doesn't create self-conflicts during merge because merge combines adjacent replacements
361                        // this doesn't create self-conflicts for same-frame editor changes because our final condition
362                        // is that we don't simultaneously insert text for both operations, which creates un-ideal
363                        // behavior (see test buffer_merge_insert)
364                        *text = "".into();
365                        transformed_range.1 = transformed_range.0;
366                    }
367                }
368
369                match op {
370                    Operation::Replace(Replace { range: transformed_range, .. })
371                    | Operation::Select(transformed_range) => {
372                        adjust_subsequent_range(
373                            *preceding_replaced_range,
374                            preceding_replacement_text.graphemes(true).count().into(),
375                            true,
376                            transformed_range,
377                        );
378                    }
379                }
380            }
381        }
382    }
383
384    pub fn can_redo(&self) -> bool {
385        self.current.seq < self.ops.processed_seq
386    }
387
388    pub fn can_undo(&self) -> bool {
389        self.current.seq > self.base.seq
390    }
391
392    pub fn redo(&mut self) -> Response {
393        let mut response = Response::default();
394        while self.can_redo() {
395            let op = &self.ops.transformed[self.current_idx()];
396
397            self.current.seq += 1;
398
399            response |= match op {
400                Operation::Replace(replace) => self.current.apply_replace(replace),
401                Operation::Select(range) => self.current.apply_select(*range),
402            };
403
404            if self.ops.is_undo_checkpoint(self.current_idx()) {
405                break;
406            }
407        }
408        response
409    }
410
411    pub fn undo(&mut self) -> Response {
412        let mut response = Response::default();
413        while self.can_undo() {
414            self.current.seq -= 1;
415            let op = &self.ops.transformed_inverted[self.current_idx()];
416
417            if let Some(replace) = &op.replace {
418                response |= self.current.apply_replace(replace);
419            }
420            response |= self.current.apply_select(op.select);
421
422            if self.ops.is_undo_checkpoint(self.current_idx()) {
423                break;
424            }
425        }
426        response
427    }
428
429    fn current_idx(&self) -> usize {
430        self.current.seq - self.base.seq
431    }
432
433    /// Transforms a range through all replacements applied between
434    /// `since_seq` and the current sequence. Returns `None` if the range
435    /// intersected a replacement (i.e. the content it referred to was
436    /// modified).
437    pub fn transform_range(
438        &self, since_seq: usize, range: &mut (DocCharOffset, DocCharOffset),
439    ) -> bool {
440        let start = since_seq.saturating_sub(self.base.seq);
441        let end = self.current_idx();
442        for op in &self.ops.transformed[start..end] {
443            if let Operation::Replace(replace) = op {
444                if range.intersects(&replace.range, true)
445                    && !(range.is_empty() && replace.range.is_empty())
446                {
447                    return false;
448                }
449                let replacement_len = replace.text.graphemes(true).count().into();
450                adjust_subsequent_range(replace.range, replacement_len, false, range);
451            }
452        }
453        true
454    }
455
456    /// Reports whether the buffer's current text is empty.
457    pub fn is_empty(&self) -> bool {
458        self.current.text.is_empty()
459    }
460
461    pub fn selection_text(&self) -> String {
462        self[self.current.selection].to_string()
463    }
464}
465
466impl From<&str> for Buffer {
467    fn from(value: &str) -> Self {
468        let mut result = Self::default();
469        result.current.text = value.to_string();
470        result.current.segs = unicode_segs::calc(value);
471        result.external.text = value.to_string();
472        result
473    }
474}
475
476/// Adjust a range based on a text replacement. Positions before the replacement generally are not adjusted,
477/// positions after the replacement generally are, and positions within the replacement are adjusted to the end of
478/// the replacement if `prefer_advance` is true or are adjusted to the start of the replacement otherwise.
479pub fn adjust_subsequent_range(
480    replaced_range: (DocCharOffset, DocCharOffset), replacement_len: RelCharOffset,
481    prefer_advance: bool, range: &mut (DocCharOffset, DocCharOffset),
482) {
483    for position in [&mut range.0, &mut range.1] {
484        adjust_subsequent_position(replaced_range, replacement_len, prefer_advance, position);
485    }
486}
487
488/// Adjust a position based on a text replacement. Positions before the replacement generally are not adjusted,
489/// positions after the replacement generally are, and positions within the replacement are adjusted to the end of
490/// the replacement if `prefer_advance` is true or are adjusted to the start of the replacement otherwise.
491fn adjust_subsequent_position(
492    replaced_range: (DocCharOffset, DocCharOffset), replacement_len: RelCharOffset,
493    prefer_advance: bool, position: &mut DocCharOffset,
494) {
495    let replaced_len = replaced_range.len();
496    let replacement_start = replaced_range.start();
497    let replacement_end = replacement_start + replacement_len;
498
499    enum Mode {
500        Insert,
501        Replace,
502    }
503    let mode = if replaced_range.is_empty() { Mode::Insert } else { Mode::Replace };
504
505    let sorted_bounds = {
506        let mut bounds = vec![replaced_range.start(), replaced_range.end(), *position];
507        bounds.sort();
508        bounds
509    };
510    let bind = |start: &DocCharOffset, end: &DocCharOffset, pos: &DocCharOffset| {
511        start == &replaced_range.start() && end == &replaced_range.end() && pos == &*position
512    };
513
514    *position = match (mode, &sorted_bounds[..]) {
515        // case 1: position at point of text insertion
516        //                       text before replacement: * *
517        //                        range of replaced text:  |
518        //          range of subsequent cursor selection:  |
519        //                        text after replacement: * X *
520        // advance:
521        // adjusted range of subsequent cursor selection:    |
522        // don't advance:
523        // adjusted range of subsequent cursor selection:  |
524        (Mode::Insert, [start, end, pos]) if bind(start, end, pos) && end == pos => {
525            if prefer_advance {
526                replacement_end
527            } else {
528                replacement_start
529            }
530        }
531
532        // case 2: position at start of text replacement
533        //                       text before replacement: * * * *
534        //                        range of replaced text:  |<->|
535        //          range of subsequent cursor selection:  |
536        //                        text after replacement: * X *
537        // adjusted range of subsequent cursor selection:  |
538        (Mode::Replace, [start, pos, end]) if bind(start, end, pos) && start == pos => {
539            if prefer_advance {
540                replacement_end
541            } else {
542                replacement_start
543            }
544        }
545
546        // case 3: position at end of text replacement
547        //                       text before replacement: * * * *
548        //                        range of replaced text:  |<->|
549        //          range of subsequent cursor selection:      |
550        //                        text after replacement: * X *
551        // adjusted range of subsequent cursor selection:    |
552        (Mode::Replace, [start, end, pos]) if bind(start, end, pos) && end == pos => {
553            if prefer_advance {
554                replacement_end
555            } else {
556                replacement_start
557            }
558        }
559
560        // case 4: position before point/start of text insertion/replacement
561        //                       text before replacement: * * * * *
562        //       (possibly empty) range of replaced text:    |<->|
563        //          range of subsequent cursor selection:  |
564        //                        text after replacement: * * X *
565        // adjusted range of subsequent cursor selection:  |
566        (_, [pos, start, end]) if bind(start, end, pos) => *position,
567
568        // case 5: position within text replacement
569        //                       text before replacement: * * * *
570        //                        range of replaced text:  |<->|
571        //          range of subsequent cursor selection:    |
572        //                        text after replacement: * X *
573        // advance:
574        // adjusted range of subsequent cursor selection:    |
575        // don't advance:
576        // adjusted range of subsequent cursor selection:  |
577        (Mode::Replace, [start, pos, end]) if bind(start, end, pos) => {
578            if prefer_advance {
579                replacement_end
580            } else {
581                replacement_start
582            }
583        }
584
585        // case 6: position after point/end of text insertion/replacement
586        //                       text before replacement: * * * * *
587        //       (possibly empty) range of replaced text:  |<->|
588        //          range of subsequent cursor selection:        |
589        //                        text after replacement: * X * *
590        // adjusted range of subsequent cursor selection:      |
591        (_, [start, end, pos]) if bind(start, end, pos) => {
592            *position + replacement_len - replaced_len
593        }
594        _ => unreachable!(),
595    }
596}
597
598impl Index<(DocByteOffset, DocByteOffset)> for Snapshot {
599    type Output = str;
600
601    fn index(&self, index: (DocByteOffset, DocByteOffset)) -> &Self::Output {
602        &self.text[index.start().0..index.end().0]
603    }
604}
605
606impl Index<(DocCharOffset, DocCharOffset)> for Snapshot {
607    type Output = str;
608
609    fn index(&self, index: (DocCharOffset, DocCharOffset)) -> &Self::Output {
610        let index = self.segs.range_to_byte(index);
611        &self.text[index.start().0..index.end().0]
612    }
613}
614
615impl Index<(DocByteOffset, DocByteOffset)> for Buffer {
616    type Output = str;
617
618    fn index(&self, index: (DocByteOffset, DocByteOffset)) -> &Self::Output {
619        &self.current[index]
620    }
621}
622
623impl Index<(DocCharOffset, DocCharOffset)> for Buffer {
624    type Output = str;
625
626    fn index(&self, index: (DocCharOffset, DocCharOffset)) -> &Self::Output {
627        &self.current[index]
628    }
629}
630
631#[cfg(test)]
632mod test {
633    use super::Buffer;
634    use unicode_segmentation::UnicodeSegmentation;
635
636    #[test]
637    fn buffer_merge_nonintersecting_replace() {
638        let base_content = "base content base";
639        let local_content = "local content base";
640        let remote_content = "base content remote";
641
642        assert_eq!(
643            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
644            "local content remote"
645        );
646        assert_eq!(
647            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
648            "local content remote"
649        );
650    }
651
652    #[test]
653    fn buffer_merge_prefix_replace() {
654        let base_content = "base content";
655        let local_content = "local content";
656        let remote_content = "remote content";
657
658        assert_eq!(
659            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
660            "local content"
661        );
662    }
663
664    #[test]
665    fn buffer_merge_infix_replace() {
666        let base_content = "con base tent";
667        let local_content = "con local tent";
668        let remote_content = "con remote tent";
669
670        assert_eq!(
671            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
672            "con local tent"
673        );
674        assert_eq!(
675            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
676            "con remote tent"
677        );
678    }
679
680    #[test]
681    fn buffer_merge_postfix_replace() {
682        let base_content = "content base";
683        let local_content = "content local";
684        let remote_content = "content remote";
685
686        assert_eq!(
687            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
688            "content local"
689        );
690        assert_eq!(
691            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
692            "content remote"
693        );
694    }
695
696    #[test]
697    fn buffer_merge_insert() {
698        let base_content = "content";
699        let local_content = "content local";
700        let remote_content = "content remote";
701
702        assert_eq!(
703            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
704            "content local remote"
705        );
706        assert_eq!(
707            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
708            "content remote local"
709        );
710    }
711
712    #[test]
713    // this test case documents behavior moreso than asserting target state
714    fn buffer_merge_insert_replace() {
715        let base_content = "content";
716        let local_content = "content local";
717        let remote_content = "remote";
718
719        assert_eq!(
720            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
721            "remote"
722        );
723        assert_eq!(
724            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
725            "remote local"
726        );
727    }
728
729    #[test]
730    // this test case used to crash `merge`
731    fn buffer_merge_crash() {
732        let base_content = "con tent";
733        let local_content = "cont tent locallocal";
734        let remote_content = "cont remote tent";
735
736        let _ = Buffer::from(base_content).merge(local_content.into(), remote_content.into());
737        let _ = Buffer::from(base_content).merge(remote_content.into(), local_content.into());
738    }
739
740    // ── Fuzz ──────────────────────────────────────────────────────────────
741
742    use rand::prelude::*;
743
744    /// Grapheme clusters for fuzz testing. Includes ASCII, multi-byte, multi-codepoint emoji
745    /// (ZWJ sequences, skin tones, flags), and combining characters.
746    const POOL: &[&str] = &[
747        "a",
748        "b",
749        "z",
750        " ",
751        "\n",
752        "\t",
753        "é",
754        "ñ",
755        "ü",
756        "日",
757        "本",
758        "語",
759        "👋",
760        "🎉",
761        "🔥",
762        "❤️",
763        "👨‍👩‍👧‍👦",
764        "🏳️‍🌈",
765        "👍🏽",
766        "🇺🇸",
767        "🇯🇵",
768        "e\u{0301}",
769        "a\u{0308}", // combining sequences: é, ä
770    ];
771
772    /// Generate a random grapheme-level edit of a document. Picks uniformly from:
773    /// - 0: Delete 1-5 consecutive graphemes at a random position
774    /// - 1: Insert 1-5 random graphemes from POOL at a random position
775    /// - 2: Replace 1-5 consecutive graphemes with 1-3 random graphemes from POOL
776    /// - 3: Clear everything
777    ///
778    /// When the document is empty, cases 0/2/3 fall through to insert (the _ arm).
779    fn random_edit(rng: &mut StdRng, doc: &str) -> String {
780        let graphemes: Vec<&str> = UnicodeSegmentation::graphemes(doc, true).collect();
781        let len = graphemes.len();
782
783        let mut g: Vec<String> = graphemes.iter().map(|s| s.to_string()).collect();
784
785        match rng.gen_range(0..4) {
786            0 if len > 0 => {
787                let pos = rng.gen_range(0..len);
788                let del = rng.gen_range(1..=(len - pos).min(5));
789                g.drain(pos..pos + del);
790            }
791            1 => {
792                let pos = rng.gen_range(0..=len);
793                let n = rng.gen_range(1..=5);
794                for j in 0..n {
795                    g.insert(pos + j, POOL[rng.gen_range(0..POOL.len())].into());
796                }
797            }
798            2 if len > 0 => {
799                let pos = rng.gen_range(0..len);
800                let del = rng.gen_range(1..=(len - pos).min(5));
801                let ins: Vec<String> = (0..rng.gen_range(1..=3))
802                    .map(|_| POOL[rng.gen_range(0..POOL.len())].into())
803                    .collect();
804                g.splice(pos..pos + del, ins);
805            }
806            3 if len > 0 => {
807                g.clear();
808            }
809            _ => {
810                let n = rng.gen_range(1..=5);
811                for _ in 0..n {
812                    g.push(POOL[rng.gen_range(0..POOL.len())].into());
813                }
814            }
815        }
816        g.concat()
817    }
818
819    #[test]
820    fn buffer_merge_fuzz() {
821        let mut rng = StdRng::seed_from_u64(42);
822        let bases = ["hello world", "👨‍👩‍👧‍👦🇺🇸🔥", "café ñoño 日本語", ""];
823        for _ in 0..10_000 {
824            let base = bases[rng.gen_range(0..bases.len())];
825            let a = random_edit(&mut rng, base);
826            let b = random_edit(&mut rng, base);
827
828            // must not panic
829            let _ = Buffer::from(base).merge(a.clone(), b.clone());
830            let _ = Buffer::from(base).merge(b, a);
831        }
832    }
833
834    // ── Chain convergence ──
835
836    /// Simulates a sync channel between two adjacent nodes. Holds the last-agreed-upon
837    /// document text, which serves as the 3-way merge base. When two nodes sync, their
838    /// documents are merged against this base, both nodes adopt the result, and the base
839    /// advances. This mirrors how lockbook's sync works: each client keeps a base (last
840    /// synced state) and merges local vs remote changes against it.
841    struct SyncLink {
842        base: String,
843    }
844
845    impl SyncLink {
846        fn new(base: &str) -> Self {
847            Self { base: base.into() }
848        }
849
850        fn sync(&mut self, left: &mut String, right: &mut String) {
851            let merged = Buffer::from(self.base.as_str()).merge(left.clone(), right.clone());
852            *left = merged.clone();
853            *right = merged.clone();
854            self.base = merged;
855        }
856    }
857
858    /// Sync all adjacent pairs in both directions until the chain stabilizes.
859    /// With N nodes and N-1 links, 2*N passes ensures changes propagate end-to-end.
860    fn full_sync(nodes: &mut [String], links: &mut [SyncLink]) {
861        for _ in 0..nodes.len() * 2 {
862            for i in 0..links.len() {
863                let (left, right) = nodes.split_at_mut(i + 1);
864                links[i].sync(&mut left[i], &mut right[0]);
865            }
866            for i in (0..links.len()).rev() {
867                let (left, right) = nodes.split_at_mut(i + 1);
868                links[i].sync(&mut left[i], &mut right[0]);
869            }
870        }
871    }
872
873    fn partial_sync(nodes: &mut [String], links: &mut [SyncLink], rng: &mut StdRng) {
874        for _ in 0..3 {
875            for i in 0..links.len() {
876                if rng.gen_bool(0.5) {
877                    let (left, right) = nodes.split_at_mut(i + 1);
878                    links[i].sync(&mut left[i], &mut right[0]);
879                }
880            }
881        }
882    }
883
884    fn assert_converged(nodes: &[String]) {
885        for (i, node) in nodes.iter().enumerate().skip(1) {
886            assert_eq!(
887                &nodes[0], node,
888                "node 0 and node {} diverged: {:?} vs {:?}",
889                i, nodes[0], node
890            );
891        }
892    }
893
894    #[test]
895    fn buffer_merge_fuzz_chain_2() {
896        let mut rng = StdRng::seed_from_u64(42);
897        for _ in 0..10_000 {
898            let init = if rng.gen_bool(0.5) { "hello 👋🏽" } else { "" };
899            let mut nodes: Vec<String> = (0..2).map(|_| init.into()).collect();
900            let mut links: Vec<SyncLink> = (0..1).map(|_| SyncLink::new(init)).collect();
901
902            for _ in 0..rng.gen_range(1..=4) {
903                for _ in 0..rng.gen_range(1..=3) {
904                    let i = rng.gen_range(0..2);
905                    nodes[i] = random_edit(&mut rng, &nodes[i]);
906                }
907                if rng.gen_bool(0.5) {
908                    partial_sync(&mut nodes, &mut links, &mut rng);
909                }
910            }
911
912            full_sync(&mut nodes, &mut links);
913            assert_converged(&nodes);
914        }
915    }
916
917    #[test]
918    fn buffer_merge_fuzz_chain_5() {
919        let mut rng = StdRng::seed_from_u64(77);
920        for _ in 0..5_000 {
921            let init = if rng.gen_bool(0.5) { "café 日本語 🇯🇵" } else { "abc" };
922            let mut nodes: Vec<String> = (0..5).map(|_| init.into()).collect();
923            let mut links: Vec<SyncLink> = (0..4).map(|_| SyncLink::new(init)).collect();
924
925            for _ in 0..rng.gen_range(1..=3) {
926                for _ in 0..rng.gen_range(1..=5) {
927                    let i = rng.gen_range(0..5);
928                    nodes[i] = random_edit(&mut rng, &nodes[i]);
929                }
930                if rng.gen_bool(0.5) {
931                    partial_sync(&mut nodes, &mut links, &mut rng);
932                }
933            }
934
935            full_sync(&mut nodes, &mut links);
936            assert_converged(&nodes);
937        }
938    }
939}