lb_rs/model/text/
buffer.rs

1use super::offset_types::{DocByteOffset, DocCharOffset, RangeExt, RelCharOffset};
2use super::operation_types::{InverseOperation, Operation, Replace};
3use super::unicode_segs::UnicodeSegs;
4use super::{diff, unicode_segs};
5use std::ops::Index;
6use std::time::{Duration, Instant};
7use unicode_segmentation::UnicodeSegmentation;
8
9/// Long-lived state of the editor's text buffer. Factored into sub-structs for borrow-checking.
10/// # Operation algebra
11/// Operations are created based on a version of the buffer. This version is called the operation's base and is
12/// identified with a sequence number. When the base of an operation is equal to the buffer's current sequence number,
13/// the operation can be applied and increments the buffer's sequence number.
14///
15/// When multiple operations are created based on the same version of the buffer, such as when a user types a few
16/// keystrokes in one frame or issues a command like indenting multiple list items, the operations all have the same
17/// base. Once the first operation is applied and the buffer's sequence number is incremented, the base of the
18/// remaining operations must be incremented by using the first operation to transform them before they can be applied.
19/// This corresponds to the reality that the buffer state has changed since the operation was created and the operation
20/// must be re-interpreted. For example, if text is typed at the beginning then end of a buffer in one frame, the
21/// position of the text typed at the end of the buffer is greater when it is applied than it was when it was typed.
22///
23/// External changes are merged into the buffer by creating a set of operations that would transform the buffer from
24/// the last external state to the current state. These operations, based on the version of the buffer at the last
25/// successful save or load, must be transformed by all operations that have been applied since (this means we must
26/// preserve the undo history for at least that long; if this presents performance issues, we can always save). Each
27/// operation that is transforming the new operations will match the base of the new operations at the time of
28/// transformation. Finally, the operations will need to transform each other just like any other set of operations
29/// made in a single frame/made based on the same version of the buffer.
30///
31/// # Undo (work in progress)
32/// Undo should revert local changes only, leaving external changes in-tact, so that when all local changes are undone,
33/// the buffer is in a state reflecting external changes only. This is complicated by the fact that external changes
34/// may have been based on local changes that were synced to another client. To undo an operation that had an external
35/// change based on it, we have to interpret the external change in the absence of local changes that were present when
36/// it was created. This is the opposite of interpreting the external change in the presence of local changes that were
37/// not present when it was created i.e. the normal flow of merging external changes. Here, we are removing a local
38/// operation from the middle of the chain of operations that led to the current state of the buffer.
39///
40/// To do this, we perform the dance of transforming operations in reverse, taking a chain of operations each based on
41/// the prior and transforming them into a set of operations based on the same base as the operation to be undone. Then
42/// we remove the operation to be undone and apply the remaining operations with the forward transformation flow.
43///
44/// Operations are not invertible i.e. you cannot construct an inverse operation that will perfectly cancel out the
45/// effect of another operation regardless of the time of interpretation. For example, with a text replacement, you can
46/// construct an inverse text replacement that replaces the new range with the original text, but when operations are
47/// undone from the middle of the chain, it may affect the original text. The operation will be re-interpreted based on
48/// a new state of the buffer at its time of application. The replaced text has no fixed value by design.
49///
50/// However, it is possible to undo the specific application of an operation in the context of the state of the buffer
51/// when it was applied. We store information necessary to undo applied operations alongside the operations themselves
52/// i.e. the text replaced in the application. When the operation is transformed for any reason, this undo information
53/// is invalidated.
54#[derive(Default)]
55pub struct Buffer {
56    /// Current contents of the buffer (what should be displayed in the editor). Todo: hide behind a read-only accessor
57    pub current: Snapshot,
58
59    /// Snapshot of the buffer at the earliest undoable state. Operations are compacted into this as they overflow the
60    /// undo limit.
61    base: Snapshot,
62
63    /// Operations received by the buffer. Used for undo/redo and for merging external changes.
64    ops: Ops,
65
66    /// State for tracking out-of-editor changes
67    external: External,
68}
69
70#[derive(Debug, Default)]
71pub struct Snapshot {
72    pub text: String,
73    pub segs: UnicodeSegs,
74    pub selection: (DocCharOffset, DocCharOffset),
75    pub seq: usize,
76}
77
78impl Snapshot {
79    fn apply_select(&mut self, range: (DocCharOffset, DocCharOffset)) -> Response {
80        self.selection = range;
81        Response { text_updated: false }
82    }
83
84    fn apply_replace(&mut self, replace: &Replace) -> Response {
85        let Replace { range, text } = replace;
86        let byte_range = self.segs.range_to_byte(*range);
87
88        self.text
89            .replace_range(byte_range.start().0..byte_range.end().0, text);
90        self.segs = unicode_segs::calc(&self.text);
91        adjust_subsequent_range(
92            *range,
93            text.graphemes(true).count().into(),
94            false,
95            &mut self.selection,
96        );
97
98        Response { text_updated: true }
99    }
100
101    fn invert(&self, op: &Operation) -> InverseOperation {
102        let mut inverse = InverseOperation { replace: None, select: self.selection };
103        if let Operation::Replace(replace) = op {
104            inverse.replace = Some(self.invert_replace(replace));
105        }
106        inverse
107    }
108
109    fn invert_replace(&self, replace: &Replace) -> Replace {
110        let Replace { range, text } = replace;
111        let byte_range = self.segs.range_to_byte(*range);
112        let replaced_text = self[byte_range].into();
113        let replacement_range = (range.start(), range.start() + text.graphemes(true).count());
114        Replace { range: replacement_range, text: replaced_text }
115    }
116}
117
118#[derive(Default)]
119struct Ops {
120    /// Operations that have been received by the buffer
121    all: Vec<Operation>,
122    meta: Vec<OpMeta>,
123
124    /// Sequence number of the first unapplied operation. Operations starting with this one are queued for processing.
125    processed_seq: usize,
126
127    /// Operations that have been applied to the buffer and already transformed, in order of application. Each of these
128    /// operations is based on the previous operation in this list, with the first based on the history base. Derived
129    /// from other data and invalidated by some undo/redo flows.
130    transformed: Vec<Operation>,
131
132    /// Operations representing the inverse of the operations in `transformed_ops`, in order of application. Useful for
133    /// undoing operations. The data model differs because an operation that replaces text containing the cursor needs
134    /// two operations to revert the text and cursor. Derived from other data and invalidated by some undo/redo flows.
135    transformed_inverted: Vec<InverseOperation>,
136}
137
138impl Ops {
139    fn len(&self) -> usize {
140        self.all.len()
141    }
142
143    fn is_undo_checkpoint(&self, idx: usize) -> bool {
144        // start and end of undo history are checkpoints
145        if idx == 0 {
146            return true;
147        }
148        if idx == self.len() {
149            return true;
150        }
151
152        // events separated by enough time are checkpoints
153        let meta = &self.meta[idx];
154        let prev_meta = &self.meta[idx - 1];
155        if meta.timestamp - prev_meta.timestamp > Duration::from_millis(500) {
156            return true;
157        }
158
159        // immediately after a standalone selection is a checkpoint
160        let mut prev_op_standalone = meta.base != prev_meta.base;
161        if idx > 1 {
162            let prev_prev_meta = &self.meta[idx - 2];
163            prev_op_standalone &= prev_meta.base != prev_prev_meta.base;
164        }
165        let prev_op_selection = matches!(&self.all[idx - 1], Operation::Select(..));
166        if prev_op_standalone && prev_op_selection {
167            return true;
168        }
169
170        false
171    }
172}
173
174#[derive(Default)]
175struct External {
176    /// Text last loaded into the editor. Used as a reference point for merging out-of-editor changes with in-editor
177    /// changes, similar to a base in a 3-way merge. May be a state that never appears in the buffer's history.
178    text: String,
179
180    /// Index of the last external operation referenced when merging changes. May be ahead of current.seq if there has
181    /// not been a call to `update()` (updates current.seq) since the last call to `reload()` (assigns new greatest seq
182    /// to `external_seq`).
183    seq: usize,
184}
185
186#[derive(Default)]
187pub struct Response {
188    text_updated: bool,
189}
190
191impl std::ops::BitOrAssign for Response {
192    fn bitor_assign(&mut self, other: Response) {
193        self.text_updated |= other.text_updated;
194    }
195}
196
197impl From<Response> for bool {
198    fn from(value: Response) -> Self {
199        value.text_updated
200    }
201}
202
203/// Additional metadata tracked alongside operations internally.
204#[derive(Clone, Debug)]
205struct OpMeta {
206    /// At what time was this operation applied? Affects undo units.
207    pub timestamp: Instant,
208
209    /// What version of the buffer was the modifier looking at when they made this operation? Used for operational
210    /// transformation, both when applying multiple operations in one frame and when merging out-of-editor changes.
211    /// The magic happens here.
212    pub base: usize,
213}
214
215impl Buffer {
216    /// Push a series of operations onto the buffer's input queue; operations will be undone/redone atomically. Useful
217    /// for batches of internal operations produced from a single input event e.g. multi-line list identation.
218    pub fn queue(&mut self, ops: Vec<Operation>) {
219        let timestamp = Instant::now();
220        let base = self.current.seq;
221
222        self.ops
223            .meta
224            .extend(ops.iter().map(|_| OpMeta { timestamp, base }));
225        self.ops.all.extend(ops);
226    }
227
228    /// Loads a new string into the buffer, merging out-of-editor changes made since last load with in-editor changes
229    /// made since last load. The buffer's undo history is preserved; undo'ing will revert in-editor changes only.
230    /// Exercising undo's may put the buffer in never-before-seen states and exercising all undo's will revert the
231    /// buffer to the most recently loaded state (undo limit permitting).
232    /// Note: undo behavior described here is aspirational and not yet implemented.
233    pub fn reload(&mut self, text: String) {
234        let timestamp = Instant::now();
235        let base = self.external.seq;
236        let ops = diff(&self.external.text, &text);
237
238        self.ops
239            .meta
240            .extend(ops.iter().map(|_| OpMeta { timestamp, base }));
241        self.ops.all.extend(ops.into_iter().map(Operation::Replace));
242
243        self.external.text = text;
244        self.external.seq = self.base.seq + self.ops.all.len();
245    }
246
247    /// Indicates to the buffer the changes that have been saved outside the editor. This will serve as the new base
248    /// for merging external changes. The sequence number should be taken from `current.seq` of the buffer when the
249    /// buffer's contents are read for saving.
250    pub fn saved(&mut self, external_seq: usize, external_text: String) {
251        self.external.text = external_text;
252        self.external.seq = external_seq;
253    }
254
255    pub fn merge(mut self, external_text_a: String, external_text_b: String) -> String {
256        let ops_a = diff(&self.external.text, &external_text_a);
257        let ops_b = diff(&self.external.text, &external_text_b);
258
259        let timestamp = Instant::now();
260        let base = self.external.seq;
261        self.ops
262            .meta
263            .extend(ops_a.iter().map(|_| OpMeta { timestamp, base }));
264        self.ops
265            .meta
266            .extend(ops_b.iter().map(|_| OpMeta { timestamp, base }));
267
268        self.ops
269            .all
270            .extend(ops_a.into_iter().map(Operation::Replace));
271        self.ops
272            .all
273            .extend(ops_b.into_iter().map(Operation::Replace));
274
275        self.update();
276        self.current.text
277    }
278
279    /// Applies all operations in the buffer's input queue
280    pub fn update(&mut self) -> Response {
281        // clear redo stack
282        //             v base        v current    v processed
283        // ops before: |<- applied ->|<- undone ->|<- queued ->|
284        // ops after:  |<- applied ->|<- queued ->|
285        let queue_len = self.base.seq + self.ops.len() - self.ops.processed_seq;
286        if queue_len > 0 {
287            let drain_range = self.current.seq..self.ops.processed_seq;
288            self.ops.all.drain(drain_range.clone());
289            self.ops.meta.drain(drain_range.clone());
290            self.ops.transformed.drain(drain_range.clone());
291            self.ops.transformed_inverted.drain(drain_range.clone());
292            self.ops.processed_seq = self.current.seq;
293        } else {
294            return Response::default();
295        }
296
297        // transform & apply
298        let mut result = Response::default();
299        for idx in self.current_idx()..self.current_idx() + queue_len {
300            let mut op = self.ops.all[idx].clone();
301            let meta = &self.ops.meta[idx];
302            self.transform(&mut op, meta);
303            self.ops.transformed_inverted.push(self.current.invert(&op));
304            self.ops.transformed.push(op.clone());
305            self.ops.processed_seq += 1;
306
307            result |= self.redo();
308        }
309
310        result
311    }
312
313    fn transform(&self, op: &mut Operation, meta: &OpMeta) {
314        let base_idx = meta.base - self.base.seq;
315        for transforming_idx in base_idx..self.ops.processed_seq {
316            let preceding_op = &self.ops.transformed[transforming_idx];
317            if let Operation::Replace(Replace {
318                range: preceding_replaced_range,
319                text: preceding_replacement_text,
320            }) = preceding_op
321            {
322                if let Operation::Replace(Replace { range: transformed_range, text }) = op {
323                    if preceding_replaced_range.intersects(transformed_range, true)
324                        && !(preceding_replaced_range.is_empty() && transformed_range.is_empty())
325                    {
326                        // concurrent replacements to intersecting ranges choose the first/local edit as the winner
327                        // this doesn't create self-conflicts during merge because merge combines adjacent replacements
328                        // this doesn't create self-conflicts for same-frame editor changes because our final condition
329                        // is that we don't simultaneously insert text for both operations, which creates un-ideal
330                        // behavior (see test buffer_merge_insert)
331                        *text = "".into();
332                        transformed_range.1 = transformed_range.0;
333                    }
334                }
335
336                match op {
337                    Operation::Replace(Replace { range: transformed_range, .. })
338                    | Operation::Select(transformed_range) => {
339                        adjust_subsequent_range(
340                            *preceding_replaced_range,
341                            preceding_replacement_text.graphemes(true).count().into(),
342                            true,
343                            transformed_range,
344                        );
345                    }
346                }
347            }
348        }
349    }
350
351    pub fn can_redo(&self) -> bool {
352        self.current.seq < self.ops.processed_seq
353    }
354
355    pub fn can_undo(&self) -> bool {
356        self.current.seq > self.base.seq
357    }
358
359    pub fn redo(&mut self) -> Response {
360        let mut response = Response::default();
361        while self.can_redo() {
362            let op = &self.ops.transformed[self.current_idx()];
363
364            self.current.seq += 1;
365
366            response |= match op {
367                Operation::Replace(replace) => self.current.apply_replace(replace),
368                Operation::Select(range) => self.current.apply_select(*range),
369            };
370
371            if self.ops.is_undo_checkpoint(self.current_idx()) {
372                break;
373            }
374        }
375        response
376    }
377
378    pub fn undo(&mut self) -> Response {
379        let mut response = Response::default();
380        while self.can_undo() {
381            self.current.seq -= 1;
382            let op = &self.ops.transformed_inverted[self.current_idx()];
383
384            if let Some(replace) = &op.replace {
385                response |= self.current.apply_replace(replace);
386            }
387            response |= self.current.apply_select(op.select);
388
389            if self.ops.is_undo_checkpoint(self.current_idx()) {
390                break;
391            }
392        }
393        response
394    }
395
396    fn current_idx(&self) -> usize {
397        self.current.seq - self.base.seq
398    }
399
400    /// Reports whether the buffer's current text is empty.
401    pub fn is_empty(&self) -> bool {
402        self.current.text.is_empty()
403    }
404
405    pub fn selection_text(&self) -> String {
406        self[self.current.selection].to_string()
407    }
408}
409
410impl From<&str> for Buffer {
411    fn from(value: &str) -> Self {
412        let mut result = Self::default();
413        result.current.text = value.to_string();
414        result.current.segs = unicode_segs::calc(value);
415        result.external.text = value.to_string();
416        result
417    }
418}
419
420/// Adjust a range based on a text replacement. Positions before the replacement generally are not adjusted,
421/// positions after the replacement generally are, and positions within the replacement are adjusted to the end of
422/// the replacement if `prefer_advance` is true or are adjusted to the start of the replacement otherwise.
423pub fn adjust_subsequent_range(
424    replaced_range: (DocCharOffset, DocCharOffset), replacement_len: RelCharOffset,
425    prefer_advance: bool, range: &mut (DocCharOffset, DocCharOffset),
426) {
427    for position in [&mut range.0, &mut range.1] {
428        adjust_subsequent_position(replaced_range, replacement_len, prefer_advance, position);
429    }
430}
431
432/// Adjust a position based on a text replacement. Positions before the replacement generally are not adjusted,
433/// positions after the replacement generally are, and positions within the replacement are adjusted to the end of
434/// the replacement if `prefer_advance` is true or are adjusted to the start of the replacement otherwise.
435fn adjust_subsequent_position(
436    replaced_range: (DocCharOffset, DocCharOffset), replacement_len: RelCharOffset,
437    prefer_advance: bool, position: &mut DocCharOffset,
438) {
439    let replaced_len = replaced_range.len();
440    let replacement_start = replaced_range.start();
441    let replacement_end = replacement_start + replacement_len;
442
443    enum Mode {
444        Insert,
445        Replace,
446    }
447    let mode = if replaced_range.is_empty() { Mode::Insert } else { Mode::Replace };
448
449    let sorted_bounds = {
450        let mut bounds = vec![replaced_range.start(), replaced_range.end(), *position];
451        bounds.sort();
452        bounds
453    };
454    let bind = |start: &DocCharOffset, end: &DocCharOffset, pos: &DocCharOffset| {
455        start == &replaced_range.start() && end == &replaced_range.end() && pos == &*position
456    };
457
458    *position = match (mode, &sorted_bounds[..]) {
459        // case 1: position at point of text insertion
460        //                       text before replacement: * *
461        //                        range of replaced text:  |
462        //          range of subsequent cursor selection:  |
463        //                        text after replacement: * X *
464        // advance:
465        // adjusted range of subsequent cursor selection:    |
466        // don't advance:
467        // adjusted range of subsequent cursor selection:  |
468        (Mode::Insert, [start, end, pos]) if bind(start, end, pos) && end == pos => {
469            if prefer_advance {
470                replacement_end
471            } else {
472                replacement_start
473            }
474        }
475
476        // case 2: position at start of text replacement
477        //                       text before replacement: * * * *
478        //                        range of replaced text:  |<->|
479        //          range of subsequent cursor selection:  |
480        //                        text after replacement: * X *
481        // adjusted range of subsequent cursor selection:  |
482        (Mode::Replace, [start, pos, end]) if bind(start, end, pos) && start == pos => {
483            if prefer_advance {
484                replacement_end
485            } else {
486                replacement_start
487            }
488        }
489
490        // case 3: position at end of text replacement
491        //                       text before replacement: * * * *
492        //                        range of replaced text:  |<->|
493        //          range of subsequent cursor selection:      |
494        //                        text after replacement: * X *
495        // adjusted range of subsequent cursor selection:    |
496        (Mode::Replace, [start, end, pos]) if bind(start, end, pos) && end == pos => {
497            if prefer_advance {
498                replacement_end
499            } else {
500                replacement_start
501            }
502        }
503
504        // case 4: position before point/start of text insertion/replacement
505        //                       text before replacement: * * * * *
506        //       (possibly empty) range of replaced text:    |<->|
507        //          range of subsequent cursor selection:  |
508        //                        text after replacement: * * X *
509        // adjusted range of subsequent cursor selection:  |
510        (_, [pos, start, end]) if bind(start, end, pos) => *position,
511
512        // case 5: position within text replacement
513        //                       text before replacement: * * * *
514        //                        range of replaced text:  |<->|
515        //          range of subsequent cursor selection:    |
516        //                        text after replacement: * X *
517        // advance:
518        // adjusted range of subsequent cursor selection:    |
519        // don't advance:
520        // adjusted range of subsequent cursor selection:  |
521        (Mode::Replace, [start, pos, end]) if bind(start, end, pos) => {
522            if prefer_advance {
523                replacement_end
524            } else {
525                replacement_start
526            }
527        }
528
529        // case 6: position after point/end of text insertion/replacement
530        //                       text before replacement: * * * * *
531        //       (possibly empty) range of replaced text:  |<->|
532        //          range of subsequent cursor selection:        |
533        //                        text after replacement: * X * *
534        // adjusted range of subsequent cursor selection:      |
535        (_, [start, end, pos]) if bind(start, end, pos) => {
536            *position + replacement_len - replaced_len
537        }
538        _ => unreachable!(),
539    }
540}
541
542impl Index<(DocByteOffset, DocByteOffset)> for Snapshot {
543    type Output = str;
544
545    fn index(&self, index: (DocByteOffset, DocByteOffset)) -> &Self::Output {
546        &self.text[index.start().0..index.end().0]
547    }
548}
549
550impl Index<(DocCharOffset, DocCharOffset)> for Snapshot {
551    type Output = str;
552
553    fn index(&self, index: (DocCharOffset, DocCharOffset)) -> &Self::Output {
554        let index = self.segs.range_to_byte(index);
555        &self.text[index.start().0..index.end().0]
556    }
557}
558
559impl Index<(DocByteOffset, DocByteOffset)> for Buffer {
560    type Output = str;
561
562    fn index(&self, index: (DocByteOffset, DocByteOffset)) -> &Self::Output {
563        &self.current[index]
564    }
565}
566
567impl Index<(DocCharOffset, DocCharOffset)> for Buffer {
568    type Output = str;
569
570    fn index(&self, index: (DocCharOffset, DocCharOffset)) -> &Self::Output {
571        &self.current[index]
572    }
573}
574
575#[cfg(test)]
576mod test {
577    use super::Buffer;
578
579    #[test]
580    fn buffer_merge_nonintersecting_replace() {
581        let base_content = "base content base";
582        let local_content = "local content base";
583        let remote_content = "base content remote";
584
585        assert_eq!(
586            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
587            "local content remote"
588        );
589        assert_eq!(
590            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
591            "local content remote"
592        );
593    }
594
595    #[test]
596    fn buffer_merge_prefix_replace() {
597        let base_content = "base content";
598        let local_content = "local content";
599        let remote_content = "remote content";
600
601        assert_eq!(
602            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
603            "local content"
604        );
605    }
606
607    #[test]
608    fn buffer_merge_infix_replace() {
609        let base_content = "con base tent";
610        let local_content = "con local tent";
611        let remote_content = "con remote tent";
612
613        assert_eq!(
614            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
615            "con local tent"
616        );
617        assert_eq!(
618            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
619            "con remote tent"
620        );
621    }
622
623    #[test]
624    fn buffer_merge_postfix_replace() {
625        let base_content = "content base";
626        let local_content = "content local";
627        let remote_content = "content remote";
628
629        assert_eq!(
630            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
631            "content local"
632        );
633        assert_eq!(
634            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
635            "content remote"
636        );
637    }
638
639    #[test]
640    fn buffer_merge_insert() {
641        let base_content = "content";
642        let local_content = "content local";
643        let remote_content = "content remote";
644
645        assert_eq!(
646            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
647            "content local remote"
648        );
649        assert_eq!(
650            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
651            "content remote local"
652        );
653    }
654
655    #[test]
656    // this test case documents behavior moreso than asserting target state
657    fn buffer_merge_insert_replace() {
658        let base_content = "content";
659        let local_content = "content local";
660        let remote_content = "remote";
661
662        assert_eq!(
663            Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
664            "content local"
665        );
666        assert_eq!(
667            Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
668            "remote"
669        );
670    }
671
672    #[test]
673    // this test case used to crash `merge`
674    fn buffer_merge_crash() {
675        let base_content = "con tent";
676        let local_content = "cont tent locallocal";
677        let remote_content = "cont remote tent";
678
679        let _ = Buffer::from(base_content).merge(local_content.into(), remote_content.into());
680        let _ = Buffer::from(base_content).merge(remote_content.into(), local_content.into());
681    }
682}