lb_rs/model/text/buffer.rs
1use super::offset_types::{DocByteOffset, DocCharOffset, RangeExt, RelCharOffset};
2use super::operation_types::{InverseOperation, Operation, Replace};
3use super::unicode_segs::UnicodeSegs;
4use super::{diff, unicode_segs};
5use std::ops::Index;
6use std::time::{Duration, Instant};
7use unicode_segmentation::UnicodeSegmentation;
8
9/// Long-lived state of the editor's text buffer. Factored into sub-structs for borrow-checking.
10/// # Operation algebra
11/// Operations are created based on a version of the buffer. This version is called the operation's base and is
12/// identified with a sequence number. When the base of an operation is equal to the buffer's current sequence number,
13/// the operation can be applied and increments the buffer's sequence number.
14///
15/// When multiple operations are created based on the same version of the buffer, such as when a user types a few
16/// keystrokes in one frame or issues a command like indenting multiple list items, the operations all have the same
17/// base. Once the first operation is applied and the buffer's sequence number is incremented, the base of the
18/// remaining operations must be incremented by using the first operation to transform them before they can be applied.
19/// This corresponds to the reality that the buffer state has changed since the operation was created and the operation
20/// must be re-interpreted. For example, if text is typed at the beginning then end of a buffer in one frame, the
21/// position of the text typed at the end of the buffer is greater when it is applied than it was when it was typed.
22///
23/// External changes are merged into the buffer by creating a set of operations that would transform the buffer from
24/// the last external state to the current state. These operations, based on the version of the buffer at the last
25/// successful save or load, must be transformed by all operations that have been applied since (this means we must
26/// preserve the undo history for at least that long; if this presents performance issues, we can always save). Each
27/// operation that is transforming the new operations will match the base of the new operations at the time of
28/// transformation. Finally, the operations will need to transform each other just like any other set of operations
29/// made in a single frame/made based on the same version of the buffer.
30///
31/// # Undo (work in progress)
32/// Undo should revert local changes only, leaving external changes in-tact, so that when all local changes are undone,
33/// the buffer is in a state reflecting external changes only. This is complicated by the fact that external changes
34/// may have been based on local changes that were synced to another client. To undo an operation that had an external
35/// change based on it, we have to interpret the external change in the absence of local changes that were present when
36/// it was created. This is the opposite of interpreting the external change in the presence of local changes that were
37/// not present when it was created i.e. the normal flow of merging external changes. Here, we are removing a local
38/// operation from the middle of the chain of operations that led to the current state of the buffer.
39///
40/// To do this, we perform the dance of transforming operations in reverse, taking a chain of operations each based on
41/// the prior and transforming them into a set of operations based on the same base as the operation to be undone. Then
42/// we remove the operation to be undone and apply the remaining operations with the forward transformation flow.
43///
44/// Operations are not invertible i.e. you cannot construct an inverse operation that will perfectly cancel out the
45/// effect of another operation regardless of the time of interpretation. For example, with a text replacement, you can
46/// construct an inverse text replacement that replaces the new range with the original text, but when operations are
47/// undone from the middle of the chain, it may affect the original text. The operation will be re-interpreted based on
48/// a new state of the buffer at its time of application. The replaced text has no fixed value by design.
49///
50/// However, it is possible to undo the specific application of an operation in the context of the state of the buffer
51/// when it was applied. We store information necessary to undo applied operations alongside the operations themselves
52/// i.e. the text replaced in the application. When the operation is transformed for any reason, this undo information
53/// is invalidated.
54#[derive(Default)]
55pub struct Buffer {
56 /// Current contents of the buffer (what should be displayed in the editor). Todo: hide behind a read-only accessor
57 pub current: Snapshot,
58
59 /// Snapshot of the buffer at the earliest undoable state. Operations are compacted into this as they overflow the
60 /// undo limit.
61 base: Snapshot,
62
63 /// Operations received by the buffer. Used for undo/redo and for merging external changes.
64 ops: Ops,
65
66 /// State for tracking out-of-editor changes
67 external: External,
68}
69
70#[derive(Debug, Default)]
71pub struct Snapshot {
72 pub text: String,
73 pub segs: UnicodeSegs,
74 pub selection: (DocCharOffset, DocCharOffset),
75 pub seq: usize,
76}
77
78impl Snapshot {
79 fn apply_select(&mut self, range: (DocCharOffset, DocCharOffset)) -> Response {
80 self.selection = range;
81 Response { text_updated: false }
82 }
83
84 fn apply_replace(&mut self, replace: &Replace) -> Response {
85 let Replace { range, text } = replace;
86 let byte_range = self.segs.range_to_byte(*range);
87
88 self.text
89 .replace_range(byte_range.start().0..byte_range.end().0, text);
90 self.segs = unicode_segs::calc(&self.text);
91 adjust_subsequent_range(
92 *range,
93 text.graphemes(true).count().into(),
94 false,
95 &mut self.selection,
96 );
97
98 Response { text_updated: true }
99 }
100
101 fn invert(&self, op: &Operation) -> InverseOperation {
102 let mut inverse = InverseOperation { replace: None, select: self.selection };
103 if let Operation::Replace(replace) = op {
104 inverse.replace = Some(self.invert_replace(replace));
105 }
106 inverse
107 }
108
109 fn invert_replace(&self, replace: &Replace) -> Replace {
110 let Replace { range, text } = replace;
111 let byte_range = self.segs.range_to_byte(*range);
112 let replaced_text = self[byte_range].into();
113 let replacement_range = (range.start(), range.start() + text.graphemes(true).count());
114 Replace { range: replacement_range, text: replaced_text }
115 }
116}
117
118#[derive(Default)]
119struct Ops {
120 /// Operations that have been received by the buffer
121 all: Vec<Operation>,
122 meta: Vec<OpMeta>,
123
124 /// Sequence number of the first unapplied operation. Operations starting with this one are queued for processing.
125 processed_seq: usize,
126
127 /// Operations that have been applied to the buffer and already transformed, in order of application. Each of these
128 /// operations is based on the previous operation in this list, with the first based on the history base. Derived
129 /// from other data and invalidated by some undo/redo flows.
130 transformed: Vec<Operation>,
131
132 /// Operations representing the inverse of the operations in `transformed_ops`, in order of application. Useful for
133 /// undoing operations. The data model differs because an operation that replaces text containing the cursor needs
134 /// two operations to revert the text and cursor. Derived from other data and invalidated by some undo/redo flows.
135 transformed_inverted: Vec<InverseOperation>,
136}
137
138impl Ops {
139 fn len(&self) -> usize {
140 self.all.len()
141 }
142
143 fn is_undo_checkpoint(&self, idx: usize) -> bool {
144 // start and end of undo history are checkpoints
145 if idx == 0 {
146 return true;
147 }
148 if idx == self.len() {
149 return true;
150 }
151
152 // events separated by enough time are checkpoints
153 let meta = &self.meta[idx];
154 let prev_meta = &self.meta[idx - 1];
155 if meta.timestamp - prev_meta.timestamp > Duration::from_millis(500) {
156 return true;
157 }
158
159 // immediately after a standalone selection is a checkpoint
160 let mut prev_op_standalone = meta.base != prev_meta.base;
161 if idx > 1 {
162 let prev_prev_meta = &self.meta[idx - 2];
163 prev_op_standalone &= prev_meta.base != prev_prev_meta.base;
164 }
165 let prev_op_selection = matches!(&self.all[idx - 1], Operation::Select(..));
166 if prev_op_standalone && prev_op_selection {
167 return true;
168 }
169
170 false
171 }
172}
173
174#[derive(Default)]
175struct External {
176 /// Text last loaded into the editor. Used as a reference point for merging out-of-editor changes with in-editor
177 /// changes, similar to a base in a 3-way merge. May be a state that never appears in the buffer's history.
178 text: String,
179
180 /// Index of the last external operation referenced when merging changes. May be ahead of current.seq if there has
181 /// not been a call to `update()` (updates current.seq) since the last call to `reload()` (assigns new greatest seq
182 /// to `external_seq`).
183 seq: usize,
184}
185
186#[derive(Default)]
187pub struct Response {
188 text_updated: bool,
189}
190
191impl std::ops::BitOrAssign for Response {
192 fn bitor_assign(&mut self, other: Response) {
193 self.text_updated |= other.text_updated;
194 }
195}
196
197impl From<Response> for bool {
198 fn from(value: Response) -> Self {
199 value.text_updated
200 }
201}
202
203/// Additional metadata tracked alongside operations internally.
204#[derive(Clone, Debug)]
205struct OpMeta {
206 /// At what time was this operation applied? Affects undo units.
207 pub timestamp: Instant,
208
209 /// What version of the buffer was the modifier looking at when they made this operation? Used for operational
210 /// transformation, both when applying multiple operations in one frame and when merging out-of-editor changes.
211 /// The magic happens here.
212 pub base: usize,
213}
214
215impl Buffer {
216 /// Push a series of operations onto the buffer's input queue; operations will be undone/redone atomically. Useful
217 /// for batches of internal operations produced from a single input event e.g. multi-line list identation.
218 pub fn queue(&mut self, ops: Vec<Operation>) {
219 let timestamp = Instant::now();
220 let base = self.current.seq;
221
222 self.ops
223 .meta
224 .extend(ops.iter().map(|_| OpMeta { timestamp, base }));
225 self.ops.all.extend(ops);
226 }
227
228 /// Loads a new string into the buffer, merging out-of-editor changes made since last load with in-editor changes
229 /// made since last load. The buffer's undo history is preserved; undo'ing will revert in-editor changes only.
230 /// Exercising undo's may put the buffer in never-before-seen states and exercising all undo's will revert the
231 /// buffer to the most recently loaded state (undo limit permitting).
232 /// Note: undo behavior described here is aspirational and not yet implemented.
233 pub fn reload(&mut self, text: String) {
234 let timestamp = Instant::now();
235 let base = self.external.seq;
236 let ops = diff(&self.external.text, &text);
237
238 self.ops
239 .meta
240 .extend(ops.iter().map(|_| OpMeta { timestamp, base }));
241 self.ops.all.extend(ops.into_iter().map(Operation::Replace));
242
243 self.external.text = text;
244 self.external.seq = self.base.seq + self.ops.all.len();
245 }
246
247 /// Indicates to the buffer the changes that have been saved outside the editor. This will serve as the new base
248 /// for merging external changes. The sequence number should be taken from `current.seq` of the buffer when the
249 /// buffer's contents are read for saving.
250 pub fn saved(&mut self, external_seq: usize, external_text: String) {
251 self.external.text = external_text;
252 self.external.seq = external_seq;
253 }
254
255 pub fn merge(mut self, external_text_a: String, external_text_b: String) -> String {
256 let ops_a = diff(&self.external.text, &external_text_a);
257 let ops_b = diff(&self.external.text, &external_text_b);
258
259 let timestamp = Instant::now();
260 let base = self.external.seq;
261 self.ops
262 .meta
263 .extend(ops_a.iter().map(|_| OpMeta { timestamp, base }));
264 self.ops
265 .meta
266 .extend(ops_b.iter().map(|_| OpMeta { timestamp, base }));
267
268 self.ops
269 .all
270 .extend(ops_a.into_iter().map(Operation::Replace));
271 self.ops
272 .all
273 .extend(ops_b.into_iter().map(Operation::Replace));
274
275 self.update();
276 self.current.text
277 }
278
279 /// Applies all operations in the buffer's input queue
280 pub fn update(&mut self) -> Response {
281 // clear redo stack
282 // v base v current v processed
283 // ops before: |<- applied ->|<- undone ->|<- queued ->|
284 // ops after: |<- applied ->|<- queued ->|
285 let queue_len = self.base.seq + self.ops.len() - self.ops.processed_seq;
286 if queue_len > 0 {
287 let drain_range = self.current.seq..self.ops.processed_seq;
288 self.ops.all.drain(drain_range.clone());
289 self.ops.meta.drain(drain_range.clone());
290 self.ops.transformed.drain(drain_range.clone());
291 self.ops.transformed_inverted.drain(drain_range.clone());
292 self.ops.processed_seq = self.current.seq;
293 } else {
294 return Response::default();
295 }
296
297 // transform & apply
298 let mut result = Response::default();
299 for idx in self.current_idx()..self.current_idx() + queue_len {
300 let mut op = self.ops.all[idx].clone();
301 let meta = &self.ops.meta[idx];
302 self.transform(&mut op, meta);
303 self.ops.transformed_inverted.push(self.current.invert(&op));
304 self.ops.transformed.push(op.clone());
305 self.ops.processed_seq += 1;
306
307 result |= self.redo();
308 }
309
310 result
311 }
312
313 fn transform(&self, op: &mut Operation, meta: &OpMeta) {
314 let base_idx = meta.base - self.base.seq;
315 for transforming_idx in base_idx..self.ops.processed_seq {
316 let preceding_op = &self.ops.transformed[transforming_idx];
317 if let Operation::Replace(Replace {
318 range: preceding_replaced_range,
319 text: preceding_replacement_text,
320 }) = preceding_op
321 {
322 if let Operation::Replace(Replace { range: transformed_range, text }) = op {
323 if preceding_replaced_range.intersects(transformed_range, true)
324 && !(preceding_replaced_range.is_empty() && transformed_range.is_empty())
325 {
326 // concurrent replacements to intersecting ranges choose the first/local edit as the winner
327 // this doesn't create self-conflicts during merge because merge combines adjacent replacements
328 // this doesn't create self-conflicts for same-frame editor changes because our final condition
329 // is that we don't simultaneously insert text for both operations, which creates un-ideal
330 // behavior (see test buffer_merge_insert)
331 *text = "".into();
332 transformed_range.1 = transformed_range.0;
333 }
334 }
335
336 match op {
337 Operation::Replace(Replace { range: transformed_range, .. })
338 | Operation::Select(transformed_range) => {
339 adjust_subsequent_range(
340 *preceding_replaced_range,
341 preceding_replacement_text.graphemes(true).count().into(),
342 true,
343 transformed_range,
344 );
345 }
346 }
347 }
348 }
349 }
350
351 pub fn can_redo(&self) -> bool {
352 self.current.seq < self.ops.processed_seq
353 }
354
355 pub fn can_undo(&self) -> bool {
356 self.current.seq > self.base.seq
357 }
358
359 pub fn redo(&mut self) -> Response {
360 let mut response = Response::default();
361 while self.can_redo() {
362 let op = &self.ops.transformed[self.current_idx()];
363
364 self.current.seq += 1;
365
366 response |= match op {
367 Operation::Replace(replace) => self.current.apply_replace(replace),
368 Operation::Select(range) => self.current.apply_select(*range),
369 };
370
371 if self.ops.is_undo_checkpoint(self.current_idx()) {
372 break;
373 }
374 }
375 response
376 }
377
378 pub fn undo(&mut self) -> Response {
379 let mut response = Response::default();
380 while self.can_undo() {
381 self.current.seq -= 1;
382 let op = &self.ops.transformed_inverted[self.current_idx()];
383
384 if let Some(replace) = &op.replace {
385 response |= self.current.apply_replace(replace);
386 }
387 response |= self.current.apply_select(op.select);
388
389 if self.ops.is_undo_checkpoint(self.current_idx()) {
390 break;
391 }
392 }
393 response
394 }
395
396 fn current_idx(&self) -> usize {
397 self.current.seq - self.base.seq
398 }
399
400 /// Reports whether the buffer's current text is empty.
401 pub fn is_empty(&self) -> bool {
402 self.current.text.is_empty()
403 }
404
405 pub fn selection_text(&self) -> String {
406 self[self.current.selection].to_string()
407 }
408}
409
410impl From<&str> for Buffer {
411 fn from(value: &str) -> Self {
412 let mut result = Self::default();
413 result.current.text = value.to_string();
414 result.current.segs = unicode_segs::calc(value);
415 result.external.text = value.to_string();
416 result
417 }
418}
419
420/// Adjust a range based on a text replacement. Positions before the replacement generally are not adjusted,
421/// positions after the replacement generally are, and positions within the replacement are adjusted to the end of
422/// the replacement if `prefer_advance` is true or are adjusted to the start of the replacement otherwise.
423pub fn adjust_subsequent_range(
424 replaced_range: (DocCharOffset, DocCharOffset), replacement_len: RelCharOffset,
425 prefer_advance: bool, range: &mut (DocCharOffset, DocCharOffset),
426) {
427 for position in [&mut range.0, &mut range.1] {
428 adjust_subsequent_position(replaced_range, replacement_len, prefer_advance, position);
429 }
430}
431
432/// Adjust a position based on a text replacement. Positions before the replacement generally are not adjusted,
433/// positions after the replacement generally are, and positions within the replacement are adjusted to the end of
434/// the replacement if `prefer_advance` is true or are adjusted to the start of the replacement otherwise.
435fn adjust_subsequent_position(
436 replaced_range: (DocCharOffset, DocCharOffset), replacement_len: RelCharOffset,
437 prefer_advance: bool, position: &mut DocCharOffset,
438) {
439 let replaced_len = replaced_range.len();
440 let replacement_start = replaced_range.start();
441 let replacement_end = replacement_start + replacement_len;
442
443 enum Mode {
444 Insert,
445 Replace,
446 }
447 let mode = if replaced_range.is_empty() { Mode::Insert } else { Mode::Replace };
448
449 let sorted_bounds = {
450 let mut bounds = vec![replaced_range.start(), replaced_range.end(), *position];
451 bounds.sort();
452 bounds
453 };
454 let bind = |start: &DocCharOffset, end: &DocCharOffset, pos: &DocCharOffset| {
455 start == &replaced_range.start() && end == &replaced_range.end() && pos == &*position
456 };
457
458 *position = match (mode, &sorted_bounds[..]) {
459 // case 1: position at point of text insertion
460 // text before replacement: * *
461 // range of replaced text: |
462 // range of subsequent cursor selection: |
463 // text after replacement: * X *
464 // advance:
465 // adjusted range of subsequent cursor selection: |
466 // don't advance:
467 // adjusted range of subsequent cursor selection: |
468 (Mode::Insert, [start, end, pos]) if bind(start, end, pos) && end == pos => {
469 if prefer_advance {
470 replacement_end
471 } else {
472 replacement_start
473 }
474 }
475
476 // case 2: position at start of text replacement
477 // text before replacement: * * * *
478 // range of replaced text: |<->|
479 // range of subsequent cursor selection: |
480 // text after replacement: * X *
481 // adjusted range of subsequent cursor selection: |
482 (Mode::Replace, [start, pos, end]) if bind(start, end, pos) && start == pos => {
483 if prefer_advance {
484 replacement_end
485 } else {
486 replacement_start
487 }
488 }
489
490 // case 3: position at end of text replacement
491 // text before replacement: * * * *
492 // range of replaced text: |<->|
493 // range of subsequent cursor selection: |
494 // text after replacement: * X *
495 // adjusted range of subsequent cursor selection: |
496 (Mode::Replace, [start, end, pos]) if bind(start, end, pos) && end == pos => {
497 if prefer_advance {
498 replacement_end
499 } else {
500 replacement_start
501 }
502 }
503
504 // case 4: position before point/start of text insertion/replacement
505 // text before replacement: * * * * *
506 // (possibly empty) range of replaced text: |<->|
507 // range of subsequent cursor selection: |
508 // text after replacement: * * X *
509 // adjusted range of subsequent cursor selection: |
510 (_, [pos, start, end]) if bind(start, end, pos) => *position,
511
512 // case 5: position within text replacement
513 // text before replacement: * * * *
514 // range of replaced text: |<->|
515 // range of subsequent cursor selection: |
516 // text after replacement: * X *
517 // advance:
518 // adjusted range of subsequent cursor selection: |
519 // don't advance:
520 // adjusted range of subsequent cursor selection: |
521 (Mode::Replace, [start, pos, end]) if bind(start, end, pos) => {
522 if prefer_advance {
523 replacement_end
524 } else {
525 replacement_start
526 }
527 }
528
529 // case 6: position after point/end of text insertion/replacement
530 // text before replacement: * * * * *
531 // (possibly empty) range of replaced text: |<->|
532 // range of subsequent cursor selection: |
533 // text after replacement: * X * *
534 // adjusted range of subsequent cursor selection: |
535 (_, [start, end, pos]) if bind(start, end, pos) => {
536 *position + replacement_len - replaced_len
537 }
538 _ => unreachable!(),
539 }
540}
541
542impl Index<(DocByteOffset, DocByteOffset)> for Snapshot {
543 type Output = str;
544
545 fn index(&self, index: (DocByteOffset, DocByteOffset)) -> &Self::Output {
546 &self.text[index.start().0..index.end().0]
547 }
548}
549
550impl Index<(DocCharOffset, DocCharOffset)> for Snapshot {
551 type Output = str;
552
553 fn index(&self, index: (DocCharOffset, DocCharOffset)) -> &Self::Output {
554 let index = self.segs.range_to_byte(index);
555 &self.text[index.start().0..index.end().0]
556 }
557}
558
559impl Index<(DocByteOffset, DocByteOffset)> for Buffer {
560 type Output = str;
561
562 fn index(&self, index: (DocByteOffset, DocByteOffset)) -> &Self::Output {
563 &self.current[index]
564 }
565}
566
567impl Index<(DocCharOffset, DocCharOffset)> for Buffer {
568 type Output = str;
569
570 fn index(&self, index: (DocCharOffset, DocCharOffset)) -> &Self::Output {
571 &self.current[index]
572 }
573}
574
575#[cfg(test)]
576mod test {
577 use super::Buffer;
578
579 #[test]
580 fn buffer_merge_nonintersecting_replace() {
581 let base_content = "base content base";
582 let local_content = "local content base";
583 let remote_content = "base content remote";
584
585 assert_eq!(
586 Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
587 "local content remote"
588 );
589 assert_eq!(
590 Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
591 "local content remote"
592 );
593 }
594
595 #[test]
596 fn buffer_merge_prefix_replace() {
597 let base_content = "base content";
598 let local_content = "local content";
599 let remote_content = "remote content";
600
601 assert_eq!(
602 Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
603 "local content"
604 );
605 }
606
607 #[test]
608 fn buffer_merge_infix_replace() {
609 let base_content = "con base tent";
610 let local_content = "con local tent";
611 let remote_content = "con remote tent";
612
613 assert_eq!(
614 Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
615 "con local tent"
616 );
617 assert_eq!(
618 Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
619 "con remote tent"
620 );
621 }
622
623 #[test]
624 fn buffer_merge_postfix_replace() {
625 let base_content = "content base";
626 let local_content = "content local";
627 let remote_content = "content remote";
628
629 assert_eq!(
630 Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
631 "content local"
632 );
633 assert_eq!(
634 Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
635 "content remote"
636 );
637 }
638
639 #[test]
640 fn buffer_merge_insert() {
641 let base_content = "content";
642 let local_content = "content local";
643 let remote_content = "content remote";
644
645 assert_eq!(
646 Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
647 "content local remote"
648 );
649 assert_eq!(
650 Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
651 "content remote local"
652 );
653 }
654
655 #[test]
656 // this test case documents behavior moreso than asserting target state
657 fn buffer_merge_insert_replace() {
658 let base_content = "content";
659 let local_content = "content local";
660 let remote_content = "remote";
661
662 assert_eq!(
663 Buffer::from(base_content).merge(local_content.into(), remote_content.into()),
664 "content local"
665 );
666 assert_eq!(
667 Buffer::from(base_content).merge(remote_content.into(), local_content.into()),
668 "remote"
669 );
670 }
671
672 #[test]
673 // this test case used to crash `merge`
674 fn buffer_merge_crash() {
675 let base_content = "con tent";
676 let local_content = "cont tent locallocal";
677 let remote_content = "cont remote tent";
678
679 let _ = Buffer::from(base_content).merge(local_content.into(), remote_content.into());
680 let _ = Buffer::from(base_content).merge(remote_content.into(), local_content.into());
681 }
682}