Skip to main content

reconcile_text/operation_transformation/
edited_text.rs

1use std::fmt::Debug;
2
3#[cfg(feature = "serde")]
4use serde::{Deserialize, Serialize};
5
6use crate::{
7    BuiltinTokenizer, CursorPosition, TextWithCursors, Token,
8    operation_transformation::{
9        DiffError, Operation,
10        utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
11    },
12    raw_operation::RawOperation,
13    tokenizer::Tokenizer,
14    types::{
15        history::History, number_or_text::NumberOrText, side::Side,
16        span_with_history::SpanWithHistory,
17    },
18    utils::string_builder::StringBuilder,
19};
20
21/// A text document with a sequence of operations derived from diffing it
22/// against an updated version. Supports merging two `EditedText` instances
23/// (from the same original) via Operational Transformation.
24///
25/// Created via `from_strings`, `from_strings_with_tokenizer`, or `from_diff`,
26/// then merged with another `EditedText` and applied to get the reconciled
27/// text.
28///
29/// Also tracks cursor positions from the updated text, repositioning them
30/// when operations are applied.
31#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
32#[derive(Debug, Clone, PartialEq, Default)]
33pub struct EditedText<'a, T>
34where
35    T: PartialEq + Clone + Debug,
36{
37    text: &'a str,
38    operations: Vec<Operation<T>>,
39    operation_sides: Vec<Side>,
40    cursors: Vec<CursorPosition>,
41}
42
43impl<'a> EditedText<'a, String> {
44    /// Create an `EditedText` from the given original and updated strings.
45    /// Uses the default word tokenizer (splits on word boundaries).
46    #[must_use]
47    pub fn from_strings(original: &'a str, updated: &TextWithCursors) -> Self {
48        Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word)
49    }
50}
51
52impl<'a, T> EditedText<'a, T>
53where
54    T: PartialEq + Clone + Debug,
55{
56    /// Create an `EditedText` from the given original and updated strings
57    /// using the provided tokenizer
58    #[must_use]
59    pub fn from_strings_with_tokenizer(
60        original: &'a str,
61        updated: &TextWithCursors,
62        tokenizer: &Tokenizer<T>,
63    ) -> Self {
64        let original_tokens = (tokenizer)(original);
65        let updated_tokens = (tokenizer)(&updated.text());
66
67        let diff: Vec<RawOperation<T>> = RawOperation::vec_from(&original_tokens, &updated_tokens);
68        let operations: Vec<Operation<T>> = cook_operations(elongate_operations(diff)).collect();
69        let operation_count = operations.len();
70
71        Self::new(
72            original,
73            operations,
74            vec![Side::Left; operation_count],
75            updated.cursors(),
76        )
77    }
78
79    /// Create a new `EditedText` with the given operations.
80    /// The operations must be in the order in which they are meant to be
81    /// applied. The operations must not overlap.
82    fn new(
83        text: &'a str,
84        operations: Vec<Operation<T>>,
85        operation_sides: Vec<Side>,
86        mut cursors: Vec<CursorPosition>,
87    ) -> Self {
88        cursors.sort_by_key(|cursor| cursor.char_index);
89
90        Self {
91            text,
92            operations,
93            operation_sides,
94            cursors,
95        }
96    }
97
98    /// Merge two `EditedText` instances. The two instances must be derived
99    /// from the same original text. The operations are merged using the
100    /// principles of Operational Transformation. The cursors are updated
101    /// accordingly to reflect the changes made by the merged operations.
102    ///
103    /// # Panics
104    ///
105    /// Panics if there's an integer overflow (in isize) when calculating new
106    /// cursor positions.
107    #[must_use]
108    #[allow(clippy::too_many_lines)]
109    pub fn merge(self, other: Self) -> Self {
110        debug_assert_eq!(
111            self.text, other.text,
112            "`EditedText`-s must be derived from the same text to be mergable"
113        );
114
115        let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
116        let mut left_cursors = self.cursors.into_iter().peekable();
117        let mut right_cursors = other.cursors.into_iter().peekable();
118
119        let mut merged_operations: Vec<Operation<T>> =
120            Vec::with_capacity(self.operations.len() + other.operations.len());
121        let mut merged_operation_sides: Vec<Side> =
122            Vec::with_capacity(self.operations.len() + other.operations.len());
123
124        let mut left_iter = self.operations.into_iter();
125        let mut right_iter = other.operations.into_iter();
126
127        let mut maybe_left_op = left_iter.next();
128        let mut maybe_right_op = right_iter.next();
129
130        let mut seen_left_length: usize = 0;
131        let mut seen_right_length: usize = 0;
132        let mut merged_length: usize = 0;
133
134        let mut last_left_op = None;
135        let mut last_right_op = None;
136
137        loop {
138            let (side, operation) = match (maybe_left_op.as_ref(), maybe_right_op.as_ref()) {
139                (Some(left_op), Some(right_op)) => {
140                    if left_op.cmp_priority(seen_left_length, right_op, seen_right_length)
141                        == std::cmp::Ordering::Less
142                    {
143                        (Side::Left, maybe_left_op.take().unwrap())
144                    } else {
145                        (Side::Right, maybe_right_op.take().unwrap())
146                    }
147                }
148
149                (Some(_), None) => (Side::Left, maybe_left_op.take().unwrap()),
150                (None, Some(_)) => (Side::Right, maybe_right_op.take().unwrap()),
151                (None, None) => break,
152            };
153
154            let is_advancing_operation = matches!(
155                operation,
156                Operation::Insert { .. } | Operation::Equal { .. }
157            );
158
159            let original_length = operation.len();
160            let (side, result) = match side {
161                Side::Left => {
162                    let result = operation.merge_operations(last_right_op.as_ref());
163
164                    if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
165                        let merged_length_signed = isize::try_from(merged_length)
166                            .expect("merged_length must fit in isize");
167                        let seen_left_length_signed = isize::try_from(seen_left_length)
168                            .expect("seen_left_length must fit in isize");
169                        let op_len_signed =
170                            isize::try_from(op.len()).expect("op.len() must fit in isize");
171                        let original_length_signed = isize::try_from(original_length)
172                            .expect("original_length must fit in isize");
173
174                        let shift = merged_length_signed - seen_left_length_signed + op_len_signed
175                            - original_length_signed;
176
177                        while let Some(cursor) = left_cursors.next_if(|cursor| {
178                            cursor.char_index <= seen_left_length + original_length
179                        }) {
180                            merged_cursors.push(
181                                cursor.with_index(cursor.char_index.saturating_add_signed(shift)),
182                            );
183                        }
184                    }
185
186                    if is_advancing_operation {
187                        seen_left_length += original_length;
188                    }
189
190                    maybe_left_op = left_iter.next();
191                    last_left_op = Some(result.clone());
192
193                    (Side::Left, result)
194                }
195                Side::Right => {
196                    let result = operation.merge_operations(last_left_op.as_ref());
197
198                    if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
199                        let merged_length_signed = isize::try_from(merged_length)
200                            .expect("merged_length must fit in isize");
201                        let seen_right_length_signed = isize::try_from(seen_right_length)
202                            .expect("seen_right_length must fit in isize");
203                        let op_len_signed =
204                            isize::try_from(op.len()).expect("op.len() must fit in isize");
205                        let original_length_signed = isize::try_from(original_length)
206                            .expect("original_length must fit in isize");
207
208                        let shift = merged_length_signed - seen_right_length_signed + op_len_signed
209                            - original_length_signed;
210
211                        while let Some(cursor) = right_cursors.next_if(|cursor| {
212                            cursor.char_index <= seen_right_length + original_length
213                        }) {
214                            merged_cursors.push(
215                                cursor.with_index(cursor.char_index.saturating_add_signed(shift)),
216                            );
217                        }
218                    }
219
220                    if is_advancing_operation {
221                        seen_right_length += original_length;
222                    }
223
224                    maybe_right_op = right_iter.next();
225                    last_right_op = Some(result.clone());
226
227                    (Side::Right, result)
228                }
229            };
230
231            if result.len() == 0 {
232                continue;
233            }
234
235            if is_advancing_operation {
236                merged_length += result.len();
237            }
238
239            merged_operations.push(result);
240            merged_operation_sides.push(side);
241        }
242
243        for cursor in left_cursors.chain(right_cursors) {
244            merged_cursors.push(cursor.with_index(merged_length));
245        }
246
247        debug_assert_eq!(merged_operations.len(), merged_operation_sides.len());
248
249        Self::new(
250            self.text,
251            merged_operations,
252            merged_operation_sides,
253            merged_cursors,
254        )
255    }
256
257    /// Apply the operations to the text and return the resulting text
258    #[must_use]
259    pub fn apply(&self) -> TextWithCursors {
260        let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
261
262        for operation in &self.operations {
263            builder = operation.apply(builder);
264        }
265
266        TextWithCursors::new(builder.take(), self.cursors.clone())
267    }
268
269    /// Apply the operations to the text and return the resulting text in chunks
270    /// together with the provenance describing where each chunk came from.
271    ///
272    /// Returns all spans including deletions (not present in the merged text).
273    ///
274    /// ```
275    ///  use reconcile_text::{History, SpanWithHistory, BuiltinTokenizer, reconcile};
276    ///
277    ///  let parent = "Merging text is hard!";
278    ///  let left = "Merging text is easy!"; // Changed "hard" to "easy"
279    ///  let right = "With reconcile, merging documents is hard!"; // Added prefix and changed word
280    ///
281    ///  let result = reconcile(
282    ///      parent,
283    ///      &left.into(),
284    ///      &right.into(),
285    ///      &*BuiltinTokenizer::Word,
286    ///  );
287    ///
288    ///  assert_eq!(
289    ///      result.apply_with_history(),
290    ///      vec![
291    ///          SpanWithHistory::new("Merging text".to_string(), History::RemovedFromRight,),
292    ///          SpanWithHistory::new(
293    ///              "With reconcile, merging documents".to_string(),
294    ///              History::AddedFromRight,
295    ///          ),
296    ///          SpanWithHistory::new(" ".to_string(), History::Unchanged,),
297    ///          SpanWithHistory::new("is".to_string(), History::Unchanged,),
298    ///          SpanWithHistory::new(" hard!".to_string(), History::RemovedFromLeft,),
299    ///          SpanWithHistory::new(" easy!".to_string(), History::AddedFromLeft,),
300    ///      ]
301    ///  );
302    /// ```
303    #[must_use]
304    pub fn apply_with_history(&self) -> Vec<SpanWithHistory> {
305        let chars: Vec<char> = self.text.chars().collect();
306        let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
307
308        let mut history = Vec::with_capacity(self.operations.len());
309
310        for (operation, side) in self.operations.iter().zip(self.operation_sides.iter()) {
311            builder = operation.apply(builder);
312
313            match operation {
314                Operation::Equal { .. } => {
315                    history.push(SpanWithHistory::new(builder.take(), History::Unchanged));
316                }
317                Operation::Insert { .. } => {
318                    let h = match side {
319                        Side::Left => History::AddedFromLeft,
320                        Side::Right => History::AddedFromRight,
321                    };
322                    history.push(SpanWithHistory::new(builder.take(), h));
323                }
324                Operation::Delete {
325                    deleted_character_count,
326                    order,
327                    ..
328                } => {
329                    let deleted: String = chars[*order..*order + *deleted_character_count]
330                        .iter()
331                        .collect();
332                    let h = match side {
333                        Side::Left => History::RemovedFromLeft,
334                        Side::Right => History::RemovedFromRight,
335                    };
336                    history.push(SpanWithHistory::new(deleted, h));
337                }
338            }
339        }
340
341        history
342    }
343
344    /// Apply the operations and return both the merged text with cursors and
345    /// the provenance history in a single pass
346    #[must_use]
347    pub fn apply_with_all(&self) -> (TextWithCursors, Vec<SpanWithHistory>) {
348        let chars: Vec<char> = self.text.chars().collect();
349        let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
350        let mut history = Vec::with_capacity(self.operations.len());
351        let mut full_text = String::new();
352
353        for (operation, side) in self.operations.iter().zip(self.operation_sides.iter()) {
354            builder = operation.apply(builder);
355
356            match operation {
357                Operation::Equal { .. } => {
358                    let span = builder.take();
359                    full_text.push_str(&span);
360                    history.push(SpanWithHistory::new(span, History::Unchanged));
361                }
362                Operation::Insert { .. } => {
363                    let span = builder.take();
364                    full_text.push_str(&span);
365                    let h = match side {
366                        Side::Left => History::AddedFromLeft,
367                        Side::Right => History::AddedFromRight,
368                    };
369                    history.push(SpanWithHistory::new(span, h));
370                }
371                Operation::Delete {
372                    deleted_character_count,
373                    order,
374                    ..
375                } => {
376                    let deleted: String = chars[*order..*order + *deleted_character_count]
377                        .iter()
378                        .collect();
379                    let h = match side {
380                        Side::Left => History::RemovedFromLeft,
381                        Side::Right => History::RemovedFromRight,
382                    };
383                    history.push(SpanWithHistory::new(deleted, h));
384                }
385            }
386        }
387
388        (
389            TextWithCursors::new(full_text, self.cursors.clone()),
390            history,
391        )
392    }
393
394    /// Convert the `EditedText` into a terse representation ready for
395    /// serialization. The result omits cursor positions and the original text.
396    /// This is useful for sending text diffs over the network if there's a
397    /// clear consensus on the original text.
398    ///
399    /// Inserts are strings, deletes are negative integers (character count),
400    /// and retained spans are positive integers (character count).
401    ///
402    /// # Errors
403    ///
404    /// Returns `DiffError::IntegerOverflow` if a character count exceeds
405    /// `i64::MAX`.
406    pub fn to_diff(&self) -> Result<Vec<NumberOrText>, DiffError> {
407        let mut result: Vec<NumberOrText> = Vec::with_capacity(self.operations.len());
408        let mut previous_equal: Option<usize> = None;
409
410        for operation in &self.operations {
411            match operation {
412                Operation::Equal { length, .. } => {
413                    if let Some(prev_length) = previous_equal {
414                        previous_equal = Some(prev_length + *length);
415                    } else {
416                        previous_equal = Some(*length);
417                    }
418                }
419
420                Operation::Insert { text, .. } => {
421                    if let Some(prev_length) = previous_equal {
422                        result
423                            .push(NumberOrText::Number(i64::try_from(prev_length).map_err(
424                                |_| DiffError::IntegerOverflow { value: prev_length },
425                            )?));
426                        previous_equal = None;
427                    }
428
429                    let text: String = text.iter().map(Token::original).collect();
430                    result.push(NumberOrText::Text(text));
431                }
432
433                Operation::Delete {
434                    deleted_character_count,
435                    ..
436                } => {
437                    if let Some(prev_length) = previous_equal {
438                        result
439                            .push(NumberOrText::Number(i64::try_from(prev_length).map_err(
440                                |_| DiffError::IntegerOverflow { value: prev_length },
441                            )?));
442                        previous_equal = None;
443                    }
444
445                    let count = i64::try_from(*deleted_character_count).map_err(|_| {
446                        DiffError::IntegerOverflow {
447                            value: *deleted_character_count,
448                        }
449                    })?;
450                    result.push(NumberOrText::Number(-count));
451                }
452            }
453        }
454
455        if let Some(prev_length) = previous_equal {
456            result
457                .push(NumberOrText::Number(i64::try_from(prev_length).map_err(
458                    |_| DiffError::IntegerOverflow { value: prev_length },
459                )?));
460        }
461
462        Ok(result)
463    }
464
465    /// Reconstruct an `EditedText` from a diff and the original text.
466    ///
467    /// # Errors
468    ///
469    /// Returns `DiffError::LengthExceedsOriginal` if the diff references a
470    /// range that exceeds the original text length.
471    ///
472    /// # Panics
473    ///
474    /// Panics if there's an integer overflow in i64.
475    pub fn from_diff(
476        original_text: &'a str,
477        diff: Vec<NumberOrText>,
478        tokenizer: &Tokenizer<T>,
479    ) -> Result<EditedText<'a, T>, DiffError> {
480        let mut operations: Vec<Operation<T>> = Vec::with_capacity(diff.len());
481        let mut order = 0;
482        let chars: Vec<char> = original_text.chars().collect();
483        let text_length = chars.len();
484
485        for item in diff {
486            match item {
487                NumberOrText::Number(length) => {
488                    if length >= 0 {
489                        let length = usize::try_from(length).expect("length must fit in usize");
490
491                        // Validate that the range doesn't exceed the original text
492                        if order + length > text_length {
493                            return Err(DiffError::LengthExceedsOriginal {
494                                position: order,
495                                requested: length,
496                                available: text_length.saturating_sub(order),
497                            });
498                        }
499
500                        let original_characters: String =
501                            chars[order..order + length].iter().collect();
502
503                        let original_tokens = tokenizer(&original_characters);
504                        for token in original_tokens {
505                            operations
506                                .push(Operation::create_equal(order, token.get_original_length()));
507                            order += token.get_original_length();
508                        }
509                    } else {
510                        let length =
511                            usize::try_from(-length).expect("negative length must fit in usize");
512
513                        // Validate that the delete range doesn't exceed the original text
514                        if order + length > text_length {
515                            return Err(DiffError::LengthExceedsOriginal {
516                                position: order,
517                                requested: length,
518                                available: text_length.saturating_sub(order),
519                            });
520                        }
521
522                        operations.push(Operation::create_delete(order, length));
523                        order += length;
524                    }
525                }
526                NumberOrText::Text(text) => {
527                    let tokens = tokenizer(&text);
528                    operations.push(Operation::create_insert(order, tokens));
529                }
530            }
531        }
532
533        let operation_count = operations.len();
534        Ok(EditedText::new(
535            original_text,
536            operations,
537            vec![Side::Left; operation_count],
538            vec![],
539        ))
540    }
541}
542
543#[cfg(test)]
544mod tests {
545    use insta::assert_debug_snapshot;
546    use pretty_assertions::assert_eq;
547
548    use super::*;
549
550    #[test]
551    fn test_calculate_operations() {
552        let left = "hello world! How are you?  Adam";
553        let right = "Hello, my friend! How are you doing? Albert";
554
555        let operations = EditedText::from_strings(left, &right.into());
556
557        insta::assert_debug_snapshot!(operations);
558
559        let new_right = operations.apply();
560        assert_eq!(new_right.text(), right);
561    }
562
563    #[test]
564    fn test_calculate_operations_with_no_diff() {
565        let text = "hello world!";
566
567        let operations = EditedText::from_strings(text, &text.into());
568
569        assert_debug_snapshot!(operations);
570
571        let new_right = operations.apply();
572        assert_eq!(new_right.text(), text);
573    }
574
575    #[test]
576    fn test_calculate_operations_with_insert() {
577        let original = "hello world! ...";
578        let left = "Hello world! I'm Andras.";
579        let right = "Hello world! How are you?";
580        let expected = "Hello world! How are you? I'm Andras.";
581
582        let operations_1 = EditedText::from_strings(original, &left.into());
583        let operations_2 = EditedText::from_strings(original, &right.into());
584
585        let operations = operations_1.merge(operations_2);
586        assert_eq!(operations.apply().text(), expected);
587    }
588
589    #[test]
590    fn test_from_diff_length_exceeds_original() {
591        let result = EditedText::from_diff(
592            "hello",
593            vec![
594                10.into(), // too large equal span - should error
595                " world".into(),
596            ],
597            &*BuiltinTokenizer::Word,
598        );
599
600        assert!(result.is_err());
601        match result {
602            Err(DiffError::LengthExceedsOriginal {
603                position,
604                requested,
605                available,
606            }) => {
607                assert_eq!(position, 0);
608                assert_eq!(requested, 10);
609                assert_eq!(available, 5);
610            }
611            _ => panic!("Expected LengthExceedsOriginal error"),
612        }
613    }
614
615    #[test]
616    fn test_from_diff_valid() {
617        let edited_text = EditedText::from_diff(
618            "hello",
619            vec![
620                5.into(), // exact length
621                " world".into(),
622            ],
623            &*BuiltinTokenizer::Word,
624        )
625        .unwrap();
626
627        let content = edited_text.apply().text();
628
629        assert_eq!(content, "hello world");
630    }
631
632    #[cfg(feature = "serde")]
633    #[test]
634    fn test_changes_deserialisation() {
635        let original = "Merging text is hard!";
636        let changes = "Merging text is easy with reconcile!";
637        let result = EditedText::from_strings(original, &changes.into());
638        let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap();
639
640        let expected = concat!("- 15\n", "- -6\n", "- ' easy with reconcile!'\n",);
641        assert_eq!(serialized, expected);
642    }
643
644    #[test]
645    fn test_apply_with_history_utf8() {
646        let parent = "こんにちは世界"; // "Hello World" in Japanese (7 chars, 21 bytes)
647        let left = "こんにちは宇宙"; // Changed 世界 to 宇宙
648        let right = parent;
649
650        let result = crate::reconcile(
651            parent,
652            &left.into(),
653            &right.into(),
654            &*BuiltinTokenizer::Word,
655        );
656
657        let history = result.apply_with_history();
658        assert!(!history.is_empty());
659        assert_eq!(result.apply().text(), "こんにちは宇宙");
660    }
661
662    #[cfg(feature = "serde")]
663    #[test]
664    fn test_changes_serialization() {
665        let original = "The quick brown fox jumps over the lazy dog.";
666        let updated = "The quick red fox jumped over the very lazy dog!";
667
668        let edited_text = EditedText::from_strings(original, &updated.into());
669
670        let changes = edited_text.to_diff().unwrap();
671        let deserialized_edited_text =
672            EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word).unwrap();
673
674        assert_eq!(deserialized_edited_text.apply().text(), updated);
675    }
676}