1use std::fmt::Debug;
2
3#[cfg(feature = "serde")]
4use serde::{Deserialize, Serialize};
5
6use crate::{
7 BuiltinTokenizer, CursorPosition, TextWithCursors, Token,
8 operation_transformation::{
9 DiffError, Operation,
10 utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
11 },
12 raw_operation::RawOperation,
13 tokenizer::Tokenizer,
14 types::{
15 history::History, number_or_text::NumberOrText, side::Side,
16 span_with_history::SpanWithHistory,
17 },
18 utils::string_builder::StringBuilder,
19};
20
21#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
32#[derive(Debug, Clone, PartialEq, Default)]
33pub struct EditedText<'a, T>
34where
35 T: PartialEq + Clone + Debug,
36{
37 text: &'a str,
38 operations: Vec<Operation<T>>,
39 operation_sides: Vec<Side>,
40 cursors: Vec<CursorPosition>,
41}
42
43impl<'a> EditedText<'a, String> {
44 #[must_use]
47 pub fn from_strings(original: &'a str, updated: &TextWithCursors) -> Self {
48 Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word)
49 }
50}
51
52impl<'a, T> EditedText<'a, T>
53where
54 T: PartialEq + Clone + Debug,
55{
56 #[must_use]
59 pub fn from_strings_with_tokenizer(
60 original: &'a str,
61 updated: &TextWithCursors,
62 tokenizer: &Tokenizer<T>,
63 ) -> Self {
64 let original_tokens = (tokenizer)(original);
65 let updated_tokens = (tokenizer)(&updated.text());
66
67 let diff: Vec<RawOperation<T>> = RawOperation::vec_from(&original_tokens, &updated_tokens);
68 let operations: Vec<Operation<T>> = cook_operations(elongate_operations(diff)).collect();
69 let operation_count = operations.len();
70
71 Self::new(
72 original,
73 operations,
74 vec![Side::Left; operation_count],
75 updated.cursors(),
76 )
77 }
78
79 fn new(
83 text: &'a str,
84 operations: Vec<Operation<T>>,
85 operation_sides: Vec<Side>,
86 mut cursors: Vec<CursorPosition>,
87 ) -> Self {
88 cursors.sort_by_key(|cursor| cursor.char_index);
89
90 Self {
91 text,
92 operations,
93 operation_sides,
94 cursors,
95 }
96 }
97
98 #[must_use]
108 #[allow(clippy::too_many_lines)]
109 pub fn merge(self, other: Self) -> Self {
110 debug_assert_eq!(
111 self.text, other.text,
112 "`EditedText`-s must be derived from the same text to be mergable"
113 );
114
115 let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
116 let mut left_cursors = self.cursors.into_iter().peekable();
117 let mut right_cursors = other.cursors.into_iter().peekable();
118
119 let mut merged_operations: Vec<Operation<T>> =
120 Vec::with_capacity(self.operations.len() + other.operations.len());
121 let mut merged_operation_sides: Vec<Side> =
122 Vec::with_capacity(self.operations.len() + other.operations.len());
123
124 let mut left_iter = self.operations.into_iter();
125 let mut right_iter = other.operations.into_iter();
126
127 let mut maybe_left_op = left_iter.next();
128 let mut maybe_right_op = right_iter.next();
129
130 let mut seen_left_length: usize = 0;
131 let mut seen_right_length: usize = 0;
132 let mut merged_length: usize = 0;
133
134 let mut last_left_op = None;
135 let mut last_right_op = None;
136
137 loop {
138 let (side, operation) = match (maybe_left_op.as_ref(), maybe_right_op.as_ref()) {
139 (Some(left_op), Some(right_op)) => {
140 if left_op.cmp_priority(seen_left_length, right_op, seen_right_length)
141 == std::cmp::Ordering::Less
142 {
143 (Side::Left, maybe_left_op.take().unwrap())
144 } else {
145 (Side::Right, maybe_right_op.take().unwrap())
146 }
147 }
148
149 (Some(_), None) => (Side::Left, maybe_left_op.take().unwrap()),
150 (None, Some(_)) => (Side::Right, maybe_right_op.take().unwrap()),
151 (None, None) => break,
152 };
153
154 let is_advancing_operation = matches!(
155 operation,
156 Operation::Insert { .. } | Operation::Equal { .. }
157 );
158
159 let original_length = operation.len();
160 let (side, result) = match side {
161 Side::Left => {
162 let result = operation.merge_operations(last_right_op.as_ref());
163
164 if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
165 let merged_length_signed = isize::try_from(merged_length)
166 .expect("merged_length must fit in isize");
167 let seen_left_length_signed = isize::try_from(seen_left_length)
168 .expect("seen_left_length must fit in isize");
169 let op_len_signed =
170 isize::try_from(op.len()).expect("op.len() must fit in isize");
171 let original_length_signed = isize::try_from(original_length)
172 .expect("original_length must fit in isize");
173
174 let shift = merged_length_signed - seen_left_length_signed + op_len_signed
175 - original_length_signed;
176
177 while let Some(cursor) = left_cursors.next_if(|cursor| {
178 cursor.char_index <= seen_left_length + original_length
179 }) {
180 merged_cursors.push(
181 cursor.with_index(cursor.char_index.saturating_add_signed(shift)),
182 );
183 }
184 }
185
186 if is_advancing_operation {
187 seen_left_length += original_length;
188 }
189
190 maybe_left_op = left_iter.next();
191 last_left_op = Some(result.clone());
192
193 (Side::Left, result)
194 }
195 Side::Right => {
196 let result = operation.merge_operations(last_left_op.as_ref());
197
198 if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
199 let merged_length_signed = isize::try_from(merged_length)
200 .expect("merged_length must fit in isize");
201 let seen_right_length_signed = isize::try_from(seen_right_length)
202 .expect("seen_right_length must fit in isize");
203 let op_len_signed =
204 isize::try_from(op.len()).expect("op.len() must fit in isize");
205 let original_length_signed = isize::try_from(original_length)
206 .expect("original_length must fit in isize");
207
208 let shift = merged_length_signed - seen_right_length_signed + op_len_signed
209 - original_length_signed;
210
211 while let Some(cursor) = right_cursors.next_if(|cursor| {
212 cursor.char_index <= seen_right_length + original_length
213 }) {
214 merged_cursors.push(
215 cursor.with_index(cursor.char_index.saturating_add_signed(shift)),
216 );
217 }
218 }
219
220 if is_advancing_operation {
221 seen_right_length += original_length;
222 }
223
224 maybe_right_op = right_iter.next();
225 last_right_op = Some(result.clone());
226
227 (Side::Right, result)
228 }
229 };
230
231 if result.len() == 0 {
232 continue;
233 }
234
235 if is_advancing_operation {
236 merged_length += result.len();
237 }
238
239 merged_operations.push(result);
240 merged_operation_sides.push(side);
241 }
242
243 for cursor in left_cursors.chain(right_cursors) {
244 merged_cursors.push(cursor.with_index(merged_length));
245 }
246
247 debug_assert_eq!(merged_operations.len(), merged_operation_sides.len());
248
249 Self::new(
250 self.text,
251 merged_operations,
252 merged_operation_sides,
253 merged_cursors,
254 )
255 }
256
257 #[must_use]
259 pub fn apply(&self) -> TextWithCursors {
260 let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
261
262 for operation in &self.operations {
263 builder = operation.apply(builder);
264 }
265
266 TextWithCursors::new(builder.take(), self.cursors.clone())
267 }
268
269 #[must_use]
304 pub fn apply_with_history(&self) -> Vec<SpanWithHistory> {
305 let chars: Vec<char> = self.text.chars().collect();
306 let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
307
308 let mut history = Vec::with_capacity(self.operations.len());
309
310 for (operation, side) in self.operations.iter().zip(self.operation_sides.iter()) {
311 builder = operation.apply(builder);
312
313 match operation {
314 Operation::Equal { .. } => {
315 history.push(SpanWithHistory::new(builder.take(), History::Unchanged));
316 }
317 Operation::Insert { .. } => {
318 let h = match side {
319 Side::Left => History::AddedFromLeft,
320 Side::Right => History::AddedFromRight,
321 };
322 history.push(SpanWithHistory::new(builder.take(), h));
323 }
324 Operation::Delete {
325 deleted_character_count,
326 order,
327 ..
328 } => {
329 let deleted: String = chars[*order..*order + *deleted_character_count]
330 .iter()
331 .collect();
332 let h = match side {
333 Side::Left => History::RemovedFromLeft,
334 Side::Right => History::RemovedFromRight,
335 };
336 history.push(SpanWithHistory::new(deleted, h));
337 }
338 }
339 }
340
341 history
342 }
343
344 #[must_use]
347 pub fn apply_with_all(&self) -> (TextWithCursors, Vec<SpanWithHistory>) {
348 let chars: Vec<char> = self.text.chars().collect();
349 let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
350 let mut history = Vec::with_capacity(self.operations.len());
351 let mut full_text = String::new();
352
353 for (operation, side) in self.operations.iter().zip(self.operation_sides.iter()) {
354 builder = operation.apply(builder);
355
356 match operation {
357 Operation::Equal { .. } => {
358 let span = builder.take();
359 full_text.push_str(&span);
360 history.push(SpanWithHistory::new(span, History::Unchanged));
361 }
362 Operation::Insert { .. } => {
363 let span = builder.take();
364 full_text.push_str(&span);
365 let h = match side {
366 Side::Left => History::AddedFromLeft,
367 Side::Right => History::AddedFromRight,
368 };
369 history.push(SpanWithHistory::new(span, h));
370 }
371 Operation::Delete {
372 deleted_character_count,
373 order,
374 ..
375 } => {
376 let deleted: String = chars[*order..*order + *deleted_character_count]
377 .iter()
378 .collect();
379 let h = match side {
380 Side::Left => History::RemovedFromLeft,
381 Side::Right => History::RemovedFromRight,
382 };
383 history.push(SpanWithHistory::new(deleted, h));
384 }
385 }
386 }
387
388 (
389 TextWithCursors::new(full_text, self.cursors.clone()),
390 history,
391 )
392 }
393
394 pub fn to_diff(&self) -> Result<Vec<NumberOrText>, DiffError> {
407 let mut result: Vec<NumberOrText> = Vec::with_capacity(self.operations.len());
408 let mut previous_equal: Option<usize> = None;
409
410 for operation in &self.operations {
411 match operation {
412 Operation::Equal { length, .. } => {
413 if let Some(prev_length) = previous_equal {
414 previous_equal = Some(prev_length + *length);
415 } else {
416 previous_equal = Some(*length);
417 }
418 }
419
420 Operation::Insert { text, .. } => {
421 if let Some(prev_length) = previous_equal {
422 result
423 .push(NumberOrText::Number(i64::try_from(prev_length).map_err(
424 |_| DiffError::IntegerOverflow { value: prev_length },
425 )?));
426 previous_equal = None;
427 }
428
429 let text: String = text.iter().map(Token::original).collect();
430 result.push(NumberOrText::Text(text));
431 }
432
433 Operation::Delete {
434 deleted_character_count,
435 ..
436 } => {
437 if let Some(prev_length) = previous_equal {
438 result
439 .push(NumberOrText::Number(i64::try_from(prev_length).map_err(
440 |_| DiffError::IntegerOverflow { value: prev_length },
441 )?));
442 previous_equal = None;
443 }
444
445 let count = i64::try_from(*deleted_character_count).map_err(|_| {
446 DiffError::IntegerOverflow {
447 value: *deleted_character_count,
448 }
449 })?;
450 result.push(NumberOrText::Number(-count));
451 }
452 }
453 }
454
455 if let Some(prev_length) = previous_equal {
456 result
457 .push(NumberOrText::Number(i64::try_from(prev_length).map_err(
458 |_| DiffError::IntegerOverflow { value: prev_length },
459 )?));
460 }
461
462 Ok(result)
463 }
464
465 pub fn from_diff(
476 original_text: &'a str,
477 diff: Vec<NumberOrText>,
478 tokenizer: &Tokenizer<T>,
479 ) -> Result<EditedText<'a, T>, DiffError> {
480 let mut operations: Vec<Operation<T>> = Vec::with_capacity(diff.len());
481 let mut order = 0;
482 let chars: Vec<char> = original_text.chars().collect();
483 let text_length = chars.len();
484
485 for item in diff {
486 match item {
487 NumberOrText::Number(length) => {
488 if length >= 0 {
489 let length = usize::try_from(length).expect("length must fit in usize");
490
491 if order + length > text_length {
493 return Err(DiffError::LengthExceedsOriginal {
494 position: order,
495 requested: length,
496 available: text_length.saturating_sub(order),
497 });
498 }
499
500 let original_characters: String =
501 chars[order..order + length].iter().collect();
502
503 let original_tokens = tokenizer(&original_characters);
504 for token in original_tokens {
505 operations
506 .push(Operation::create_equal(order, token.get_original_length()));
507 order += token.get_original_length();
508 }
509 } else {
510 let length =
511 usize::try_from(-length).expect("negative length must fit in usize");
512
513 if order + length > text_length {
515 return Err(DiffError::LengthExceedsOriginal {
516 position: order,
517 requested: length,
518 available: text_length.saturating_sub(order),
519 });
520 }
521
522 operations.push(Operation::create_delete(order, length));
523 order += length;
524 }
525 }
526 NumberOrText::Text(text) => {
527 let tokens = tokenizer(&text);
528 operations.push(Operation::create_insert(order, tokens));
529 }
530 }
531 }
532
533 let operation_count = operations.len();
534 Ok(EditedText::new(
535 original_text,
536 operations,
537 vec![Side::Left; operation_count],
538 vec![],
539 ))
540 }
541}
542
543#[cfg(test)]
544mod tests {
545 use insta::assert_debug_snapshot;
546 use pretty_assertions::assert_eq;
547
548 use super::*;
549
550 #[test]
551 fn test_calculate_operations() {
552 let left = "hello world! How are you? Adam";
553 let right = "Hello, my friend! How are you doing? Albert";
554
555 let operations = EditedText::from_strings(left, &right.into());
556
557 insta::assert_debug_snapshot!(operations);
558
559 let new_right = operations.apply();
560 assert_eq!(new_right.text(), right);
561 }
562
563 #[test]
564 fn test_calculate_operations_with_no_diff() {
565 let text = "hello world!";
566
567 let operations = EditedText::from_strings(text, &text.into());
568
569 assert_debug_snapshot!(operations);
570
571 let new_right = operations.apply();
572 assert_eq!(new_right.text(), text);
573 }
574
575 #[test]
576 fn test_calculate_operations_with_insert() {
577 let original = "hello world! ...";
578 let left = "Hello world! I'm Andras.";
579 let right = "Hello world! How are you?";
580 let expected = "Hello world! How are you? I'm Andras.";
581
582 let operations_1 = EditedText::from_strings(original, &left.into());
583 let operations_2 = EditedText::from_strings(original, &right.into());
584
585 let operations = operations_1.merge(operations_2);
586 assert_eq!(operations.apply().text(), expected);
587 }
588
589 #[test]
590 fn test_from_diff_length_exceeds_original() {
591 let result = EditedText::from_diff(
592 "hello",
593 vec![
594 10.into(), " world".into(),
596 ],
597 &*BuiltinTokenizer::Word,
598 );
599
600 assert!(result.is_err());
601 match result {
602 Err(DiffError::LengthExceedsOriginal {
603 position,
604 requested,
605 available,
606 }) => {
607 assert_eq!(position, 0);
608 assert_eq!(requested, 10);
609 assert_eq!(available, 5);
610 }
611 _ => panic!("Expected LengthExceedsOriginal error"),
612 }
613 }
614
615 #[test]
616 fn test_from_diff_valid() {
617 let edited_text = EditedText::from_diff(
618 "hello",
619 vec![
620 5.into(), " world".into(),
622 ],
623 &*BuiltinTokenizer::Word,
624 )
625 .unwrap();
626
627 let content = edited_text.apply().text();
628
629 assert_eq!(content, "hello world");
630 }
631
632 #[cfg(feature = "serde")]
633 #[test]
634 fn test_changes_deserialisation() {
635 let original = "Merging text is hard!";
636 let changes = "Merging text is easy with reconcile!";
637 let result = EditedText::from_strings(original, &changes.into());
638 let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap();
639
640 let expected = concat!("- 15\n", "- -6\n", "- ' easy with reconcile!'\n",);
641 assert_eq!(serialized, expected);
642 }
643
644 #[test]
645 fn test_apply_with_history_utf8() {
646 let parent = "こんにちは世界"; let left = "こんにちは宇宙"; let right = parent;
649
650 let result = crate::reconcile(
651 parent,
652 &left.into(),
653 &right.into(),
654 &*BuiltinTokenizer::Word,
655 );
656
657 let history = result.apply_with_history();
658 assert!(!history.is_empty());
659 assert_eq!(result.apply().text(), "こんにちは宇宙");
660 }
661
662 #[cfg(feature = "serde")]
663 #[test]
664 fn test_changes_serialization() {
665 let original = "The quick brown fox jumps over the lazy dog.";
666 let updated = "The quick red fox jumped over the very lazy dog!";
667
668 let edited_text = EditedText::from_strings(original, &updated.into());
669
670 let changes = edited_text.to_diff().unwrap();
671 let deserialized_edited_text =
672 EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word).unwrap();
673
674 assert_eq!(deserialized_edited_text.apply().text(), updated);
675 }
676}