1pub mod buffer;
2pub mod offset_types;
3pub mod operation_types;
4pub mod unicode_segs;
5
6use offset_types::{DocByteOffset, RangeExt as _};
7use operation_types::Replace;
8
9use similar::DiffableStrRef as _;
10use unicode_segmentation::UnicodeSegmentation as _;
11
12pub fn diff(from: &str, to: &str) -> Vec<Replace> {
13 let mut result = Vec::new();
14
15 let from_segs = unicode_segs::calc(from);
16 let to_segs = unicode_segs::calc(to);
17
18 let mut from_words: Vec<_> = from
19 .split_word_bound_indices()
20 .map(|(idx, _)| DocByteOffset(idx))
21 .collect();
22 from_words.push(DocByteOffset(from.len()));
23
24 let mut to_words: Vec<_> = to
25 .split_word_bound_indices()
26 .map(|(idx, _)| DocByteOffset(idx))
27 .collect();
28 to_words.push(DocByteOffset(to.len()));
29
30 let diff = similar::TextDiff::configure()
31 .algorithm(similar::Algorithm::Myers)
32 .diff_unicode_words(from.as_diffable_str(), to.as_diffable_str());
33
34 for diff_op in diff.ops().iter().cloned() {
35 match diff_op {
36 similar::DiffOp::Equal { .. } => {}
37 similar::DiffOp::Delete { old_index, old_len, .. } => {
38 let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
39 - from_segs.offset_to_char(from_words[old_index]);
40 let old_index = from_segs.offset_to_char(from_words[old_index]);
41
42 let mut extended = false;
43 if let Some(op) = result.last_mut() {
44 let Replace { range, .. } = op;
45 if range.1 == old_index {
46 range.1 = old_index + old_len;
47 extended = true;
48 }
49 }
50
51 if !extended {
52 let op =
53 Replace { range: (old_index, old_index + old_len), text: String::new() };
54 result.push(op);
55 }
56 }
57 similar::DiffOp::Insert { old_index, new_index, new_len } => {
58 let old_index = from_segs.offset_to_char(from_words[old_index]);
59 let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
60 - to_segs.offset_to_char(to_words[new_index]);
61 let new_index = to_segs.offset_to_char(to_words[new_index]);
62
63 let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
64 let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
65
66 let mut extended = false;
67 if let Some(op) = result.last_mut() {
68 let Replace { range, text } = op;
69 if range.1 == old_index {
70 text.push_str(&new_text);
71 extended = true;
72 }
73 }
74
75 if !extended {
76 let op = Replace { range: (old_index, old_index), text: new_text };
77 result.push(op);
78 }
79 }
80 similar::DiffOp::Replace { old_index, old_len, new_index, new_len } => {
81 let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
82 - from_segs.offset_to_char(from_words[old_index]);
83 let old_index = from_segs.offset_to_char(from_words[old_index]);
84 let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
85 - to_segs.offset_to_char(to_words[new_index]);
86 let new_index = to_segs.offset_to_char(to_words[new_index]);
87
88 let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
89 let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
90
91 let mut extended = false;
92 if let Some(op) = result.last_mut() {
93 let Replace { range, text } = op;
94 if range.1 == old_index {
95 range.1 = old_index + old_len;
96 text.push_str(&new_text);
97 extended = true;
98 }
99 }
100
101 if !extended {
102 let op = Replace { range: (old_index, old_index + old_len), text: new_text };
103 result.push(op);
104 }
105 }
106 }
107 }
108 result
109}
110
111#[cfg(test)]
112mod test {
113 use rand::{rngs::StdRng, Rng as _, SeedableRng as _};
114
115 #[test]
116 fn diff_full_replace() {
117 let from = "Hello";
118 let to = "Goodbye";
119
120 let result = super::diff(from, to);
121 assert_eq!(result.len(), 1);
122 assert_eq!(result[0].range, (0.into(), 5.into()));
123 assert_eq!(result[0].text, "Goodbye");
124 }
125
126 #[test]
127 fn diff_partial_replace() {
128 let from = "Hello, world!";
129 let to = "Hello, Rust!";
130
131 let result = super::diff(from, to);
132 assert_eq!(result.len(), 1);
133 assert_eq!(result[0].range, (7.into(), 12.into()));
134 assert_eq!(result[0].text, "Rust");
135 }
136
137 #[test]
138 fn diff_fuzz() {
139 let mut count = 0;
140 let mut rng = StdRng::seed_from_u64(0);
141 loop {
142 let from: String = rand_str(&mut rng, rand::random::<usize>() % 10);
143 let to: String = rand_str(&mut rng, rand::random::<usize>() % 10);
144 let _ = super::diff(&from, &to);
145 count += 1;
146 if count == 1000 {
147 break;
148 }
149 }
150 }
151
152 fn rand_str(rng: &mut StdRng, length: usize) -> String {
153 let unicode_string: String = (0..length)
154 .map(|_| {
155 let code_point = rng.gen_range(0x0020..=0xD7FF);
156 std::char::from_u32(code_point).unwrap_or('?')
157 })
158 .collect();
159 unicode_string
160 }
161}