1pub mod buffer;
2pub mod offset_types;
3pub mod operation_types;
4pub mod unicode_segs;
5
6use offset_types::{DocByteOffset, RangeExt as _};
7use operation_types::Replace;
8
9use similar::DiffableStrRef as _;
10use unicode_segmentation::UnicodeSegmentation as _;
11
12pub fn diff(from: &str, to: &str) -> Vec<Replace> {
13 let mut result = Vec::new();
14
15 let from_segs = unicode_segs::calc(from);
16 let to_segs = unicode_segs::calc(to);
17
18 let mut from_words: Vec<_> = from
19 .split_word_bound_indices()
20 .map(|(idx, _)| DocByteOffset(idx))
21 .collect();
22 from_words.push(DocByteOffset(from.len()));
23
24 let mut to_words: Vec<_> = to
25 .split_word_bound_indices()
26 .map(|(idx, _)| DocByteOffset(idx))
27 .collect();
28 to_words.push(DocByteOffset(to.len()));
29
30 let diff = similar::TextDiff::configure()
31 .algorithm(similar::Algorithm::Myers)
32 .diff_unicode_words(from.as_diffable_str(), to.as_diffable_str());
33
34 for diff_op in diff.ops().iter().cloned() {
35 match diff_op {
36 similar::DiffOp::Equal { .. } => {}
37 similar::DiffOp::Delete { old_index, old_len, .. } => {
38 let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
39 - from_segs.offset_to_char(from_words[old_index]);
40 let old_index = from_segs.offset_to_char(from_words[old_index]);
41
42 let mut extended = false;
43 if let Some(op) = result.last_mut() {
44 let Replace { range, .. } = op;
45 if range.1 == old_index {
46 range.1 = old_index + old_len;
47 extended = true;
48 }
49 }
50
51 if !extended {
52 let op =
53 Replace { range: (old_index, old_index + old_len), text: String::new() };
54 result.push(op);
55 }
56 }
57 similar::DiffOp::Insert { old_index, new_index, new_len } => {
58 let old_index = from_segs.offset_to_char(from_words[old_index]);
59 let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
60 - to_segs.offset_to_char(to_words[new_index]);
61 let new_index = to_segs.offset_to_char(to_words[new_index]);
62
63 let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
64 let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
65
66 let mut extended = false;
67 if let Some(op) = result.last_mut() {
68 let Replace { range, text } = op;
69 if range.1 == old_index {
70 text.push_str(&new_text);
71 extended = true;
72 }
73 }
74
75 if !extended {
76 let op = Replace { range: (old_index, old_index), text: new_text };
77 result.push(op);
78 }
79 }
80 similar::DiffOp::Replace { old_index, old_len, new_index, new_len } => {
81 let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
82 - from_segs.offset_to_char(from_words[old_index]);
83 let old_index = from_segs.offset_to_char(from_words[old_index]);
84 let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
85 - to_segs.offset_to_char(to_words[new_index]);
86 let new_index = to_segs.offset_to_char(to_words[new_index]);
87
88 let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
89 let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
90
91 let mut extended = false;
92 if let Some(op) = result.last_mut() {
93 let Replace { range, text } = op;
94 if range.1 == old_index {
95 range.1 = old_index + old_len;
96 text.push_str(&new_text);
97 extended = true;
98 }
99 }
100
101 if !extended {
102 let op = Replace { range: (old_index, old_index + old_len), text: new_text };
103 result.push(op);
104 }
105 }
106 }
107 }
108 result
109}
110
111#[cfg(test)]
112mod test {
113 use rand::rngs::StdRng;
114 use rand::{Rng as _, SeedableRng as _};
115
116 #[test]
117 fn diff_full_replace() {
118 let from = "Hello";
119 let to = "Goodbye";
120
121 let result = super::diff(from, to);
122 assert_eq!(result.len(), 1);
123 assert_eq!(result[0].range, (0.into(), 5.into()));
124 assert_eq!(result[0].text, "Goodbye");
125 }
126
127 #[test]
128 fn diff_partial_replace() {
129 let from = "Hello, world!";
130 let to = "Hello, Rust!";
131
132 let result = super::diff(from, to);
133 assert_eq!(result.len(), 1);
134 assert_eq!(result[0].range, (7.into(), 12.into()));
135 assert_eq!(result[0].text, "Rust");
136 }
137
138 #[test]
139 fn diff_fuzz() {
140 let mut count = 0;
141 let mut rng = StdRng::seed_from_u64(0);
142 loop {
143 let from: String = rand_str(&mut rng, rand::random::<usize>() % 10);
144 let to: String = rand_str(&mut rng, rand::random::<usize>() % 10);
145 let _ = super::diff(&from, &to);
146 count += 1;
147 if count == 1000 {
148 break;
149 }
150 }
151 }
152
153 fn rand_str(rng: &mut StdRng, length: usize) -> String {
154 let unicode_string: String = (0..length)
155 .map(|_| {
156 let code_point = rng.gen_range(0x0020..=0xD7FF);
157 std::char::from_u32(code_point).unwrap_or('?')
158 })
159 .collect();
160 unicode_string
161 }
162}