1pub mod buffer;
2pub mod offset_types;
3pub mod operation_types;
4pub mod unicode_segs;
5pub mod units;
6
7use offset_types::{Byte, RangeExt as _};
8use operation_types::Replace;
9
10use similar::DiffableStrRef as _;
11use unicode_segmentation::UnicodeSegmentation as _;
12
13pub fn diff(from: &str, to: &str) -> Vec<Replace> {
14 let mut result = Vec::new();
15
16 let from_segs = unicode_segs::calc(from);
17 let to_segs = unicode_segs::calc(to);
18
19 let mut from_words: Vec<_> = from
20 .split_word_bound_indices()
21 .map(|(idx, _)| Byte(idx))
22 .collect();
23 from_words.push(Byte(from.len()));
24
25 let mut to_words: Vec<_> = to
26 .split_word_bound_indices()
27 .map(|(idx, _)| Byte(idx))
28 .collect();
29 to_words.push(Byte(to.len()));
30
31 let diff = similar::TextDiff::configure()
32 .algorithm(similar::Algorithm::Myers)
33 .diff_unicode_words(from.as_diffable_str(), to.as_diffable_str());
34
35 for diff_op in diff.ops().iter().cloned() {
36 match diff_op {
37 similar::DiffOp::Equal { .. } => {}
38 similar::DiffOp::Delete { old_index, old_len, .. } => {
39 let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
40 - from_segs.offset_to_char(from_words[old_index]);
41 let old_index = from_segs.offset_to_char(from_words[old_index]);
42
43 let mut extended = false;
44 if let Some(op) = result.last_mut() {
45 let Replace { range, .. } = op;
46 if range.1 == old_index {
47 range.1 = old_index + old_len;
48 extended = true;
49 }
50 }
51
52 if !extended {
53 let op =
54 Replace { range: (old_index, old_index + old_len), text: String::new() };
55 result.push(op);
56 }
57 }
58 similar::DiffOp::Insert { old_index, new_index, new_len } => {
59 let old_index = from_segs.offset_to_char(from_words[old_index]);
60 let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
61 - to_segs.offset_to_char(to_words[new_index]);
62 let new_index = to_segs.offset_to_char(to_words[new_index]);
63
64 let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
65 let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
66
67 let mut extended = false;
68 if let Some(op) = result.last_mut() {
69 let Replace { range, text } = op;
70 if range.1 == old_index {
71 text.push_str(&new_text);
72 extended = true;
73 }
74 }
75
76 if !extended {
77 let op = Replace { range: (old_index, old_index), text: new_text };
78 result.push(op);
79 }
80 }
81 similar::DiffOp::Replace { old_index, old_len, new_index, new_len } => {
82 let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
83 - from_segs.offset_to_char(from_words[old_index]);
84 let old_index = from_segs.offset_to_char(from_words[old_index]);
85 let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
86 - to_segs.offset_to_char(to_words[new_index]);
87 let new_index = to_segs.offset_to_char(to_words[new_index]);
88
89 let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
90 let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
91
92 let mut extended = false;
93 if let Some(op) = result.last_mut() {
94 let Replace { range, text } = op;
95 if range.1 == old_index {
96 range.1 = old_index + old_len;
97 text.push_str(&new_text);
98 extended = true;
99 }
100 }
101
102 if !extended {
103 let op = Replace { range: (old_index, old_index + old_len), text: new_text };
104 result.push(op);
105 }
106 }
107 }
108 }
109 result
110}
111
112#[cfg(test)]
113mod test {
114 use rand::rngs::StdRng;
115 use rand::{Rng as _, SeedableRng as _};
116
117 #[test]
118 fn diff_full_replace() {
119 let from = "Hello";
120 let to = "Goodbye";
121
122 let result = super::diff(from, to);
123 assert_eq!(result.len(), 1);
124 assert_eq!(result[0].range, (0.into(), 5.into()));
125 assert_eq!(result[0].text, "Goodbye");
126 }
127
128 #[test]
129 fn diff_partial_replace() {
130 let from = "Hello, world!";
131 let to = "Hello, Rust!";
132
133 let result = super::diff(from, to);
134 assert_eq!(result.len(), 1);
135 assert_eq!(result[0].range, (7.into(), 12.into()));
136 assert_eq!(result[0].text, "Rust");
137 }
138
139 #[test]
140 fn diff_fuzz() {
141 let mut count = 0;
142 let mut rng = StdRng::seed_from_u64(0);
143 loop {
144 let from: String = rand_str(&mut rng, rand::random::<usize>() % 10);
145 let to: String = rand_str(&mut rng, rand::random::<usize>() % 10);
146 let _ = super::diff(&from, &to);
147 count += 1;
148 if count == 1000 {
149 break;
150 }
151 }
152 }
153
154 fn rand_str(rng: &mut StdRng, length: usize) -> String {
155 let unicode_string: String = (0..length)
156 .map(|_| {
157 let code_point = rng.gen_range(0x0020..=0xD7FF);
158 std::char::from_u32(code_point).unwrap_or('?')
159 })
160 .collect();
161 unicode_string
162 }
163}