lb_rs/model/text/
mod.rs

1pub mod buffer;
2pub mod offset_types;
3pub mod operation_types;
4pub mod unicode_segs;
5
6use offset_types::{DocByteOffset, RangeExt as _};
7use operation_types::Replace;
8
9use similar::DiffableStrRef as _;
10use unicode_segmentation::UnicodeSegmentation as _;
11
12pub fn diff(from: &str, to: &str) -> Vec<Replace> {
13    let mut result = Vec::new();
14
15    let from_segs = unicode_segs::calc(from);
16    let to_segs = unicode_segs::calc(to);
17
18    let mut from_words: Vec<_> = from
19        .split_word_bound_indices()
20        .map(|(idx, _)| DocByteOffset(idx))
21        .collect();
22    from_words.push(DocByteOffset(from.len()));
23
24    let mut to_words: Vec<_> = to
25        .split_word_bound_indices()
26        .map(|(idx, _)| DocByteOffset(idx))
27        .collect();
28    to_words.push(DocByteOffset(to.len()));
29
30    let diff = similar::TextDiff::configure()
31        .algorithm(similar::Algorithm::Myers)
32        .diff_unicode_words(from.as_diffable_str(), to.as_diffable_str());
33
34    for diff_op in diff.ops().iter().cloned() {
35        match diff_op {
36            similar::DiffOp::Equal { .. } => {}
37            similar::DiffOp::Delete { old_index, old_len, .. } => {
38                let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
39                    - from_segs.offset_to_char(from_words[old_index]);
40                let old_index = from_segs.offset_to_char(from_words[old_index]);
41
42                let mut extended = false;
43                if let Some(op) = result.last_mut() {
44                    let Replace { range, .. } = op;
45                    if range.1 == old_index {
46                        range.1 = old_index + old_len;
47                        extended = true;
48                    }
49                }
50
51                if !extended {
52                    let op =
53                        Replace { range: (old_index, old_index + old_len), text: String::new() };
54                    result.push(op);
55                }
56            }
57            similar::DiffOp::Insert { old_index, new_index, new_len } => {
58                let old_index = from_segs.offset_to_char(from_words[old_index]);
59                let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
60                    - to_segs.offset_to_char(to_words[new_index]);
61                let new_index = to_segs.offset_to_char(to_words[new_index]);
62
63                let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
64                let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
65
66                let mut extended = false;
67                if let Some(op) = result.last_mut() {
68                    let Replace { range, text } = op;
69                    if range.1 == old_index {
70                        text.push_str(&new_text);
71                        extended = true;
72                    }
73                }
74
75                if !extended {
76                    let op = Replace { range: (old_index, old_index), text: new_text };
77                    result.push(op);
78                }
79            }
80            similar::DiffOp::Replace { old_index, old_len, new_index, new_len } => {
81                let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
82                    - from_segs.offset_to_char(from_words[old_index]);
83                let old_index = from_segs.offset_to_char(from_words[old_index]);
84                let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
85                    - to_segs.offset_to_char(to_words[new_index]);
86                let new_index = to_segs.offset_to_char(to_words[new_index]);
87
88                let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
89                let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
90
91                let mut extended = false;
92                if let Some(op) = result.last_mut() {
93                    let Replace { range, text } = op;
94                    if range.1 == old_index {
95                        range.1 = old_index + old_len;
96                        text.push_str(&new_text);
97                        extended = true;
98                    }
99                }
100
101                if !extended {
102                    let op = Replace { range: (old_index, old_index + old_len), text: new_text };
103                    result.push(op);
104                }
105            }
106        }
107    }
108    result
109}
110
111#[cfg(test)]
112mod test {
113    use rand::rngs::StdRng;
114    use rand::{Rng as _, SeedableRng as _};
115
116    #[test]
117    fn diff_full_replace() {
118        let from = "Hello";
119        let to = "Goodbye";
120
121        let result = super::diff(from, to);
122        assert_eq!(result.len(), 1);
123        assert_eq!(result[0].range, (0.into(), 5.into()));
124        assert_eq!(result[0].text, "Goodbye");
125    }
126
127    #[test]
128    fn diff_partial_replace() {
129        let from = "Hello, world!";
130        let to = "Hello, Rust!";
131
132        let result = super::diff(from, to);
133        assert_eq!(result.len(), 1);
134        assert_eq!(result[0].range, (7.into(), 12.into()));
135        assert_eq!(result[0].text, "Rust");
136    }
137
138    #[test]
139    fn diff_fuzz() {
140        let mut count = 0;
141        let mut rng = StdRng::seed_from_u64(0);
142        loop {
143            let from: String = rand_str(&mut rng, rand::random::<usize>() % 10);
144            let to: String = rand_str(&mut rng, rand::random::<usize>() % 10);
145            let _ = super::diff(&from, &to);
146            count += 1;
147            if count == 1000 {
148                break;
149            }
150        }
151    }
152
153    fn rand_str(rng: &mut StdRng, length: usize) -> String {
154        let unicode_string: String = (0..length)
155            .map(|_| {
156                let code_point = rng.gen_range(0x0020..=0xD7FF);
157                std::char::from_u32(code_point).unwrap_or('?')
158            })
159            .collect();
160        unicode_string
161    }
162}