Skip to main content

lb_rs/model/text/
mod.rs

1pub mod buffer;
2pub mod offset_types;
3pub mod operation_types;
4pub mod unicode_segs;
5pub mod units;
6
7use offset_types::{Byte, RangeExt as _};
8use operation_types::Replace;
9
10use similar::DiffableStrRef as _;
11use unicode_segmentation::UnicodeSegmentation as _;
12
13pub fn diff(from: &str, to: &str) -> Vec<Replace> {
14    let mut result = Vec::new();
15
16    let from_segs = unicode_segs::calc(from);
17    let to_segs = unicode_segs::calc(to);
18
19    let mut from_words: Vec<_> = from
20        .split_word_bound_indices()
21        .map(|(idx, _)| Byte(idx))
22        .collect();
23    from_words.push(Byte(from.len()));
24
25    let mut to_words: Vec<_> = to
26        .split_word_bound_indices()
27        .map(|(idx, _)| Byte(idx))
28        .collect();
29    to_words.push(Byte(to.len()));
30
31    let diff = similar::TextDiff::configure()
32        .algorithm(similar::Algorithm::Myers)
33        .diff_unicode_words(from.as_diffable_str(), to.as_diffable_str());
34
35    for diff_op in diff.ops().iter().cloned() {
36        match diff_op {
37            similar::DiffOp::Equal { .. } => {}
38            similar::DiffOp::Delete { old_index, old_len, .. } => {
39                let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
40                    - from_segs.offset_to_char(from_words[old_index]);
41                let old_index = from_segs.offset_to_char(from_words[old_index]);
42
43                let mut extended = false;
44                if let Some(op) = result.last_mut() {
45                    let Replace { range, .. } = op;
46                    if range.1 == old_index {
47                        range.1 = old_index + old_len;
48                        extended = true;
49                    }
50                }
51
52                if !extended {
53                    let op =
54                        Replace { range: (old_index, old_index + old_len), text: String::new() };
55                    result.push(op);
56                }
57            }
58            similar::DiffOp::Insert { old_index, new_index, new_len } => {
59                let old_index = from_segs.offset_to_char(from_words[old_index]);
60                let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
61                    - to_segs.offset_to_char(to_words[new_index]);
62                let new_index = to_segs.offset_to_char(to_words[new_index]);
63
64                let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
65                let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
66
67                let mut extended = false;
68                if let Some(op) = result.last_mut() {
69                    let Replace { range, text } = op;
70                    if range.1 == old_index {
71                        text.push_str(&new_text);
72                        extended = true;
73                    }
74                }
75
76                if !extended {
77                    let op = Replace { range: (old_index, old_index), text: new_text };
78                    result.push(op);
79                }
80            }
81            similar::DiffOp::Replace { old_index, old_len, new_index, new_len } => {
82                let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
83                    - from_segs.offset_to_char(from_words[old_index]);
84                let old_index = from_segs.offset_to_char(from_words[old_index]);
85                let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
86                    - to_segs.offset_to_char(to_words[new_index]);
87                let new_index = to_segs.offset_to_char(to_words[new_index]);
88
89                let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
90                let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();
91
92                let mut extended = false;
93                if let Some(op) = result.last_mut() {
94                    let Replace { range, text } = op;
95                    if range.1 == old_index {
96                        range.1 = old_index + old_len;
97                        text.push_str(&new_text);
98                        extended = true;
99                    }
100                }
101
102                if !extended {
103                    let op = Replace { range: (old_index, old_index + old_len), text: new_text };
104                    result.push(op);
105                }
106            }
107        }
108    }
109    result
110}
111
112#[cfg(test)]
113mod test {
114    use rand::rngs::StdRng;
115    use rand::{Rng as _, SeedableRng as _};
116
117    #[test]
118    fn diff_full_replace() {
119        let from = "Hello";
120        let to = "Goodbye";
121
122        let result = super::diff(from, to);
123        assert_eq!(result.len(), 1);
124        assert_eq!(result[0].range, (0.into(), 5.into()));
125        assert_eq!(result[0].text, "Goodbye");
126    }
127
128    #[test]
129    fn diff_partial_replace() {
130        let from = "Hello, world!";
131        let to = "Hello, Rust!";
132
133        let result = super::diff(from, to);
134        assert_eq!(result.len(), 1);
135        assert_eq!(result[0].range, (7.into(), 12.into()));
136        assert_eq!(result[0].text, "Rust");
137    }
138
139    #[test]
140    fn diff_fuzz() {
141        let mut count = 0;
142        let mut rng = StdRng::seed_from_u64(0);
143        loop {
144            let from: String = rand_str(&mut rng, rand::random::<usize>() % 10);
145            let to: String = rand_str(&mut rng, rand::random::<usize>() % 10);
146            let _ = super::diff(&from, &to);
147            count += 1;
148            if count == 1000 {
149                break;
150            }
151        }
152    }
153
154    fn rand_str(rng: &mut StdRng, length: usize) -> String {
155        let unicode_string: String = (0..length)
156            .map(|_| {
157                let code_point = rng.gen_range(0x0020..=0xD7FF);
158                std::char::from_u32(code_point).unwrap_or('?')
159            })
160            .collect();
161        unicode_string
162    }
163}