delta/algorithm/
onepass.rs1use crate::hash::{next_prime, RollingHash};
2use crate::splay::SplayTree;
3use crate::types::{Command, DiffOptions};
4
5#[derive(Clone, Copy)]
8struct Slot {
9 fp: u64,
10 offset: usize,
11 version: u64,
12}
13
14const EMPTY_SLOT: Slot = Slot {
15 fp: 0,
16 offset: 0,
17 version: u64::MAX,
18};
19
20pub fn diff_onepass(r: &[u8], v: &[u8], opts: &DiffOptions) -> Vec<Command> {
35 let p = opts.p;
36 let q = opts.q;
37 let verbose = opts.verbose;
38 let use_splay = opts.use_splay;
39
40 let mut commands = Vec::new();
41 if v.is_empty() {
42 return commands;
43 }
44
45 let num_seeds = if r.len() >= p { r.len() - p + 1 } else { 0 };
47 let q = next_prime(q.max(num_seeds / p));
48
49 if verbose {
50 eprintln!(
51 "onepass: {}, q={}, |R|={}, |V|={}, seed_len={}",
52 if use_splay { "splay tree" } else { "hash table" },
53 q, r.len(), v.len(), p
54 );
55 }
56
57 let mut h_v_ht: Vec<Slot> = if !use_splay { vec![EMPTY_SLOT; q] } else { Vec::new() };
60 let mut h_r_ht: Vec<Slot> = if !use_splay { vec![EMPTY_SLOT; q] } else { Vec::new() };
61
62 let mut h_v_sp: SplayTree<(usize, u64)> = SplayTree::new();
64 let mut h_r_sp: SplayTree<(usize, u64)> = SplayTree::new();
65
66 let mut ver: u64 = 0;
67
68 let mut dbg_positions: usize = 0;
70 let mut dbg_lookups: usize = 0;
71 let mut dbg_matches: usize = 0;
72
73 let mut r_c: usize = 0;
75 let mut v_c: usize = 0;
76 let mut v_s: usize = 0;
77
78 let mut rh_v: Option<RollingHash> = if v.len() >= p { Some(RollingHash::new(v, 0, p)) } else { None };
81 let mut rh_r: Option<RollingHash> = if r.len() >= p { Some(RollingHash::new(r, 0, p)) } else { None };
82 let mut rh_v_pos: usize = 0; let mut rh_r_pos: usize = 0;
84
85 loop {
86 let can_v = v_c + p <= v.len();
88 let can_r = r_c + p <= r.len();
89
90 if !can_v && !can_r {
91 break;
92 }
93 dbg_positions += 1;
94
95 let fp_v = if can_v {
96 if let Some(ref mut rh) = rh_v {
97 if v_c == rh_v_pos {
98 } else if v_c == rh_v_pos + 1 {
100 rh.roll(v[v_c - 1], v[v_c + p - 1]);
101 rh_v_pos = v_c;
102 } else {
103 *rh = RollingHash::new(v, v_c, p);
105 rh_v_pos = v_c;
106 }
107 Some(rh.value())
108 } else {
109 None
110 }
111 } else {
112 None
113 };
114 let fp_r = if can_r {
115 if let Some(ref mut rh) = rh_r {
116 if r_c == rh_r_pos {
117 } else if r_c == rh_r_pos + 1 {
119 rh.roll(r[r_c - 1], r[r_c + p - 1]);
120 rh_r_pos = r_c;
121 } else {
122 *rh = RollingHash::new(r, r_c, p);
123 rh_r_pos = r_c;
124 }
125 Some(rh.value())
126 } else {
127 None
128 }
129 } else {
130 None
131 };
132
133 if let Some(fp) = fp_v {
135 if use_splay {
136 let existing = h_v_sp.find(fp);
137 if existing.is_none() || existing.unwrap().1 != ver {
138 h_v_sp.insert(fp, (v_c, ver));
139 }
140 } else {
141 let idx = (fp % q as u64) as usize;
142 let slot = &mut h_v_ht[idx];
143 if slot.version != ver {
144 *slot = Slot { fp, offset: v_c, version: ver };
145 }
146 }
147 }
148 if let Some(fp) = fp_r {
149 if use_splay {
150 let existing = h_r_sp.find(fp);
151 if existing.is_none() || existing.unwrap().1 != ver {
152 h_r_sp.insert(fp, (r_c, ver));
153 }
154 } else {
155 let idx = (fp % q as u64) as usize;
156 let slot = &mut h_r_ht[idx];
157 if slot.version != ver {
158 *slot = Slot { fp, offset: r_c, version: ver };
159 }
160 }
161 }
162
163 let mut match_found = false;
165 let mut r_m: usize = 0;
166 let mut v_m: usize = 0;
167
168 if let Some(fp) = fp_r {
169 let v_cand = if use_splay {
170 h_v_sp.find(fp).and_then(|&mut (off, v)| if v == ver { Some(off) } else { None })
171 } else {
172 let idx = (fp % q as u64) as usize;
173 let slot = &h_v_ht[idx];
174 if slot.version == ver && slot.fp == fp { Some(slot.offset) } else { None }
175 };
176 if let Some(v_cand) = v_cand {
177 dbg_lookups += 1;
178 if r[r_c..r_c + p] == v[v_cand..v_cand + p] {
179 r_m = r_c;
180 v_m = v_cand;
181 match_found = true;
182 }
183 }
184 }
185
186 if !match_found {
187 if let Some(fp) = fp_v {
188 let r_cand = if use_splay {
189 h_r_sp.find(fp).and_then(|&mut (off, v)| if v == ver { Some(off) } else { None })
190 } else {
191 let idx = (fp % q as u64) as usize;
192 let slot = &h_r_ht[idx];
193 if slot.version == ver && slot.fp == fp { Some(slot.offset) } else { None }
194 };
195 if let Some(r_cand) = r_cand {
196 dbg_lookups += 1;
197 if v[v_c..v_c + p] == r[r_cand..r_cand + p] {
198 v_m = v_c;
199 r_m = r_cand;
200 match_found = true;
201 }
202 }
203 }
204 }
205
206 if !match_found {
207 v_c += 1;
208 r_c += 1;
209 continue;
210 }
211 dbg_matches += 1;
212
213 let max_ext = (v.len() - v_m).min(r.len() - r_m);
217 let ml = v[v_m..v_m + max_ext]
218 .iter()
219 .zip(&r[r_m..r_m + max_ext])
220 .position(|(a, b)| a != b)
221 .unwrap_or(max_ext);
222
223 if v_s < v_m {
225 commands.push(Command::Add {
226 data: v[v_s..v_m].to_vec(),
227 });
228 }
229 commands.push(Command::Copy {
230 offset: r_m,
231 length: ml,
232 });
233 v_s = v_m + ml;
234
235 v_c = v_m + ml;
237 r_c = r_m + ml;
238 ver += 1;
239 }
240
241 if v_s < v.len() {
243 commands.push(Command::Add {
244 data: v[v_s..].to_vec(),
245 });
246 }
247
248 if verbose {
249 let hit_pct = if dbg_lookups > 0 { dbg_matches as f64 / dbg_lookups as f64 * 100.0 } else { 0.0 };
250 eprintln!(
251 " scan: {} positions, {} lookups, {} matches (flushes)\n \
252 scan: hit rate {:.1}% (of lookups)",
253 dbg_positions, dbg_lookups, dbg_matches, hit_pct
254 );
255 super::print_command_stats(&commands);
256 }
257
258 commands
259}
260
261pub fn diff_onepass_default(r: &[u8], v: &[u8]) -> Vec<Command> {
263 diff_onepass(r, v, &DiffOptions::default())
264}