1use log::*;
2use rayon::prelude::*;
3use sacabase::StringIndex;
4use sacapart::PartitionedSuffixArray;
5use std::{
6 cmp::min,
7 error::Error,
8 io::{self, Write},
9 time::Instant,
10};
11
12#[cfg(feature = "enc")]
13pub mod enc;
14
15#[cfg(any(test, feature = "instructions"))]
16pub mod instructions;
17
18#[derive(Debug)]
19pub struct Match {
20 pub add_old_start: usize,
21 pub add_new_start: usize,
22 pub add_length: usize,
23 pub copy_end: usize,
24}
25
26impl Match {
27 #[inline(always)]
28 pub fn copy_start(&self) -> usize {
29 self.add_new_start + self.add_length
30 }
31}
32
33#[derive(Debug, Clone)]
34pub struct Control<'a> {
35 pub add: &'a [u8],
36 pub copy: &'a [u8],
37 pub seek: i64,
38}
39
40pub struct Translator<'a, F, E>
41where
42 F: FnMut(&Control) -> Result<(), E>,
43 E: Error,
44{
45 obuf: &'a [u8],
46 nbuf: &'a [u8],
47 prev_match: Option<Match>,
48 buf: Vec<u8>,
49 on_control: F,
50 closed: bool,
51}
52
53impl<'a, F, E> Translator<'a, F, E>
54where
55 F: FnMut(&Control) -> Result<(), E>,
56 E: Error,
57{
58 pub fn new(obuf: &'a [u8], nbuf: &'a [u8], on_control: F) -> Self {
59 Self {
60 obuf,
61 nbuf,
62 buf: Vec::with_capacity(16 * 1024),
63 prev_match: None,
64 on_control,
65 closed: false,
66 }
67 }
68
69 fn send_control(&mut self, m: Option<&Match>) -> Result<(), E> {
70 if let Some(pm) = self.prev_match.take() {
71 (self.on_control)(&Control {
72 add: &self.buf[..pm.add_length],
73 copy: &self.nbuf[pm.copy_start()..pm.copy_end],
74 seek: if let Some(m) = m {
75 m.add_old_start as i64 - (pm.add_old_start + pm.add_length) as i64
76 } else {
77 0
78 },
79 })?;
80 }
81 Ok(())
82 }
83
84 pub fn translate(&mut self, m: Match) -> Result<(), E> {
85 self.send_control(Some(&m))?;
86
87 self.buf.clear();
88
89 let nbuf = &self.nbuf;
95 let obuf = &self.obuf;
96 self.buf.extend(
97 (0..m.add_length)
98 .map(|i| nbuf[m.add_new_start + i].wrapping_sub(obuf[m.add_old_start + i])),
99 );
100
101 self.prev_match = Some(m);
102 Ok(())
103 }
104
105 pub fn close(mut self) -> Result<(), E> {
106 self.do_close()
107 }
108
109 fn do_close(&mut self) -> Result<(), E> {
110 if !self.closed {
111 self.send_control(None)?;
112 self.closed = true;
113 }
114 Ok(())
115 }
116}
117
118impl<'a, F, E> Drop for Translator<'a, F, E>
119where
120 F: FnMut(&Control) -> Result<(), E>,
121 E: Error,
122{
123 fn drop(&mut self) {
124 self.do_close().unwrap_or(());
127 }
128}
129
130struct BsdiffIterator<'a> {
131 scan: usize,
132 pos: usize,
133 length: usize,
134 lastscan: usize,
135 lastpos: usize,
136 lastoffset: isize,
137
138 obuf: &'a [u8],
139 nbuf: &'a [u8],
140 sa: &'a dyn StringIndex<'a>,
141}
142
143impl<'a> BsdiffIterator<'a> {
144 pub fn new(obuf: &'a [u8], nbuf: &'a [u8], sa: &'a dyn StringIndex<'a>) -> Self {
145 Self {
146 scan: 0,
147 pos: 0,
148 length: 0,
149 lastscan: 0,
150 lastpos: 0,
151 lastoffset: 0,
152 obuf,
153 nbuf,
154 sa,
155 }
156 }
157}
158
159impl<'a> Iterator for BsdiffIterator<'a> {
160 type Item = Match;
161 fn next(&mut self) -> Option<Self::Item> {
162 let obuflen = self.obuf.len();
163 let nbuflen = self.nbuf.len();
164
165 while self.scan < nbuflen {
166 let mut oldscore = 0_usize;
167 self.scan += self.length;
168
169 let mut scsc = self.scan;
170 'inner: while self.scan < nbuflen {
171 let res = self.sa.longest_substring_match(&self.nbuf[self.scan..]);
172 self.pos = res.start;
173 self.length = res.len;
174
175 {
176 while scsc < self.scan + self.length {
177 let oi = (scsc as isize + self.lastoffset) as usize;
178 if oi < obuflen && self.obuf[oi] == self.nbuf[scsc] {
179 oldscore += 1;
180 }
181 scsc += 1;
182 }
183 }
184
185 let significantly_better = self.length > oldscore + 8;
186 let same_length = self.length == oldscore && self.length != 0;
187
188 if same_length || significantly_better {
189 break 'inner;
190 }
191
192 {
193 let oi = (self.scan as isize + self.lastoffset) as usize;
194 if oi < obuflen && self.obuf[oi] == self.nbuf[self.scan] {
195 oldscore -= 1;
196 }
197 }
198
199 self.scan += 1;
200 } let done_scanning = self.scan == nbuflen;
203 if self.length != oldscore || done_scanning {
204 let mut lenf = {
206 let (mut s, mut sf, mut lenf) = (0_isize, 0_isize, 0_isize);
207
208 for i in 0..min(self.scan - self.lastscan, obuflen - self.lastpos) {
209 if self.obuf[self.lastpos + i] == self.nbuf[self.lastscan + i] {
210 s += 1;
211 }
212
213 {
214 let i = i + 1;
217 if s * 2 - i as isize > sf * 2 - lenf {
218 sf = s;
219 lenf = i as isize;
220 }
221 }
222 }
223 lenf as usize
224 };
225
226 let mut lenb = if self.scan >= nbuflen {
228 0
229 } else {
230 let (mut s, mut sb, mut lenb) = (0_isize, 0_isize, 0_isize);
231
232 for i in 1..=min(self.scan - self.lastscan, self.pos) {
233 if self.obuf[self.pos - i] == self.nbuf[self.scan - i] {
234 s += 1;
235 }
236
237 if (s * 2 - i as isize) > (sb * 2 - lenb) {
238 sb = s;
239 lenb = i as isize;
240 }
241 }
242 lenb as usize
243 };
244
245 let lastscan_was_better = self.lastscan + lenf > self.scan - lenb;
246 if lastscan_was_better {
247 let overlap = (self.lastscan + lenf) - (self.scan - lenb);
251
252 let lens = {
253 let (mut s, mut ss, mut lens) = (0, 0, 0);
254 for i in 0..overlap {
255 if self.nbuf[self.lastscan + lenf - overlap + i]
256 == self.obuf[self.lastpos + lenf - overlap + i]
257 {
258 s += 1;
260 }
261 if self.nbuf[self.scan - lenb + i] == self.obuf[self.pos - lenb + i] {
262 s -= 1;
264 }
265
266 if s > ss {
268 ss = s;
269 lens = i + 1;
270 }
271 }
272 lens
273 };
274 lenf += lens;
276 lenf -= overlap;
277
278 lenb -= lens;
279 } let m = Match {
282 add_old_start: self.lastpos,
283 add_new_start: self.lastscan,
284 add_length: lenf,
285 copy_end: self.scan - lenb,
286 };
287
288 self.lastscan = self.scan - lenb;
289 self.lastpos = self.pos - lenb;
290 self.lastoffset = self.pos as isize - self.scan as isize;
291
292 return Some(m);
293 } } None
297 }
298}
299
300pub struct DiffParams {
302 sort_partitions: usize,
303 scan_chunk_size: Option<usize>,
304}
305
306impl DiffParams {
307 pub fn new(
318 sort_partitions: usize,
319 scan_chunk_size: Option<usize>,
320 ) -> Result<Self, Box<dyn Error + Send + Sync + 'static>> {
321 if sort_partitions < 1 {
322 return Err("number of sort partitions cannot be less than 1".into());
323 }
324 if scan_chunk_size.filter(|s| *s < 1).is_some() {
325 return Err("scan chunk size cannot be less than 1".into());
326 }
327
328 Ok(Self {
329 sort_partitions,
330 scan_chunk_size,
331 })
332 }
333}
334
335impl Default for DiffParams {
336 fn default() -> Self {
337 Self {
338 sort_partitions: 1,
339 scan_chunk_size: None,
340 }
341 }
342}
343
344pub fn diff<F, E>(obuf: &[u8], nbuf: &[u8], params: &DiffParams, mut on_match: F) -> Result<(), E>
346where
347 F: FnMut(Match) -> Result<(), E>,
348{
349 info!("building suffix array...");
350 let before_suffix = Instant::now();
351 let sa = PartitionedSuffixArray::new(&obuf[..], params.sort_partitions, divsufsort::sort);
352 info!(
353 "sorting took {}",
354 DurationSpeed(obuf.len() as u64, before_suffix.elapsed())
355 );
356
357 let before_scan = Instant::now();
358 if let Some(chunk_size) = params.scan_chunk_size {
359 let num_chunks = (nbuf.len() + chunk_size - 1) / chunk_size;
361
362 info!(
363 "scanning with {}B chunks... ({} chunks total)",
364 chunk_size, num_chunks
365 );
366
367 let mut txs = Vec::with_capacity(num_chunks);
368 let mut rxs = Vec::with_capacity(num_chunks);
369 for _ in 0..num_chunks {
370 let (tx, rx) = std::sync::mpsc::channel::<Vec<Match>>();
371 txs.push(tx);
372 rxs.push(rx);
373 }
374
375 nbuf.par_chunks(chunk_size).zip(txs).for_each(|(nbuf, tx)| {
376 let iter = BsdiffIterator::new(obuf, nbuf, &sa);
377 tx.send(iter.collect()).expect("should send results");
378 });
379
380 for (i, rx) in rxs.into_iter().enumerate() {
381 let offset = i * chunk_size;
382 let v = rx.recv().expect("should receive results");
383 for mut m in v {
384 m.add_new_start += offset;
389 m.copy_end += offset;
390 on_match(m)?;
391 }
392 }
393 } else {
394 for m in BsdiffIterator::new(obuf, nbuf, &sa) {
395 on_match(m)?
396 }
397 }
398
399 info!(
400 "scanning took {}",
401 DurationSpeed(obuf.len() as u64, before_scan.elapsed())
402 );
403
404 Ok(())
405}
406
407use std::fmt;
408
409struct DurationSpeed(u64, std::time::Duration);
410
411impl fmt::Display for DurationSpeed {
412 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
413 let (size, duration) = (self.0, self.1);
414 write!(f, "{:?} ({})", duration, Speed(size, duration))
415 }
416}
417
418struct Speed(u64, std::time::Duration);
419
420impl fmt::Display for Speed {
421 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
422 let (size, duration) = (self.0, self.1);
423 let per_sec = size as f64 / duration.as_secs_f64();
424 write!(f, "{} / s", Size(per_sec as u64))
425 }
426}
427
428struct Size(u64);
429
430impl fmt::Display for Size {
431 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
432 let x = self.0;
433
434 if x > 1024 * 1024 {
435 write!(f, "{:.2} MiB", x as f64 / (1024.0 * 1024.0))
436 } else if x > 1024 {
437 write!(f, "{:.1} KiB", x as f64 / (1024.0))
438 } else {
439 write!(f, "{} B", x)
440 }
441 }
442}
443
444#[cfg(feature = "enc")]
445pub fn simple_diff(older: &[u8], newer: &[u8], out: &mut dyn Write) -> Result<(), io::Error> {
446 simple_diff_with_params(older, newer, out, &Default::default())
447}
448
449#[cfg(feature = "enc")]
450pub fn simple_diff_with_params(
451 older: &[u8],
452 newer: &[u8],
453 out: &mut dyn Write,
454 diff_params: &DiffParams,
455) -> Result<(), io::Error> {
456 let mut w = enc::Writer::new(out)?;
457
458 let mut translator = Translator::new(older, newer, |control| w.write(control));
459 diff(older, newer, diff_params, |m| translator.translate(m))?;
460 translator.close()?;
461
462 Ok(())
463}
464
465pub fn assert_cycle(older: &[u8], newer: &[u8]) {
466 let mut older_pos = 0_usize;
467 let mut newer_pos = 0_usize;
468
469 let mut translator = Translator::new(older, newer, |control| -> Result<(), std::io::Error> {
470 for &ab in control.add {
471 let fb = ab.wrapping_add(older[older_pos]);
472 older_pos += 1;
473
474 let nb = newer[newer_pos];
475 newer_pos += 1;
476
477 assert_eq!(fb, nb);
478 }
479
480 for &cb in control.copy {
481 let nb = newer[newer_pos];
482 newer_pos += 1;
483
484 assert_eq!(cb, nb);
485 }
486
487 older_pos = (older_pos as i64 + control.seek) as usize;
488
489 Ok(())
490 });
491
492 diff(older, newer, &Default::default(), |m| {
493 translator.translate(m)
494 })
495 .unwrap();
496
497 translator.close().unwrap();
498
499 assert_eq!(
500 newer_pos,
501 newer.len(),
502 "fresh should have same length as newer"
503 );
504}
505
506#[cfg(test)]
507mod tests {
508 use super::instructions::apply_instructions;
509 use proptest::prelude::*;
510
511 #[test]
512 fn short_patch() {
513 let older = [
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 2, 0,
516 ];
517 let instructions = [
518 12, 16, 5, 40, 132, 1, 47, 43, 20, 86, 150, 0, 150, 0, 150, 0, 115, 31, 0, 0, 0, 0, 0,
519 0, 0, 1, 38, 188, 128, 0, 150, 0,
520 ];
521 let newer = apply_instructions(&older[..], &instructions[..]);
522
523 super::assert_cycle(&older[..], &newer[..]);
524 }
525
526 proptest! {
527 #[test]
528 fn cycle(older: [u8; 32], instructions: [u8; 32]) {
529 let newer = apply_instructions(&older[..], &instructions[..]);
530 println!("{} => {}", older.len(), newer.len());
531 super::assert_cycle(&older[..], &newer[..]);
532 }
533 }
534}