use super::utils::*;
use bzip2::write::BzEncoder;
use std::io::{Cursor, Result, Write};
use suffix_array::SuffixArray;
pub use bzip2::Compression;
pub use suffix_array::MAX_LENGTH;
pub const DISMATCH_COUNT: usize = 8;
pub const SMALL_MATCH: usize = 12;
pub const BUFFER_SIZE: usize = 4096;
pub const LEVEL: Compression = Compression::Default;
pub struct Bsdiff<'s, 't> {
s: &'s [u8],
t: &'t [u8],
sa: SuffixArray<'s>,
dismat: usize,
small: usize,
bsize: usize,
level: Compression,
}
impl<'s, 't> Bsdiff<'s, 't> {
pub fn new(source: &'s [u8], target: &'t [u8]) -> Self {
if source.len() > MAX_LENGTH {
panic!("source data is too large to be indexed");
}
Bsdiff {
s: source,
t: target,
sa: SuffixArray::new(source),
dismat: DISMATCH_COUNT,
small: SMALL_MATCH,
level: Compression::Default,
bsize: BUFFER_SIZE,
}
}
pub fn target(mut self, t: &'t [u8]) -> Self {
self.t = t;
self
}
pub fn dismatch_count(mut self, mut dis: usize) -> Self {
if dis < 1 {
dis = 1;
}
self.dismat = dis;
self
}
pub fn small_match(mut self, sm: usize) -> Self {
self.small = sm;
self
}
pub fn compression_level(mut self, lv: Compression) -> Self {
self.level = lv;
self
}
pub fn buffer_size(mut self, mut bs: usize) -> Self {
if bs < 128 {
bs = 128;
}
self.bsize = bs;
self
}
pub fn compare<P: Write>(&self, patch: P) -> Result<u64> {
let ctx = Context::new(self.s, self.t, &self.sa, self.dismat, self.small);
ctx.compare(patch, self.level, self.bsize)
}
}
struct Context<'s, 't, 'sa> {
s: &'s [u8],
t: &'t [u8],
sa: &'sa SuffixArray<'s>,
dismat: usize,
small: usize,
i0: usize,
j0: usize,
n0: usize,
b0: usize,
}
impl<'s, 't, 'sa> Context<'s, 't, 'sa> {
pub fn new(
s: &'s [u8],
t: &'t [u8],
sa: &'sa SuffixArray<'s>,
dismat: usize,
small: usize,
) -> Self {
Context {
s,
t,
sa,
dismat,
small,
i0: 0,
j0: 0,
n0: 0,
b0: 0,
}
}
pub fn compare<P: Write>(self, mut patch: P, level: Compression, bsize: usize) -> Result<u64> {
let s = self.s;
let t = self.t;
let mut bz_ctrls = Vec::new();
let mut bz_delta = Vec::new();
let mut bz_extra = Vec::new();
{
let mut ctrls = BzEncoder::new(Cursor::new(&mut bz_ctrls), level);
let mut delta = BzEncoder::new(Cursor::new(&mut bz_delta), level);
let mut extra = BzEncoder::new(Cursor::new(&mut bz_extra), level);
let mut spos = 0;
let mut tpos = 0;
let mut ctl = [0; 24];
let mut dlt = Vec::with_capacity(bsize);
unsafe {
dlt.set_len(bsize);
}
for c in self {
encode_int(c.add as i64, &mut ctl[0..8]);
encode_int(c.copy as i64, &mut ctl[8..16]);
encode_int(c.seek, &mut ctl[16..24]);
ctrls.write_all(&ctl[..])?;
if c.add > 0 {
let mut n = c.add;
while n > 0 {
let k = Ord::min(n, bsize as u64) as usize;
let dat =
Iterator::zip(s[spos as usize..].iter(), t[tpos as usize..].iter());
for (d, (&x, &y)) in Iterator::zip(dlt[..k].iter_mut(), dat) {
*d = y.wrapping_sub(x)
}
delta.write_all(&dlt[..k])?;
spos += k as u64;
tpos += k as u64;
n -= k as u64;
}
}
if c.copy > 0 {
extra.write_all(&t[tpos as usize..(tpos + c.copy) as usize])?;
tpos += c.copy;
}
spos = spos.wrapping_add(c.seek as u64);
}
ctrls.flush()?;
delta.flush()?;
extra.flush()?;
}
let mut header = [0; 32];
let csize = bz_ctrls.len() as u64;
let dsize = bz_delta.len() as u64;
let esize = bz_extra.len() as u64;
let tsize = t.len() as u64;
header[0..8].copy_from_slice(b"BSDIFF40");
encode_int(csize as i64, &mut header[8..16]);
encode_int(dsize as i64, &mut header[16..24]);
encode_int(tsize as i64, &mut header[24..32]);
patch.write_all(&header[..])?;
patch.write_all(&bz_ctrls[..])?;
patch.write_all(&bz_delta[..])?;
patch.write_all(&bz_extra[..])?;
patch.flush()?;
Ok(32 + csize + dsize + esize)
}
#[inline]
fn previous_state(&self) -> (usize, usize, usize, usize) {
(self.i0, self.j0, self.n0, self.b0)
}
#[inline]
fn update_state(&mut self, i0: usize, j0: usize, n0: usize, b0: usize) {
self.i0 = i0;
self.j0 = j0;
self.n0 = n0;
self.b0 = b0;
}
#[inline]
fn search_next(&self) -> Option<(usize, usize, usize)> {
if self.j0 == self.t.len() && self.b0 == 0 {
return None;
}
let mut j = self.j0 + self.n0;
let mut k = j;
let mut m = 0;
while j < self.t.len() {
let (i, n) = range_to_extent(self.sa.search_lcp(&self.t[j..]));
while k < j + n {
let i = self.i0.saturating_add(k - self.j0);
if i < self.s.len() && self.s[i] == self.t[k] {
m += 1;
}
k += 1;
}
if n == 0 {
j += 1;
m = 0;
} else if m == n {
j += n;
m = 0;
} else if n <= self.small {
j += n;
m = 0;
} else if n <= m + self.dismat {
let i = self.i0.saturating_add(j - self.j0);
if i < self.s.len() && self.s[i] == self.t[j] {
m -= 1;
}
j += 1;
} else {
return Some((i, j, n));
}
}
Some((self.s.len(), self.t.len(), 0))
}
#[inline]
fn shrink_gap(&self, i: usize, j: usize) -> (usize, usize) {
let gap = &self.t[self.j0 + self.n0..j];
let suffix = &self.s[self.i0 + self.n0..];
let prefix = &self.s[..i];
let mut a0 = scan_similar(gap.iter(), suffix.iter());
let mut b = scan_similar(gap.iter().rev(), prefix.iter().rev());
if a0 + b > gap.len() {
let n = a0 + b - gap.len();
let xs = gap[gap.len() - b..a0].iter();
let ys = suffix[gap.len() - b..a0].iter();
let zs = prefix[prefix.len() - b..prefix.len() - b + n].iter();
let i = scan_divide(xs, ys, zs);
a0 = a0 - n + i;
b = b - i;
}
(a0, b)
}
}
#[inline]
fn scan_similar<T: Eq, I: Iterator<Item = T>>(xs: I, ys: I) -> usize {
let mut i = 0;
let mut matched = 0;
let mut max_score = 0;
for (n, eq) in (1..).zip(xs.zip(ys).map(|(x, y)| x == y)) {
matched += usize::from(eq);
let dismatched = n - matched;
let score = matched.wrapping_sub(dismatched) as isize;
if score > max_score {
i = n;
max_score = score;
}
}
i
}
#[inline]
fn scan_divide<T: Eq, I: Iterator<Item = T>>(xs: I, ys: I, zs: I) -> usize {
let mut i = 0;
let mut y_matched = 0;
let mut z_matched = 0;
let mut max_score = 0;
let eqs = xs.zip(ys).zip(zs).map(|((x, y), z)| (x == y, x == z));
for (n, (y_eq, z_eq)) in (1..).zip(eqs) {
y_matched += usize::from(y_eq);
z_matched += usize::from(z_eq);
let score = y_matched.wrapping_sub(z_matched) as isize;
if score > max_score {
i = n;
max_score = score;
}
}
i
}
impl<'s, 't, 'sa> Iterator for Context<'s, 't, 'sa> {
type Item = Control;
fn next(&mut self) -> Option<Self::Item> {
if let Some((i, j, n)) = self.search_next() {
let (i0, j0, n0, b0) = self.previous_state();
let (a0, b) = self.shrink_gap(i, j);
let add = (b0 + n0 + a0) as u64;
let copy = ((j - b) - (j0 + n0 + a0)) as u64;
let seek = (i - b).wrapping_sub(i0 + n0 + a0) as isize as i64;
self.update_state(i, j, n, b);
Some(Control { add, copy, seek })
} else {
None
}
}
}