use super::utils::*;
use bzip2::write::BzEncoder;
use std::io::{Cursor, Result, Write};
use std::ops::Range;
use suffix_array::SuffixArray;
pub use bzip2::Compression;
pub use suffix_array::MAX_LENGTH;
pub const DISMATCH_COUNT: usize = 8;
pub const SMALL_MATCH: usize = 12;
const LONG_REPEATITION: usize = 128;
pub const BUFFER_SIZE: usize = 4096;
pub const LEVEL: Compression = Compression::Default;
pub struct Bsdiff<'s, 't> {
s: &'s [u8],
t: &'t [u8],
dismat: usize,
small: usize,
lrep: usize,
bsize: usize,
level: Compression,
}
impl<'s, 't> Bsdiff<'s, 't> {
pub fn new(source: &'s [u8], target: &'t [u8]) -> Self {
if source.len() > MAX_LENGTH {
panic!("source data is too large to be indexed");
}
Bsdiff {
s: source,
t: target,
dismat: DISMATCH_COUNT,
small: SMALL_MATCH,
lrep: LONG_REPEATITION,
level: Compression::Default,
bsize: BUFFER_SIZE,
}
}
pub fn source(mut self, s: &'s [u8]) -> Self {
self.s = s;
self
}
pub fn target(mut self, t: &'t [u8]) -> Self {
self.t = t;
self
}
pub fn dismatch_count(mut self, mut dis: usize) -> Self {
if dis < 1 {
dis = 1;
}
self.dismat = dis;
self
}
pub fn small_match(mut self, sm: usize) -> Self {
self.small = sm;
self
}
#[allow(unused)]
fn long_repeation(mut self, mut lr: usize) -> Self {
if lr < 64 {
lr = 64;
}
self.lrep = lr;
self
}
pub fn compression_level(mut self, lv: Compression) -> Self {
self.level = lv;
self
}
pub fn buffer_size(mut self, mut bs: usize) -> Self {
if bs < 128 {
bs = 128;
}
self.bsize = bs;
self
}
pub fn compare<P: Write>(&self, patch: P) -> Result<u64> {
let diff = SaDiff::new(self.s, self.t, self.dismat, self.small, self.lrep);
pack(self.s, self.t, diff, patch, self.level, self.bsize)
}
}
fn pack<D, P>(s: &[u8], t: &[u8], diff: D, mut p: P, lv: Compression, bsize: usize) -> Result<u64>
where
D: Iterator<Item = Control>,
P: Write,
{
let mut bz_ctrls = Vec::new();
let mut bz_delta = Vec::new();
let mut bz_extra = Vec::new();
{
let mut ctrls = BzEncoder::new(Cursor::new(&mut bz_ctrls), lv);
let mut delta = BzEncoder::new(Cursor::new(&mut bz_delta), lv);
let mut extra = BzEncoder::new(Cursor::new(&mut bz_extra), lv);
let mut spos = 0;
let mut tpos = 0;
let mut cbuf = [0; 24];
let mut dbuf = Vec::with_capacity(bsize);
unsafe {
dbuf.set_len(bsize);
}
for ctl in diff {
encode_int(ctl.add as i64, &mut cbuf[0..8]);
encode_int(ctl.copy as i64, &mut cbuf[8..16]);
encode_int(ctl.seek, &mut cbuf[16..24]);
ctrls.write_all(&cbuf[..])?;
if ctl.add > 0 {
let mut n = ctl.add;
while n > 0 {
let k = Ord::min(n, bsize as u64) as usize;
let dat = Iterator::zip(s[spos as usize..].iter(), t[tpos as usize..].iter());
for (d, (&x, &y)) in Iterator::zip(dbuf[..k].iter_mut(), dat) {
*d = y.wrapping_sub(x)
}
delta.write_all(&dbuf[..k])?;
spos += k as u64;
tpos += k as u64;
n -= k as u64;
}
}
if ctl.copy > 0 {
extra.write_all(&t[tpos as usize..(tpos + ctl.copy) as usize])?;
tpos += ctl.copy;
}
spos = spos.wrapping_add(ctl.seek as u64);
}
ctrls.flush()?;
delta.flush()?;
extra.flush()?;
}
let mut header = [0; 32];
let csize = bz_ctrls.len() as u64;
let dsize = bz_delta.len() as u64;
let esize = bz_extra.len() as u64;
let tsize = t.len() as u64;
header[0..8].copy_from_slice(b"BSDIFF40");
encode_int(csize as i64, &mut header[8..16]);
encode_int(dsize as i64, &mut header[16..24]);
encode_int(tsize as i64, &mut header[24..32]);
p.write_all(&header[..])?;
p.write_all(&bz_ctrls[..])?;
p.write_all(&bz_delta[..])?;
p.write_all(&bz_extra[..])?;
p.flush()?;
Ok(32 + csize + dsize + esize)
}
struct SaDiff<'s, 't> {
s: &'s [u8],
t: &'t [u8],
sa: SuffixArray<'s>,
st: SkipTable,
dismat: usize,
small: usize,
i0: usize,
j0: usize,
n0: usize,
b0: usize,
}
impl<'s, 't> SaDiff<'s, 't> {
pub fn new(s: &'s [u8], t: &'t [u8], dismat: usize, small: usize, lrep: usize) -> Self {
let sa = SuffixArray::new(s);
let st = SkipTable::new(t, lrep);
SaDiff {
s,
t,
sa,
st,
dismat,
small,
i0: 0,
j0: 0,
n0: 0,
b0: 0,
}
}
#[inline]
fn previous_state(&self) -> (usize, usize, usize, usize) {
(self.i0, self.j0, self.n0, self.b0)
}
#[inline]
fn update_state(&mut self, i0: usize, j0: usize, n0: usize, b0: usize) {
self.i0 = i0;
self.j0 = j0;
self.n0 = n0;
self.b0 = b0;
}
#[inline]
fn search_next(&mut self) -> Option<(usize, usize, usize)> {
if self.j0 == self.t.len() && self.b0 == 0 {
return None;
}
let mut j = self.j0 + self.n0;
let mut k = j;
let mut m = 0;
while j < self.t.len().saturating_sub(self.small) {
if let Some(skip) = self.st.pop(j) {
j += skip;
k = j;
m = 0;
}
let (i, n) = range_to_extent(self.sa.search_lcp(&self.t[j..]));
while k < j + n {
let i = self.i0.saturating_add(k - self.j0);
if i < self.s.len() && self.s[i] == self.t[k] {
m += 1;
}
k += 1;
}
if n == 0 {
j += 1;
m = 0;
} else if m == n || n <= self.small {
j += n;
m = 0;
} else if n <= m + self.dismat {
let i = self.i0.saturating_add(j - self.j0);
if i < self.s.len() && self.s[i] == self.t[j] {
m -= 1;
}
j += 1;
} else {
return Some((i, j, n));
}
}
Some((self.s.len(), self.t.len(), 0))
}
#[inline]
fn shrink_gap(&self, i: usize, j: usize) -> (usize, usize) {
let gap = &self.t[self.j0 + self.n0..j];
let suffix = &self.s[self.i0 + self.n0..];
let prefix = &self.s[..i];
let mut a0 = scan_similar(gap.iter(), suffix.iter());
let mut b = scan_similar(gap.iter().rev(), prefix.iter().rev());
if a0 + b > gap.len() {
let n = a0 + b - gap.len();
let xs = gap[gap.len() - b..a0].iter();
let ys = suffix[gap.len() - b..a0].iter();
let zs = prefix[prefix.len() - b..prefix.len() - b + n].iter();
let i = scan_divide(xs, ys, zs);
a0 = a0 - n + i;
b = b - i;
}
(a0, b)
}
}
impl<'s, 't> Iterator for SaDiff<'s, 't> {
type Item = Control;
fn next(&mut self) -> Option<Self::Item> {
if let Some((i, j, n)) = self.search_next() {
let (i0, j0, n0, b0) = self.previous_state();
let (a0, b) = self.shrink_gap(i, j);
let add = (b0 + n0 + a0) as u64;
let copy = ((j - b) - (j0 + n0 + a0)) as u64;
let seek = (i - b).wrapping_sub(i0 + n0 + a0) as isize as i64;
self.update_state(i, j, n, b);
Some(Control { add, copy, seek })
} else {
None
}
}
}
struct SkipTable {
tab: Vec<(u32, u32)>,
}
impl SkipTable {
pub fn new(s: &[u8], large: usize) -> Self {
let mut tab = Vec::new();
let mut i = 0;
let n = s.len().saturating_sub(1);
while i < n {
let x = s[i];
let c = 1 + s[i + 1..].iter().take_while(|&&y| y == x).count();
if c >= large {
tab.push((i as u32, c as u32));
}
i += c;
}
tab.shrink_to_fit();
tab.reverse();
SkipTable { tab }
}
#[inline]
pub fn pop(&mut self, i: usize) -> Option<usize> {
while self.tab.len() > 0 {
let (off, len) = self.tab[self.tab.len() - 1];
let j = off as usize;
let k = j + len as usize;
if i < j {
break;
}
if i < k {
return Some(k - i);
}
self.tab.pop();
}
None
}
}
#[inline]
fn range_to_extent(range: Range<usize>) -> (usize, usize) {
let Range { start, end } = range;
(start, end.saturating_sub(start))
}
#[inline]
fn scan_similar<T: Eq, I: Iterator<Item = T>>(xs: I, ys: I) -> usize {
let mut i = 0;
let mut matched = 0;
let mut max_score = 0;
for (n, eq) in (1..).zip(xs.zip(ys).map(|(x, y)| x == y)) {
matched += usize::from(eq);
let dismatched = n - matched;
let score = matched.wrapping_sub(dismatched) as isize;
if score > max_score {
i = n;
max_score = score;
}
}
i
}
#[inline]
fn scan_divide<T: Eq, I: Iterator<Item = T>>(xs: I, ys: I, zs: I) -> usize {
let mut i = 0;
let mut y_matched = 0;
let mut z_matched = 0;
let mut max_score = 0;
let eqs = xs.zip(ys).zip(zs).map(|((x, y), z)| (x == y, x == z));
for (n, (y_eq, z_eq)) in (1..).zip(eqs) {
y_matched += usize::from(y_eq);
z_matched += usize::from(z_eq);
let score = y_matched.wrapping_sub(z_matched) as isize;
if score > max_score {
i = n;
max_score = score;
}
}
i
}