extern crate bytepack;
pub mod hashmatch;
pub mod treematch;
#[cfg(test)]
mod tests;
use std::iter::Iterator;
use hashmatch::HashMatchIterator;
use treematch::TreeMatchIterator;
#[derive(Clone,Copy,Debug,PartialEq, Eq)]
pub struct Match {
pub first_pos: usize,
pub second_pos: usize,
pub length: usize,
}
impl Match {
pub fn new(first_pos: usize, second_pos: usize, length: usize) -> Match {
Match {
first_pos: first_pos,
second_pos: second_pos,
length: length,
}
}
pub fn first_end(&self) -> usize {
self.first_pos + self.length
}
pub fn second_end(&self) -> usize {
self.second_pos + self.length
}
}
#[derive(Clone,Copy,Debug)]
pub enum AlgoSpec {
HashMatch(usize),
TreeMatch(usize)
}
pub struct MatchIterator<'a> {
iter: Box<Iterator<Item=Match> + 'a>
}
impl<'a> MatchIterator<'a> {
pub fn new(first: &'a [u8], second: &'a [u8], algo_spec: AlgoSpec) -> MatchIterator<'a> {
MatchIterator {
iter: match algo_spec {
AlgoSpec::TreeMatch(mml) => Box::new(TreeMatchIterator::new(first, second, mml)),
AlgoSpec::HashMatch(1) => Box::new(HashMatchIterator::<u8>::new(first, second)),
AlgoSpec::HashMatch(2) => Box::new(HashMatchIterator::<u16>::new(first, second)),
AlgoSpec::HashMatch(3) => Box::new(HashMatchIterator::<[u8;3]>::new(first, second)),
AlgoSpec::HashMatch(4) => Box::new(HashMatchIterator::<u32>::new(first, second)),
AlgoSpec::HashMatch(5) => Box::new(HashMatchIterator::<[u8;5]>::new(first, second)),
AlgoSpec::HashMatch(6) => Box::new(HashMatchIterator::<[u16;3]>::new(first, second)),
AlgoSpec::HashMatch(7) => Box::new(HashMatchIterator::<[u8;7]>::new(first, second)),
AlgoSpec::HashMatch(8) => Box::new(HashMatchIterator::<u64>::new(first, second)),
AlgoSpec::HashMatch(10) => Box::new(HashMatchIterator::<[u16;5]>::new(first, second)),
AlgoSpec::HashMatch(12) => Box::new(HashMatchIterator::<[u32;3]>::new(first, second)),
AlgoSpec::HashMatch(14) => Box::new(HashMatchIterator::<[u16;7]>::new(first, second)),
AlgoSpec::HashMatch(16) => Box::new(HashMatchIterator::<[u64;2]>::new(first, second)),
AlgoSpec::HashMatch(20) => Box::new(HashMatchIterator::<[u32;5]>::new(first, second)),
AlgoSpec::HashMatch(24) => Box::new(HashMatchIterator::<[u64;3]>::new(first, second)),
AlgoSpec::HashMatch(28) => Box::new(HashMatchIterator::<[u32;7]>::new(first, second)),
AlgoSpec::HashMatch(32) => Box::new(HashMatchIterator::<[u64;4]>::new(first, second)),
AlgoSpec::HashMatch(40) => Box::new(HashMatchIterator::<[u64;5]>::new(first, second)),
AlgoSpec::HashMatch(48) => Box::new(HashMatchIterator::<[u64;6]>::new(first, second)),
AlgoSpec::HashMatch(56) => Box::new(HashMatchIterator::<[u64;7]>::new(first, second)),
AlgoSpec::HashMatch(64) => Box::new(HashMatchIterator::<[u64;8]>::new(first, second)),
_ => panic!("Unsupported AlgoSpec")
}
}
}
}
impl<'a> Iterator for MatchIterator<'a> {
type Item = Match;
#[inline]
fn next(&mut self) -> Option<Match> {
self.iter.next()
}
}
pub fn longest_common_substring(first: &[u8], second: &[u8], algo_spec: AlgoSpec) -> Match {
let mut longest = Match::new(0,0,0);
let match_iter = MatchIterator::new(first, second, algo_spec);
for m in match_iter {
if m.length > longest.length {
longest = m;
}
}
return longest;
}
pub fn longest_common_substrings(first: &[u8], second: &[u8], algo_spec: AlgoSpec, number: usize) -> Vec<Match> {
let match_iter = MatchIterator::new(first, second, algo_spec);
let mut top = Vec::<Match>::with_capacity(number + 1);
let mut threshold = 0;
for m in match_iter {
if m.length > threshold {
let mut insert_pos = 0;
while insert_pos < top.len() && top[insert_pos].length > m.length {
insert_pos += 1;
}
top.insert(insert_pos, m);
if top.len() > number {
top.truncate(number);
threshold = top.last().unwrap().length;
}
}
}
return top;
}
pub fn patch_set(first: &[u8], second: &[u8], algo_spec: AlgoSpec) -> Vec<Match> {
let mut match_iter = MatchIterator::new(first, second, algo_spec);
let mut patches = Vec::<Match>::new();
if let Some(m) = match_iter.next() {
patches.push(m);
}
for mut m in match_iter {
let last = patches.len() - 1;
if m.second_end() > patches[last].second_end() {
if m.second_pos == patches[last].second_pos {
patches[last] = m;
}
else if m.second_pos < patches[last].second_pos {
let overlap = patches[last].second_pos - m.second_pos;
m.first_pos += overlap;
m.second_pos += overlap;
m.length -= overlap;
patches[last] = m;
}
else if m.second_pos > patches[last].second_pos && m.second_pos < patches[last].second_end() {
let overlap = patches[last].second_end() - m.second_pos;
m.first_pos += overlap;
m.second_pos += overlap;
m.length -= overlap;
patches.push(m);
}
else {
patches.push(m);
}
}
}
return patches;
}
pub fn unique_strings(first: &[u8], second: &[u8], algo_spec: AlgoSpec) -> Vec<(usize,usize)> {
let match_iter = MatchIterator::new(first, second, algo_spec);
let mut uniques = Vec::<(usize,usize)>::new();
let mut covered = 0;
for m in match_iter {
if m.second_pos > covered {
uniques.push((covered, m.second_pos));
}
if m.second_end() > covered {
covered = m.second_end();
}
}
if covered < second.len() {
uniques.push((covered, second.len()));
}
return uniques;
}