1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#![allow(exceeding_bitshifts)]
use std::fs::{
File
};
use std::io::{
Read,
Result
};
pub type Fingerprint = u32;
#[derive(Clone)]
pub struct AddressAndFingerprint {
address: Option<usize>,
fingerprint: Fingerprint
}
pub type Fingerprints = Vec<AddressAndFingerprint>;
pub type FingerprintsLite = Vec<Fingerprint>;
pub type LargestIntType = u64;
pub const DEFAULT_PRIME: LargestIntType = 982451653;
pub const DEFAULT_WINDOW_SIZE: usize = 50;
pub const DEFAULT_SELECTION_BITS_LENGTH: usize = 8;
pub trait ManberFingerprints: Read {
fn manber_fingerprints(&mut self, opt_prime: Option<LargestIntType>, opt_window_size: Option<usize>, opt_selection_bits_length: Option<usize>) -> Result<Fingerprints> {
let mut fingerprints = Fingerprints::new();
let prime = opt_prime.or(Some(DEFAULT_PRIME)).unwrap();
let window_size = opt_window_size.or(Some(DEFAULT_WINDOW_SIZE)).unwrap();
let selection_bits_length = opt_selection_bits_length.or(Some(DEFAULT_SELECTION_BITS_LENGTH)).unwrap();
let fingerprint_table:Vec<LargestIntType> = (0..256).map(|e| e as LargestIntType * prime).collect();
let mut file_bytes:Vec<u8> = Vec::new();
self.read_to_end(&mut file_bytes)?;
let window = &file_bytes[0..window_size];
fingerprints.push(
AddressAndFingerprint {
address: Some(0),
fingerprint: Self::calculate_fingerprint(&window, &fingerprint_table)
}
);
Self::calculate_fingerprints(&file_bytes, &mut fingerprints, prime, window_size, selection_bits_length);
Ok(fingerprints)
}
fn calculate_fingerprints(
file_bytes: &[u8],
fingerprints: &mut Fingerprints,
prime: LargestIntType,
window_size: usize,
selection_bits_length: usize
) -> () {
let mut current_byte_index = 1;
let mut current_fingerprint = 0;
let file_bytes_length:usize = file_bytes.len();
while current_byte_index < file_bytes_length {
let head_byte = file_bytes[current_byte_index - window_size] as LargestIntType;
let tail_byte = file_bytes[current_byte_index] as LargestIntType;
let new_part = (fingerprints[current_fingerprint].clone().fingerprint as LargestIntType)
+ (prime * tail_byte);
let old_part = head_byte * prime;
let fingerprint = ((new_part - old_part) % (
LargestIntType::pow(2, (::std::mem::size_of::<Fingerprint>() * 8) as u32)
)
) as Fingerprint;
let selection_bits = fingerprint >>
((::std::mem::size_of::<Fingerprint>() * 8) - selection_bits_length) as LargestIntType;
if selection_bits == 0 || selection_bits_length == 0 {
fingerprints.push(
AddressAndFingerprint {
address: Some(current_byte_index),
fingerprint
}
);
current_fingerprint = current_fingerprint + 1;
}
current_byte_index = current_byte_index + 1;
}
}
fn calculate_fingerprint(window: &[u8], fingerprint_table: &[LargestIntType]) -> Fingerprint {
(window.iter().map(|&byte| fingerprint_table[byte as usize]).sum::<u64>() % (
LargestIntType::pow(2, (::std::mem::size_of::<Fingerprint>() * 8) as u32)
)) as Fingerprint
}
}
impl ManberFingerprints for File { }