speck_core/delta/
builder.rs1use alloc::vec::Vec;
4use crate::delta::{Delta, Op, MAX_HUNK_SIZE};
5use crate::error::Result;
6
7#[derive(Debug)]
9pub struct DeltaBuilder {
10 min_match_len: usize,
11 max_search_depth: usize,
12}
13
14impl Default for DeltaBuilder {
15 fn default() -> Self {
16 Self {
17 min_match_len: 8,
18 max_search_depth: 100,
19 }
20 }
21}
22
23impl DeltaBuilder {
24 pub fn new() -> Self {
26 Self::default()
27 }
28
29 pub fn min_match_len(mut self, len: usize) -> Self {
31 self.min_match_len = len.max(4);
32 self
33 }
34
35 pub fn build(&self, old: &[u8], new: &[u8]) -> Result<Delta> {
37 if old.is_empty() {
38 return Ok(Delta {
40 source_size: 0,
41 target_size: new.len() as u64,
42 ops: vec![Op::Insert { data: new.to_vec() }],
43 });
44 }
45
46 let mut ops = Vec::new();
47 let mut new_pos: usize = 0;
48
49 let sa = SuffixArray::new(old);
51
52 while new_pos < new.len() {
53 let best = self.find_best_match(&sa, old, &new[new_pos..], new_pos);
55
56 if let Some((src_off, match_len)) = best {
57 if match_len >= self.min_match_len {
58 ops.push(Op::Copy {
60 src_offset: src_off as u64,
61 length: match_len as u64,
62 });
63 new_pos += match_len;
64 continue;
65 }
66 }
67
68 let literal_start = new_pos;
70 let mut literal_len = 1;
71
72 while literal_len < MAX_HUNK_SIZE
73 && new_pos + literal_len < new.len() {
74 if literal_len >= self.min_match_len {
76 let lookahead = &new[new_pos + literal_len..];
77 if self.find_best_match(&sa, old, lookahead, new_pos + literal_len)
78 .map_or(false, |(_, len)| len >= self.min_match_len) {
79 break;
80 }
81 }
82 literal_len += 1;
83 }
84
85 ops.push(Op::Insert {
86 data: new[literal_start..literal_start + literal_len].to_vec(),
87 });
88 new_pos += literal_len;
89 }
90
91 let ops = self.coalesce_copies(ops);
93
94 Ok(Delta {
95 source_size: old.len() as u64,
96 target_size: new.len() as u64,
97 ops,
98 })
99 }
100
101 fn find_best_match(&self, sa: &SuffixArray, old: &[u8], new_suffix: &[u8], _new_pos: usize) -> Option<(usize, usize)> {
102 if new_suffix.len() < self.min_match_len {
103 return None;
104 }
105
106 let mut best_len = 0;
108 let mut best_pos = 0;
109
110 let step = (old.len() / self.max_search_depth).max(1);
112 for i in (0..old.len()).step_by(step) {
113 let len = common_prefix_length(&old[i..], new_suffix);
114 if len > best_len && len >= self.min_match_len {
115 best_len = len;
116 best_pos = i;
117 if best_len == new_suffix.len() {
118 break;
119 }
120 }
121 }
122
123 if best_len >= self.min_match_len {
124 Some((best_pos, best_len))
125 } else {
126 None
127 }
128 }
129
130 fn coalesce_copies(&self, ops: Vec<Op>) -> Vec<Op> {
131 let mut result = Vec::with_capacity(ops.len());
132
133 for op in ops {
134 if let Some(last) = result.last_mut() {
135 match (last, &op) {
136 (Op::Copy { src_offset, length }, Op::Copy {
137 src_offset: next_src,
138 length: next_len,
139 }) => {
140 if *src_offset + *length == *next_src {
142 *length += *next_len;
143 continue;
144 }
145 }
146 (Op::Insert { data }, Op::Insert { data: next_data }) => {
147 if data.len() + next_data.len() <= MAX_HUNK_SIZE {
148 data.extend_from_slice(next_data);
149 continue;
150 }
151 }
152 _ => {}
153 }
154 }
155 result.push(op);
156 }
157
158 result
159 }
160}
161
162struct SuffixArray<'a> {
164 text: &'a [u8],
165 sa: Vec<usize>,
166}
167
168impl<'a> SuffixArray<'a> {
169 fn new(text: &'a [u8]) -> Self {
170 let mut sa: Vec<usize> = (0..text.len()).collect();
171 sa.sort_by(|&a, &b| text[a..].cmp(&text[b..]));
172 Self { text, sa }
173 }
174}
175
176fn common_prefix_length(a: &[u8], b: &[u8]) -> usize {
177 a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
178}