lz4_compression/compress.rs
1//! The compression algorithm.
2//!
3//! We make use of hash tables to find duplicates. This gives a reasonable compression ratio with a
4//! high performance. It has fixed memory usage, which contrary to other approachs, makes it less
5//! memory hungry.
6
7use std::convert::TryInto;
8
9/// Duplication dictionary size.
10///
11/// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and
12/// thus collisions are more likely, hurting the compression ratio.
13const DICTIONARY_SIZE: usize = 4096;
14
15
16/// A LZ4 block.
17///
18/// This defines a single compression "unit", consisting of two parts, a number of raw literals,
19/// and possibly a pointer to the already encoded buffer from which to copy.
20#[derive(Debug)]
21struct Block {
22 /// The length (in bytes) of the literals section.
23 lit_len: usize,
24
25 /// The duplicates section if any.
26 ///
27 /// Only the last block in a stream can lack of the duplicates section.
28 dup: Option<Duplicate>,
29}
30
31/// A consecutive sequence of bytes found in already encoded part of the input.
32#[derive(Copy, Clone, Debug)]
33struct Duplicate {
34 /// The number of bytes before our cursor, where the duplicate starts.
35 offset: u16,
36
37 /// The length beyond the four first bytes.
38 ///
39 /// Adding four to this number yields the actual length.
40 extra_bytes: usize,
41}
42
43/// An LZ4 encoder.
44struct Encoder<'a> {
45 /// The raw uncompressed input.
46 input: &'a [u8],
47
48 /// The compressed output.
49 output: &'a mut Vec<u8>,
50
51 /// The number of bytes from the input that are encoded.
52 cur: usize,
53
54 /// The dictionary of previously encoded sequences.
55 ///
56 /// This is used to find duplicates in the stream so they are not written multiple times.
57 ///
58 /// Every four bytes are hashed, and in the resulting slot their position in the input buffer
59 /// is placed. This way we can easily look up a candidate to back references.
60 dict: [usize; DICTIONARY_SIZE],
61}
62
63impl<'a> Encoder<'a> {
64 /// Go forward by some number of bytes.
65 ///
66 /// This will update the cursor and dictionary to reflect the now processed bytes.
67 ///
68 /// This returns `false` if all the input bytes are processed.
69 fn go_forward(&mut self, steps: usize) -> bool {
70 // Go over all the bytes we are skipping and update the cursor and dictionary.
71 for _ in 0..steps {
72 // Insert the cursor position into the dictionary.
73 self.insert_cursor();
74
75 // Increment the cursor.
76 self.cur += 1;
77 }
78
79 // Return `true` if there's more to read.
80 self.cur <= self.input.len()
81 }
82
83 /// Insert the batch under the cursor into the dictionary.
84 fn insert_cursor(&mut self) {
85 // Make sure that there is at least one batch remaining.
86 if self.remaining_batch() {
87 // Insert the cursor into the table.
88 self.dict[self.get_cur_hash()] = self.cur;
89 }
90 }
91
92 /// Check if there are any remaining batches.
93 fn remaining_batch(&self) -> bool {
94 self.cur + 4 < self.input.len()
95 }
96
97 /// Get the hash of the current four bytes below the cursor.
98 ///
99 /// This is guaranteed to be below `DICTIONARY_SIZE`.
100 fn get_cur_hash(&self) -> usize {
101 // Use PCG transform to generate a relatively good hash of the four bytes batch at the
102 // cursor.
103 let mut x = self.get_batch_at_cursor().wrapping_mul(0xa4d94a4f);
104 let a = x >> 16;
105 let b = x >> 30;
106 x ^= a >> b;
107 x = x.wrapping_mul(0xa4d94a4f);
108
109 x as usize % DICTIONARY_SIZE
110 }
111
112 /// Read a 4-byte "batch" from some position.
113 ///
114 /// This will read a native-endian 4-byte integer from some position.
115 fn get_batch(&self, n: usize) -> u32 {
116 debug_assert!(self.remaining_batch(), "Reading a partial batch.");
117
118 u32::from_ne_bytes(self.input[n..n+4].try_into().unwrap())
119 }
120
121 /// Read the batch at the cursor.
122 fn get_batch_at_cursor(&self) -> u32 {
123 self.get_batch(self.cur)
124 }
125
126 /// Find a duplicate of the current batch.
127 ///
128 /// If any duplicate is found, a tuple `(position, size - 4)` is returned.
129 fn find_duplicate(&self) -> Option<Duplicate> {
130 // If there is no remaining batch, we return none.
131 if !self.remaining_batch() {
132 return None;
133 }
134
135 // Find a candidate in the dictionary by hashing the current four bytes.
136 let candidate = self.dict[self.get_cur_hash()];
137
138 // Three requirements to the candidate exists:
139 // - The candidate is not the trap value (0xFFFFFFFF), which represents an empty bucket.
140 // - We should not return a position which is merely a hash collision, so w that the
141 // candidate actually matches what we search for.
142 // - We can address up to 16-bit offset, hence we are only able to address the candidate if
143 // its offset is less than or equals to 0xFFFF.
144 if candidate != !0
145 && self.get_batch(candidate) == self.get_batch_at_cursor()
146 && self.cur - candidate <= 0xFFFF {
147
148 // Calculate the "extension bytes", i.e. the duplicate bytes beyond the batch. These
149 // are the number of prefix bytes shared between the match and needle.
150 let ext = self.input[self.cur + 4..]
151 .iter()
152 .zip(&self.input[candidate + 4..])
153 .take_while(|&(a, b)| a == b)
154 .count();
155
156 Some(Duplicate {
157 offset: (self.cur - candidate) as u16,
158 extra_bytes: ext,
159 })
160 } else { None }
161 }
162
163 /// Write an integer to the output in LSIC format.
164 fn write_integer(&mut self, mut n: usize) {
165 // Write the 0xFF bytes as long as the integer is higher than said value.
166 while n >= 0xFF {
167 n -= 0xFF;
168 self.output.push(0xFF);
169 }
170
171 // Write the remaining byte.
172 self.output.push(n as u8);
173 }
174
175 /// Read the block of the top of the stream.
176 fn pop_block(&mut self) -> Block {
177 // The length of the literals section.
178 let mut lit = 0;
179
180 loop {
181 // Search for a duplicate.
182 if let Some(dup) = self.find_duplicate() {
183 // We found a duplicate, so the literals section is over...
184
185 // Move forward. Note that `ext` is actually the steps minus 4, because of the
186 // minimum matchlenght, so we need to add 4.
187 self.go_forward(dup.extra_bytes + 4);
188
189 return Block {
190 lit_len: lit,
191 dup: Some(dup),
192 };
193 }
194
195 // Try to move forward.
196 if !self.go_forward(1) {
197 // We reached the end of the stream, and no duplicates section follows.
198 return Block {
199 lit_len: lit,
200 dup: None,
201 };
202 }
203
204 // No duplicates found yet, so extend the literals section.
205 lit += 1;
206 }
207 }
208
209 /// Complete the encoding into `self.output`.
210 fn complete(&mut self) {
211 // Construct one block at a time.
212 loop {
213 // The start of the literals section.
214 let start = self.cur;
215
216 // Read the next block into two sections, the literals and the duplicates.
217 let block = self.pop_block();
218
219 // Generate the higher half of the token.
220 let mut token = if block.lit_len < 0xF {
221 // Since we can fit the literals length into it, there is no need for saturation.
222 (block.lit_len as u8) << 4
223 }
224 else {
225 // We were unable to fit the literals into it, so we saturate to 0xF. We will later
226 // write the extensional value through LSIC encoding.
227 0xF0
228 };
229
230 // Generate the lower half of the token, the duplicates length.
231 let dup_extra_len = block.dup.map_or(0, |x| x.extra_bytes);
232 token |= if dup_extra_len < 0xF {
233 // We could fit it in.
234 dup_extra_len as u8
235 }
236 else {
237 // We were unable to fit it in, so we default to 0xF, which will later be extended
238 // by LSIC encoding.
239 0xF
240 };
241
242 // Push the token to the output stream.
243 self.output.push(token);
244
245 // If we were unable to fit the literals length into the token, write the extensional
246 // part through LSIC.
247 if block.lit_len >= 0xF {
248 self.write_integer(block.lit_len - 0xF);
249 }
250
251 // Now, write the actual literals.
252 self.output.extend_from_slice(&self.input[start..start + block.lit_len]);
253
254 if let Some(Duplicate { offset, .. }) = block.dup {
255 // Wait! There's more. Now, we encode the duplicates section.
256
257 // Push the offset in little endian.
258 self.output.push(offset as u8);
259 self.output.push((offset >> 8) as u8);
260
261 // If we were unable to fit the duplicates length into the token, write the
262 // extensional part through LSIC.
263 if dup_extra_len >= 0xF {
264 self.write_integer(dup_extra_len - 0xF);
265 }
266 } else {
267 break;
268 }
269 }
270 }
271}
272
273/// Compress all bytes of `input` into `output`.
274pub fn compress_into(input: &[u8], output: &mut Vec<u8>) {
275 Encoder {
276 input,
277 output,
278 cur: 0,
279 dict: [!0; DICTIONARY_SIZE],
280 }.complete();
281}
282
283/// Compress all bytes of `input`.
284pub fn compress(input: &[u8]) -> Vec<u8> {
285 // In most cases, the compression won't expand the size, so we set the input size as capacity.
286 let mut vec = Vec::with_capacity(input.len());
287
288 compress_into(input, &mut vec);
289
290 vec
291}