prs_rs/impls/comp/
compress.rs

1use super::lz77_matcher::{
2    lz77_get_longest_match_fast, lz77_get_longest_match_slow, Lz77Match, Lz77Parameters,
3};
4use crate::impls::comp::comp_dict::CompDict;
5use crate::prelude::Allocator;
6use core::{ptr::write_unaligned, slice};
7
8/// Size of a CompDict window.
9///
10/// This is the size for the look behind buffer (sized MAX_OFFSET) and the following lookahead buffer
11/// (sized WINDOW_SIZE - MAX_OFFSET)
12///
13/// We process the data in smaller windows, to reduce RAM usage and improve L2 cache hit rate on modern CPUs.
14/// This must be at least `MAX_OFFSET + COPY_MAX_LENGTH`.
15///
16/// This should be set based on available amount of L2 cache.
17///
18/// ----------------
19///
20/// As per CompDict Layout
21/// - [CompDictEntry; MAX_U16] (dict), constant size.
22/// - [MaxOffset; WINDOW_SIZE] (offsets), variable size. This buffer stores offsets of all items of 2 byte combinations.
23///
24/// Which is:
25/// - 12/24 (CompDictEntry) * 64K = 768K/1.5M
26/// - 4 (MaxOffset) * WINDOW_SIZE = 4 * 64K = 256K
27///
28/// During init we also use up:
29/// - 4/8 (InsertPointer) * 64K = 256K/512K
30/// - 2 (FreqTableEntry) * 64K = 128K
31const WINDOW_SIZE: usize = u16::MAX as usize;
32
33const MAX_OFFSET: usize = 0x1FFF;
34const COPY_MAX_LENGTH: isize = 0x100;
35const SHORT_COPY_MAX_OFFSET: isize = 0x100;
36const SHORT_COPY_MAX_LEN: usize = 5;
37const SHORT_COPY_MIN_LEN: usize = 2;
38
39/// Parameters
40///
41/// - `source`: A pointer to the decompressed data.
42/// - `destination`: A pointer to where to put the compressed data.
43/// - `source_len`: Length of the compressed data.
44/// - `long_lived_allocator`: The allocator to use for long-lived memory allocation.
45/// - `short_lived_allocator`: The allocator to use for short-lived memory allocation.
46///
47/// # Returns
48/// Number of bytes written to `destination`.
49///
50/// # Safety
51///
52/// It's safe as long as `dest` has sufficient length (max length: [`crate::util::prs_calculate_max_compressed_size`])
53/// and the remaining parameters are valid.
54pub unsafe fn prs_compress<L: Allocator + Copy, S: Allocator + Copy>(
55    source: *const u8,
56    mut dest: *mut u8,
57    source_len: usize,
58    long_lived_allocator: L,
59    short_lived_allocator: S,
60) -> usize {
61    let orig_dest = dest as usize;
62
63    // Write first control byte.
64    let mut last_init_covered_all = false;
65    let mut control_byte_ptr = reserve_control_byte(&mut dest);
66    let mut control_bit_position = 0;
67    let mut source_ofs = 0;
68    let mut dict = CompDict::new_in(WINDOW_SIZE, long_lived_allocator, short_lived_allocator);
69
70    // First byte is always a direct encode, so we can encode it before looping,
71    // doing this here saves a branch in lz77_get_longest_match, improving perf.
72    if source_len > 0 {
73        append_control_bit(
74            1,
75            &mut dest,
76            &mut control_bit_position,
77            &mut control_byte_ptr,
78        );
79        append_byte(*source, &mut dest);
80        source_ofs += 1;
81    }
82
83    // Loop through all the bytes, as long as there are less than COPY_MAX_LENGTH bytes left.
84    // We eliminate a branch inside lz77_get_longest_match by doing this, saving a bit of perf.
85    let fast_processing_end = source_len.saturating_sub(COPY_MAX_LENGTH as usize);
86    while source_ofs < fast_processing_end {
87        let window_start = source_ofs.saturating_sub(MAX_OFFSET);
88        let window_end = window_start + WINDOW_SIZE;
89        let window_end = if window_end >= source_len {
90            last_init_covered_all = true;
91            source_len
92        } else {
93            window_end
94        };
95        let window_slice =
96            slice::from_raw_parts(source.add(window_start), window_end - window_start);
97        dict.init(window_slice, window_start);
98
99        // Process the current window.
100        while source_ofs < window_end.min(fast_processing_end) {
101            let result = lz77_get_longest_match_fast::<CompressParameters, L, S>(
102                &mut dict, source, source_ofs,
103            );
104
105            encode_lz77_match(
106                result,
107                &mut dest,
108                &mut control_bit_position,
109                &mut control_byte_ptr,
110                &mut source_ofs,
111                source,
112            );
113        }
114    }
115
116    // Handle the remaining bytes.
117    // We sub 1 because `lz77_get_longest_match` reads the next 2 bytes.
118    // If our file happens to be 1 byte from the end, we can't read 2 bytes.
119    if !last_init_covered_all {
120        // Only reinitialize the dictionary if we haven't already covered the entire file
121        let window_start = source_ofs.saturating_sub(MAX_OFFSET);
122        let window_slice =
123            slice::from_raw_parts(source.add(window_start), source_len - window_start);
124        dict.init(window_slice, window_start);
125    }
126
127    while source_ofs < source_len.saturating_sub(1) {
128        let result = lz77_get_longest_match_slow::<CompressParameters, L, S>(
129            &mut dict, source, source_len, source_ofs,
130        );
131
132        encode_lz77_match(
133            result,
134            &mut dest,
135            &mut control_bit_position,
136            &mut control_byte_ptr,
137            &mut source_ofs,
138            source,
139        );
140    }
141
142    // There is potentially one last remaining byte.
143    if source_ofs == source_len.wrapping_sub(1) {
144        append_control_bit(
145            1,
146            &mut dest,
147            &mut control_bit_position,
148            &mut control_byte_ptr,
149        );
150        append_byte(*source.add(source_ofs), &mut dest);
151    }
152
153    // Finish the PRS file
154    append_control_bit(
155        0,
156        &mut dest,
157        &mut control_bit_position,
158        &mut control_byte_ptr,
159    );
160    append_control_bit(
161        1,
162        &mut dest,
163        &mut control_bit_position,
164        &mut control_byte_ptr,
165    );
166
167    append_byte(0x00, &mut dest);
168    append_byte(0x00, &mut dest);
169
170    dest as usize - orig_dest
171}
172
173#[inline(always)]
174unsafe fn encode_lz77_match(
175    result: Lz77Match,
176    dest: &mut *mut u8,
177    control_bit_position: &mut usize,
178    control_byte_ptr: &mut *mut u8,
179    source_ofs: &mut usize,
180    source: *const u8,
181) {
182    // Check for short copy.
183    if result.offset >= -SHORT_COPY_MAX_OFFSET
184        && result.length >= SHORT_COPY_MIN_LEN
185        && result.length <= SHORT_COPY_MAX_LEN
186    {
187        write_short_copy(dest, &result, control_bit_position, control_byte_ptr);
188        *source_ofs += result.length;
189    } else if result.length <= 2 {
190        // Otherwise write a direct byte if we can't compress.
191        append_control_bit(1, dest, control_bit_position, control_byte_ptr);
192        append_byte(*source.add(*source_ofs), dest);
193        *source_ofs += 1;
194    } else {
195        // Otherwise encode a long copy
196        if result.length <= 9 {
197            write_long_copy_small(dest, &result, control_bit_position, control_byte_ptr);
198            *source_ofs += result.length;
199        } else {
200            write_long_copy_large(dest, &result, control_bit_position, control_byte_ptr);
201            *source_ofs += result.length;
202        }
203    }
204}
205
206/// Writes a short copy (00 opcode), size 2-5, offset 1-256
207#[inline(always)]
208unsafe fn write_short_copy(
209    dest: &mut *mut u8,
210    result: &Lz77Match,
211    control_bit_position: &mut usize,
212    control_byte_ptr: &mut *mut u8,
213) {
214    let encoded_len = result.length - 2;
215
216    // Write 00 opcode.
217    append_control_bit(0, dest, control_bit_position, control_byte_ptr);
218    append_control_bit(0, dest, control_bit_position, control_byte_ptr);
219
220    // Pack the size with the second byte first.
221    append_control_bit(
222        ((encoded_len >> 1) & 1) as u8,
223        dest,
224        control_bit_position,
225        control_byte_ptr,
226    );
227
228    append_control_bit(
229        (encoded_len & 1) as u8,
230        dest,
231        control_bit_position,
232        control_byte_ptr,
233    );
234
235    // Write the offset into the destination. As negative int, truncated to byte
236    append_byte((result.offset & 0xFF) as u8, dest);
237}
238
239/// Writes a long copy (01 opcode), with small length. size 3-9, offset 1-8191
240#[inline(always)]
241unsafe fn write_long_copy_small(
242    dest: &mut *mut u8,
243    result: &Lz77Match,
244    control_bit_position: &mut usize,
245    control_byte_ptr: &mut *mut u8,
246) {
247    // Write 01 opcode.
248    append_control_bit(0, dest, control_bit_position, control_byte_ptr);
249    append_control_bit(1, dest, control_bit_position, control_byte_ptr);
250
251    // Pack the value.
252    let len = (result.length - 2) as isize;
253    let ofs = result.offset << 3 & 0xFFF8;
254    let packed = (ofs | len) as u16;
255
256    // Write the packed value.
257    append_u16_le(packed, dest);
258}
259
260/// Writes a long copy (01 opcode), with large length. size 1-256, offset 1-8191
261#[inline(always)]
262unsafe fn write_long_copy_large(
263    dest: &mut *mut u8,
264    result: &Lz77Match,
265    control_bit_position: &mut usize,
266    control_byte_ptr: &mut *mut u8,
267) {
268    // Write 01 opcode.
269    append_control_bit(0, dest, control_bit_position, control_byte_ptr);
270    append_control_bit(1, dest, control_bit_position, control_byte_ptr);
271
272    // Pack the value.
273    let packed = (result.offset << 3 & 0xFFF8) as u16;
274
275    // Write the packed value.
276    append_u16_le(packed, dest);
277    append_byte((result.length - 1) as u8, dest);
278}
279
280/// Appends a control 'bit' to the current control byte.
281/// If the control byte is full, it's appended to the destination.
282///
283/// # Parameters
284///
285/// - `bit`: The either 0 or 1 bit to be appended onto the control byte.
286/// - `dest`: The destination where the compressed data goes.
287/// - `control_bit_position`: The current bit position in the control byte.
288/// - `control_byte`: The current control byte.
289#[inline]
290unsafe fn append_control_bit(
291    bit: u8,
292    dest: &mut *mut u8,
293    control_bit_position: &mut usize,
294    control_byte_ptr: &mut *mut u8,
295) {
296    // Reserve next control byte if necessary.
297    if *control_bit_position >= 8 {
298        *control_byte_ptr = reserve_control_byte(dest);
299        *control_bit_position = 0;
300    }
301
302    // Append the current bit position and go to next position.
303    **control_byte_ptr |= bit << *control_bit_position;
304    *control_bit_position += 1;
305}
306
307/// Advances by one, and returns address of previous byte.
308#[inline]
309fn reserve_control_byte(dest: &mut *mut u8) -> *mut u8 {
310    unsafe {
311        **dest = 0; // Make suer it's zeroed in case user passes non-zeroed buffer.
312        let result = *dest;
313        *dest = dest.add(1);
314        result
315    }
316}
317
318/// Appends single byte to destination.
319#[inline]
320unsafe fn append_byte(value: u8, dest: &mut *mut u8) {
321    **dest = value;
322    *dest = dest.add(1);
323}
324
325/// Appends two bytes to the destination, little endian
326#[inline]
327unsafe fn append_u16_le(value: u16, dest: &mut *mut u8) {
328    write_unaligned((*dest) as *mut u16, value.to_le());
329    *dest = dest.add(2);
330}
331
332struct CompressParameters;
333impl Lz77Parameters for CompressParameters {
334    const MAX_OFFSET: usize = MAX_OFFSET;
335    const MAX_LENGTH: usize = COPY_MAX_LENGTH as usize;
336}