prs_rs/impls/comp/
comp_dict.rs

1use crate::prelude::{Allocator, Box, Global, Layout};
2use core::ptr::{write, NonNull};
3use core::slice;
4use core::{mem::size_of, ptr::read_unaligned};
5
6pub(crate) type MaxOffset = u32;
7type FreqCountType = u32;
8const MAX_U16: usize = 65536;
9const ALLOC_ALIGNMENT: usize = 64; // x86 cache line
10
11// Round up to next multiple of ALLOC_ALIGNMENT
12const DICTIONARY_PADDING: usize =
13    (ALLOC_ALIGNMENT - (size_of::<[CompDictEntry; MAX_U16]>() % ALLOC_ALIGNMENT)) % ALLOC_ALIGNMENT;
14
15const ENTRY_SECTION_LEN: usize = size_of::<[CompDictEntry; MAX_U16]>();
16
17/// Dictionary for PRS compression.
18///
19/// This dictionary stores the locations of every single possible place that a specified 2-byte sequence
20/// can be found, with the 2 byte combination being the dictionary 'key'. The values (locations) are
21/// stored inside a shared buffer, where [`CompDictEntry`] dictates the file offsets of the locations
22/// which start with this 2 byte combination. The items are stored in ascending order.
23///
24/// When the compressor is looking for longest match at given address, it will read the 2 bytes at the
25/// address and use that as key [`CompDict::get_item`]. Then the offsets inside the returned entry
26/// will be used to greatly speed up search.
27pub struct CompDict<L: Allocator + Copy = Global, S: Allocator + Copy = Global> {
28    /// Our memory allocation is here.
29    /// Layout:
30    /// - [CompDictEntry; MAX_U16] (dict), constant size
31    /// - [MaxOffset; data_len_num_bytes] (offsets), variable size. This buffer stores offsets of all items of 2 byte combinations.
32    buf: NonNull<u8>,
33    alloc_length: usize, // length of data that 'dict' and 'offsets' were made with
34    long_lived_allocator: L,
35    short_lived_allocator: S,
36}
37
38impl<L: Allocator + Copy, S: Allocator + Copy> Drop for CompDict<L, S> {
39    fn drop(&mut self) {
40        unsafe {
41            // dealloc buffer and box
42            let layout = Layout::from_size_align_unchecked(self.alloc_length, ALLOC_ALIGNMENT);
43            self.long_lived_allocator.deallocate(self.buf, layout);
44        }
45    }
46}
47
48/// An entry in [Compression Dictionary][`CompDict`].
49///
50/// This has pointer to current 'last min offset' [`CompDictEntry::last_read_item`] in [`CompDict::buf`]
51/// allocation, and pointer to last offset for the current 2 byte key.
52///
53/// Last min offset [`CompDictEntry::last_read_item`] is advanced as items are sequentially read,
54/// i.e. when [`CompDict::get_item`] is called. This offset corresponds to the first item which had
55/// offset greater than `min_offset` parameter of last [`CompDict::get_item`] call.
56///
57/// When compressing, this means we can find next matching offset in LZ77 search window
58/// in (effectively) O(1) time.
59#[derive(Clone)]
60#[allow(rustdoc::private_intra_doc_links)]
61pub struct CompDictEntry {
62    /// Address of the last minimum offset from previous call to [`CompDict::get_item`].
63    last_read_item: *mut MaxOffset,
64    /// Address of the last maximum offset from previous call to [`CompDict::get_item`].
65    last_read_item_max: *mut MaxOffset,
66    /// Item after last item within the offsets section of the [`CompDict::buf`].
67    last_item: *mut MaxOffset,
68}
69
70impl<L: Allocator + Copy, S: Allocator + Copy> CompDict<L, S> {
71    /// Create a new [`CompDict`] without initializing it.
72    ///
73    /// # Parameters
74    ///
75    /// - `data_len`: The length of the data that will be used to initialize the dictionary.
76    /// - `long_lived_allocator`: The allocator to use for long-lived memory allocation.
77    /// - `short_lived_allocator`: The allocator to use for short-lived memory allocation.
78    #[inline(always)]
79    pub fn new_in(data_len: usize, long_lived_allocator: L, short_lived_allocator: S) -> Self {
80        unsafe {
81            // constant
82            let offset_section_len = size_of::<MaxOffset>() * data_len;
83            let alloc_size = ENTRY_SECTION_LEN + DICTIONARY_PADDING + offset_section_len;
84
85            let layout = Layout::from_size_align_unchecked(alloc_size, ALLOC_ALIGNMENT);
86            let buf = long_lived_allocator.allocate(layout).unwrap();
87
88            CompDict {
89                buf: NonNull::new_unchecked(buf.as_ptr() as *mut u8),
90                alloc_length: alloc_size,
91                long_lived_allocator,
92                short_lived_allocator,
93            }
94        }
95    }
96
97    /// Initialize the [`CompDict`] with the given data and offset.
98    ///
99    /// # Parameters
100    ///
101    /// - `data`: The data to create the dictionary from.
102    /// - `offset`: The offset to add to the offsets in the dictionary.
103    ///
104    /// # Safety
105    ///
106    /// This function is unsafe as it operates on raw pointers and assumes that
107    /// the `CompDict` has been properly allocated with enough space for `data`.
108    #[inline(always)]
109    pub unsafe fn init(&mut self, data: &[u8], offset: usize) {
110        let dict_entry_ptr = self.buf.as_ptr() as *mut CompDictEntry;
111        let max_ofs_ptr =
112            self.buf
113                .as_ptr()
114                .add(ENTRY_SECTION_LEN + DICTIONARY_PADDING) as *mut MaxOffset;
115
116        // We will use this later to populate the dictionary.
117        // The `dict_insert_entry_ptrs` is a buffer which stores the pointer to the current location
118        // where we need to insert the offset for a given 2 byte sequence (hence length MAX_U16).
119        let alloc = self
120            .short_lived_allocator
121            .allocate(Layout::new::<[*mut MaxOffset; MAX_U16]>())
122            .unwrap()
123            .as_ptr() as *mut [*mut MaxOffset; MAX_U16];
124
125        let mut dict_insert_entry_ptrs =
126            Box::<[*mut MaxOffset; MAX_U16], S>::from_raw_in(alloc, self.short_lived_allocator);
127
128        // dict_insert_entry_ptrs is now a Box, so it will be deallocated when it goes out of scope.
129
130        // Initialize all CompDictEntries
131        let freq_table = self.create_frequency_table(data);
132        let mut cur_ofs_addr = max_ofs_ptr;
133        let mut cur_dict_entry = dict_entry_ptr;
134        let mut cur_freq_tbl_entry = freq_table.as_ptr();
135        let mut cur_ofs_insert_ptr = dict_insert_entry_ptrs.as_mut_ptr();
136        let max_dict_entry = cur_dict_entry.add(MAX_U16);
137
138        // This loop initializes each CompDictEntry (ies) based on the frequency table.
139        // It sets up the pointers for where the offsets for each 2-byte sequence will be stored.
140        // This also populates `dict_insert_entry_ptrs` (via `cur_ofs_insert_ptr`) setting each
141        // entry to the value of `cur_ofs_addr` (the current offset address).
142        while cur_dict_entry < max_dict_entry {
143            let num_items = *cur_freq_tbl_entry;
144            *cur_ofs_insert_ptr = cur_ofs_addr;
145
146            write(
147                cur_dict_entry,
148                CompDictEntry {
149                    last_read_item: cur_ofs_addr,
150                    last_read_item_max: cur_ofs_addr,
151                    last_item: cur_ofs_addr.add(num_items as usize),
152                },
153            );
154
155            cur_ofs_addr = cur_ofs_addr.add(num_items as usize);
156            cur_freq_tbl_entry = cur_freq_tbl_entry.add(1);
157            cur_dict_entry = cur_dict_entry.add(1);
158            cur_ofs_insert_ptr = cur_ofs_insert_ptr.add(1);
159        }
160
161        // The rest of the function is dedicated to actually populating the dictionary with offsets.
162        // Here we do the following:
163        // - Read Each 2 Byte Sequence
164        // - Use 2 Byte Sequence as Key
165        // - Gets insert location via `dict_insert_entry_ptrs` (**insert_entry_ptr)
166        // - Advance insert location for given key (*insert_entry_ptr)
167
168        // Iterate over the data, and add each 2-byte sequence to the dictionary.
169        #[cfg(not(target_pointer_width = "64"))]
170        {
171            let data_ptr_start = data.as_ptr();
172            let mut data_ptr = data.as_ptr();
173            let data_ptr_max = data.as_ptr().add(data.len().saturating_sub(1));
174            debug_assert!(data.len() as MaxOffset <= MaxOffset::MAX);
175
176            while data_ptr < data_ptr_max {
177                let key = read_unaligned(data_ptr as *const u16);
178                let insert_entry_ptr = dict_insert_entry_ptrs.as_mut_ptr().add(key as usize);
179
180                // Insert the offset into the dictionary
181                **insert_entry_ptr = (data_ptr.sub(data_ptr_start as usize) as MaxOffset)
182                    .wrapping_add(offset as MaxOffset);
183
184                *insert_entry_ptr = (*insert_entry_ptr).add(1); // advance to next entry
185
186                data_ptr = data_ptr.add(1);
187            }
188        }
189
190        #[cfg(target_pointer_width = "64")]
191        {
192            let mut data_ofs = 0;
193            let data_len = data.len();
194
195            while data_ofs < data_len.saturating_sub(16) {
196                // Doing a lot of the `data.as_ptr().add()` is ugly, but it makes LLVM do a better job.
197                let chunk = read_unaligned(data.as_ptr().add(data_ofs) as *const u64);
198
199                // Process every 16-bit sequence starting at each byte within the 64-bit chunk
200                for shift in 0..7 {
201                    // Successfully unrolled by LLVM
202                    let key = ((chunk >> (shift * 8)) & 0xFFFF) as u16;
203                    let insert_entry_ptr = dict_insert_entry_ptrs.as_mut_ptr().add(key as usize);
204
205                    **insert_entry_ptr = ((data.as_ptr().add(data_ofs + shift) as usize
206                        - data.as_ptr() as usize)
207                        as MaxOffset)
208                        .wrapping_add(offset as MaxOffset);
209
210                    *insert_entry_ptr = (*insert_entry_ptr).add(1);
211                }
212
213                // Handle the 16-bit number that spans the boundary between this chunk and the next
214                // Note: LLVM puts next_chunk in register and reuses it for next loop iteration (under x64), nothing special to do here.
215                let next_chunk = read_unaligned(data.as_ptr().add(data_ofs + 8) as *const u64);
216                let next_chunk_byte = (next_chunk & 0xFF) << 8;
217                let key = ((chunk >> 56) | next_chunk_byte) as u16;
218                let insert_entry_ptr = dict_insert_entry_ptrs.as_mut_ptr().add(key as usize);
219
220                **insert_entry_ptr = ((data.as_ptr().add(data_ofs + 7) as usize
221                    - data.as_ptr() as usize) as MaxOffset)
222                    .wrapping_add(offset as MaxOffset);
223                *insert_entry_ptr = (*insert_entry_ptr).add(1);
224
225                data_ofs += 8;
226            }
227
228            // Process any remaining bytes in the data.
229            while data_ofs < data_len.saturating_sub(1) {
230                let key = read_unaligned(data.as_ptr().add(data_ofs) as *const u16);
231                let insert_entry_ptr = dict_insert_entry_ptrs.as_mut_ptr().add(key as usize);
232
233                **insert_entry_ptr = ((data.as_ptr().add(data_ofs) as usize
234                    - data.as_ptr() as usize) as MaxOffset)
235                    .wrapping_add(offset as MaxOffset);
236                *insert_entry_ptr = (*insert_entry_ptr).add(1);
237                data_ofs += 1;
238            }
239        }
240    }
241
242    /// Creates a frequency table for the given data.
243    ///
244    /// # Parameters
245    /// - `data`: The data to create the frequency table from.
246    pub(crate) unsafe fn create_frequency_table(&self, data: &[u8]) -> Box<[FreqCountType], S> {
247        // This actually has no overhead.
248
249        let result =
250            Box::<[FreqCountType], S>::new_zeroed_slice_in(MAX_U16, self.short_lived_allocator);
251        let mut result = result.assume_init();
252
253        #[cfg(not(target_pointer_width = "64"))]
254        {
255            // Iterate over the data, and add each 2-byte sequence to the dictionary.
256            let data_ptr = data.as_ptr();
257            let data_ofs_max = data.len().saturating_sub(1);
258            let mut data_ofs = 0;
259            while data_ofs < data_ofs_max {
260                // LLVM successfully unrolls this
261                let index = read_unaligned(data_ptr.add(data_ofs) as *const u16);
262                result[index as usize] += 1;
263                data_ofs += 1;
264            }
265
266            result
267        }
268
269        #[cfg(target_pointer_width = "64")]
270        {
271            let data_len = data.len();
272            let mut data_ofs = 0;
273
274            while data_ofs < data_len.saturating_sub(16) {
275                let chunk = read_unaligned(data.as_ptr().add(data_ofs) as *const u64);
276
277                // Process every 16-bit sequence starting at each byte within the 64-bit chunk
278                for shift in 0..7 {
279                    let index = ((chunk >> (shift * 8)) & 0xFFFF) as u16;
280                    result[index as usize] += 1;
281                }
282
283                // Handle the 16-bit number that spans the boundary between this chunk and the next
284                // Note: LLVM puts next_chunk in register and reuses it for next loop iteration (under x64), nothing special to do here.
285                let next_chunk = read_unaligned(data.as_ptr().add(data_ofs + 8) as *const u64);
286                let key = ((chunk >> 56) | ((next_chunk & 0xFF) << 8)) as u16;
287                result[key as usize] += 1;
288
289                data_ofs += 8;
290            }
291
292            // Process any remaining bytes in the data.
293            while data_ofs < data_len.saturating_sub(1) {
294                let index = read_unaligned(data.as_ptr().add(data_ofs) as *const u16);
295                result[index as usize] += 1;
296                data_ofs += 1;
297            }
298
299            result
300        }
301    }
302
303    /// Returns a slice of offsets for the given key which are greater than or equal to `min_ofs`
304    /// and less than or equal to `max_ofs`.
305    ///
306    /// # Parameters
307    ///
308    /// - `key`: The key to search for.
309    /// - `min_ofs`: The minimum offset returned in the slice.
310    /// - `max_ofs`: The maximum offset returned in the slice.
311    ///
312    /// # Safety
313    ///
314    /// This function is unsafe as it operates on raw pointers.
315    #[inline(always)]
316    pub unsafe fn get_item(&mut self, key: u16, min_ofs: usize, max_ofs: usize) -> &[MaxOffset] {
317        // Ensure that the key is within the bounds of the dictionary.
318        debug_assert!(key as usize <= MAX_U16, "Key is out of range!");
319
320        let entry = &mut self.get_dict_mut()[key as usize];
321        let mut cur_last_read_item = entry.last_read_item;
322
323        // Advance the 'last_read_item' pointer to the first offset greater than or equal to min_ofs
324        while cur_last_read_item < entry.last_item && *cur_last_read_item < min_ofs as MaxOffset {
325            cur_last_read_item = cur_last_read_item.add(1);
326        }
327        entry.last_read_item = cur_last_read_item;
328
329        // Find the end of the range - the first offset greater than max_ofs
330        // TODO: Try last read max item.
331        let mut end = entry.last_read_item_max;
332        while end < entry.last_item && *end <= max_ofs as MaxOffset {
333            end = end.add(1);
334        }
335        entry.last_read_item_max = end;
336
337        // Create a slice from the updated range
338        slice::from_raw_parts(
339            cur_last_read_item,
340            end.offset_from(cur_last_read_item) as usize,
341        )
342    }
343
344    /// Retrieves the dictionary entries section of this [`CompDict`].
345    pub fn get_dict_mut(&mut self) -> &mut [CompDictEntry; MAX_U16] {
346        unsafe {
347            let first_item = self.buf.as_ptr() as *mut CompDictEntry;
348            &mut *(first_item as *mut [CompDictEntry; MAX_U16])
349        }
350    }
351}
352
353impl CompDict {
354    /// Create a new [`CompDict`] without initializing it.
355    ///
356    /// # Parameters
357    ///
358    /// - `data_len`: The length of the data that will be used to initialize the dictionary.
359    pub fn new(data_len: usize) -> Self {
360        Self::new_in(data_len, Global, Global)
361    }
362}
363
364impl CompDictEntry {
365    /// Returns a slice of offsets between `last_read_item` and `last_item`.
366    ///
367    /// # Safety
368    ///
369    /// This function is unsafe as it operates on raw pointers.
370    /// The caller must ensure that `last_read_item` and `last_item` are valid.
371    #[cfg(test)]
372    pub unsafe fn get_items(&mut self) -> &[MaxOffset] {
373        // Calculate the length of the slice by finding the distance between the pointers.
374        let length = self.last_item.offset_from(self.last_read_item) as usize;
375
376        // Create and return a slice from the raw pointers.
377        slice::from_raw_parts(self.last_read_item, length)
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[test]
386    fn can_create_dict() {
387        unsafe {
388            let data = &[0x41, 0x42, 0x43, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41];
389            let mut comp_dict = CompDict::new(data.len());
390            comp_dict.init(data, 0);
391
392            // Assert that the items were correctly inserted.
393            assert_eq!(
394                comp_dict.get_dict_mut()[0x4241_u16.to_le() as usize].get_items(),
395                &[0]
396            );
397            assert_eq!(
398                comp_dict.get_dict_mut()[0x4342_u16.to_le() as usize].get_items(),
399                &[1]
400            );
401
402            // Ensure we can get a slice.
403            let result = comp_dict.get_item(0x4141, 3, 4);
404            assert_eq!(&[3, 4], result);
405
406            // Access the next in sequence, and ensure it was correctly advanced.
407            let result = comp_dict.get_item(0x4141, 4, 5);
408            assert_eq!(&[4, 5], result);
409            assert_eq!(*comp_dict.get_dict_mut()[0x4141].last_read_item, 4);
410
411            // Access beyond end of sequence
412            let result = comp_dict.get_item(0x4141, 5, 99);
413            assert_eq!(&[5, 6, 7], result);
414        }
415    }
416
417    #[test]
418    fn can_create_dict_with_offset() {
419        unsafe {
420            let data = &[0x41, 0x42, 0x43, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41];
421            let offset = 1000;
422            let mut comp_dict = CompDict::new(data.len());
423            comp_dict.init(data, offset);
424
425            // Assert that the items were correctly inserted with the offset.
426            assert_eq!(
427                comp_dict.get_dict_mut()[0x4241_u16.to_le() as usize].get_items(),
428                &[1000]
429            );
430            assert_eq!(
431                comp_dict.get_dict_mut()[0x4342_u16.to_le() as usize].get_items(),
432                &[1001]
433            );
434
435            // Ensure we can get a slice with offsets.
436            let result = comp_dict.get_item(0x4141, 1003, 1004);
437            assert_eq!(&[1003, 1004], result);
438
439            // Access the next in sequence, and ensure it was correctly advanced.
440            let result = comp_dict.get_item(0x4141, 1004, 1005);
441            assert_eq!(&[1004, 1005], result);
442            assert_eq!(*comp_dict.get_dict_mut()[0x4141].last_read_item, 1004);
443
444            // Access beyond end of sequence
445            let result = comp_dict.get_item(0x4141, 1005, 1099);
446            assert_eq!(&[1005, 1006, 1007], result);
447        }
448    }
449}
prs_rs/impls/comp/comp_dict.rs

prs_rs/impls/comp/
comp_dict.rs