zenflate 0.3.0

Pure Rust DEFLATE/zlib/gzip compression and decompression
Documentation
//! Hash Table (ht) matchfinder for the fastest compression level.
//!
//! Ported from libdeflate's `ht_matchfinder.h`.
//!
//! Stores hash chains inline in the hash table (2 entries per bucket).
//! Optimized for speed over compression ratio. Does not support length-3 matches.

use super::{MATCHFINDER_WINDOW_SIZE, lz_extend, lz_hash, matchfinder_init, matchfinder_rebase};

/// Hash table order (log2 of number of buckets).
const HT_MATCHFINDER_HASH_ORDER: u32 = 15;

/// Number of entries per hash bucket.
const HT_MATCHFINDER_BUCKET_SIZE: usize = 2;

/// Minimum match length for the ht_matchfinder.
#[allow(dead_code)]
pub(crate) const HT_MATCHFINDER_MIN_MATCH_LEN: u32 = 4;

/// Minimum value of max_len for longest_match().
pub(crate) const HT_MATCHFINDER_REQUIRED_NBYTES: u32 = 5;

/// Number of buckets in the hash table.
const HT_NUM_BUCKETS: usize = 1 << HT_MATCHFINDER_HASH_ORDER;

/// Hash Table matchfinder: 32K buckets × 2 entries (i16 positions).
#[derive(Clone)]
pub(crate) struct HtMatchfinder {
    hash_tab: [[i16; HT_MATCHFINDER_BUCKET_SIZE]; HT_NUM_BUCKETS],
}

impl HtMatchfinder {
    /// Create and initialize a new HtMatchfinder.
    pub fn new() -> Self {
        Self {
            hash_tab: [[super::MATCHFINDER_INITVAL; HT_MATCHFINDER_BUCKET_SIZE]; HT_NUM_BUCKETS],
        }
    }

    /// Initialize (reset) the matchfinder.
    pub fn init(&mut self) {
        matchfinder_init(self.hash_tab.as_flattened_mut());
    }

    /// Slide the window by MATCHFINDER_WINDOW_SIZE.
    fn slide_window(&mut self) {
        matchfinder_rebase(self.hash_tab.as_flattened_mut());
    }

    /// Find the longest match at position `in_next` within the input buffer.
    ///
    /// `in_base_offset` is the current base offset into the input (adjusted after window slides).
    /// `in_next` is the absolute position in the input we're matching at.
    /// `max_len` is the maximum match length allowed (must be >= HT_MATCHFINDER_REQUIRED_NBYTES).
    /// `nice_len` is the "nice" length — stop searching if we find a match this long.
    /// `next_hash` is the precomputed hash for the next position (updated on return).
    ///
    /// Returns `(best_len, offset)` where `best_len` is 0 if no match found,
    /// and `offset` is the match offset (distance back).
    #[inline(always)]
    pub fn longest_match(
        &mut self,
        input: &[u8],
        in_base_offset: &mut usize,
        in_next: usize,
        max_len: u32,
        nice_len: u32,
        next_hash: &mut u32,
    ) -> (u32, u32) {
        use crate::fast_bytes::{load_u32_le, prefetch};

        debug_assert!(max_len >= HT_MATCHFINDER_REQUIRED_NBYTES);

        let mut cur_pos = (in_next - *in_base_offset) as i32;

        // Slide window if we've reached the boundary
        if cur_pos as u32 >= MATCHFINDER_WINDOW_SIZE {
            self.slide_window();
            *in_base_offset += MATCHFINDER_WINDOW_SIZE as usize;
            cur_pos -= MATCHFINDER_WINDOW_SIZE as i32;
        }

        let in_base = *in_base_offset;
        let cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE as i32;

        let hash = *next_hash as usize;

        // Precompute next hash from in_next+1 and prefetch the bucket
        if in_next + 5 <= input.len() {
            *next_hash = lz_hash(load_u32_le(input, in_next + 1), HT_MATCHFINDER_HASH_ORDER);
            prefetch(&self.hash_tab[*next_hash as usize]);
        }

        // Load 4 bytes at current position for quick comparison
        let seq = load_u32_le(input, in_next);

        // BUCKET_SIZE == 2 hand-unrolled path (matches C code)
        let cur_node = self.hash_tab[hash][0] as i32;
        self.hash_tab[hash][0] = cur_pos as i16;

        if cur_node <= cutoff {
            return (0, 0);
        }

        let match_pos = (in_base as isize + cur_node as isize) as usize;
        let matchptr_seq = load_u32_le(input, match_pos);

        let to_insert = cur_node as i16;
        let cur_node2 = self.hash_tab[hash][1] as i32;
        self.hash_tab[hash][1] = to_insert;

        let mut best_len = 0u32;
        let mut best_offset = 0u32;

        if matchptr_seq == seq {
            best_len = lz_extend(&input[in_next..], &input[match_pos..], 4, max_len);
            best_offset = (in_next - match_pos) as u32;

            if cur_node2 <= cutoff || best_len >= nice_len {
                return (best_len, best_offset);
            }

            let match_pos2 = (in_base as isize + cur_node2 as isize) as usize;
            let matchptr2_seq = load_u32_le(input, match_pos2);

            // Check second entry: must match first 4 bytes AND the bytes at best_len-3
            if matchptr2_seq == seq && best_len >= 4 {
                let tail_off = (best_len - 3) as usize;
                let s_tail = load_u32_le(input, in_next + tail_off);
                let m_tail = load_u32_le(input, match_pos2 + tail_off);
                if s_tail == m_tail {
                    let len2 = lz_extend(&input[in_next..], &input[match_pos2..], 4, max_len);
                    if len2 > best_len {
                        best_len = len2;
                        best_offset = (in_next - match_pos2) as u32;
                    }
                }
            }
        } else {
            if cur_node2 <= cutoff {
                return (0, 0);
            }
            let match_pos2 = (in_base as isize + cur_node2 as isize) as usize;
            let matchptr2_seq = load_u32_le(input, match_pos2);
            if matchptr2_seq == seq {
                best_len = lz_extend(&input[in_next..], &input[match_pos2..], 4, max_len);
                best_offset = (in_next - match_pos2) as u32;
            }
        }

        (best_len, best_offset)
    }

    /// Find the longest match using raw pointers (unchecked mode).
    ///
    /// Equivalent to [`longest_match`] but takes `*const u8` instead of `&[u8]`
    /// to eliminate fat-pointer register pressure in the hot loop.
    ///
    /// # Safety
    ///
    /// `input_ptr` must be valid for reads of `in_end` bytes from the start of the input.
    /// `in_next + 4` must not exceed `in_end`.
    #[cfg(feature = "unchecked")]
    #[inline(always)]
    #[allow(clippy::too_many_arguments, dead_code)]
    pub unsafe fn longest_match_raw(
        &mut self,
        input_ptr: *const u8,
        in_end: usize,
        in_base_offset: &mut usize,
        in_next: usize,
        max_len: u32,
        nice_len: u32,
        next_hash: &mut u32,
    ) -> (u32, u32) {
        use super::raw::lz_extend_raw;
        use crate::fast_bytes::{load_u32_le_ptr, prefetch_ptr};

        unsafe {
            debug_assert!(max_len >= HT_MATCHFINDER_REQUIRED_NBYTES);

            let mut cur_pos = (in_next - *in_base_offset) as i32;

            // Slide window if we've reached the boundary
            if cur_pos as u32 >= MATCHFINDER_WINDOW_SIZE {
                self.slide_window();
                *in_base_offset += MATCHFINDER_WINDOW_SIZE as usize;
                cur_pos -= MATCHFINDER_WINDOW_SIZE as i32;
            }

            let in_base = *in_base_offset;
            let cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE as i32;

            let hash = *next_hash as usize;

            // Precompute next hash from in_next+1 and prefetch the bucket
            if in_next + 5 <= in_end {
                *next_hash = lz_hash(
                    load_u32_le_ptr(input_ptr, in_next + 1),
                    HT_MATCHFINDER_HASH_ORDER,
                );
                prefetch_ptr(self.hash_tab.as_ptr() as *const u8);
                let _ = *self.hash_tab.get_unchecked(*next_hash as usize);
                prefetch_ptr(self.hash_tab.get_unchecked(*next_hash as usize).as_ptr() as *const u8);
            }

            // Load 4 bytes at current position for quick comparison
            let seq = load_u32_le_ptr(input_ptr, in_next);

            // BUCKET_SIZE == 2 hand-unrolled path (matches C code)
            let cur_node = *self.hash_tab.get_unchecked(hash).get_unchecked(0) as i32;
            *self.hash_tab.get_unchecked_mut(hash).get_unchecked_mut(0) = cur_pos as i16;

            if cur_node <= cutoff {
                return (0, 0);
            }

            let match_pos = (in_base as isize + cur_node as isize) as usize;
            let matchptr_seq = load_u32_le_ptr(input_ptr, match_pos);

            let to_insert = cur_node as i16;
            let cur_node2 = *self.hash_tab.get_unchecked(hash).get_unchecked(1) as i32;
            *self.hash_tab.get_unchecked_mut(hash).get_unchecked_mut(1) = to_insert;

            let mut best_len = 0u32;
            let mut best_offset = 0u32;

            if matchptr_seq == seq {
                best_len =
                    lz_extend_raw(input_ptr.add(in_next), input_ptr.add(match_pos), 4, max_len);
                best_offset = (in_next - match_pos) as u32;

                if cur_node2 <= cutoff || best_len >= nice_len {
                    return (best_len, best_offset);
                }

                let match_pos2 = (in_base as isize + cur_node2 as isize) as usize;
                let matchptr2_seq = load_u32_le_ptr(input_ptr, match_pos2);

                // Check second entry: must match first 4 bytes AND the bytes at best_len-3
                if matchptr2_seq == seq && best_len >= 4 {
                    let tail_off = (best_len - 3) as usize;
                    let s_tail = load_u32_le_ptr(input_ptr, in_next + tail_off);
                    let m_tail = load_u32_le_ptr(input_ptr, match_pos2 + tail_off);
                    if s_tail == m_tail {
                        let len2 = lz_extend_raw(
                            input_ptr.add(in_next),
                            input_ptr.add(match_pos2),
                            4,
                            max_len,
                        );
                        if len2 > best_len {
                            best_len = len2;
                            best_offset = (in_next - match_pos2) as u32;
                        }
                    }
                }
            } else {
                if cur_node2 <= cutoff {
                    return (0, 0);
                }
                let match_pos2 = (in_base as isize + cur_node2 as isize) as usize;
                let matchptr2_seq = load_u32_le_ptr(input_ptr, match_pos2);
                if matchptr2_seq == seq {
                    best_len = lz_extend_raw(
                        input_ptr.add(in_next),
                        input_ptr.add(match_pos2),
                        4,
                        max_len,
                    );
                    best_offset = (in_next - match_pos2) as u32;
                }
            }

            (best_len, best_offset)
        }
    }

    /// Skip `count` bytes using raw pointers (unchecked mode).
    ///
    /// # Safety
    ///
    /// `input_ptr` must be valid for reads of `in_end` bytes.
    #[cfg(feature = "unchecked")]
    #[inline(always)]
    #[allow(dead_code)]
    pub unsafe fn skip_bytes_raw(
        &mut self,
        input_ptr: *const u8,
        in_end: usize,
        in_base_offset: &mut usize,
        in_next: usize,
        count: u32,
        next_hash: &mut u32,
    ) {
        use crate::fast_bytes::{load_u32_le_ptr, prefetch_ptr};

        unsafe {
            if count == 0 {
                return;
            }

            if (count + HT_MATCHFINDER_REQUIRED_NBYTES) as usize > in_end - in_next {
                return;
            }

            let mut cur_pos = (in_next - *in_base_offset) as i32;

            if (cur_pos + count as i32 - 1) >= MATCHFINDER_WINDOW_SIZE as i32 {
                self.slide_window();
                *in_base_offset += MATCHFINDER_WINDOW_SIZE as usize;
                cur_pos -= MATCHFINDER_WINDOW_SIZE as i32;
            }

            let mut hash = *next_hash as usize;
            let mut pos = in_next;
            let mut remaining = count;

            while remaining > 0 {
                // Shift bucket: move [0] to [1]
                *self.hash_tab.get_unchecked_mut(hash).get_unchecked_mut(1) =
                    *self.hash_tab.get_unchecked(hash).get_unchecked(0);
                *self.hash_tab.get_unchecked_mut(hash).get_unchecked_mut(0) = cur_pos as i16;

                pos += 1;
                cur_pos += 1;
                remaining -= 1;

                if pos + 4 <= in_end {
                    hash = lz_hash(load_u32_le_ptr(input_ptr, pos), HT_MATCHFINDER_HASH_ORDER)
                        as usize;
                }
            }

            prefetch_ptr(self.hash_tab.get_unchecked(hash).as_ptr() as *const u8);
            *next_hash = hash as u32;
        }
    }

    /// Skip `count` bytes in the matchfinder (update hash table without finding matches).
    #[inline(always)]
    pub fn skip_bytes(
        &mut self,
        input: &[u8],
        in_base_offset: &mut usize,
        in_next: usize,
        count: u32,
        next_hash: &mut u32,
    ) {
        use crate::fast_bytes::{load_u32_le, prefetch};

        if count == 0 {
            return;
        }

        let in_end = input.len();
        if (count + HT_MATCHFINDER_REQUIRED_NBYTES) as usize > in_end - in_next {
            return;
        }

        let mut cur_pos = (in_next - *in_base_offset) as i32;

        if (cur_pos + count as i32 - 1) >= MATCHFINDER_WINDOW_SIZE as i32 {
            self.slide_window();
            *in_base_offset += MATCHFINDER_WINDOW_SIZE as usize;
            cur_pos -= MATCHFINDER_WINDOW_SIZE as i32;
        }

        let mut hash = *next_hash as usize;
        let mut pos = in_next;
        let mut remaining = count;

        while remaining > 0 {
            // Shift bucket: move [0] to [1]
            self.hash_tab[hash][1] = self.hash_tab[hash][0];
            self.hash_tab[hash][0] = cur_pos as i16;

            pos += 1;
            cur_pos += 1;
            remaining -= 1;

            if pos + 4 <= in_end {
                hash = lz_hash(load_u32_le(input, pos), HT_MATCHFINDER_HASH_ORDER) as usize;
            }
        }

        prefetch(&self.hash_tab[hash]);
        *next_hash = hash as u32;
    }
}