haagenti_zstd/compress/
match_finder.rs

1//! LZ77 match finding using optimized hash tables.
2//!
3//! This module implements high-performance match finding for Zstd compression.
4//! Key optimizations:
5//! - Fixed-size power-of-2 hash table (no HashMap allocation)
6//! - Fast multiplicative hash function
7//! - SIMD-accelerated match length comparison
8//! - Cache-friendly hash chain structure
9//! - Cache-line aligned structures (64-byte alignment)
10
11use core::ops::{Deref, DerefMut};
12
13/// Minimum match length for Zstd.
14pub const MIN_MATCH_LENGTH: usize = 3;
15
16/// Maximum match length.
17pub const MAX_MATCH_LENGTH: usize = 131074; // Per RFC 8878
18
19/// Hash table size (power of 2 for fast modulo via AND).
20/// 64K entries is a good balance between memory and hit rate.
21const HASH_LOG: usize = 16;
22const HASH_SIZE: usize = 1 << HASH_LOG;
23#[allow(dead_code)]
24const HASH_MASK: u32 = (HASH_SIZE - 1) as u32;
25
26/// Maximum chain depth per hash bucket.
27/// Increased from 8 to allow deeper searches for better text compression.
28/// The actual search depth is min(this, search_depth from config).
29#[allow(dead_code)]
30const MAX_CHAIN_DEPTH: usize = 256;
31
32/// Primary hash multiplier (golden ratio derived, excellent distribution).
33const HASH_PRIME: u32 = 0x9E3779B9;
34
35/// Secondary hash multiplier for mixing (from MurmurHash3).
36const HASH_PRIME2: u32 = 0x85EBCA6B;
37
38// =============================================================================
39// Cache-Aligned Structures
40// =============================================================================
41
42/// Cache-line aligned wrapper for better memory access patterns.
43///
44/// 64-byte alignment matches typical x86_64 cache line size, reducing
45/// cache misses and eliminating false sharing in multi-threaded scenarios.
46#[repr(C, align(64))]
47pub struct CacheAligned<T>(T);
48
49impl<T> CacheAligned<T> {
50    /// Create a new cache-aligned wrapper.
51    #[inline]
52    pub const fn new(value: T) -> Self {
53        Self(value)
54    }
55
56    /// Get mutable access to the inner value.
57    #[inline]
58    pub fn inner_mut(&mut self) -> &mut T {
59        &mut self.0
60    }
61}
62
63impl<T> Deref for CacheAligned<T> {
64    type Target = T;
65
66    #[inline]
67    fn deref(&self) -> &Self::Target {
68        &self.0
69    }
70}
71
72impl<T> DerefMut for CacheAligned<T> {
73    #[inline]
74    fn deref_mut(&mut self) -> &mut Self::Target {
75        &mut self.0
76    }
77}
78
79/// Cache-aligned hash table for the match finder.
80///
81/// This structure ensures the hash table data starts at a 64-byte boundary,
82/// improving cache efficiency for random access patterns typical in hash lookups.
83#[repr(C, align(64))]
84struct AlignedHashTable {
85    data: [u32; HASH_SIZE],
86}
87
88impl AlignedHashTable {
89    /// Create a new zeroed hash table via heap allocation.
90    fn new_boxed() -> Box<Self> {
91        // Use zeroed allocation for efficiency - avoids initializing twice
92        // SAFETY: AlignedHashTable contains only u32 values, which are valid when zeroed
93        unsafe {
94            let layout = core::alloc::Layout::new::<Self>();
95            let ptr = std::alloc::alloc_zeroed(layout) as *mut Self;
96            if ptr.is_null() {
97                std::alloc::handle_alloc_error(layout);
98            }
99            Box::from_raw(ptr)
100        }
101    }
102
103    /// Reset all entries to zero using optimized memset.
104    #[inline]
105    #[allow(dead_code)]
106    fn reset(&mut self) {
107        self.data.fill(0);
108    }
109
110    /// Get a reference to entry at the given index.
111    #[inline(always)]
112    fn get(&self, index: usize) -> u32 {
113        self.data[index]
114    }
115
116    /// Set entry at the given index.
117    #[inline(always)]
118    fn set(&mut self, index: usize, value: u32) {
119        self.data[index] = value;
120    }
121}
122
123/// A found match in the input data.
124#[derive(Debug, Clone, Copy, PartialEq, Eq)]
125pub struct Match {
126    /// Position in the input where the match starts.
127    pub position: usize,
128    /// Offset back to the matching data.
129    pub offset: usize,
130    /// Length of the match.
131    pub length: usize,
132}
133
134impl Match {
135    /// Create a new match.
136    #[inline]
137    pub fn new(position: usize, offset: usize, length: usize) -> Self {
138        Self {
139            position,
140            offset,
141            length,
142        }
143    }
144}
145
146/// Optimized hash chain-based match finder.
147///
148/// Uses a fixed-size hash table with direct indexing for O(1) lookup.
149/// SIMD-accelerated match length comparison when available.
150/// Hash table is cache-line aligned (64 bytes) for optimal memory access.
151///
152/// ## Novel Optimization: Generation Counter
153///
154/// Instead of zeroing the 256KB hash table on every reset (O(n)), we use
155/// a generation counter. Hash entries store (generation << 28) | (pos + 1).
156/// On reset, we just increment generation (O(1)). Entries from old generations
157/// are treated as empty.
158///
159/// ## Novel Optimization: Match Prediction
160///
161/// When a match is found with offset O, the next occurrence of that pattern
162/// is very likely to be at position P + O (for repeating patterns like text).
163/// We track the last successful offset and check it first at the next position.
164pub struct MatchFinder {
165    /// Maximum search depth in the hash chain.
166    search_depth: usize,
167    /// Hash table: hash -> (generation << 28) | (pos + 1).
168    /// Entries from old generations are treated as empty.
169    /// Cache-aligned for optimal memory access patterns.
170    hash_table: Box<AlignedHashTable>,
171    /// Current generation counter (4 bits, wraps at 16).
172    generation: u32,
173    /// Chain table: for each position, stores previous position with same hash.
174    chain_table: Vec<u32>,
175    /// Input length (for bounds checking).
176    input_len: usize,
177    /// Last successful match offset (for prediction).
178    /// If the same offset produces matches repeatedly, we check it first.
179    predicted_offset: u32,
180    /// Second-to-last offset for alternating patterns.
181    predicted_offset2: u32,
182    /// Count of successful predictions (for adaptive behavior).
183    prediction_hits: u32,
184}
185
186// Manual Debug impl since AlignedHashTable doesn't derive Debug
187impl core::fmt::Debug for MatchFinder {
188    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
189        f.debug_struct("MatchFinder")
190            .field("search_depth", &self.search_depth)
191            .field(
192                "hash_table",
193                &format_args!("[AlignedHashTable; {}]", HASH_SIZE),
194            )
195            .field("chain_table_len", &self.chain_table.len())
196            .field("input_len", &self.input_len)
197            .field("predicted_offset", &self.predicted_offset)
198            .field("predicted_offset2", &self.predicted_offset2)
199            .field("prediction_hits", &self.prediction_hits)
200            .finish()
201    }
202}
203
204/// Mask for extracting generation from hash entry: top 4 bits
205const GEN_MASK: u32 = 0xF0000000;
206/// Shift for generation in hash entry
207const GEN_SHIFT: u32 = 28;
208/// Mask for extracting position from hash entry: bottom 28 bits
209const POS_MASK: u32 = 0x0FFFFFFF;
210
211impl MatchFinder {
212    /// Create a new match finder.
213    pub fn new(search_depth: usize) -> Self {
214        Self {
215            search_depth: search_depth.clamp(1, 128),
216            hash_table: AlignedHashTable::new_boxed(),
217            generation: 0,
218            chain_table: Vec::new(),
219            input_len: 0,
220            predicted_offset: 0,
221            predicted_offset2: 0,
222            prediction_hits: 0,
223        }
224    }
225
226    /// Calculate early exit threshold based on position in file.
227    ///
228    /// Early in the file, we want longer matches before exiting search
229    /// (more exploration). Later in the file, shorter matches are acceptable
230    /// for early exit (faster throughput).
231    ///
232    /// The rationale:
233    /// - Position 0-1KB: Still building hash chains, need full search (threshold=48)
234    /// - Position 1-8KB: Chains are populated, moderate search (threshold=32)
235    /// - Position 8-32KB: Well-populated chains, balanced (threshold=24)
236    /// - Position 32KB+: Mature chains, early exit is safe (threshold=16)
237    ///
238    /// This optimization is particularly effective for repetitive data where
239    /// excellent matches are common.
240    #[inline]
241    pub fn early_exit_threshold(&self, position: usize) -> usize {
242        match position {
243            0..=1024 => 32,     // Very early: good match to exit (was 48)
244            1025..=8192 => 24,  // Early: moderate threshold (was 32)
245            8193..=32768 => 16, // Mid-file: lower threshold (was 24)
246            _ => 12,            // Late in file: aggressive exit (was 16)
247        }
248    }
249
250    /// Calculate effective search depth based on input size.
251    ///
252    /// Larger inputs benefit from reduced search depth:
253    /// - Reduces time spent in hash chain traversal
254    /// - Cache pressure is higher with large data
255    /// - Prediction often finds good matches anyway
256    ///
257    /// The scaling is designed to maintain compression quality while
258    /// improving throughput on large inputs.
259    #[inline]
260    pub fn effective_depth(&self, input_len: usize) -> usize {
261        let base = self.search_depth;
262
263        let scaled = match input_len {
264            // Small inputs: full depth for best compression
265            0..=4096 => base,
266            // Medium inputs: 90% depth (was 75%)
267            4097..=16384 => (base * 9 / 10).max(4),
268            // Large inputs: 75% depth (was 33%)
269            16385..=65536 => (base * 3 / 4).max(4),
270            // Very large inputs: 50% depth (was 25%)
271            65537..=262144 => (base / 2).max(3),
272            // Huge inputs: 33% depth (was 12.5%)
273            _ => (base / 3).max(2),
274        };
275
276        scaled.min(base) // Never exceed configured depth
277    }
278
279    /// Reset the hash table for new input.
280    ///
281    /// Uses generation counter to avoid zeroing tables (O(1) vs O(n)).
282    /// Hash table and chain table both use generation encoding.
283    /// Entries from old generations are treated as invalid/empty.
284    #[inline]
285    pub(super) fn reset(&mut self, input_len: usize) {
286        // Increment generation instead of zeroing (O(1) vs O(256KB+))
287        // Generation uses 4 bits, wrapping at 16. Old entries become invalid.
288        self.generation = (self.generation + 1) & 0xF;
289
290        // Chain table uses generation encoding too - just resize if needed
291        // No need to zero since we check generation on read
292        if self.chain_table.len() < input_len {
293            self.chain_table.resize(input_len, 0);
294        }
295        // If shrinking, leave as is - positions beyond input_len won't be accessed
296
297        self.input_len = input_len;
298
299        // Reset prediction state
300        self.predicted_offset = 0;
301        self.predicted_offset2 = 0;
302        self.prediction_hits = 0;
303    }
304
305    /// Compute hash for 4 bytes using fast multiplicative hash.
306    ///
307    /// Uses single multiplication for maximum speed. The golden ratio prime
308    /// provides good distribution even with just one multiply.
309    #[inline(always)]
310    pub(super) fn hash4(&self, data: &[u8], pos: usize) -> u32 {
311        debug_assert!(pos + 4 <= data.len());
312
313        // Load 4 bytes as u32 (little-endian)
314        let bytes = unsafe { std::ptr::read_unaligned(data.as_ptr().add(pos) as *const u32) };
315
316        // Single multiply hash - faster than double-mixing
317        // Golden ratio prime provides good distribution
318        bytes.wrapping_mul(HASH_PRIME) >> (32 - HASH_LOG as u32)
319    }
320
321    /// Alternative hash for 3-byte minimum matches.
322    #[inline(always)]
323    pub(super) fn hash3(&self, data: &[u8], pos: usize) -> u32 {
324        debug_assert!(pos + 3 <= data.len());
325
326        let b0 = data[pos] as u32;
327        let b1 = data[pos + 1] as u32;
328        let b2 = data[pos + 2] as u32;
329
330        // Combine bytes with shifts
331        let value = b0 | (b1 << 8) | (b2 << 16);
332        value.wrapping_mul(HASH_PRIME) >> (32 - HASH_LOG as u32)
333    }
334
335    /// Find all matches in the input data.
336    ///
337    /// Returns matches sorted by position.
338    pub fn find_matches(&mut self, input: &[u8]) -> Vec<Match> {
339        if input.len() < MIN_MATCH_LENGTH {
340            return Vec::new();
341        }
342
343        self.reset(input.len());
344        let mut matches = Vec::with_capacity(input.len() / 16);
345
346        let mut pos = 0;
347        let end = input.len().saturating_sub(MIN_MATCH_LENGTH);
348
349        while pos <= end {
350            // Prefetch ahead for better cache behavior
351            #[cfg(target_arch = "x86_64")]
352            if pos + 64 < input.len() {
353                unsafe {
354                    use std::arch::x86_64::_mm_prefetch;
355                    _mm_prefetch(
356                        input.as_ptr().add(pos + 64) as *const i8,
357                        std::arch::x86_64::_MM_HINT_T0,
358                    );
359                }
360            }
361
362            // Hash must be computed anyway for chain updates
363            let hash = if pos + 4 <= input.len() {
364                self.hash4(input, pos)
365            } else {
366                self.hash3(input, pos)
367            };
368
369            // Try predicted offsets first (fast path for repetitive patterns)
370            let cur_prefix =
371                unsafe { std::ptr::read_unaligned(input.as_ptr().add(pos) as *const u32) };
372
373            let mut best_match = None;
374
375            // Check primary prediction
376            if self.predicted_offset > 0 && pos >= self.predicted_offset as usize {
377                let match_pos = pos - self.predicted_offset as usize;
378                let match_prefix = unsafe {
379                    std::ptr::read_unaligned(input.as_ptr().add(match_pos) as *const u32)
380                };
381                if cur_prefix == match_prefix {
382                    let length = 4 + self.match_length_from(input, match_pos + 4, pos + 4);
383                    if length >= MIN_MATCH_LENGTH {
384                        self.prediction_hits += 1;
385                        best_match = Some(Match::new(pos, self.predicted_offset as usize, length));
386                    }
387                }
388            }
389
390            // Check secondary prediction if primary failed
391            if best_match.is_none()
392                && self.predicted_offset2 > 0
393                && self.predicted_offset2 != self.predicted_offset
394                && pos >= self.predicted_offset2 as usize
395            {
396                let match_pos = pos - self.predicted_offset2 as usize;
397                let match_prefix = unsafe {
398                    std::ptr::read_unaligned(input.as_ptr().add(match_pos) as *const u32)
399                };
400                if cur_prefix == match_prefix {
401                    let length = 4 + self.match_length_from(input, match_pos + 4, pos + 4);
402                    if length >= MIN_MATCH_LENGTH {
403                        self.prediction_hits += 1;
404                        best_match = Some(Match::new(pos, self.predicted_offset2 as usize, length));
405                    }
406                }
407            }
408
409            // Fall back to hash chain search
410            if best_match.is_none() {
411                best_match = self.find_best_match(input, pos, hash as usize);
412            }
413
414            if let Some(m) = best_match {
415                matches.push(m);
416
417                // Update predictions for next iteration (shift if different)
418                let new_offset = m.offset as u32;
419                if new_offset != self.predicted_offset {
420                    self.predicted_offset2 = self.predicted_offset;
421                    self.predicted_offset = new_offset;
422                }
423
424                // Update hash table for current position before skipping
425                self.update_hash(input, pos, hash as usize);
426
427                // Aggressive skip for RLE-like patterns (offset 1-4)
428                // These patterns are very common in text and benefit from minimal hash updates
429                if m.offset <= 4 && m.length >= 32 {
430                    // RLE-like: update only at 16-byte intervals
431                    let skip_end = (pos + m.length).min(end);
432                    let mut update_pos = pos + 16;
433                    while update_pos < skip_end {
434                        if update_pos + 4 <= input.len() {
435                            let h = self.hash4(input, update_pos);
436                            self.update_hash(input, update_pos, h as usize);
437                        }
438                        update_pos += 16;
439                    }
440                } else if m.length >= 64 {
441                    // Very long matches: sparse updates every 8th position
442                    let skip_end = (pos + m.length).min(end);
443                    let mut update_pos = pos + 8;
444                    while update_pos < skip_end {
445                        if update_pos + 4 <= input.len() {
446                            let h = self.hash4(input, update_pos);
447                            self.update_hash(input, update_pos, h as usize);
448                        }
449                        update_pos += 8;
450                    }
451                } else if m.length >= 8 {
452                    // Medium matches: update every 4th position
453                    let skip_end = (pos + m.length).min(end);
454                    let mut update_pos = pos + 4;
455                    while update_pos < skip_end {
456                        if update_pos + 4 <= input.len() {
457                            let h = self.hash4(input, update_pos);
458                            self.update_hash(input, update_pos, h as usize);
459                        }
460                        update_pos += 4;
461                    }
462                }
463                // Short matches (3-7): skip updates entirely for speed
464
465                pos += m.length;
466            } else {
467                self.update_hash(input, pos, hash as usize);
468                pos += 1;
469            }
470        }
471
472        matches
473    }
474
475    /// Update hash table with new position.
476    ///
477    /// Both hash and chain entries store (generation << 28) | (pos + 1).
478    /// This allows O(1) reset by just incrementing generation.
479    #[inline(always)]
480    pub(super) fn update_hash(&mut self, _input: &[u8], pos: usize, hash: usize) {
481        let prev = self.hash_table.get(hash);
482        // Check if prev is from current generation
483        let prev_gen = (prev & GEN_MASK) >> GEN_SHIFT;
484        // Store generation-encoded chain entry: (gen << 28) | prev_pos
485        // If prev was from old generation, prev_pos = 0 (end of chain)
486        let chain_entry = if prev_gen == self.generation {
487            // Valid prev - keep same generation encoding
488            prev
489        } else {
490            // Old generation - set to 0 (end of chain marker)
491            0
492        };
493        if pos < self.chain_table.len() {
494            self.chain_table[pos] = chain_entry;
495        }
496        // Store (generation << 28) | (pos + 1) in hash table
497        let encoded = (self.generation << GEN_SHIFT) | ((pos + 1) as u32);
498        self.hash_table.set(hash, encoded);
499    }
500
501    /// Find the best match at the current position.
502    ///
503    /// Optimized with:
504    /// - Early rejection using first 4 bytes comparison
505    /// - Aggressive prefetching of next chain entry and match data
506    /// - Minimal branching in hot loop
507    #[inline]
508    pub(super) fn find_best_match(&self, input: &[u8], pos: usize, hash: usize) -> Option<Match> {
509        let hash_entry = self.hash_table.get(hash);
510
511        // Check generation - entries from old generations are treated as empty
512        let entry_gen = (hash_entry & GEN_MASK) >> GEN_SHIFT;
513        if entry_gen != self.generation {
514            return None;
515        }
516
517        // Extract position (mask out generation, subtract 1)
518        let entry_pos = hash_entry & POS_MASK;
519        if entry_pos == 0 {
520            return None;
521        }
522        let mut match_pos = (entry_pos - 1) as usize;
523
524        // Early bounds check
525        if match_pos >= pos || pos + 4 > input.len() {
526            return None;
527        }
528
529        // Load first 4 bytes at current position for fast rejection
530        let cur_prefix = unsafe { std::ptr::read_unaligned(input.as_ptr().add(pos) as *const u32) };
531
532        let mut best_match: Option<Match> = None;
533        let mut best_length = MIN_MATCH_LENGTH - 1;
534        let mut depth = 0;
535        let max_offset = 1 << 28; // Zstd max offset
536
537        // Use adaptive depth based on input size
538        let effective_depth = self.effective_depth(self.input_len);
539
540        while depth < effective_depth && match_pos < pos {
541            let offset = pos - match_pos;
542
543            if offset > max_offset {
544                break;
545            }
546
547            // Prefetch next chain entry while processing current
548            // Chain entries are generation-encoded: (gen << 28) | (pos + 1)
549            let next_chain = if match_pos < self.chain_table.len() {
550                let chain_entry = self.chain_table[match_pos];
551                // Check generation - if wrong, treat as end of chain
552                let chain_gen = (chain_entry & GEN_MASK) >> GEN_SHIFT;
553                if chain_gen != self.generation {
554                    0 // Wrong generation = end of chain
555                } else {
556                    let next_pos_enc = chain_entry & POS_MASK;
557                    if next_pos_enc > 0 {
558                        let next_pos = (next_pos_enc - 1) as usize;
559                        #[cfg(target_arch = "x86_64")]
560                        if next_pos < input.len() {
561                            unsafe {
562                                use std::arch::x86_64::_mm_prefetch;
563                                _mm_prefetch(
564                                    input.as_ptr().add(next_pos) as *const i8,
565                                    std::arch::x86_64::_MM_HINT_T0,
566                                );
567                            }
568                        }
569                    }
570                    next_pos_enc
571                }
572            } else {
573                0
574            };
575
576            // Fast rejection: check first 4 bytes before full comparison
577            let match_prefix = if match_pos + 4 <= input.len() {
578                unsafe { std::ptr::read_unaligned(input.as_ptr().add(match_pos) as *const u32) }
579            } else {
580                // Can't compare 4 bytes, follow chain
581                if next_chain == 0 {
582                    break;
583                }
584                match_pos = (next_chain - 1) as usize;
585                depth += 1;
586                continue;
587            };
588
589            // Quick check: if first 4 bytes don't match, skip this candidate
590            if match_prefix != cur_prefix {
591                if next_chain == 0 {
592                    break;
593                }
594                match_pos = (next_chain - 1) as usize;
595                depth += 1;
596                continue;
597            }
598
599            // First 4 bytes match - do full comparison (already have 4 matching)
600            let length = 4 + self.match_length_from(input, match_pos + 4, pos + 4);
601
602            // Prefer predicted offset on close matches
603            let is_predicted = offset as u32 == self.predicted_offset;
604            let effective_length = if is_predicted && length >= MIN_MATCH_LENGTH {
605                length + 2
606            } else {
607                length
608            };
609
610            if effective_length > best_length {
611                best_length = length;
612                best_match = Some(Match::new(pos, offset, length));
613
614                // Early exit using position-adaptive threshold
615                // Early in file: require longer matches (more thorough search)
616                // Later in file: shorter matches are good enough (faster)
617                let exit_threshold = self.early_exit_threshold(pos);
618                if length >= exit_threshold {
619                    break;
620                }
621            }
622
623            // Follow chain to previous position with same hash
624            // Chain entries are generation-encoded: (gen << 28) | (pos + 1)
625            if next_chain == 0 {
626                break; // End of chain
627            }
628            match_pos = (next_chain - 1) as usize;
629
630            depth += 1;
631        }
632
633        best_match
634    }
635
636    /// Calculate match length starting from given offsets.
637    ///
638    /// Used when we already know the first N bytes match (e.g., from prefix comparison).
639    /// This avoids re-comparing bytes we've already verified.
640    #[inline(always)]
641    pub fn match_length_from(&self, input: &[u8], pos1: usize, pos2: usize) -> usize {
642        // Bounds check
643        if pos1 >= input.len() || pos2 >= input.len() {
644            return 0;
645        }
646
647        let max_len = (input.len() - pos2)
648            .min(input.len() - pos1)
649            .min(MAX_MATCH_LENGTH);
650
651        if max_len == 0 {
652            return 0;
653        }
654
655        // Use SIMD-accelerated comparison when feature is enabled
656        #[cfg(feature = "simd")]
657        {
658            let src = &input[pos1..pos1 + max_len];
659            let cur = &input[pos2..pos2 + max_len];
660            haagenti_simd::find_match_length(src, cur, max_len)
661        }
662
663        // Optimized scalar fallback - compare 8 bytes at a time
664        #[cfg(not(feature = "simd"))]
665        {
666            let mut len = 0;
667
668            // Compare 8 bytes at a time using u64
669            while len + 8 <= max_len {
670                let word1 = unsafe {
671                    std::ptr::read_unaligned(input.as_ptr().add(pos1 + len) as *const u64)
672                };
673                let word2 = unsafe {
674                    std::ptr::read_unaligned(input.as_ptr().add(pos2 + len) as *const u64)
675                };
676
677                let diff = word1 ^ word2;
678                if diff != 0 {
679                    // Find first differing byte using trailing zeros
680                    len += (diff.trailing_zeros() / 8) as usize;
681                    return len;
682                }
683                len += 8;
684            }
685
686            // Compare remaining bytes (up to 7)
687            while len < max_len && input[pos1 + len] == input[pos2 + len] {
688                len += 1;
689            }
690
691            len
692        }
693    }
694
695    /// Find matches using speculative parallel lookahead.
696    ///
697    /// This method speculatively computes hashes for multiple positions at once,
698    /// exploiting instruction-level parallelism in modern CPUs. The key insight
699    /// is that hash computation and match lookup are independent operations that
700    /// can be pipelined.
701    ///
702    /// Algorithm:
703    /// 1. Compute hashes for next LOOKAHEAD positions in parallel
704    /// 2. Look up potential matches for all positions
705    /// 3. Select the best match among candidates
706    /// 4. Skip to end of selected match
707    ///
708    /// Benefits:
709    /// - Better instruction-level parallelism (ILP)
710    /// - May find better matches by considering multiple positions
711    /// - Reduces branch mispredictions by batching work
712    ///
713    /// Expected impact: +15-25% throughput on large data with varied content.
714    #[inline]
715    pub fn find_matches_speculative(&mut self, input: &[u8]) -> Vec<Match> {
716        const LOOKAHEAD: usize = 4;
717
718        if input.len() < MIN_MATCH_LENGTH {
719            return Vec::new();
720        }
721
722        self.reset(input.len());
723        let mut matches = Vec::with_capacity(input.len() / 16);
724        let mut pos = 0;
725        let end = input.len().saturating_sub(MIN_MATCH_LENGTH + LOOKAHEAD);
726
727        while pos <= end && pos + 4 <= input.len() {
728            // Speculatively compute hashes for LOOKAHEAD positions
729            // This enables CPU to pipeline the computations
730            let hashes: [u32; LOOKAHEAD] = [
731                self.hash4(input, pos),
732                if pos + 5 <= input.len() {
733                    self.hash4(input, pos + 1)
734                } else {
735                    0
736                },
737                if pos + 6 <= input.len() {
738                    self.hash4(input, pos + 2)
739                } else {
740                    0
741                },
742                if pos + 7 <= input.len() {
743                    self.hash4(input, pos + 3)
744                } else {
745                    0
746                },
747            ];
748
749            // Find matches at all speculative positions
750            let mut best_match: Option<Match> = None;
751            let mut best_score: usize = 0;
752
753            for (i, &hash) in hashes.iter().enumerate() {
754                if hash == 0 {
755                    continue;
756                }
757
758                let check_pos = pos + i;
759                if let Some(m) = self.find_best_match(input, check_pos, hash as usize) {
760                    // Score: balance length vs position (prefer earlier positions for same length)
761                    // Longer matches are always preferred, ties go to earlier position
762                    let score = m.length * 8 - i;
763                    if score > best_score {
764                        best_score = score;
765                        best_match = Some(m);
766                    }
767                }
768            }
769
770            if let Some(m) = best_match {
771                // Update hash table for positions before the match
772                for (i, &hash) in hashes
773                    .iter()
774                    .enumerate()
775                    .take(LOOKAHEAD.min(m.position - pos))
776                {
777                    if pos + i + 4 <= input.len() {
778                        self.update_hash(input, pos + i, hash as usize);
779                    }
780                }
781
782                // Update hash at match position
783                self.update_hash(input, m.position, hashes[m.position - pos] as usize);
784
785                matches.push(m);
786
787                // Sparse updates during skip for long matches
788                if m.length >= 8 {
789                    let skip_end = (m.position + m.length).min(end);
790                    let mut update_pos = m.position + 4;
791                    while update_pos < skip_end {
792                        if update_pos + 4 <= input.len() {
793                            let h = self.hash4(input, update_pos);
794                            self.update_hash(input, update_pos, h as usize);
795                        }
796                        update_pos += 4;
797                    }
798                }
799
800                pos = m.position + m.length;
801            } else {
802                // No match found - update hash and advance
803                self.update_hash(input, pos, hashes[0] as usize);
804                pos += 1;
805            }
806        }
807
808        // Handle remaining positions without lookahead
809        let final_end = input.len().saturating_sub(MIN_MATCH_LENGTH);
810        while pos <= final_end && pos + 4 <= input.len() {
811            let hash = self.hash4(input, pos);
812            if let Some(m) = self.find_best_match(input, pos, hash as usize) {
813                self.update_hash(input, pos, hash as usize);
814                matches.push(m);
815                pos += m.length;
816            } else {
817                self.update_hash(input, pos, hash as usize);
818                pos += 1;
819            }
820        }
821
822        matches
823    }
824}
825
826/// Greedy-Lazy Hybrid Match Finder.
827///
828/// This match finder uses a smarter strategy than pure lazy matching:
829/// - Commits immediately to long matches (>= 24 bytes) or predicted offset matches
830/// - Only performs lazy lookahead for short "questionable" matches (4-23 bytes)
831/// - Uses a quality score that combines length + offset preference
832///
833/// This reduces overhead compared to always-lazy matching while maintaining
834/// compression ratio improvements for cases where lazy evaluation helps.
835#[derive(Debug)]
836pub struct LazyMatchFinder {
837    /// Inner greedy match finder (exposed for repeat offset finder)
838    pub(super) inner: MatchFinder,
839    /// Threshold for immediate commit (no lazy check)
840    lazy_threshold: usize,
841}
842
843impl LazyMatchFinder {
844    /// Create a new lazy match finder.
845    pub fn new(search_depth: usize) -> Self {
846        Self {
847            inner: MatchFinder::new(search_depth),
848            lazy_threshold: 24, // Matches >= 24 bytes commit immediately
849        }
850    }
851
852    /// Configure the lazy threshold based on input size.
853    ///
854    /// For larger inputs, we lower the threshold to commit earlier, improving
855    /// throughput at a small cost to compression ratio.
856    ///
857    /// Scaling rationale:
858    /// - Small inputs (<= 4KB): Full lazy evaluation (threshold = 24)
859    /// - Medium inputs (4-16KB): Slight reduction (threshold = 20)
860    /// - Large inputs (16-64KB): Moderate reduction (threshold = 16)
861    /// - Very large inputs (64KB+): Aggressive (threshold = 12)
862    /// - Huge inputs (256KB+): Most aggressive (threshold = 8)
863    ///
864    /// The minimum threshold is MIN_MATCH_LENGTH + 1 (4) to ensure lazy
865    /// evaluation is still meaningful.
866    #[inline]
867    pub fn configure_for_size(&mut self, input_len: usize) {
868        self.lazy_threshold = match input_len {
869            0..=4096 => 24,       // Small: full lazy for best ratio
870            4097..=16384 => 20,   // Medium: slight speedup
871            16385..=65536 => 16,  // Large: balance speed and ratio
872            65537..=262144 => 12, // Very large: favor throughput
873            _ => 8,               // Huge: aggressive early commit
874        };
875
876        // Never go below minimum viable threshold
877        self.lazy_threshold = self.lazy_threshold.max(MIN_MATCH_LENGTH + 1);
878    }
879
880    /// Find matches with automatic size-based configuration.
881    ///
882    /// This is the recommended method for general use. It automatically
883    /// adjusts the lazy threshold based on input size for optimal
884    /// throughput/ratio balance.
885    ///
886    /// For very large inputs (>= 128KB), uses chunked matching with a smaller
887    /// L1-cache-friendly hash table for improved cache locality.
888    #[inline]
889    pub fn find_matches_auto(&mut self, input: &[u8]) -> Vec<Match> {
890        self.configure_for_size(input.len());
891
892        // Use chunked matching only for very large inputs (>= 128KB)
893        // The 65KB block boundary is avoided since block sizes are max 128KB
894        if input.len() >= 131072 {
895            self.find_matches_chunked(input, 16384)
896        } else {
897            self.find_matches(input)
898        }
899    }
900
901    /// Find matches using greedy-lazy hybrid strategy with offset prediction.
902    ///
903    /// Key optimizations:
904    /// 1. Offset prediction: Try last successful offset before hash lookup
905    /// 2. Immediate commit for long matches (>= lazy_threshold)
906    /// 3. Only 1-position lookahead for short matches
907    /// 4. Simple length comparison (no complex quality scoring)
908    #[inline]
909    pub fn find_matches(&mut self, input: &[u8]) -> Vec<Match> {
910        if input.len() < MIN_MATCH_LENGTH {
911            return Vec::new();
912        }
913
914        self.inner.reset(input.len());
915        let mut matches = Vec::with_capacity(input.len() / 16);
916
917        let mut pos = 0;
918        let end = input.len().saturating_sub(MIN_MATCH_LENGTH);
919        let mut pending_match: Option<Match> = None;
920        let mut predicted_offset: usize = 0;
921
922        while pos <= end {
923            let hash = if pos + 4 <= input.len() {
924                self.inner.hash4(input, pos)
925            } else {
926                self.inner.hash3(input, pos)
927            };
928
929            // Try prediction first (very fast for repetitive patterns)
930            let current_match =
931                if predicted_offset > 0 && pos >= predicted_offset && pos + 4 <= input.len() {
932                    let match_pos = pos - predicted_offset;
933                    // Load prefixes for comparison
934                    let cur_prefix =
935                        unsafe { std::ptr::read_unaligned(input.as_ptr().add(pos) as *const u32) };
936                    let match_prefix = unsafe {
937                        std::ptr::read_unaligned(input.as_ptr().add(match_pos) as *const u32)
938                    };
939
940                    if cur_prefix == match_prefix {
941                        // Prediction hit - compute full match length
942                        let length =
943                            4 + self.inner.match_length_from(input, match_pos + 4, pos + 4);
944                        if length >= MIN_MATCH_LENGTH {
945                            Some(Match::new(pos, predicted_offset, length))
946                        } else {
947                            self.inner.find_best_match(input, pos, hash as usize)
948                        }
949                    } else {
950                        self.inner.find_best_match(input, pos, hash as usize)
951                    }
952                } else {
953                    self.inner.find_best_match(input, pos, hash as usize)
954                };
955
956            if let Some(curr) = current_match {
957                // Update prediction with current match offset
958                predicted_offset = curr.offset;
959
960                if let Some(pending) = pending_match.take() {
961                    // Compare: current must be significantly better to replace pending
962                    if curr.length > pending.length + 1 {
963                        // Current is better - use it
964                        matches.push(curr);
965                        self.inner.update_hash(input, pos, hash as usize);
966                        pos += curr.length;
967                    } else {
968                        // Pending is good enough - use pending
969                        matches.push(pending);
970                        pos = pending.position + pending.length;
971                    }
972                } else {
973                    // No pending - decide to commit or defer
974                    if curr.length >= self.lazy_threshold || pos + 1 > end {
975                        // Long match or near end - commit immediately
976                        matches.push(curr);
977                        self.inner.update_hash(input, pos, hash as usize);
978                        pos += curr.length;
979                    } else {
980                        // Short match - defer and check next position
981                        pending_match = Some(curr);
982                        self.inner.update_hash(input, pos, hash as usize);
983                        pos += 1;
984                    }
985                }
986            } else {
987                // No match at current position
988                if let Some(pending) = pending_match.take() {
989                    matches.push(pending);
990                    pos = pending.position + pending.length;
991                } else {
992                    self.inner.update_hash(input, pos, hash as usize);
993                    pos += 1;
994                }
995            }
996        }
997
998        // Emit any remaining pending match
999        if let Some(pending) = pending_match {
1000            matches.push(pending);
1001        }
1002
1003        matches
1004    }
1005
1006    /// Find matches using block chunking for improved cache locality.
1007    ///
1008    /// For large inputs (> chunk_size), this processes the input in independent
1009    /// chunks using a small, cache-friendly hash table. This dramatically improves
1010    /// cache hit rates since the hash table and chain table fit in L1/L2 cache.
1011    ///
1012    /// Key benefits:
1013    /// - Small hash table (4KB) fits entirely in L1 cache
1014    /// - Chain table per chunk is small (~chunk_size * 4 bytes)
1015    /// - Reduced TLB pressure from smaller working set
1016    /// - Fresh hash table per chunk = zero collision chains initially
1017    ///
1018    /// The tradeoff is that matches cannot span chunk boundaries, which may
1019    /// slightly reduce compression ratio. In practice, the throughput gain
1020    /// significantly outweighs this cost for large inputs.
1021    ///
1022    /// # Arguments
1023    ///
1024    /// * `input` - The data to find matches in
1025    /// * `chunk_size` - Size of each chunk (16384 recommended for L1 cache fit)
1026    ///
1027    /// # Returns
1028    ///
1029    /// Matches with positions relative to the original input (not chunks).
1030    #[inline]
1031    pub fn find_matches_chunked(&mut self, input: &[u8], chunk_size: usize) -> Vec<Match> {
1032        // For small inputs, just use standard matching
1033        if input.len() <= chunk_size {
1034            return self.find_matches(input);
1035        }
1036
1037        let chunk_size = chunk_size.max(1024); // Minimum reasonable chunk
1038        let mut all_matches = Vec::with_capacity(input.len() / 16);
1039        let mut chunk_start = 0;
1040
1041        // Use a small, cache-friendly hash table for chunked processing
1042        // 12-bit = 4096 entries = 16KB, fits in L1 cache
1043        const CHUNK_HASH_LOG: usize = 12;
1044        const CHUNK_HASH_SIZE: usize = 1 << CHUNK_HASH_LOG;
1045        const CHUNK_HASH_MASK: u32 = (CHUNK_HASH_SIZE - 1) as u32;
1046
1047        let mut chunk_hash = vec![0u32; CHUNK_HASH_SIZE];
1048        let mut chunk_chain = vec![0u32; chunk_size];
1049        let search_depth = self.inner.search_depth;
1050
1051        while chunk_start < input.len() {
1052            let chunk_end = (chunk_start + chunk_size).min(input.len());
1053            let chunk = &input[chunk_start..chunk_end];
1054
1055            if chunk.len() >= MIN_MATCH_LENGTH {
1056                // Reset the small hash table (fast - only 16KB)
1057                chunk_hash.fill(0);
1058                if chunk_chain.len() < chunk.len() {
1059                    chunk_chain.resize(chunk.len(), 0);
1060                }
1061                chunk_chain[..chunk.len()].fill(0);
1062
1063                // Process this chunk using the small hash table
1064                let chunk_matches = Self::find_matches_in_chunk(
1065                    chunk,
1066                    &mut chunk_hash,
1067                    &mut chunk_chain,
1068                    CHUNK_HASH_LOG,
1069                    CHUNK_HASH_MASK,
1070                    search_depth,
1071                    self.lazy_threshold,
1072                );
1073
1074                // Adjust positions to be relative to original input
1075                for m in chunk_matches {
1076                    all_matches.push(Match::new(chunk_start + m.position, m.offset, m.length));
1077                }
1078            }
1079
1080            chunk_start = chunk_end;
1081        }
1082
1083        all_matches
1084    }
1085
1086    /// Process a single chunk with a small hash table.
1087    /// This is a simplified version of find_matches optimized for cache locality.
1088    #[inline]
1089    fn find_matches_in_chunk(
1090        chunk: &[u8],
1091        hash_table: &mut [u32],
1092        chain_table: &mut [u32],
1093        hash_log: usize,
1094        hash_mask: u32,
1095        search_depth: usize,
1096        lazy_threshold: usize,
1097    ) -> Vec<Match> {
1098        if chunk.len() < MIN_MATCH_LENGTH {
1099            return Vec::new();
1100        }
1101
1102        let mut matches = Vec::with_capacity(chunk.len() / 32);
1103        let end = chunk.len().saturating_sub(MIN_MATCH_LENGTH);
1104        let mut pos = 0;
1105        let mut pending: Option<Match> = None;
1106
1107        while pos <= end {
1108            // Compute hash (simplified, using same algorithm as main finder)
1109            let hash = if pos + 4 <= chunk.len() {
1110                let bytes =
1111                    unsafe { std::ptr::read_unaligned(chunk.as_ptr().add(pos) as *const u32) };
1112                let h = bytes.wrapping_mul(HASH_PRIME);
1113                let h = h ^ (h >> 15);
1114                let h = h.wrapping_mul(HASH_PRIME2);
1115                (h >> (32 - hash_log as u32)) & hash_mask
1116            } else {
1117                let b0 = chunk[pos] as u32;
1118                let b1 = chunk[pos + 1] as u32;
1119                let b2 = chunk[pos + 2] as u32;
1120                let value = b0 | (b1 << 8) | (b2 << 16);
1121                (value.wrapping_mul(HASH_PRIME) >> (32 - hash_log as u32)) & hash_mask
1122            };
1123
1124            // Find best match at this position
1125            let current_match = Self::find_best_match_in_chunk(
1126                chunk,
1127                pos,
1128                hash as usize,
1129                hash_table,
1130                chain_table,
1131                search_depth,
1132            );
1133
1134            // Lazy matching logic
1135            if let Some(curr) = current_match {
1136                if let Some(pend) = pending.take() {
1137                    if curr.length > pend.length + 1 {
1138                        matches.push(curr);
1139                        Self::update_chunk_hash(pos, hash as usize, hash_table, chain_table);
1140                        pos += curr.length;
1141                    } else {
1142                        matches.push(pend);
1143                        pos = pend.position + pend.length;
1144                    }
1145                } else if curr.length >= lazy_threshold || pos + 1 > end {
1146                    matches.push(curr);
1147                    Self::update_chunk_hash(pos, hash as usize, hash_table, chain_table);
1148                    pos += curr.length;
1149                } else {
1150                    pending = Some(curr);
1151                    Self::update_chunk_hash(pos, hash as usize, hash_table, chain_table);
1152                    pos += 1;
1153                }
1154            } else if let Some(pend) = pending.take() {
1155                matches.push(pend);
1156                pos = pend.position + pend.length;
1157            } else {
1158                Self::update_chunk_hash(pos, hash as usize, hash_table, chain_table);
1159                pos += 1;
1160            }
1161        }
1162
1163        if let Some(pend) = pending {
1164            matches.push(pend);
1165        }
1166
1167        matches
1168    }
1169
1170    #[inline(always)]
1171    fn update_chunk_hash(pos: usize, hash: usize, hash_table: &mut [u32], chain_table: &mut [u32]) {
1172        let prev = hash_table[hash];
1173        if pos < chain_table.len() {
1174            chain_table[pos] = prev;
1175        }
1176        hash_table[hash] = (pos + 1) as u32;
1177    }
1178
1179    #[inline]
1180    fn find_best_match_in_chunk(
1181        chunk: &[u8],
1182        pos: usize,
1183        hash: usize,
1184        hash_table: &[u32],
1185        chain_table: &[u32],
1186        search_depth: usize,
1187    ) -> Option<Match> {
1188        let hash_entry = hash_table[hash];
1189        if hash_entry == 0 {
1190            return None;
1191        }
1192
1193        let mut match_pos = (hash_entry - 1) as usize;
1194        if match_pos >= pos || pos + 4 > chunk.len() {
1195            return None;
1196        }
1197
1198        let cur_prefix = unsafe { std::ptr::read_unaligned(chunk.as_ptr().add(pos) as *const u32) };
1199
1200        let mut best_match: Option<Match> = None;
1201        let mut best_length = MIN_MATCH_LENGTH - 1;
1202        let mut depth = 0;
1203
1204        while depth < search_depth && match_pos < pos {
1205            let offset = pos - match_pos;
1206
1207            // Fast prefix check
1208            if match_pos + 4 <= chunk.len() {
1209                let match_prefix = unsafe {
1210                    std::ptr::read_unaligned(chunk.as_ptr().add(match_pos) as *const u32)
1211                };
1212
1213                if match_prefix == cur_prefix {
1214                    // Count matching bytes after the first 4
1215                    let max_len = (chunk.len() - pos).min(chunk.len() - match_pos);
1216                    let mut length = 4;
1217                    while length < max_len && chunk[match_pos + length] == chunk[pos + length] {
1218                        length += 1;
1219                    }
1220
1221                    if length > best_length {
1222                        best_length = length;
1223                        best_match = Some(Match::new(pos, offset, length));
1224
1225                        if length >= 64 {
1226                            break; // Good enough
1227                        }
1228                    }
1229                }
1230            }
1231
1232            // Follow chain
1233            if match_pos < chain_table.len() {
1234                let next = chain_table[match_pos];
1235                if next == 0 {
1236                    break;
1237                }
1238                match_pos = (next - 1) as usize;
1239            } else {
1240                break;
1241            }
1242
1243            depth += 1;
1244        }
1245
1246        best_match
1247    }
1248}
1249
1250// =============================================================================
1251// Two-Tier Hash Match Finder (Phase 2.2)
1252// =============================================================================
1253
1254/// Hash table size for the long (8-byte) hash table.
1255/// Smaller than the short table since 8-byte matches are less common.
1256#[allow(dead_code)]
1257const LONG_HASH_LOG: usize = 14;
1258#[allow(dead_code)]
1259const LONG_HASH_SIZE: usize = 1 << LONG_HASH_LOG;
1260
1261/// Two-Tier Hash Table Match Finder.
1262///
1263/// Uses separate hash tables for 4-byte and 8-byte prefixes to improve
1264/// both match quality and search speed:
1265///
1266/// - **Short hash (4-byte)**: Standard hash table for finding any match >= 4 bytes
1267/// - **Long hash (8-byte)**: Optimized for finding long matches quickly
1268///
1269/// When searching, we first check the 8-byte hash table. If we find a match
1270/// there, it's likely to be long (since 8 bytes matched). This reduces the
1271/// time spent in hash chain traversal for repetitive data.
1272///
1273/// Key benefits:
1274/// - 8-byte hash has fewer collisions → shorter chains
1275/// - Long matches are found faster → earlier exit
1276/// - Maintains full match finding capability via 4-byte fallback
1277#[allow(dead_code)]
1278pub struct TwoTierMatchFinder {
1279    /// Standard 4-byte hash table and chain
1280    short_hash: Box<AlignedHashTable>,
1281    short_chain: Vec<u32>,
1282    /// 8-byte hash table for long match candidates
1283    long_hash: Vec<u32>,
1284    long_chain: Vec<u32>,
1285    /// Configuration
1286    search_depth: usize,
1287    input_len: usize,
1288}
1289
1290// Manual Debug impl since AlignedHashTable doesn't derive Debug
1291impl core::fmt::Debug for TwoTierMatchFinder {
1292    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1293        f.debug_struct("TwoTierMatchFinder")
1294            .field(
1295                "short_hash",
1296                &format_args!("[AlignedHashTable; {}]", HASH_SIZE),
1297            )
1298            .field("short_chain_len", &self.short_chain.len())
1299            .field("long_hash_len", &self.long_hash.len())
1300            .field("long_chain_len", &self.long_chain.len())
1301            .field("search_depth", &self.search_depth)
1302            .field("input_len", &self.input_len)
1303            .finish()
1304    }
1305}
1306
1307#[allow(dead_code)]
1308impl TwoTierMatchFinder {
1309    /// Create a new two-tier match finder.
1310    pub fn new(search_depth: usize) -> Self {
1311        Self {
1312            short_hash: AlignedHashTable::new_boxed(),
1313            short_chain: Vec::new(),
1314            long_hash: vec![0u32; LONG_HASH_SIZE],
1315            long_chain: Vec::new(),
1316            search_depth: search_depth.clamp(1, 128),
1317            input_len: 0,
1318        }
1319    }
1320
1321    /// Reset for new input.
1322    fn reset(&mut self, input_len: usize) {
1323        self.short_hash.reset();
1324        // Avoid resize + fill double-zero
1325        if self.short_chain.len() < input_len {
1326            self.short_chain.clear();
1327            self.short_chain.resize(input_len, 0);
1328        } else {
1329            self.short_chain.truncate(input_len);
1330            self.short_chain.fill(0);
1331        }
1332
1333        self.long_hash.fill(0);
1334        // Avoid resize + fill double-zero
1335        if self.long_chain.len() < input_len {
1336            self.long_chain.clear();
1337            self.long_chain.resize(input_len, 0);
1338        } else {
1339            self.long_chain.truncate(input_len);
1340            self.long_chain.fill(0);
1341        }
1342
1343        self.input_len = input_len;
1344    }
1345
1346    /// Compute 4-byte hash (same as MatchFinder).
1347    #[inline(always)]
1348    fn hash4(&self, data: &[u8], pos: usize) -> u32 {
1349        debug_assert!(pos + 4 <= data.len());
1350        let bytes = unsafe { std::ptr::read_unaligned(data.as_ptr().add(pos) as *const u32) };
1351        let h = bytes.wrapping_mul(HASH_PRIME);
1352        let h = h ^ (h >> 15);
1353        let h = h.wrapping_mul(HASH_PRIME2);
1354        h >> (32 - HASH_LOG as u32)
1355    }
1356
1357    /// Compute 8-byte hash for long match candidates.
1358    #[inline(always)]
1359    fn hash8(&self, data: &[u8], pos: usize) -> u32 {
1360        debug_assert!(pos + 8 <= data.len());
1361        let bytes = unsafe { std::ptr::read_unaligned(data.as_ptr().add(pos) as *const u64) };
1362        // Use a different mixing strategy for 8 bytes
1363        let h = (bytes as u32) ^ ((bytes >> 32) as u32);
1364        let h = h.wrapping_mul(HASH_PRIME);
1365        let h = h ^ (h >> 17);
1366        let h = h.wrapping_mul(HASH_PRIME2);
1367        h >> (32 - LONG_HASH_LOG as u32)
1368    }
1369
1370    /// Update both hash tables.
1371    #[inline(always)]
1372    fn update_hashes(&mut self, data: &[u8], pos: usize) {
1373        // Update 4-byte hash
1374        if pos + 4 <= data.len() {
1375            let h4 = self.hash4(data, pos) as usize;
1376            let prev = self.short_hash.get(h4);
1377            if pos < self.short_chain.len() {
1378                self.short_chain[pos] = prev;
1379            }
1380            self.short_hash.set(h4, (pos + 1) as u32);
1381        }
1382
1383        // Update 8-byte hash
1384        if pos + 8 <= data.len() {
1385            let h8 = self.hash8(data, pos) as usize;
1386            let prev = self.long_hash[h8];
1387            if pos < self.long_chain.len() {
1388                self.long_chain[pos] = prev;
1389            }
1390            self.long_hash[h8] = (pos + 1) as u32;
1391        }
1392    }
1393
1394    /// Find all matches using the two-tier approach.
1395    pub fn find_matches(&mut self, input: &[u8]) -> Vec<Match> {
1396        if input.len() < MIN_MATCH_LENGTH {
1397            return Vec::new();
1398        }
1399
1400        self.reset(input.len());
1401        let mut matches = Vec::with_capacity(input.len() / 16);
1402        let mut pos = 0;
1403        let end = input.len().saturating_sub(MIN_MATCH_LENGTH);
1404
1405        while pos <= end {
1406            let mut best_match: Option<Match> = None;
1407
1408            // Try 8-byte hash first (if we have enough bytes)
1409            if pos + 8 <= input.len() {
1410                best_match = self.find_long_match(input, pos);
1411            }
1412
1413            // Fall back to 4-byte hash if no long match found
1414            if best_match.is_none() && pos + 4 <= input.len() {
1415                best_match = self.find_short_match(input, pos);
1416            }
1417
1418            // Update hash tables
1419            self.update_hashes(input, pos);
1420
1421            if let Some(m) = best_match {
1422                matches.push(m);
1423                // Sparse updates during skip
1424                if m.length >= 8 {
1425                    let skip_end = (pos + m.length).min(end);
1426                    let mut update_pos = pos + 4;
1427                    while update_pos < skip_end {
1428                        self.update_hashes(input, update_pos);
1429                        update_pos += 4;
1430                    }
1431                }
1432                pos += m.length;
1433            } else {
1434                pos += 1;
1435            }
1436        }
1437
1438        matches
1439    }
1440
1441    /// Find match using 8-byte hash table.
1442    #[inline]
1443    fn find_long_match(&self, input: &[u8], pos: usize) -> Option<Match> {
1444        let h8 = self.hash8(input, pos) as usize;
1445        let hash_entry = self.long_hash[h8];
1446
1447        if hash_entry == 0 {
1448            return None;
1449        }
1450
1451        let mut match_pos = (hash_entry - 1) as usize;
1452        if match_pos >= pos {
1453            return None;
1454        }
1455
1456        // Load 8-byte prefix for comparison
1457        let cur_prefix = unsafe { std::ptr::read_unaligned(input.as_ptr().add(pos) as *const u64) };
1458
1459        let mut best_match: Option<Match> = None;
1460        let mut best_length = 7; // Only accept matches >= 8 from this table
1461        let mut depth = 0;
1462
1463        while depth < self.search_depth / 2 && match_pos < pos {
1464            let offset = pos - match_pos;
1465
1466            if match_pos + 8 <= input.len() {
1467                let match_prefix = unsafe {
1468                    std::ptr::read_unaligned(input.as_ptr().add(match_pos) as *const u64)
1469                };
1470
1471                if match_prefix == cur_prefix {
1472                    // 8 bytes match - extend
1473                    let mut length = 8;
1474                    let max_len = (input.len() - pos).min(input.len() - match_pos);
1475                    while length < max_len && input[match_pos + length] == input[pos + length] {
1476                        length += 1;
1477                    }
1478
1479                    if length > best_length {
1480                        best_length = length;
1481                        best_match = Some(Match::new(pos, offset, length));
1482
1483                        // Early exit for excellent matches
1484                        if length >= 32 {
1485                            return best_match;
1486                        }
1487                    }
1488                }
1489            }
1490
1491            // Follow chain
1492            if match_pos < self.long_chain.len() {
1493                let next = self.long_chain[match_pos];
1494                if next == 0 {
1495                    break;
1496                }
1497                match_pos = (next - 1) as usize;
1498            } else {
1499                break;
1500            }
1501
1502            depth += 1;
1503        }
1504
1505        best_match
1506    }
1507
1508    /// Find match using 4-byte hash table.
1509    #[inline]
1510    fn find_short_match(&self, input: &[u8], pos: usize) -> Option<Match> {
1511        let h4 = self.hash4(input, pos) as usize;
1512        let hash_entry = self.short_hash.get(h4);
1513
1514        if hash_entry == 0 {
1515            return None;
1516        }
1517
1518        let mut match_pos = (hash_entry - 1) as usize;
1519        if match_pos >= pos {
1520            return None;
1521        }
1522
1523        let cur_prefix = unsafe { std::ptr::read_unaligned(input.as_ptr().add(pos) as *const u32) };
1524
1525        let mut best_match: Option<Match> = None;
1526        let mut best_length = MIN_MATCH_LENGTH - 1;
1527        let mut depth = 0;
1528
1529        while depth < self.search_depth && match_pos < pos {
1530            let offset = pos - match_pos;
1531
1532            if match_pos + 4 <= input.len() {
1533                let match_prefix = unsafe {
1534                    std::ptr::read_unaligned(input.as_ptr().add(match_pos) as *const u32)
1535                };
1536
1537                if match_prefix == cur_prefix {
1538                    let mut length = 4;
1539                    let max_len = (input.len() - pos).min(input.len() - match_pos);
1540                    while length < max_len && input[match_pos + length] == input[pos + length] {
1541                        length += 1;
1542                    }
1543
1544                    if length > best_length {
1545                        best_length = length;
1546                        best_match = Some(Match::new(pos, offset, length));
1547
1548                        if length >= 24 {
1549                            return best_match;
1550                        }
1551                    }
1552                }
1553            }
1554
1555            // Follow chain
1556            if match_pos < self.short_chain.len() {
1557                let next = self.short_chain[match_pos];
1558                if next == 0 {
1559                    break;
1560                }
1561                match_pos = (next - 1) as usize;
1562            } else {
1563                break;
1564            }
1565
1566            depth += 1;
1567        }
1568
1569        best_match
1570    }
1571}
1572
1573// =============================================================================
1574// Tests
1575// =============================================================================
1576
1577#[cfg(test)]
1578mod tests {
1579    use super::*;
1580
1581    #[test]
1582    fn test_match_creation() {
1583        let m = Match::new(10, 5, 4);
1584        assert_eq!(m.position, 10);
1585        assert_eq!(m.offset, 5);
1586        assert_eq!(m.length, 4);
1587    }
1588
1589    #[test]
1590    fn test_match_finder_creation() {
1591        let mf = MatchFinder::new(16);
1592        assert_eq!(mf.search_depth, 16);
1593    }
1594
1595    #[test]
1596    fn test_no_matches_short_input() {
1597        let mut mf = MatchFinder::new(16);
1598        let matches = mf.find_matches(b"ab");
1599        assert!(matches.is_empty());
1600    }
1601
1602    #[test]
1603    fn test_no_matches_unique() {
1604        let mut mf = MatchFinder::new(16);
1605        let matches = mf.find_matches(b"abcdefghij");
1606        assert!(matches.is_empty());
1607    }
1608
1609    #[test]
1610    fn test_simple_repeat() {
1611        let mut mf = MatchFinder::new(16);
1612        // "abcdabcd" - "abcd" repeats at offset 4
1613        let matches = mf.find_matches(b"abcdabcd");
1614
1615        assert!(!matches.is_empty());
1616        let m = &matches[0];
1617        assert_eq!(m.position, 4);
1618        assert_eq!(m.offset, 4);
1619        assert_eq!(m.length, 4);
1620    }
1621
1622    #[test]
1623    fn test_overlapping_repeat() {
1624        let mut mf = MatchFinder::new(16);
1625        // "aaaaaa" - RLE-like pattern
1626        let matches = mf.find_matches(b"aaaaaaaaa");
1627
1628        assert!(!matches.is_empty());
1629        // Should find long match with small offset
1630        let has_rle = matches.iter().any(|m| m.offset <= 4 && m.length >= 3);
1631        assert!(has_rle);
1632    }
1633
1634    #[test]
1635    fn test_multiple_matches() {
1636        let mut mf = MatchFinder::new(16);
1637        // Multiple repeated patterns
1638        let input = b"abcdXabcdYabcdZ";
1639        let matches = mf.find_matches(input);
1640
1641        // Should find matches (abcd repeats)
1642        assert!(
1643            matches.len() >= 1,
1644            "Expected at least one match, got {:?}",
1645            matches
1646        );
1647    }
1648
1649    #[test]
1650    fn test_long_match() {
1651        let mut mf = MatchFinder::new(16);
1652        // Long repeated sequence
1653        let input = b"0123456789ABCDEF0123456789ABCDEF";
1654        let matches = mf.find_matches(input);
1655
1656        assert!(!matches.is_empty());
1657        let m = &matches[0];
1658        assert_eq!(m.length, 16); // Full repeat
1659        assert_eq!(m.offset, 16);
1660    }
1661
1662    #[test]
1663    fn test_hash_distribution() {
1664        let mf = MatchFinder::new(16);
1665        let input = b"testdatatestdata";
1666
1667        // Different positions should produce different hashes
1668        let h1 = mf.hash4(input, 0);
1669        let h2 = mf.hash4(input, 4);
1670        let h3 = mf.hash4(input, 8);
1671
1672        // Same data at different positions should hash the same
1673        assert_eq!(h1, h3); // "test" at 0 and 8
1674        assert_ne!(h1, h2); // "test" vs "data"
1675    }
1676
1677    #[test]
1678    fn test_search_depth_limit() {
1679        let mut mf = MatchFinder::new(1);
1680        let input = b"abcXabcYabcZ";
1681        let matches = mf.find_matches(input);
1682
1683        // Should still find matches, but may not be optimal
1684        assert!(matches.len() <= 3);
1685    }
1686
1687    #[test]
1688    fn test_match_length_calculation() {
1689        let mut mf = MatchFinder::new(16);
1690        let input = b"hellohello";
1691        mf.reset(input.len());
1692
1693        // match_length_from compares from pos1 and pos2 onwards
1694        let len = mf.match_length_from(input, 0, 5);
1695        assert_eq!(len, 5); // "hello" matches
1696    }
1697
1698    #[test]
1699    fn test_lazy_match_finder() {
1700        let mut mf = LazyMatchFinder::new(16);
1701        let input = b"abcdefabcdefXabcdefabcdef";
1702        let matches = mf.find_matches(input);
1703
1704        // Should find good matches
1705        assert!(!matches.is_empty());
1706    }
1707
1708    #[test]
1709    fn test_large_input() {
1710        let mut mf = MatchFinder::new(32);
1711
1712        // Generate input with repeating patterns
1713        let mut input = Vec::with_capacity(100_000);
1714        let pattern = b"The quick brown fox jumps over the lazy dog. ";
1715        while input.len() < 100_000 {
1716            input.extend_from_slice(pattern);
1717        }
1718
1719        let matches = mf.find_matches(&input);
1720
1721        // Should find some matches in repetitive data
1722        // (count depends on match lengths - longer matches = fewer count)
1723        assert!(
1724            !matches.is_empty(),
1725            "Expected to find matches in repetitive data"
1726        );
1727
1728        // Verify compression potential - this is the key metric
1729        let total_match_len: usize = matches.iter().map(|m| m.length).sum();
1730        assert!(
1731            total_match_len > input.len() / 4,
1732            "Expected matches to cover at least 25% of input, got {} / {}",
1733            total_match_len,
1734            input.len()
1735        );
1736    }
1737
1738    #[test]
1739    fn test_aligned_hash_table_alignment() {
1740        // Verify the AlignedHashTable is properly 64-byte aligned
1741        let table = AlignedHashTable::new_boxed();
1742        let addr = table.data.as_ptr() as usize;
1743
1744        // Address should be 64-byte aligned
1745        assert_eq!(
1746            addr % 64,
1747            0,
1748            "Hash table data should be 64-byte aligned, got address {:x}",
1749            addr
1750        );
1751
1752        // Also verify the struct itself starts at aligned boundary
1753        let struct_addr = &*table as *const _ as usize;
1754        assert_eq!(
1755            struct_addr % 64,
1756            0,
1757            "AlignedHashTable struct should be 64-byte aligned, got address {:x}",
1758            struct_addr
1759        );
1760    }
1761
1762    #[test]
1763    fn test_aligned_hash_table_operations() {
1764        let mut table = AlignedHashTable::new_boxed();
1765
1766        // Test that table is initially zeroed
1767        for i in 0..HASH_SIZE {
1768            assert_eq!(table.get(i), 0);
1769        }
1770
1771        // Test set/get
1772        table.set(0, 42);
1773        table.set(HASH_SIZE - 1, 123);
1774        assert_eq!(table.get(0), 42);
1775        assert_eq!(table.get(HASH_SIZE - 1), 123);
1776
1777        // Test reset
1778        table.reset();
1779        assert_eq!(table.get(0), 0);
1780        assert_eq!(table.get(HASH_SIZE - 1), 0);
1781    }
1782
1783    // =========================================================================
1784    // Adaptive Search Depth Tests
1785    // =========================================================================
1786
1787    #[test]
1788    fn test_adaptive_depth_scales_with_size() {
1789        let finder = MatchFinder::new(16);
1790
1791        // Small inputs: full depth
1792        assert_eq!(finder.effective_depth(1024), 16);
1793        assert_eq!(finder.effective_depth(4096), 16);
1794
1795        // Medium inputs: reduced depth (90%)
1796        assert_eq!(finder.effective_depth(8192), 14);
1797        assert_eq!(finder.effective_depth(16384), 14);
1798
1799        // Large inputs: 75% depth (less aggressive for better ratio)
1800        assert_eq!(finder.effective_depth(65536), 12);
1801
1802        // Very large inputs: 50% depth
1803        assert_eq!(finder.effective_depth(262144), 8);
1804    }
1805
1806    #[test]
1807    fn test_adaptive_depth_respects_minimum() {
1808        let finder = MatchFinder::new(4);
1809
1810        // Even with adaptive scaling, never go below reasonable minimum
1811        assert!(finder.effective_depth(262144) >= 2);
1812        assert!(finder.effective_depth(1_000_000) >= 2);
1813    }
1814
1815    #[test]
1816    fn test_adaptive_depth_respects_configured_max() {
1817        let finder_shallow = MatchFinder::new(4);
1818        let finder_deep = MatchFinder::new(64);
1819
1820        // Adaptive depth should scale from configured value
1821        assert!(finder_shallow.effective_depth(1024) <= 4);
1822        assert!(finder_deep.effective_depth(1024) <= 64);
1823
1824        // Large input scaling maintains proportions
1825        let shallow_large = finder_shallow.effective_depth(65536);
1826        let deep_large = finder_deep.effective_depth(65536);
1827        assert!(deep_large > shallow_large);
1828    }
1829
1830    #[test]
1831    fn test_adaptive_finder_large_input_throughput() {
1832        use std::time::Instant;
1833
1834        // Generate large repetitive data
1835        let mut data = Vec::with_capacity(65536);
1836        let pattern = b"The quick brown fox jumps over the lazy dog. ";
1837        while data.len() < 65536 {
1838            data.extend_from_slice(pattern);
1839        }
1840
1841        let mut finder = MatchFinder::new(16);
1842
1843        // Warm up
1844        for _ in 0..3 {
1845            let _ = finder.find_matches(&data);
1846        }
1847
1848        // Measure throughput
1849        let iterations = 20;
1850        let start = Instant::now();
1851        for _ in 0..iterations {
1852            let _ = finder.find_matches(&data);
1853        }
1854        let elapsed = start.elapsed();
1855
1856        let total_bytes = data.len() * iterations;
1857        let throughput_mib = total_bytes as f64 / elapsed.as_secs_f64() / (1024.0 * 1024.0);
1858
1859        // With adaptive depth, should achieve reasonable throughput on large data
1860        // Note: threshold is conservative for CI environments
1861        assert!(
1862            throughput_mib >= 30.0,
1863            "Large input throughput {:.1} MiB/s below target 30 MiB/s",
1864            throughput_mib
1865        );
1866    }
1867
1868    #[test]
1869    fn test_adaptive_finder_maintains_compression_quality() {
1870        // Generate compressible data
1871        let mut data = Vec::with_capacity(65536);
1872        let pattern = b"compression test data with repeating patterns ";
1873        while data.len() < 65536 {
1874            data.extend_from_slice(pattern);
1875        }
1876
1877        let mut finder = MatchFinder::new(16);
1878        let matches = finder.find_matches(&data);
1879
1880        // Calculate match coverage
1881        let total_match_bytes: usize = matches.iter().map(|m| m.length).sum();
1882        let coverage = total_match_bytes as f64 / data.len() as f64;
1883
1884        // Even with adaptive depth, should find good matches in repetitive data
1885        assert!(
1886            coverage >= 0.70,
1887            "Match coverage {:.1}% below target 70%",
1888            coverage * 100.0
1889        );
1890    }
1891
1892    // =========================================================================
1893    // Block Chunking Tests (Phase 2.1)
1894    // =========================================================================
1895
1896    #[test]
1897    fn test_chunked_matching_correctness() {
1898        let mut data = Vec::with_capacity(65536);
1899        let pattern = b"The quick brown fox jumps over the lazy dog. ";
1900        while data.len() < 65536 {
1901            data.extend_from_slice(pattern);
1902        }
1903
1904        let mut finder = LazyMatchFinder::new(16);
1905
1906        // Standard matching
1907        let standard_matches = finder.find_matches(&data);
1908
1909        // Chunked matching (resets finder internally per chunk)
1910        let chunked_matches = finder.find_matches_chunked(&data, 16384);
1911
1912        // Both should produce valid matches (positions within bounds)
1913        for m in &standard_matches {
1914            assert!(m.position + m.length <= data.len());
1915            assert!(m.position >= m.offset);
1916        }
1917        for m in &chunked_matches {
1918            assert!(m.position + m.length <= data.len());
1919            assert!(m.position >= m.offset);
1920        }
1921
1922        // Chunked should cover reasonable portion of input
1923        let chunked_coverage: usize = chunked_matches.iter().map(|m| m.length).sum();
1924        let min_coverage = data.len() / 2; // At least 50%
1925        assert!(
1926            chunked_coverage >= min_coverage,
1927            "Chunked coverage {} below minimum {}",
1928            chunked_coverage,
1929            min_coverage
1930        );
1931    }
1932
1933    #[test]
1934    fn test_chunked_matching_performance_reasonable() {
1935        use std::time::Instant;
1936
1937        // Generate large data (256KB) with varied content
1938        // Mix of repetitive and varied patterns to simulate real data
1939        let mut data = Vec::with_capacity(262144);
1940        let pattern = b"The quick brown fox jumps over the lazy dog. ";
1941        for i in 0..262144 {
1942            // Mix of patterns: some repetitive, some varied
1943            if i % 1024 < 512 {
1944                data.push(pattern[i % pattern.len()]);
1945            } else {
1946                data.push((i as u8).wrapping_mul(17).wrapping_add(i as u8 >> 4));
1947            }
1948        }
1949
1950        let mut finder = LazyMatchFinder::new(16);
1951
1952        // Warm up
1953        for _ in 0..2 {
1954            let _ = finder.find_matches(&data);
1955            let _ = finder.find_matches_chunked(&data, 16384);
1956        }
1957
1958        // Measure standard matching
1959        let iterations = 10;
1960        let start = Instant::now();
1961        for _ in 0..iterations {
1962            let _ = finder.find_matches(&data);
1963        }
1964        let standard_time = start.elapsed();
1965
1966        // Measure chunked matching
1967        let start = Instant::now();
1968        for _ in 0..iterations {
1969            let _ = finder.find_matches_chunked(&data, 16384);
1970        }
1971        let chunked_time = start.elapsed();
1972
1973        // Chunked matching lacks several optimizations from the main path:
1974        // 1. No offset prediction (main path predicts based on recent matches)
1975        // 2. No generation counters (zeros tables each chunk)
1976        // 3. Simpler match length comparison
1977        //
1978        // As a result, chunked is expected to be significantly slower than
1979        // the optimized standard path. The threshold is set high because:
1980        // - Standard path has O(1) reset + prediction = very fast
1981        // - Chunked has O(n) reset per chunk + no prediction = slow
1982        let ratio = chunked_time.as_secs_f64() / standard_time.as_secs_f64();
1983        assert!(
1984            ratio < 20.0,
1985            "Chunked ({:?}) is too slow compared to standard ({:?}), ratio: {:.2}x",
1986            chunked_time,
1987            standard_time,
1988            ratio
1989        );
1990    }
1991
1992    #[test]
1993    fn test_chunked_small_input_fallback() {
1994        let mut finder = LazyMatchFinder::new(16);
1995        let small_data = b"small input that fits in one chunk";
1996
1997        // Chunked should work for small input (becomes single chunk)
1998        let matches = finder.find_matches_chunked(small_data, 16384);
1999
2000        // Should not panic, may or may not find matches
2001        assert!(matches.len() <= small_data.len());
2002    }
2003
2004    #[test]
2005    fn test_chunked_boundary_handling() {
2006        // Create data where a pattern spans chunk boundary
2007        let chunk_size = 1024;
2008        let mut data = vec![b'A'; chunk_size - 8];
2009        // Pattern at boundary
2010        data.extend_from_slice(b"PATTERN!");
2011        data.extend_from_slice(b"PATTERN!"); // Repeat in next chunk
2012        data.extend_from_slice(&vec![b'B'; chunk_size - 16]);
2013
2014        let mut finder = LazyMatchFinder::new(16);
2015        let matches = finder.find_matches_chunked(&data, chunk_size);
2016
2017        // Matches in second chunk should have correct absolute positions
2018        for m in &matches {
2019            assert!(m.position + m.length <= data.len());
2020            // Verify match is valid by checking data
2021            let src_start = m.position - m.offset;
2022            for i in 0..m.length {
2023                assert_eq!(
2024                    data[src_start + i],
2025                    data[m.position + i],
2026                    "Match at {} offset {} length {} invalid at byte {}",
2027                    m.position,
2028                    m.offset,
2029                    m.length,
2030                    i
2031                );
2032            }
2033        }
2034    }
2035
2036    #[test]
2037    fn test_chunked_maintains_compression_ratio() {
2038        let mut data = Vec::with_capacity(65536);
2039        let pattern = b"repeating content for compression ratio test ";
2040        while data.len() < 65536 {
2041            data.extend_from_slice(pattern);
2042        }
2043
2044        let mut finder = LazyMatchFinder::new(16);
2045
2046        let standard = finder.find_matches(&data);
2047        let chunked = finder.find_matches_chunked(&data, 16384);
2048
2049        let standard_coverage: usize = standard.iter().map(|m| m.length).sum();
2050        let chunked_coverage: usize = chunked.iter().map(|m| m.length).sum();
2051
2052        // Chunked may have slightly less coverage due to chunk boundaries,
2053        // but should be within 15% of standard
2054        let min_acceptable = (standard_coverage as f64 * 0.85) as usize;
2055        assert!(
2056            chunked_coverage >= min_acceptable,
2057            "Chunked coverage {} below 85% of standard {}",
2058            chunked_coverage,
2059            standard_coverage
2060        );
2061    }
2062
2063    // =========================================================================
2064    // Position-Adaptive Early Exit Tests (Phase 1.3)
2065    // =========================================================================
2066
2067    #[test]
2068    fn test_early_exit_threshold_by_position() {
2069        let finder = MatchFinder::new(16);
2070
2071        // Early in file: need longer match to exit early (higher threshold)
2072        let threshold_early = finder.early_exit_threshold(100);
2073        assert!(
2074            threshold_early >= 24,
2075            "Early position should have threshold >= 24, got {}",
2076            threshold_early
2077        );
2078
2079        // Mid-file: moderate threshold
2080        let threshold_mid = finder.early_exit_threshold(10000);
2081        assert!(
2082            threshold_mid >= 12 && threshold_mid < 24,
2083            "Mid position should have threshold 12-23, got {}",
2084            threshold_mid
2085        );
2086
2087        // Late in file: shorter threshold acceptable
2088        let threshold_late = finder.early_exit_threshold(50000);
2089        assert!(
2090            threshold_late >= 8 && threshold_late < 16,
2091            "Late position should have threshold 8-15, got {}",
2092            threshold_late
2093        );
2094
2095        // Monotonic: threshold should decrease (or stay same) as position increases
2096        assert!(
2097            threshold_early >= threshold_mid,
2098            "Threshold should decrease with position"
2099        );
2100        assert!(
2101            threshold_mid >= threshold_late,
2102            "Threshold should decrease with position"
2103        );
2104    }
2105
2106    #[test]
2107    fn test_early_exit_excellent_match() {
2108        let mut finder = MatchFinder::new(16);
2109
2110        // Create data with a long match (36+ bytes)
2111        let mut data = vec![b'X'; 10]; // Prefix
2112        let pattern = b"abcdefghijklmnopqrstuvwxyz0123456789ABCD";
2113        data.extend_from_slice(pattern);
2114        data.extend_from_slice(pattern); // 40-byte repeat
2115
2116        let matches = finder.find_matches(&data);
2117
2118        // Should find the excellent match
2119        let long_match = matches.iter().any(|m| m.length >= 36);
2120        assert!(
2121            long_match,
2122            "Should find the long match >= 36 bytes, got {:?}",
2123            matches
2124        );
2125    }
2126
2127    #[test]
2128    fn test_early_exit_improves_throughput_on_repetitive() {
2129        use std::time::Instant;
2130
2131        // Generate highly repetitive data with many long matches
2132        let mut data = Vec::with_capacity(65536);
2133        let pattern = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // 72 bytes
2134        while data.len() < 65536 {
2135            data.extend_from_slice(pattern);
2136        }
2137
2138        let mut finder = MatchFinder::new(16);
2139
2140        // Warm up
2141        for _ in 0..3 {
2142            let _ = finder.find_matches(&data);
2143        }
2144
2145        // Measure
2146        let iterations = 20;
2147        let start = Instant::now();
2148        for _ in 0..iterations {
2149            let _ = finder.find_matches(&data);
2150        }
2151        let elapsed = start.elapsed();
2152
2153        let total_bytes = data.len() * iterations;
2154        let throughput_mib = total_bytes as f64 / elapsed.as_secs_f64() / (1024.0 * 1024.0);
2155
2156        // With early exit, should achieve good throughput on repetitive data
2157        assert!(
2158            throughput_mib >= 25.0,
2159            "Repetitive data throughput {:.1} MiB/s below target 25 MiB/s",
2160            throughput_mib
2161        );
2162    }
2163
2164    #[test]
2165    fn test_early_exit_maintains_match_quality() {
2166        let mut data = Vec::with_capacity(65536);
2167        let pattern = b"The quick brown fox jumps over the lazy dog repeatedly. ";
2168        while data.len() < 65536 {
2169            data.extend_from_slice(pattern);
2170        }
2171
2172        let mut finder = MatchFinder::new(16);
2173        let matches = finder.find_matches(&data);
2174
2175        // Even with early exit, should find good matches
2176        let total_match_bytes: usize = matches.iter().map(|m| m.length).sum();
2177        let coverage = total_match_bytes as f64 / data.len() as f64;
2178
2179        assert!(
2180            coverage >= 0.65,
2181            "Match coverage {:.1}% below target 65%",
2182            coverage * 100.0
2183        );
2184    }
2185
2186    // =========================================================================
2187    // Lazy Threshold Scaling Tests (Phase 1.2)
2188    // =========================================================================
2189
2190    #[test]
2191    fn test_lazy_threshold_scaling_by_size() {
2192        // Small input: default threshold
2193        let mut finder = LazyMatchFinder::new(16);
2194        finder.configure_for_size(1024);
2195        assert_eq!(
2196            finder.lazy_threshold, 24,
2197            "Small input should use default threshold"
2198        );
2199
2200        // Medium input: slightly lower threshold
2201        let mut finder = LazyMatchFinder::new(16);
2202        finder.configure_for_size(16384);
2203        assert!(
2204            finder.lazy_threshold <= 20,
2205            "Medium input should lower threshold"
2206        );
2207
2208        // Large input: commit earlier
2209        let mut finder = LazyMatchFinder::new(16);
2210        finder.configure_for_size(65536);
2211        assert!(
2212            finder.lazy_threshold <= 16,
2213            "Large input should commit earlier"
2214        );
2215
2216        // Very large input: aggressive early commit
2217        let mut finder = LazyMatchFinder::new(16);
2218        finder.configure_for_size(262144);
2219        assert!(
2220            finder.lazy_threshold <= 12,
2221            "Very large input should be aggressive"
2222        );
2223    }
2224
2225    #[test]
2226    fn test_adaptive_lazy_threshold_minimum() {
2227        let mut finder = LazyMatchFinder::new(16);
2228        finder.configure_for_size(1_000_000);
2229
2230        // Should never go below minimum match length + 1
2231        assert!(finder.lazy_threshold >= MIN_MATCH_LENGTH + 1);
2232    }
2233
2234    #[test]
2235    fn test_adaptive_lazy_maintains_quality() {
2236        let mut data = Vec::with_capacity(65536);
2237        let pattern = b"The quick brown fox jumps over the lazy dog. ";
2238        while data.len() < 65536 {
2239            data.extend_from_slice(pattern);
2240        }
2241
2242        // Fixed threshold
2243        let mut fixed_finder = LazyMatchFinder::new(16);
2244        let fixed_matches = fixed_finder.find_matches(&data);
2245
2246        // Adaptive threshold
2247        let mut adaptive_finder = LazyMatchFinder::new(16);
2248        adaptive_finder.configure_for_size(data.len());
2249        let adaptive_matches = adaptive_finder.find_matches(&data);
2250
2251        let fixed_coverage: usize = fixed_matches.iter().map(|m| m.length).sum();
2252        let adaptive_coverage: usize = adaptive_matches.iter().map(|m| m.length).sum();
2253
2254        // Adaptive should maintain at least 90% of fixed coverage
2255        let min_coverage = (fixed_coverage as f64 * 0.90) as usize;
2256        assert!(
2257            adaptive_coverage >= min_coverage,
2258            "Adaptive coverage {} below 90% of fixed {}",
2259            adaptive_coverage,
2260            fixed_coverage
2261        );
2262    }
2263
2264    #[test]
2265    fn test_find_matches_auto_uses_adaptive() {
2266        let mut data = Vec::with_capacity(65536);
2267        let pattern = b"repeating patterns for auto adaptive test. ";
2268        while data.len() < 65536 {
2269            data.extend_from_slice(pattern);
2270        }
2271
2272        let mut finder = LazyMatchFinder::new(16);
2273
2274        // find_matches_auto should auto-configure for size
2275        let matches = finder.find_matches_auto(&data);
2276
2277        // Should produce valid matches
2278        for m in &matches {
2279            assert!(m.position + m.length <= data.len());
2280            assert!(m.position >= m.offset);
2281        }
2282
2283        // Should have reasonable coverage
2284        let coverage: usize = matches.iter().map(|m| m.length).sum();
2285        assert!(
2286            coverage > data.len() / 2,
2287            "Should cover at least 50% of input"
2288        );
2289    }
2290
2291    // =========================================================================
2292    // Two-Tier Hash Match Finder Tests (Phase 2.2)
2293    // =========================================================================
2294
2295    #[test]
2296    fn test_two_tier_finder_creation() {
2297        let finder = TwoTierMatchFinder::new(16);
2298        assert_eq!(finder.search_depth, 16);
2299    }
2300
2301    #[test]
2302    fn test_two_tier_finds_matches() {
2303        let mut finder = TwoTierMatchFinder::new(16);
2304        let input = b"abcdefghijklmnopabcdefghijklmnop";
2305        let matches = finder.find_matches(input);
2306
2307        // Should find the 16-byte repeat
2308        assert!(!matches.is_empty(), "Should find matches");
2309        let m = &matches[0];
2310        assert_eq!(m.length, 16);
2311        assert_eq!(m.offset, 16);
2312    }
2313
2314    #[test]
2315    fn test_two_tier_finds_long_matches_via_8byte_hash() {
2316        let mut finder = TwoTierMatchFinder::new(16);
2317
2318        // Create pattern with 8+ byte repeat
2319        let mut data = vec![b'X'; 10];
2320        let pattern = b"LONGPATTERN12345678901234567890AB";
2321        data.extend_from_slice(pattern);
2322        data.extend_from_slice(pattern);
2323
2324        let matches = finder.find_matches(&data);
2325
2326        // Should find the long match
2327        let long_match = matches.iter().any(|m| m.length >= 30);
2328        assert!(
2329            long_match,
2330            "Should find long match via 8-byte hash, got {:?}",
2331            matches
2332        );
2333    }
2334
2335    #[test]
2336    fn test_two_tier_finds_short_matches_fallback() {
2337        let mut finder = TwoTierMatchFinder::new(16);
2338
2339        // Pattern that's exactly 4-7 bytes (too short for 8-byte hash)
2340        let input = b"ABCDxxABCD"; // 4-byte match
2341        let matches = finder.find_matches(input);
2342
2343        // Should find the short match via 4-byte fallback
2344        let short_match = matches.iter().any(|m| m.length >= 4);
2345        assert!(
2346            short_match,
2347            "Should find short match via 4-byte fallback, got {:?}",
2348            matches
2349        );
2350    }
2351
2352    #[test]
2353    fn test_two_tier_coverage_comparable_to_single() {
2354        let mut data = Vec::with_capacity(16384);
2355        let pattern = b"The quick brown fox jumps over the lazy dog. ";
2356        while data.len() < 16384 {
2357            data.extend_from_slice(pattern);
2358        }
2359
2360        let mut single = MatchFinder::new(16);
2361        let mut two_tier = TwoTierMatchFinder::new(16);
2362
2363        let single_matches = single.find_matches(&data);
2364        let two_tier_matches = two_tier.find_matches(&data);
2365
2366        let single_coverage: usize = single_matches.iter().map(|m| m.length).sum();
2367        let two_tier_coverage: usize = two_tier_matches.iter().map(|m| m.length).sum();
2368
2369        // Two-tier should achieve at least 90% of single-tier coverage
2370        let min_coverage = (single_coverage as f64 * 0.90) as usize;
2371        assert!(
2372            two_tier_coverage >= min_coverage,
2373            "Two-tier coverage {} below 90% of single {}",
2374            two_tier_coverage,
2375            single_coverage
2376        );
2377    }
2378
2379    #[test]
2380    fn test_two_tier_performance_reasonable() {
2381        use std::time::Instant;
2382
2383        let mut data = Vec::with_capacity(65536);
2384        let pattern = b"repeating pattern for performance test data here. ";
2385        while data.len() < 65536 {
2386            data.extend_from_slice(pattern);
2387        }
2388
2389        let mut single = MatchFinder::new(16);
2390        let mut two_tier = TwoTierMatchFinder::new(16);
2391
2392        // Warm up
2393        for _ in 0..3 {
2394            let _ = single.find_matches(&data);
2395            let _ = two_tier.find_matches(&data);
2396        }
2397
2398        // Measure single tier
2399        let iterations = 15;
2400        let start = Instant::now();
2401        for _ in 0..iterations {
2402            let _ = single.find_matches(&data);
2403        }
2404        let single_time = start.elapsed();
2405
2406        // Measure two tier
2407        let start = Instant::now();
2408        for _ in 0..iterations {
2409            let _ = two_tier.find_matches(&data);
2410        }
2411        let two_tier_time = start.elapsed();
2412
2413        // Two-tier has significant overhead from maintaining two hash tables and
2414        // two chain tables. Additionally, the main MatchFinder now uses O(1) reset
2415        // via generation counters while TwoTier still zeros its tables. Combined
2416        // with double updates per position, TwoTier is expected to be much slower.
2417        // With sparse skip optimization on repetitive data, standard path is even
2418        // faster, increasing the relative gap further.
2419        // Note: TwoTier is not used in the main compression path.
2420        let ratio = two_tier_time.as_secs_f64() / single_time.as_secs_f64();
2421        assert!(
2422            ratio < 30.0,
2423            "Two-tier ({:?}) too slow compared to single ({:?}), ratio: {:.2}x",
2424            two_tier_time,
2425            single_time,
2426            ratio
2427        );
2428    }
2429
2430    #[test]
2431    fn test_two_tier_8byte_hash_distribution() {
2432        let finder = TwoTierMatchFinder::new(16);
2433        let data = b"AABBCCDDAABBCCDD"; // 16 bytes with 8-byte repeat
2434
2435        // Different 8-byte sequences should hash differently
2436        let h1 = finder.hash8(data, 0); // "AABBCCDD"
2437        let h2 = finder.hash8(data, 8); // "AABBCCDD" (same)
2438
2439        // Same data should hash the same
2440        assert_eq!(h1, h2, "Same 8-byte sequence should have same hash");
2441
2442        // Different data should (usually) hash differently
2443        let data2 = b"XYZ12345XYZ12345";
2444        let h3 = finder.hash8(data2, 0);
2445        // Not guaranteed different but should be due to good hash function
2446        // Just verify it runs without panic
2447        assert!(h3 < LONG_HASH_SIZE as u32);
2448    }
2449
2450    // =========================================================================
2451    // Speculative Parallel Match Finding Tests (Phase 3.2)
2452    // =========================================================================
2453
2454    #[test]
2455    fn test_speculative_finds_matches() {
2456        let mut finder = MatchFinder::new(16);
2457        let input = b"abcdefghijklmnopabcdefghijklmnop";
2458        let matches = finder.find_matches_speculative(input);
2459
2460        assert!(!matches.is_empty(), "Should find matches");
2461        let m = &matches[0];
2462        assert_eq!(m.length, 16);
2463    }
2464
2465    #[test]
2466    fn test_speculative_correctness() {
2467        // Generate text-like data
2468        let mut data = Vec::with_capacity(16384);
2469        let pattern = b"The quick brown fox jumps over the lazy dog. ";
2470        while data.len() < 16384 {
2471            data.extend_from_slice(pattern);
2472        }
2473
2474        let mut standard = MatchFinder::new(16);
2475        let mut speculative = MatchFinder::new(16);
2476
2477        let std_matches = standard.find_matches(&data);
2478        let spec_matches = speculative.find_matches_speculative(&data);
2479
2480        // Both should find matches
2481        assert!(!std_matches.is_empty());
2482        assert!(!spec_matches.is_empty());
2483
2484        // Coverage should be comparable (within 20%)
2485        let std_coverage: usize = std_matches.iter().map(|m| m.length).sum();
2486        let spec_coverage: usize = spec_matches.iter().map(|m| m.length).sum();
2487
2488        let min_coverage = (std_coverage as f64 * 0.80) as usize;
2489        assert!(
2490            spec_coverage >= min_coverage,
2491            "Speculative coverage {} below 80% of standard {}",
2492            spec_coverage,
2493            std_coverage
2494        );
2495    }
2496
2497    #[test]
2498    fn test_speculative_no_overlapping_matches() {
2499        let mut finder = MatchFinder::new(16);
2500        // Data with overlapping match opportunities
2501        let input = b"ABCABCABCABCABCABCABCABCABCABCABCABC";
2502        let matches = finder.find_matches_speculative(input);
2503
2504        // Verify no overlapping matches in output
2505        for i in 1..matches.len() {
2506            let prev_end = matches[i - 1].position + matches[i - 1].length;
2507            assert!(
2508                matches[i].position >= prev_end,
2509                "Match {} at pos {} overlaps with previous ending at {}",
2510                i,
2511                matches[i].position,
2512                prev_end
2513            );
2514        }
2515    }
2516
2517    #[test]
2518    fn test_speculative_handles_short_input() {
2519        let mut finder = MatchFinder::new(16);
2520
2521        // Very short input
2522        let matches = finder.find_matches_speculative(b"abc");
2523        assert!(matches.is_empty());
2524
2525        // Input just at boundary
2526        let matches = finder.find_matches_speculative(b"abcdabcd");
2527        // Should handle gracefully
2528        assert!(matches.len() <= 1);
2529    }
2530
2531    #[test]
2532    fn test_speculative_performance_reasonable() {
2533        use std::time::Instant;
2534
2535        let mut data = Vec::with_capacity(65536);
2536        let pattern = b"repeating pattern for speculative matching test. ";
2537        while data.len() < 65536 {
2538            data.extend_from_slice(pattern);
2539        }
2540
2541        let mut standard = MatchFinder::new(16);
2542        let mut speculative = MatchFinder::new(16);
2543
2544        // Warm up
2545        for _ in 0..3 {
2546            let _ = standard.find_matches(&data);
2547            let _ = speculative.find_matches_speculative(&data);
2548        }
2549
2550        // Measure standard
2551        let iterations = 15;
2552        let start = Instant::now();
2553        for _ in 0..iterations {
2554            let _ = standard.find_matches(&data);
2555        }
2556        let std_time = start.elapsed();
2557
2558        // Measure speculative
2559        let start = Instant::now();
2560        for _ in 0..iterations {
2561            let _ = speculative.find_matches_speculative(&data);
2562        }
2563        let spec_time = start.elapsed();
2564
2565        // Speculative does extra work (lookahead) for potentially better matches.
2566        // With the optimized standard path (prediction + O(1) reset + sparse skip),
2567        // speculative may be significantly slower as it explores more candidates
2568        // without the sparse skip optimization for repetitive patterns.
2569        let ratio = spec_time.as_secs_f64() / std_time.as_secs_f64();
2570        assert!(
2571            ratio < 8.0,
2572            "Speculative ({:?}) too slow compared to standard ({:?}), ratio: {:.2}x",
2573            spec_time,
2574            std_time,
2575            ratio
2576        );
2577    }
2578
2579    #[test]
2580    fn test_speculative_finds_better_matches() {
2581        let mut finder = MatchFinder::new(16);
2582
2583        // Data where position +1 has a longer match than position 0
2584        // Position 0: "XXXAB..." matches 5 bytes
2585        // Position 1: "XXABC..." might match longer pattern later
2586        let mut data = vec![b'X'; 5];
2587        data.extend_from_slice(b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"); // 26 bytes
2588        data.extend_from_slice(b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"); // repeat
2589
2590        let matches = finder.find_matches_speculative(&data);
2591
2592        // Should find the long match
2593        let has_long = matches.iter().any(|m| m.length >= 20);
2594        assert!(
2595            has_long,
2596            "Speculative should find long matches: {:?}",
2597            matches
2598        );
2599    }
2600}
haagenti_zstd/compress/match_finder.rs

haagenti_zstd/compress/
match_finder.rs