hermes_core/structures/
opt_p4d.rs

1//! OptP4D (Optimized Patched Frame-of-Reference Delta) posting list compression
2//!
3//! OptP4D is an improvement over PForDelta that finds the optimal bit width for each block
4//! by trying all possible bit widths and selecting the one that minimizes total storage.
5//!
6//! Key features:
7//! - Block-based compression (128 integers per block for SIMD alignment)
8//! - Delta encoding for doc IDs
9//! - Optimal bit-width selection per block
10//! - Patched coding: exceptions (values that don't fit) stored separately
11//! - Fast SIMD-friendly decoding with NEON (ARM) and SSE (x86) support
12//!
13//! Format per block:
14//! - Header: bit_width (5 bits) + num_exceptions (7 bits) + first_doc_id (32 bits)
15//! - Main array: 128 values packed at `bit_width` bits each
16//! - Exceptions: [position (7 bits), high_bits (32 - bit_width bits)] for each exception
17
18use super::simd;
19use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
20use std::io::{self, Read, Write};
21
22/// Block size for OptP4D (128 integers for SIMD alignment)
23pub const OPT_P4D_BLOCK_SIZE: usize = 128;
24
25/// Maximum number of exceptions before we increase bit width
26/// (keeping exceptions under ~10% of block for good compression)
27const MAX_EXCEPTIONS_RATIO: f32 = 0.10;
28
29/// Find the optimal bit width for a block of values
30/// Returns (bit_width, exception_count, total_bits)
31fn find_optimal_bit_width(values: &[u32]) -> (u8, usize, usize) {
32    if values.is_empty() {
33        return (0, 0, 0);
34    }
35
36    let n = values.len();
37    let max_exceptions = ((n as f32) * MAX_EXCEPTIONS_RATIO).ceil() as usize;
38
39    // Count how many values need each bit width
40    let mut bit_counts = [0usize; 33]; // bit_counts[b] = count of values needing exactly b bits
41    for &v in values {
42        let bits = simd::bits_needed(v) as usize;
43        bit_counts[bits] += 1;
44    }
45
46    // Compute cumulative counts: values that fit in b bits or less
47    let mut cumulative = [0usize; 33];
48    cumulative[0] = bit_counts[0];
49    for b in 1..=32 {
50        cumulative[b] = cumulative[b - 1] + bit_counts[b];
51    }
52
53    let mut best_bits = 32u8;
54    let mut best_total = usize::MAX;
55    let mut best_exceptions = 0usize;
56
57    // Try each bit width and compute total storage
58    for b in 0..=32u8 {
59        let fitting = if b == 0 {
60            bit_counts[0]
61        } else {
62            cumulative[b as usize]
63        };
64        let exceptions = n - fitting;
65
66        // Skip if too many exceptions
67        if exceptions > max_exceptions && b < 32 {
68            continue;
69        }
70
71        // Calculate total bits:
72        // - Main array: n * b bits
73        // - Exceptions: exceptions * (7 bits position + (32 - b) bits high value)
74        let main_bits = n * (b as usize);
75        let exception_bits = if b < 32 {
76            exceptions * (7 + (32 - b as usize))
77        } else {
78            0
79        };
80        let total = main_bits + exception_bits;
81
82        if total < best_total {
83            best_total = total;
84            best_bits = b;
85            best_exceptions = exceptions;
86        }
87    }
88
89    (best_bits, best_exceptions, best_total)
90}
91
92/// Pack values into a bitpacked array with the given bit width (NewPFD/OptPFD style)
93///
94/// Following the paper "Decoding billions of integers per second through vectorization":
95/// - Store the first b bits (low bits) of ALL values in the main array
96/// - For exceptions (values >= 2^b), store only the HIGH (32-b) bits separately with positions
97///
98/// Returns the packed bytes and a list of exceptions (position, high_bits)
99fn pack_with_exceptions(values: &[u32], bit_width: u8) -> (Vec<u8>, Vec<(u8, u32)>) {
100    if bit_width == 0 {
101        // All values must be 0, exceptions store full value
102        let exceptions: Vec<(u8, u32)> = values
103            .iter()
104            .enumerate()
105            .filter(|&(_, &v)| v != 0)
106            .map(|(i, &v)| (i as u8, v)) // For b=0, high bits = full value
107            .collect();
108        return (Vec::new(), exceptions);
109    }
110
111    if bit_width >= 32 {
112        // No exceptions possible, just pack all 32 bits
113        let bytes_needed = values.len() * 4;
114        let mut packed = vec![0u8; bytes_needed];
115        for (i, &value) in values.iter().enumerate() {
116            let bytes = value.to_le_bytes();
117            packed[i * 4..i * 4 + 4].copy_from_slice(&bytes);
118        }
119        return (packed, Vec::new());
120    }
121
122    let mask = (1u64 << bit_width) - 1;
123    let bytes_needed = (values.len() * bit_width as usize).div_ceil(8);
124    let mut packed = vec![0u8; bytes_needed];
125    let mut exceptions = Vec::new();
126
127    let mut bit_pos = 0usize;
128    for (i, &value) in values.iter().enumerate() {
129        // Store lower b bits in main array (for ALL values, including exceptions)
130        let low_bits = (value as u64) & mask;
131
132        // Write low bits to packed array
133        let byte_idx = bit_pos / 8;
134        let bit_offset = bit_pos % 8;
135
136        let mut remaining_bits = bit_width as usize;
137        let mut val = low_bits;
138        let mut current_byte_idx = byte_idx;
139        let mut current_bit_offset = bit_offset;
140
141        while remaining_bits > 0 {
142            let bits_in_byte = (8 - current_bit_offset).min(remaining_bits);
143            let byte_mask = ((1u64 << bits_in_byte) - 1) as u8;
144            packed[current_byte_idx] |= ((val as u8) & byte_mask) << current_bit_offset;
145            val >>= bits_in_byte;
146            remaining_bits -= bits_in_byte;
147            current_byte_idx += 1;
148            current_bit_offset = 0;
149        }
150
151        bit_pos += bit_width as usize;
152
153        // Record exception: store only the HIGH (32-b) bits
154        let fits = value <= mask as u32;
155        if !fits {
156            let high_bits = value >> bit_width;
157            exceptions.push((i as u8, high_bits));
158        }
159    }
160
161    (packed, exceptions)
162}
163
164/// Unpack values from a bitpacked array and apply exceptions (NewPFD/OptPFD style)
165///
166/// Following the paper "Decoding billions of integers per second through vectorization":
167/// - Low b bits are stored in the main array for ALL values
168/// - Exceptions store only the HIGH (32-b) bits
169/// - Reconstruct: value = (high_bits << b) | low_bits
170///
171/// Uses SIMD acceleration for common bit widths (8, 16, 32)
172fn unpack_with_exceptions(
173    packed: &[u8],
174    bit_width: u8,
175    exceptions: &[(u8, u32)],
176    count: usize,
177    output: &mut [u32],
178) {
179    if bit_width == 0 {
180        output[..count].fill(0);
181    } else if bit_width == 8 {
182        // SIMD-accelerated 8-bit unpacking
183        simd::unpack_8bit(packed, output, count);
184    } else if bit_width == 16 {
185        // SIMD-accelerated 16-bit unpacking
186        simd::unpack_16bit(packed, output, count);
187    } else if bit_width >= 32 {
188        // SIMD-accelerated 32-bit unpacking
189        simd::unpack_32bit(packed, output, count);
190        return; // No exceptions for 32-bit
191    } else {
192        // Generic bit unpacking for other bit widths
193        let mask = (1u64 << bit_width) - 1;
194        let mut bit_pos = 0usize;
195        let input_ptr = packed.as_ptr();
196
197        for out in output[..count].iter_mut() {
198            let byte_idx = bit_pos >> 3;
199            let bit_offset = bit_pos & 7;
200
201            // Read 8 bytes at once for efficiency
202            let word = if byte_idx + 8 <= packed.len() {
203                unsafe { (input_ptr.add(byte_idx) as *const u64).read_unaligned() }
204            } else {
205                // Handle edge case near end of buffer
206                let mut word = 0u64;
207                for (i, &b) in packed[byte_idx..].iter().enumerate() {
208                    word |= (b as u64) << (i * 8);
209                }
210                word
211            };
212
213            *out = ((word >> bit_offset) & mask) as u32;
214            bit_pos += bit_width as usize;
215        }
216    }
217
218    // Apply exceptions: combine high bits with low bits already in output
219    // value = (high_bits << bit_width) | low_bits
220    for &(pos, high_bits) in exceptions {
221        if (pos as usize) < count {
222            let low_bits = output[pos as usize];
223            output[pos as usize] = (high_bits << bit_width) | low_bits;
224        }
225    }
226}
227
228/// A single OptP4D block
229#[derive(Debug, Clone)]
230pub struct OptP4DBlock {
231    /// First doc_id in this block (absolute)
232    pub first_doc_id: u32,
233    /// Last doc_id in this block (absolute)
234    pub last_doc_id: u32,
235    /// Number of documents in this block
236    pub num_docs: u16,
237    /// Bit width for delta encoding
238    pub doc_bit_width: u8,
239    /// Bit width for term frequencies
240    pub tf_bit_width: u8,
241    /// Maximum term frequency in this block
242    pub max_tf: u32,
243    /// Maximum block score for WAND/MaxScore
244    pub max_block_score: f32,
245    /// Packed doc deltas
246    pub doc_deltas: Vec<u8>,
247    /// Doc delta exceptions: (position, full_delta)
248    pub doc_exceptions: Vec<(u8, u32)>,
249    /// Packed term frequencies
250    pub term_freqs: Vec<u8>,
251    /// TF exceptions: (position, full_tf)
252    pub tf_exceptions: Vec<(u8, u32)>,
253}
254
255impl OptP4DBlock {
256    /// Serialize the block
257    pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
258        writer.write_u32::<LittleEndian>(self.first_doc_id)?;
259        writer.write_u32::<LittleEndian>(self.last_doc_id)?;
260        writer.write_u16::<LittleEndian>(self.num_docs)?;
261        writer.write_u8(self.doc_bit_width)?;
262        writer.write_u8(self.tf_bit_width)?;
263        writer.write_u32::<LittleEndian>(self.max_tf)?;
264        writer.write_f32::<LittleEndian>(self.max_block_score)?;
265
266        // Write doc deltas
267        writer.write_u16::<LittleEndian>(self.doc_deltas.len() as u16)?;
268        writer.write_all(&self.doc_deltas)?;
269
270        // Write doc exceptions
271        writer.write_u8(self.doc_exceptions.len() as u8)?;
272        for &(pos, val) in &self.doc_exceptions {
273            writer.write_u8(pos)?;
274            writer.write_u32::<LittleEndian>(val)?;
275        }
276
277        // Write term freqs
278        writer.write_u16::<LittleEndian>(self.term_freqs.len() as u16)?;
279        writer.write_all(&self.term_freqs)?;
280
281        // Write tf exceptions
282        writer.write_u8(self.tf_exceptions.len() as u8)?;
283        for &(pos, val) in &self.tf_exceptions {
284            writer.write_u8(pos)?;
285            writer.write_u32::<LittleEndian>(val)?;
286        }
287
288        Ok(())
289    }
290
291    /// Deserialize a block
292    pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
293        let first_doc_id = reader.read_u32::<LittleEndian>()?;
294        let last_doc_id = reader.read_u32::<LittleEndian>()?;
295        let num_docs = reader.read_u16::<LittleEndian>()?;
296        let doc_bit_width = reader.read_u8()?;
297        let tf_bit_width = reader.read_u8()?;
298        let max_tf = reader.read_u32::<LittleEndian>()?;
299        let max_block_score = reader.read_f32::<LittleEndian>()?;
300
301        // Read doc deltas
302        let doc_deltas_len = reader.read_u16::<LittleEndian>()? as usize;
303        let mut doc_deltas = vec![0u8; doc_deltas_len];
304        reader.read_exact(&mut doc_deltas)?;
305
306        // Read doc exceptions
307        let num_doc_exceptions = reader.read_u8()? as usize;
308        let mut doc_exceptions = Vec::with_capacity(num_doc_exceptions);
309        for _ in 0..num_doc_exceptions {
310            let pos = reader.read_u8()?;
311            let val = reader.read_u32::<LittleEndian>()?;
312            doc_exceptions.push((pos, val));
313        }
314
315        // Read term freqs
316        let term_freqs_len = reader.read_u16::<LittleEndian>()? as usize;
317        let mut term_freqs = vec![0u8; term_freqs_len];
318        reader.read_exact(&mut term_freqs)?;
319
320        // Read tf exceptions
321        let num_tf_exceptions = reader.read_u8()? as usize;
322        let mut tf_exceptions = Vec::with_capacity(num_tf_exceptions);
323        for _ in 0..num_tf_exceptions {
324            let pos = reader.read_u8()?;
325            let val = reader.read_u32::<LittleEndian>()?;
326            tf_exceptions.push((pos, val));
327        }
328
329        Ok(Self {
330            first_doc_id,
331            last_doc_id,
332            num_docs,
333            doc_bit_width,
334            tf_bit_width,
335            max_tf,
336            max_block_score,
337            doc_deltas,
338            doc_exceptions,
339            term_freqs,
340            tf_exceptions,
341        })
342    }
343
344    /// Decode doc_ids from this block using SIMD-accelerated delta decoding
345    pub fn decode_doc_ids(&self) -> Vec<u32> {
346        if self.num_docs == 0 {
347            return Vec::new();
348        }
349
350        let count = self.num_docs as usize;
351        let mut deltas = vec![0u32; count];
352
353        // Unpack deltas with exceptions (SIMD-accelerated for 8/16/32-bit)
354        if count > 1 {
355            unpack_with_exceptions(
356                &self.doc_deltas,
357                self.doc_bit_width,
358                &self.doc_exceptions,
359                count - 1,
360                &mut deltas,
361            );
362        }
363
364        // Convert deltas to absolute doc_ids using SIMD-accelerated prefix sum
365        let mut doc_ids = vec![0u32; count];
366        simd::delta_decode(&mut doc_ids, &deltas, self.first_doc_id, count);
367
368        doc_ids
369    }
370
371    /// Decode term frequencies from this block using SIMD acceleration
372    pub fn decode_term_freqs(&self) -> Vec<u32> {
373        if self.num_docs == 0 {
374            return Vec::new();
375        }
376
377        let count = self.num_docs as usize;
378        let mut tfs = vec![0u32; count];
379
380        // Unpack TFs with exceptions (SIMD-accelerated for 8/16/32-bit)
381        unpack_with_exceptions(
382            &self.term_freqs,
383            self.tf_bit_width,
384            &self.tf_exceptions,
385            count,
386            &mut tfs,
387        );
388
389        // TF is stored as tf-1, so add 1 back using SIMD
390        simd::add_one(&mut tfs, count);
391
392        tfs
393    }
394}
395
396/// OptP4D posting list
397#[derive(Debug, Clone)]
398pub struct OptP4DPostingList {
399    /// Blocks of postings
400    pub blocks: Vec<OptP4DBlock>,
401    /// Total document count
402    pub doc_count: u32,
403    /// Maximum score across all blocks
404    pub max_score: f32,
405}
406
407impl OptP4DPostingList {
408    /// BM25F parameters for block-max score calculation
409    const K1: f32 = 1.2;
410    const B: f32 = 0.75;
411
412    /// Compute BM25F upper bound score for a given max_tf and IDF
413    #[inline]
414    fn compute_bm25f_upper_bound(max_tf: u32, idf: f32) -> f32 {
415        let tf = max_tf as f32;
416        let min_length_norm = 1.0 - Self::B;
417        let tf_norm = (tf * (Self::K1 + 1.0)) / (tf + Self::K1 * min_length_norm);
418        idf * tf_norm
419    }
420
421    /// Create from raw doc_ids and term frequencies
422    pub fn from_postings(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> Self {
423        assert_eq!(doc_ids.len(), term_freqs.len());
424
425        if doc_ids.is_empty() {
426            return Self {
427                blocks: Vec::new(),
428                doc_count: 0,
429                max_score: 0.0,
430            };
431        }
432
433        let mut blocks = Vec::new();
434        let mut max_score = 0.0f32;
435        let mut i = 0;
436
437        while i < doc_ids.len() {
438            let block_end = (i + OPT_P4D_BLOCK_SIZE).min(doc_ids.len());
439            let block_docs = &doc_ids[i..block_end];
440            let block_tfs = &term_freqs[i..block_end];
441
442            let block = Self::create_block(block_docs, block_tfs, idf);
443            max_score = max_score.max(block.max_block_score);
444            blocks.push(block);
445
446            i = block_end;
447        }
448
449        Self {
450            blocks,
451            doc_count: doc_ids.len() as u32,
452            max_score,
453        }
454    }
455
456    fn create_block(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> OptP4DBlock {
457        let num_docs = doc_ids.len();
458        let first_doc_id = doc_ids[0];
459        let last_doc_id = *doc_ids.last().unwrap();
460
461        // Compute deltas (delta - 1 to save one bit since deltas are always >= 1)
462        let mut deltas = Vec::with_capacity(num_docs.saturating_sub(1));
463        for j in 1..num_docs {
464            let delta = doc_ids[j] - doc_ids[j - 1] - 1;
465            deltas.push(delta);
466        }
467
468        // Find optimal bit width for deltas
469        let (doc_bit_width, _, _) = find_optimal_bit_width(&deltas);
470        let (doc_deltas, doc_exceptions) = pack_with_exceptions(&deltas, doc_bit_width);
471
472        // Compute max TF and prepare TF array (store tf-1)
473        let mut tfs = Vec::with_capacity(num_docs);
474        let mut max_tf = 0u32;
475
476        for &tf in term_freqs {
477            tfs.push(tf - 1); // Store tf-1
478            max_tf = max_tf.max(tf);
479        }
480
481        // Find optimal bit width for TFs
482        let (tf_bit_width, _, _) = find_optimal_bit_width(&tfs);
483        let (term_freqs_packed, tf_exceptions) = pack_with_exceptions(&tfs, tf_bit_width);
484
485        // BM25F upper bound score
486        let max_block_score = Self::compute_bm25f_upper_bound(max_tf, idf);
487
488        OptP4DBlock {
489            first_doc_id,
490            last_doc_id,
491            num_docs: num_docs as u16,
492            doc_bit_width,
493            tf_bit_width,
494            max_tf,
495            max_block_score,
496            doc_deltas,
497            doc_exceptions,
498            term_freqs: term_freqs_packed,
499            tf_exceptions,
500        }
501    }
502
503    /// Serialize the posting list
504    pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
505        writer.write_u32::<LittleEndian>(self.doc_count)?;
506        writer.write_f32::<LittleEndian>(self.max_score)?;
507        writer.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
508
509        for block in &self.blocks {
510            block.serialize(writer)?;
511        }
512
513        Ok(())
514    }
515
516    /// Deserialize a posting list
517    pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
518        let doc_count = reader.read_u32::<LittleEndian>()?;
519        let max_score = reader.read_f32::<LittleEndian>()?;
520        let num_blocks = reader.read_u32::<LittleEndian>()? as usize;
521
522        let mut blocks = Vec::with_capacity(num_blocks);
523        for _ in 0..num_blocks {
524            blocks.push(OptP4DBlock::deserialize(reader)?);
525        }
526
527        Ok(Self {
528            blocks,
529            doc_count,
530            max_score,
531        })
532    }
533
534    /// Get document count
535    pub fn len(&self) -> u32 {
536        self.doc_count
537    }
538
539    /// Check if empty
540    pub fn is_empty(&self) -> bool {
541        self.doc_count == 0
542    }
543
544    /// Create an iterator
545    pub fn iterator(&self) -> OptP4DIterator<'_> {
546        OptP4DIterator::new(self)
547    }
548}
549
550/// Iterator over OptP4D posting list
551pub struct OptP4DIterator<'a> {
552    posting_list: &'a OptP4DPostingList,
553    current_block: usize,
554    block_doc_ids: Vec<u32>,
555    block_term_freqs: Vec<u32>,
556    pos_in_block: usize,
557    exhausted: bool,
558}
559
560impl<'a> OptP4DIterator<'a> {
561    pub fn new(posting_list: &'a OptP4DPostingList) -> Self {
562        let mut iter = Self {
563            posting_list,
564            current_block: 0,
565            block_doc_ids: Vec::new(),
566            block_term_freqs: Vec::new(),
567            pos_in_block: 0,
568            exhausted: posting_list.blocks.is_empty(),
569        };
570
571        if !iter.exhausted {
572            iter.decode_current_block();
573        }
574
575        iter
576    }
577
578    fn decode_current_block(&mut self) {
579        let block = &self.posting_list.blocks[self.current_block];
580        self.block_doc_ids = block.decode_doc_ids();
581        self.block_term_freqs = block.decode_term_freqs();
582        self.pos_in_block = 0;
583    }
584
585    /// Current document ID
586    pub fn doc(&self) -> u32 {
587        if self.exhausted {
588            u32::MAX
589        } else {
590            self.block_doc_ids[self.pos_in_block]
591        }
592    }
593
594    /// Current term frequency
595    pub fn term_freq(&self) -> u32 {
596        if self.exhausted {
597            0
598        } else {
599            self.block_term_freqs[self.pos_in_block]
600        }
601    }
602
603    /// Advance to next document
604    pub fn advance(&mut self) -> u32 {
605        if self.exhausted {
606            return u32::MAX;
607        }
608
609        self.pos_in_block += 1;
610
611        if self.pos_in_block >= self.block_doc_ids.len() {
612            self.current_block += 1;
613            if self.current_block >= self.posting_list.blocks.len() {
614                self.exhausted = true;
615                return u32::MAX;
616            }
617            self.decode_current_block();
618        }
619
620        self.doc()
621    }
622
623    /// Seek to first doc >= target
624    pub fn seek(&mut self, target: u32) -> u32 {
625        if self.exhausted {
626            return u32::MAX;
627        }
628
629        // Skip blocks where last_doc_id < target
630        while self.current_block < self.posting_list.blocks.len() {
631            let block = &self.posting_list.blocks[self.current_block];
632            if block.last_doc_id >= target {
633                break;
634            }
635            self.current_block += 1;
636        }
637
638        if self.current_block >= self.posting_list.blocks.len() {
639            self.exhausted = true;
640            return u32::MAX;
641        }
642
643        // Decode block if needed
644        if self.block_doc_ids.is_empty() || self.current_block != self.posting_list.blocks.len() - 1
645        {
646            self.decode_current_block();
647        }
648
649        // Binary search within block
650        match self.block_doc_ids[self.pos_in_block..].binary_search(&target) {
651            Ok(idx) => {
652                self.pos_in_block += idx;
653            }
654            Err(idx) => {
655                self.pos_in_block += idx;
656                if self.pos_in_block >= self.block_doc_ids.len() {
657                    // Move to next block
658                    self.current_block += 1;
659                    if self.current_block >= self.posting_list.blocks.len() {
660                        self.exhausted = true;
661                        return u32::MAX;
662                    }
663                    self.decode_current_block();
664                }
665            }
666        }
667
668        self.doc()
669    }
670}
671
672#[cfg(test)]
673mod tests {
674    use super::*;
675
676    #[test]
677    fn test_bits_needed() {
678        assert_eq!(simd::bits_needed(0), 0);
679        assert_eq!(simd::bits_needed(1), 1);
680        assert_eq!(simd::bits_needed(2), 2);
681        assert_eq!(simd::bits_needed(3), 2);
682        assert_eq!(simd::bits_needed(4), 3);
683        assert_eq!(simd::bits_needed(255), 8);
684        assert_eq!(simd::bits_needed(256), 9);
685        assert_eq!(simd::bits_needed(u32::MAX), 32);
686    }
687
688    #[test]
689    fn test_find_optimal_bit_width() {
690        // All zeros
691        let values = vec![0u32; 100];
692        let (bits, exceptions, _) = find_optimal_bit_width(&values);
693        assert_eq!(bits, 0);
694        assert_eq!(exceptions, 0);
695
696        // All small values
697        let values: Vec<u32> = (0..100).map(|i| i % 16).collect();
698        let (bits, _, _) = find_optimal_bit_width(&values);
699        assert!(bits <= 4);
700
701        // Mix with outliers
702        let mut values: Vec<u32> = (0..100).map(|i| i % 16).collect();
703        values[50] = 1_000_000; // outlier
704        let (bits, exceptions, _) = find_optimal_bit_width(&values);
705        assert!(bits < 20); // Should use small bit width with exception
706        assert!(exceptions >= 1);
707    }
708
709    #[test]
710    fn test_pack_unpack_with_exceptions() {
711        let values = vec![1, 2, 3, 255, 4, 5, 1000, 6, 7, 8];
712        let (packed, exceptions) = pack_with_exceptions(&values, 4);
713
714        let mut output = vec![0u32; values.len()];
715        unpack_with_exceptions(&packed, 4, &exceptions, values.len(), &mut output);
716
717        assert_eq!(output, values);
718    }
719
720    #[test]
721    fn test_opt_p4d_posting_list_small() {
722        let doc_ids: Vec<u32> = (0..100).map(|i| i * 2).collect();
723        let term_freqs: Vec<u32> = vec![1; 100];
724
725        let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
726
727        assert_eq!(list.len(), 100);
728        assert_eq!(list.blocks.len(), 1);
729
730        // Verify iteration
731        let mut iter = list.iterator();
732        for (i, &expected) in doc_ids.iter().enumerate() {
733            assert_eq!(iter.doc(), expected, "Mismatch at {}", i);
734            assert_eq!(iter.term_freq(), 1);
735            iter.advance();
736        }
737        assert_eq!(iter.doc(), u32::MAX);
738    }
739
740    #[test]
741    fn test_opt_p4d_posting_list_large() {
742        let doc_ids: Vec<u32> = (0..500).map(|i| i * 3).collect();
743        let term_freqs: Vec<u32> = (0..500).map(|i| (i % 10) + 1).collect();
744
745        let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
746
747        assert_eq!(list.len(), 500);
748        assert_eq!(list.blocks.len(), 4); // 500 / 128 = 3.9 -> 4 blocks
749
750        // Verify iteration
751        let mut iter = list.iterator();
752        for (i, &expected) in doc_ids.iter().enumerate() {
753            assert_eq!(iter.doc(), expected, "Mismatch at {}", i);
754            assert_eq!(iter.term_freq(), term_freqs[i]);
755            iter.advance();
756        }
757    }
758
759    #[test]
760    fn test_opt_p4d_seek() {
761        let doc_ids: Vec<u32> = vec![10, 20, 30, 100, 200, 300, 1000, 2000];
762        let term_freqs: Vec<u32> = vec![1; 8];
763
764        let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
765        let mut iter = list.iterator();
766
767        assert_eq!(iter.seek(25), 30);
768        assert_eq!(iter.seek(100), 100);
769        assert_eq!(iter.seek(500), 1000);
770        assert_eq!(iter.seek(3000), u32::MAX);
771    }
772
773    #[test]
774    fn test_opt_p4d_serialization() {
775        let doc_ids: Vec<u32> = (0..200).map(|i| i * 5).collect();
776        let term_freqs: Vec<u32> = (0..200).map(|i| (i % 5) + 1).collect();
777
778        let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
779
780        let mut buffer = Vec::new();
781        list.serialize(&mut buffer).unwrap();
782
783        let restored = OptP4DPostingList::deserialize(&mut &buffer[..]).unwrap();
784
785        assert_eq!(restored.len(), list.len());
786        assert_eq!(restored.blocks.len(), list.blocks.len());
787
788        // Verify iteration matches
789        let mut iter1 = list.iterator();
790        let mut iter2 = restored.iterator();
791
792        while iter1.doc() != u32::MAX {
793            assert_eq!(iter1.doc(), iter2.doc());
794            assert_eq!(iter1.term_freq(), iter2.term_freq());
795            iter1.advance();
796            iter2.advance();
797        }
798    }
799
800    #[test]
801    fn test_opt_p4d_with_outliers() {
802        // Create data with some outliers to test exception handling
803        let mut doc_ids: Vec<u32> = (0..128).map(|i| i * 2).collect();
804        doc_ids[64] = 1_000_000; // Large outlier
805
806        // Fix: ensure doc_ids are sorted
807        doc_ids.sort();
808
809        let term_freqs: Vec<u32> = vec![1; 128];
810
811        let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
812
813        // Verify the outlier is handled correctly
814        let mut iter = list.iterator();
815        let mut found_outlier = false;
816        while iter.doc() != u32::MAX {
817            if iter.doc() == 1_000_000 {
818                found_outlier = true;
819            }
820            iter.advance();
821        }
822        assert!(found_outlier, "Outlier value should be preserved");
823    }
824
825    #[test]
826    fn test_opt_p4d_simd_full_blocks() {
827        // Test with multiple full 128-integer blocks to exercise SIMD paths
828        let doc_ids: Vec<u32> = (0..1024).map(|i| i * 2).collect();
829        let term_freqs: Vec<u32> = (0..1024).map(|i| (i % 20) + 1).collect();
830
831        let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
832
833        assert_eq!(list.len(), 1024);
834        assert_eq!(list.blocks.len(), 8); // 1024 / 128 = 8 full blocks
835
836        // Verify all values are decoded correctly
837        let mut iter = list.iterator();
838        for (i, &expected_doc) in doc_ids.iter().enumerate() {
839            assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
840            assert_eq!(iter.term_freq(), term_freqs[i], "TF mismatch at {}", i);
841            iter.advance();
842        }
843        assert_eq!(iter.doc(), u32::MAX);
844    }
845
846    #[test]
847    fn test_opt_p4d_simd_8bit_values() {
848        // Test with values that fit in 8 bits to exercise SIMD 8-bit unpack
849        let doc_ids: Vec<u32> = (0..256).collect();
850        let term_freqs: Vec<u32> = (0..256).map(|i| (i % 100) + 1).collect();
851
852        let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
853
854        // Verify all values
855        let mut iter = list.iterator();
856        for (i, &expected_doc) in doc_ids.iter().enumerate() {
857            assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
858            assert_eq!(iter.term_freq(), term_freqs[i], "TF mismatch at {}", i);
859            iter.advance();
860        }
861    }
862
863    #[test]
864    fn test_opt_p4d_simd_delta_decode() {
865        // Test SIMD delta decoding with various gap sizes
866        let mut doc_ids = Vec::with_capacity(512);
867        let mut current = 0u32;
868        for i in 0..512 {
869            current += (i % 10) + 1; // Variable gaps
870            doc_ids.push(current);
871        }
872        let term_freqs: Vec<u32> = vec![1; 512];
873
874        let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
875
876        // Verify delta decoding is correct
877        let mut iter = list.iterator();
878        for (i, &expected_doc) in doc_ids.iter().enumerate() {
879            assert_eq!(
880                iter.doc(),
881                expected_doc,
882                "Doc mismatch at {} (expected {}, got {})",
883                i,
884                expected_doc,
885                iter.doc()
886            );
887            iter.advance();
888        }
889    }
890}