1use super::simd;
19use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
20use std::io::{self, Read, Write};
21
22pub const OPT_P4D_BLOCK_SIZE: usize = 128;
24
25const MAX_EXCEPTIONS_RATIO: f32 = 0.10;
28
29fn find_optimal_bit_width(values: &[u32]) -> (u8, usize, usize) {
32 if values.is_empty() {
33 return (0, 0, 0);
34 }
35
36 let n = values.len();
37 let max_exceptions = ((n as f32) * MAX_EXCEPTIONS_RATIO).ceil() as usize;
38
39 let mut bit_counts = [0usize; 33]; for &v in values {
42 let bits = simd::bits_needed(v) as usize;
43 bit_counts[bits] += 1;
44 }
45
46 let mut cumulative = [0usize; 33];
48 cumulative[0] = bit_counts[0];
49 for b in 1..=32 {
50 cumulative[b] = cumulative[b - 1] + bit_counts[b];
51 }
52
53 let mut best_bits = 32u8;
54 let mut best_total = usize::MAX;
55 let mut best_exceptions = 0usize;
56
57 for b in 0..=32u8 {
59 let fitting = if b == 0 {
60 bit_counts[0]
61 } else {
62 cumulative[b as usize]
63 };
64 let exceptions = n - fitting;
65
66 if exceptions > max_exceptions && b < 32 {
68 continue;
69 }
70
71 let main_bits = n * (b as usize);
75 let exception_bits = if b < 32 {
76 exceptions * (7 + (32 - b as usize))
77 } else {
78 0
79 };
80 let total = main_bits + exception_bits;
81
82 if total < best_total {
83 best_total = total;
84 best_bits = b;
85 best_exceptions = exceptions;
86 }
87 }
88
89 (best_bits, best_exceptions, best_total)
90}
91
92fn pack_with_exceptions(values: &[u32], bit_width: u8) -> (Vec<u8>, Vec<(u8, u32)>) {
100 if bit_width == 0 {
101 let exceptions: Vec<(u8, u32)> = values
103 .iter()
104 .enumerate()
105 .filter(|&(_, &v)| v != 0)
106 .map(|(i, &v)| (i as u8, v)) .collect();
108 return (Vec::new(), exceptions);
109 }
110
111 if bit_width >= 32 {
112 let bytes_needed = values.len() * 4;
114 let mut packed = vec![0u8; bytes_needed];
115 for (i, &value) in values.iter().enumerate() {
116 let bytes = value.to_le_bytes();
117 packed[i * 4..i * 4 + 4].copy_from_slice(&bytes);
118 }
119 return (packed, Vec::new());
120 }
121
122 let mask = (1u64 << bit_width) - 1;
123 let bytes_needed = (values.len() * bit_width as usize).div_ceil(8);
124 let mut packed = vec![0u8; bytes_needed];
125 let mut exceptions = Vec::new();
126
127 let mut bit_pos = 0usize;
128 for (i, &value) in values.iter().enumerate() {
129 let low_bits = (value as u64) & mask;
131
132 let byte_idx = bit_pos / 8;
134 let bit_offset = bit_pos % 8;
135
136 let mut remaining_bits = bit_width as usize;
137 let mut val = low_bits;
138 let mut current_byte_idx = byte_idx;
139 let mut current_bit_offset = bit_offset;
140
141 while remaining_bits > 0 {
142 let bits_in_byte = (8 - current_bit_offset).min(remaining_bits);
143 let byte_mask = ((1u64 << bits_in_byte) - 1) as u8;
144 packed[current_byte_idx] |= ((val as u8) & byte_mask) << current_bit_offset;
145 val >>= bits_in_byte;
146 remaining_bits -= bits_in_byte;
147 current_byte_idx += 1;
148 current_bit_offset = 0;
149 }
150
151 bit_pos += bit_width as usize;
152
153 let fits = value <= mask as u32;
155 if !fits {
156 let high_bits = value >> bit_width;
157 exceptions.push((i as u8, high_bits));
158 }
159 }
160
161 (packed, exceptions)
162}
163
164fn unpack_with_exceptions(
173 packed: &[u8],
174 bit_width: u8,
175 exceptions: &[(u8, u32)],
176 count: usize,
177 output: &mut [u32],
178) {
179 if bit_width == 0 {
180 output[..count].fill(0);
181 } else if bit_width == 8 {
182 simd::unpack_8bit(packed, output, count);
184 } else if bit_width == 16 {
185 simd::unpack_16bit(packed, output, count);
187 } else if bit_width >= 32 {
188 simd::unpack_32bit(packed, output, count);
190 return; } else {
192 let mask = (1u64 << bit_width) - 1;
194 let mut bit_pos = 0usize;
195 let input_ptr = packed.as_ptr();
196
197 for out in output[..count].iter_mut() {
198 let byte_idx = bit_pos >> 3;
199 let bit_offset = bit_pos & 7;
200
201 let word = if byte_idx + 8 <= packed.len() {
203 unsafe { (input_ptr.add(byte_idx) as *const u64).read_unaligned() }
204 } else {
205 let mut word = 0u64;
207 for (i, &b) in packed[byte_idx..].iter().enumerate() {
208 word |= (b as u64) << (i * 8);
209 }
210 word
211 };
212
213 *out = ((word >> bit_offset) & mask) as u32;
214 bit_pos += bit_width as usize;
215 }
216 }
217
218 for &(pos, high_bits) in exceptions {
221 if (pos as usize) < count {
222 let low_bits = output[pos as usize];
223 output[pos as usize] = (high_bits << bit_width) | low_bits;
224 }
225 }
226}
227
228#[derive(Debug, Clone)]
230pub struct OptP4DBlock {
231 pub first_doc_id: u32,
233 pub last_doc_id: u32,
235 pub num_docs: u16,
237 pub doc_bit_width: u8,
239 pub tf_bit_width: u8,
241 pub max_tf: u32,
243 pub max_block_score: f32,
245 pub doc_deltas: Vec<u8>,
247 pub doc_exceptions: Vec<(u8, u32)>,
249 pub term_freqs: Vec<u8>,
251 pub tf_exceptions: Vec<(u8, u32)>,
253}
254
255impl OptP4DBlock {
256 pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
258 writer.write_u32::<LittleEndian>(self.first_doc_id)?;
259 writer.write_u32::<LittleEndian>(self.last_doc_id)?;
260 writer.write_u16::<LittleEndian>(self.num_docs)?;
261 writer.write_u8(self.doc_bit_width)?;
262 writer.write_u8(self.tf_bit_width)?;
263 writer.write_u32::<LittleEndian>(self.max_tf)?;
264 writer.write_f32::<LittleEndian>(self.max_block_score)?;
265
266 writer.write_u16::<LittleEndian>(self.doc_deltas.len() as u16)?;
268 writer.write_all(&self.doc_deltas)?;
269
270 writer.write_u8(self.doc_exceptions.len() as u8)?;
272 for &(pos, val) in &self.doc_exceptions {
273 writer.write_u8(pos)?;
274 writer.write_u32::<LittleEndian>(val)?;
275 }
276
277 writer.write_u16::<LittleEndian>(self.term_freqs.len() as u16)?;
279 writer.write_all(&self.term_freqs)?;
280
281 writer.write_u8(self.tf_exceptions.len() as u8)?;
283 for &(pos, val) in &self.tf_exceptions {
284 writer.write_u8(pos)?;
285 writer.write_u32::<LittleEndian>(val)?;
286 }
287
288 Ok(())
289 }
290
291 pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
293 let first_doc_id = reader.read_u32::<LittleEndian>()?;
294 let last_doc_id = reader.read_u32::<LittleEndian>()?;
295 let num_docs = reader.read_u16::<LittleEndian>()?;
296 let doc_bit_width = reader.read_u8()?;
297 let tf_bit_width = reader.read_u8()?;
298 let max_tf = reader.read_u32::<LittleEndian>()?;
299 let max_block_score = reader.read_f32::<LittleEndian>()?;
300
301 let doc_deltas_len = reader.read_u16::<LittleEndian>()? as usize;
303 let mut doc_deltas = vec![0u8; doc_deltas_len];
304 reader.read_exact(&mut doc_deltas)?;
305
306 let num_doc_exceptions = reader.read_u8()? as usize;
308 let mut doc_exceptions = Vec::with_capacity(num_doc_exceptions);
309 for _ in 0..num_doc_exceptions {
310 let pos = reader.read_u8()?;
311 let val = reader.read_u32::<LittleEndian>()?;
312 doc_exceptions.push((pos, val));
313 }
314
315 let term_freqs_len = reader.read_u16::<LittleEndian>()? as usize;
317 let mut term_freqs = vec![0u8; term_freqs_len];
318 reader.read_exact(&mut term_freqs)?;
319
320 let num_tf_exceptions = reader.read_u8()? as usize;
322 let mut tf_exceptions = Vec::with_capacity(num_tf_exceptions);
323 for _ in 0..num_tf_exceptions {
324 let pos = reader.read_u8()?;
325 let val = reader.read_u32::<LittleEndian>()?;
326 tf_exceptions.push((pos, val));
327 }
328
329 Ok(Self {
330 first_doc_id,
331 last_doc_id,
332 num_docs,
333 doc_bit_width,
334 tf_bit_width,
335 max_tf,
336 max_block_score,
337 doc_deltas,
338 doc_exceptions,
339 term_freqs,
340 tf_exceptions,
341 })
342 }
343
344 pub fn decode_doc_ids(&self) -> Vec<u32> {
346 if self.num_docs == 0 {
347 return Vec::new();
348 }
349
350 let count = self.num_docs as usize;
351 let mut deltas = vec![0u32; count];
352
353 if count > 1 {
355 unpack_with_exceptions(
356 &self.doc_deltas,
357 self.doc_bit_width,
358 &self.doc_exceptions,
359 count - 1,
360 &mut deltas,
361 );
362 }
363
364 let mut doc_ids = vec![0u32; count];
366 simd::delta_decode(&mut doc_ids, &deltas, self.first_doc_id, count);
367
368 doc_ids
369 }
370
371 pub fn decode_term_freqs(&self) -> Vec<u32> {
373 if self.num_docs == 0 {
374 return Vec::new();
375 }
376
377 let count = self.num_docs as usize;
378 let mut tfs = vec![0u32; count];
379
380 unpack_with_exceptions(
382 &self.term_freqs,
383 self.tf_bit_width,
384 &self.tf_exceptions,
385 count,
386 &mut tfs,
387 );
388
389 simd::add_one(&mut tfs, count);
391
392 tfs
393 }
394}
395
396#[derive(Debug, Clone)]
398pub struct OptP4DPostingList {
399 pub blocks: Vec<OptP4DBlock>,
401 pub doc_count: u32,
403 pub max_score: f32,
405}
406
407impl OptP4DPostingList {
408 const K1: f32 = 1.2;
410 const B: f32 = 0.75;
411
412 #[inline]
414 fn compute_bm25f_upper_bound(max_tf: u32, idf: f32) -> f32 {
415 let tf = max_tf as f32;
416 let min_length_norm = 1.0 - Self::B;
417 let tf_norm = (tf * (Self::K1 + 1.0)) / (tf + Self::K1 * min_length_norm);
418 idf * tf_norm
419 }
420
421 pub fn from_postings(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> Self {
423 assert_eq!(doc_ids.len(), term_freqs.len());
424
425 if doc_ids.is_empty() {
426 return Self {
427 blocks: Vec::new(),
428 doc_count: 0,
429 max_score: 0.0,
430 };
431 }
432
433 let mut blocks = Vec::new();
434 let mut max_score = 0.0f32;
435 let mut i = 0;
436
437 while i < doc_ids.len() {
438 let block_end = (i + OPT_P4D_BLOCK_SIZE).min(doc_ids.len());
439 let block_docs = &doc_ids[i..block_end];
440 let block_tfs = &term_freqs[i..block_end];
441
442 let block = Self::create_block(block_docs, block_tfs, idf);
443 max_score = max_score.max(block.max_block_score);
444 blocks.push(block);
445
446 i = block_end;
447 }
448
449 Self {
450 blocks,
451 doc_count: doc_ids.len() as u32,
452 max_score,
453 }
454 }
455
456 fn create_block(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> OptP4DBlock {
457 let num_docs = doc_ids.len();
458 let first_doc_id = doc_ids[0];
459 let last_doc_id = *doc_ids.last().unwrap();
460
461 let mut deltas = Vec::with_capacity(num_docs.saturating_sub(1));
463 for j in 1..num_docs {
464 let delta = doc_ids[j] - doc_ids[j - 1] - 1;
465 deltas.push(delta);
466 }
467
468 let (doc_bit_width, _, _) = find_optimal_bit_width(&deltas);
470 let (doc_deltas, doc_exceptions) = pack_with_exceptions(&deltas, doc_bit_width);
471
472 let mut tfs = Vec::with_capacity(num_docs);
474 let mut max_tf = 0u32;
475
476 for &tf in term_freqs {
477 tfs.push(tf - 1); max_tf = max_tf.max(tf);
479 }
480
481 let (tf_bit_width, _, _) = find_optimal_bit_width(&tfs);
483 let (term_freqs_packed, tf_exceptions) = pack_with_exceptions(&tfs, tf_bit_width);
484
485 let max_block_score = Self::compute_bm25f_upper_bound(max_tf, idf);
487
488 OptP4DBlock {
489 first_doc_id,
490 last_doc_id,
491 num_docs: num_docs as u16,
492 doc_bit_width,
493 tf_bit_width,
494 max_tf,
495 max_block_score,
496 doc_deltas,
497 doc_exceptions,
498 term_freqs: term_freqs_packed,
499 tf_exceptions,
500 }
501 }
502
503 pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
505 writer.write_u32::<LittleEndian>(self.doc_count)?;
506 writer.write_f32::<LittleEndian>(self.max_score)?;
507 writer.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
508
509 for block in &self.blocks {
510 block.serialize(writer)?;
511 }
512
513 Ok(())
514 }
515
516 pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
518 let doc_count = reader.read_u32::<LittleEndian>()?;
519 let max_score = reader.read_f32::<LittleEndian>()?;
520 let num_blocks = reader.read_u32::<LittleEndian>()? as usize;
521
522 let mut blocks = Vec::with_capacity(num_blocks);
523 for _ in 0..num_blocks {
524 blocks.push(OptP4DBlock::deserialize(reader)?);
525 }
526
527 Ok(Self {
528 blocks,
529 doc_count,
530 max_score,
531 })
532 }
533
534 pub fn len(&self) -> u32 {
536 self.doc_count
537 }
538
539 pub fn is_empty(&self) -> bool {
541 self.doc_count == 0
542 }
543
544 pub fn iterator(&self) -> OptP4DIterator<'_> {
546 OptP4DIterator::new(self)
547 }
548}
549
550pub struct OptP4DIterator<'a> {
552 posting_list: &'a OptP4DPostingList,
553 current_block: usize,
554 block_doc_ids: Vec<u32>,
555 block_term_freqs: Vec<u32>,
556 pos_in_block: usize,
557 exhausted: bool,
558}
559
560impl<'a> OptP4DIterator<'a> {
561 pub fn new(posting_list: &'a OptP4DPostingList) -> Self {
562 let mut iter = Self {
563 posting_list,
564 current_block: 0,
565 block_doc_ids: Vec::new(),
566 block_term_freqs: Vec::new(),
567 pos_in_block: 0,
568 exhausted: posting_list.blocks.is_empty(),
569 };
570
571 if !iter.exhausted {
572 iter.decode_current_block();
573 }
574
575 iter
576 }
577
578 fn decode_current_block(&mut self) {
579 let block = &self.posting_list.blocks[self.current_block];
580 self.block_doc_ids = block.decode_doc_ids();
581 self.block_term_freqs = block.decode_term_freqs();
582 self.pos_in_block = 0;
583 }
584
585 pub fn doc(&self) -> u32 {
587 if self.exhausted {
588 u32::MAX
589 } else {
590 self.block_doc_ids[self.pos_in_block]
591 }
592 }
593
594 pub fn term_freq(&self) -> u32 {
596 if self.exhausted {
597 0
598 } else {
599 self.block_term_freqs[self.pos_in_block]
600 }
601 }
602
603 pub fn advance(&mut self) -> u32 {
605 if self.exhausted {
606 return u32::MAX;
607 }
608
609 self.pos_in_block += 1;
610
611 if self.pos_in_block >= self.block_doc_ids.len() {
612 self.current_block += 1;
613 if self.current_block >= self.posting_list.blocks.len() {
614 self.exhausted = true;
615 return u32::MAX;
616 }
617 self.decode_current_block();
618 }
619
620 self.doc()
621 }
622
623 pub fn seek(&mut self, target: u32) -> u32 {
625 if self.exhausted {
626 return u32::MAX;
627 }
628
629 while self.current_block < self.posting_list.blocks.len() {
631 let block = &self.posting_list.blocks[self.current_block];
632 if block.last_doc_id >= target {
633 break;
634 }
635 self.current_block += 1;
636 }
637
638 if self.current_block >= self.posting_list.blocks.len() {
639 self.exhausted = true;
640 return u32::MAX;
641 }
642
643 if self.block_doc_ids.is_empty() || self.current_block != self.posting_list.blocks.len() - 1
645 {
646 self.decode_current_block();
647 }
648
649 match self.block_doc_ids[self.pos_in_block..].binary_search(&target) {
651 Ok(idx) => {
652 self.pos_in_block += idx;
653 }
654 Err(idx) => {
655 self.pos_in_block += idx;
656 if self.pos_in_block >= self.block_doc_ids.len() {
657 self.current_block += 1;
659 if self.current_block >= self.posting_list.blocks.len() {
660 self.exhausted = true;
661 return u32::MAX;
662 }
663 self.decode_current_block();
664 }
665 }
666 }
667
668 self.doc()
669 }
670}
671
672#[cfg(test)]
673mod tests {
674 use super::*;
675
676 #[test]
677 fn test_bits_needed() {
678 assert_eq!(simd::bits_needed(0), 0);
679 assert_eq!(simd::bits_needed(1), 1);
680 assert_eq!(simd::bits_needed(2), 2);
681 assert_eq!(simd::bits_needed(3), 2);
682 assert_eq!(simd::bits_needed(4), 3);
683 assert_eq!(simd::bits_needed(255), 8);
684 assert_eq!(simd::bits_needed(256), 9);
685 assert_eq!(simd::bits_needed(u32::MAX), 32);
686 }
687
688 #[test]
689 fn test_find_optimal_bit_width() {
690 let values = vec![0u32; 100];
692 let (bits, exceptions, _) = find_optimal_bit_width(&values);
693 assert_eq!(bits, 0);
694 assert_eq!(exceptions, 0);
695
696 let values: Vec<u32> = (0..100).map(|i| i % 16).collect();
698 let (bits, _, _) = find_optimal_bit_width(&values);
699 assert!(bits <= 4);
700
701 let mut values: Vec<u32> = (0..100).map(|i| i % 16).collect();
703 values[50] = 1_000_000; let (bits, exceptions, _) = find_optimal_bit_width(&values);
705 assert!(bits < 20); assert!(exceptions >= 1);
707 }
708
709 #[test]
710 fn test_pack_unpack_with_exceptions() {
711 let values = vec![1, 2, 3, 255, 4, 5, 1000, 6, 7, 8];
712 let (packed, exceptions) = pack_with_exceptions(&values, 4);
713
714 let mut output = vec![0u32; values.len()];
715 unpack_with_exceptions(&packed, 4, &exceptions, values.len(), &mut output);
716
717 assert_eq!(output, values);
718 }
719
720 #[test]
721 fn test_opt_p4d_posting_list_small() {
722 let doc_ids: Vec<u32> = (0..100).map(|i| i * 2).collect();
723 let term_freqs: Vec<u32> = vec![1; 100];
724
725 let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
726
727 assert_eq!(list.len(), 100);
728 assert_eq!(list.blocks.len(), 1);
729
730 let mut iter = list.iterator();
732 for (i, &expected) in doc_ids.iter().enumerate() {
733 assert_eq!(iter.doc(), expected, "Mismatch at {}", i);
734 assert_eq!(iter.term_freq(), 1);
735 iter.advance();
736 }
737 assert_eq!(iter.doc(), u32::MAX);
738 }
739
740 #[test]
741 fn test_opt_p4d_posting_list_large() {
742 let doc_ids: Vec<u32> = (0..500).map(|i| i * 3).collect();
743 let term_freqs: Vec<u32> = (0..500).map(|i| (i % 10) + 1).collect();
744
745 let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
746
747 assert_eq!(list.len(), 500);
748 assert_eq!(list.blocks.len(), 4); let mut iter = list.iterator();
752 for (i, &expected) in doc_ids.iter().enumerate() {
753 assert_eq!(iter.doc(), expected, "Mismatch at {}", i);
754 assert_eq!(iter.term_freq(), term_freqs[i]);
755 iter.advance();
756 }
757 }
758
759 #[test]
760 fn test_opt_p4d_seek() {
761 let doc_ids: Vec<u32> = vec![10, 20, 30, 100, 200, 300, 1000, 2000];
762 let term_freqs: Vec<u32> = vec![1; 8];
763
764 let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
765 let mut iter = list.iterator();
766
767 assert_eq!(iter.seek(25), 30);
768 assert_eq!(iter.seek(100), 100);
769 assert_eq!(iter.seek(500), 1000);
770 assert_eq!(iter.seek(3000), u32::MAX);
771 }
772
773 #[test]
774 fn test_opt_p4d_serialization() {
775 let doc_ids: Vec<u32> = (0..200).map(|i| i * 5).collect();
776 let term_freqs: Vec<u32> = (0..200).map(|i| (i % 5) + 1).collect();
777
778 let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
779
780 let mut buffer = Vec::new();
781 list.serialize(&mut buffer).unwrap();
782
783 let restored = OptP4DPostingList::deserialize(&mut &buffer[..]).unwrap();
784
785 assert_eq!(restored.len(), list.len());
786 assert_eq!(restored.blocks.len(), list.blocks.len());
787
788 let mut iter1 = list.iterator();
790 let mut iter2 = restored.iterator();
791
792 while iter1.doc() != u32::MAX {
793 assert_eq!(iter1.doc(), iter2.doc());
794 assert_eq!(iter1.term_freq(), iter2.term_freq());
795 iter1.advance();
796 iter2.advance();
797 }
798 }
799
800 #[test]
801 fn test_opt_p4d_with_outliers() {
802 let mut doc_ids: Vec<u32> = (0..128).map(|i| i * 2).collect();
804 doc_ids[64] = 1_000_000; doc_ids.sort();
808
809 let term_freqs: Vec<u32> = vec![1; 128];
810
811 let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
812
813 let mut iter = list.iterator();
815 let mut found_outlier = false;
816 while iter.doc() != u32::MAX {
817 if iter.doc() == 1_000_000 {
818 found_outlier = true;
819 }
820 iter.advance();
821 }
822 assert!(found_outlier, "Outlier value should be preserved");
823 }
824
825 #[test]
826 fn test_opt_p4d_simd_full_blocks() {
827 let doc_ids: Vec<u32> = (0..1024).map(|i| i * 2).collect();
829 let term_freqs: Vec<u32> = (0..1024).map(|i| (i % 20) + 1).collect();
830
831 let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
832
833 assert_eq!(list.len(), 1024);
834 assert_eq!(list.blocks.len(), 8); let mut iter = list.iterator();
838 for (i, &expected_doc) in doc_ids.iter().enumerate() {
839 assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
840 assert_eq!(iter.term_freq(), term_freqs[i], "TF mismatch at {}", i);
841 iter.advance();
842 }
843 assert_eq!(iter.doc(), u32::MAX);
844 }
845
846 #[test]
847 fn test_opt_p4d_simd_8bit_values() {
848 let doc_ids: Vec<u32> = (0..256).collect();
850 let term_freqs: Vec<u32> = (0..256).map(|i| (i % 100) + 1).collect();
851
852 let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
853
854 let mut iter = list.iterator();
856 for (i, &expected_doc) in doc_ids.iter().enumerate() {
857 assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
858 assert_eq!(iter.term_freq(), term_freqs[i], "TF mismatch at {}", i);
859 iter.advance();
860 }
861 }
862
863 #[test]
864 fn test_opt_p4d_simd_delta_decode() {
865 let mut doc_ids = Vec::with_capacity(512);
867 let mut current = 0u32;
868 for i in 0..512 {
869 current += (i % 10) + 1; doc_ids.push(current);
871 }
872 let term_freqs: Vec<u32> = vec![1; 512];
873
874 let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
875
876 let mut iter = list.iterator();
878 for (i, &expected_doc) in doc_ids.iter().enumerate() {
879 assert_eq!(
880 iter.doc(),
881 expected_doc,
882 "Doc mismatch at {} (expected {}, got {})",
883 i,
884 expected_doc,
885 iter.doc()
886 );
887 iter.advance();
888 }
889 }
890}