1use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
11use std::io::{self, Read, Write};
12
13#[derive(Debug, Clone)]
15pub struct EliasFano {
16 lower_bits: Vec<u64>,
18 upper_bits: Vec<u64>,
20 len: u32,
22 universe: u64,
24 lower_bit_width: u8,
26}
27
28impl EliasFano {
29 pub fn from_sorted_slice(values: &[u32]) -> Self {
31 if values.is_empty() {
32 return Self {
33 lower_bits: Vec::new(),
34 upper_bits: Vec::new(),
35 len: 0,
36 universe: 0,
37 lower_bit_width: 0,
38 };
39 }
40
41 let n = values.len() as u64;
42 let max_val = *values.last().unwrap() as u64;
43 let universe = max_val + 1;
44
45 let lower_bit_width = if n == 0 {
47 0
48 } else {
49 let ratio = universe.max(1) / n.max(1);
50 if ratio <= 1 {
51 0
52 } else {
53 (64 - ratio.leading_zeros() - 1) as u8
54 }
55 };
56
57 let lower_bits_total = (n as usize) * (lower_bit_width as usize);
59 let lower_words = lower_bits_total.div_ceil(64);
60 let mut lower_bits = vec![0u64; lower_words];
61
62 let upper_bound = n + (max_val >> lower_bit_width) + 1;
65 let upper_words = (upper_bound as usize).div_ceil(64);
66 let mut upper_bits = vec![0u64; upper_words];
67
68 let lower_mask = if lower_bit_width == 0 {
70 0
71 } else {
72 (1u64 << lower_bit_width) - 1
73 };
74
75 for (i, &val) in values.iter().enumerate() {
76 let val = val as u64;
77
78 if lower_bit_width > 0 {
80 let lower = val & lower_mask;
81 let bit_pos = i * (lower_bit_width as usize);
82 let word_idx = bit_pos / 64;
83 let bit_offset = bit_pos % 64;
84
85 lower_bits[word_idx] |= lower << bit_offset;
86 if bit_offset + (lower_bit_width as usize) > 64 && word_idx + 1 < lower_bits.len() {
87 lower_bits[word_idx + 1] |= lower >> (64 - bit_offset);
88 }
89 }
90
91 let upper = val >> lower_bit_width;
93 let upper_pos = (i as u64) + upper;
94 let word_idx = (upper_pos / 64) as usize;
95 let bit_offset = upper_pos % 64;
96 if word_idx < upper_bits.len() {
97 upper_bits[word_idx] |= 1u64 << bit_offset;
98 }
99 }
100
101 Self {
102 lower_bits,
103 upper_bits,
104 len: values.len() as u32,
105 universe,
106 lower_bit_width,
107 }
108 }
109
110 #[inline]
112 pub fn len(&self) -> u32 {
113 self.len
114 }
115
116 #[inline]
118 pub fn is_empty(&self) -> bool {
119 self.len == 0
120 }
121
122 #[inline]
124 pub fn get(&self, i: u32) -> Option<u32> {
125 if i >= self.len {
126 return None;
127 }
128
129 let i = i as usize;
130
131 let lower = if self.lower_bit_width == 0 {
133 0u64
134 } else {
135 let bit_pos = i * (self.lower_bit_width as usize);
136 let word_idx = bit_pos / 64;
137 let bit_offset = bit_pos % 64;
138 let lower_mask = (1u64 << self.lower_bit_width) - 1;
139
140 let mut val = (self.lower_bits[word_idx] >> bit_offset) & lower_mask;
141 if bit_offset + (self.lower_bit_width as usize) > 64
142 && word_idx + 1 < self.lower_bits.len()
143 {
144 val |= (self.lower_bits[word_idx + 1] << (64 - bit_offset)) & lower_mask;
145 }
146 val
147 };
148
149 let select_pos = self.select1(i as u32)?;
151 let upper = (select_pos as u64) - (i as u64);
152
153 Some(((upper << self.lower_bit_width) | lower) as u32)
154 }
155
156 fn select1(&self, i: u32) -> Option<u32> {
159 if i >= self.len {
160 return None;
161 }
162
163 let mut remaining = i + 1;
164 let mut pos = 0u32;
165
166 for &word in self.upper_bits.iter() {
168 let popcount = Self::popcount64(word);
169 if popcount >= remaining {
170 let bit_pos = Self::select_in_word(word, remaining);
172 return Some(pos + bit_pos);
173 }
174 remaining -= popcount;
175 pos += 64;
176 }
177
178 None
179 }
180
181 #[inline]
183 fn popcount64(word: u64) -> u32 {
184 word.count_ones()
186 }
187
188 #[inline]
190 fn select_in_word(word: u64, k: u32) -> u32 {
191 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2"))]
192 {
193 use std::arch::x86_64::_pdep_u64;
195 let mask = 1u64 << (k - 1);
196 let selected = unsafe { _pdep_u64(mask, word) };
197 selected.trailing_zeros()
198 }
199
200 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2")))]
201 {
202 let mut w = word;
204 for _ in 0..k - 1 {
205 w &= w - 1; }
207 w.trailing_zeros()
208 }
209 }
210
211 pub fn next_geq(&self, target: u32) -> Option<(u32, u32)> {
214 if self.len == 0 {
215 return None;
216 }
217
218 let target = target as u64;
219
220 let target_upper = target >> self.lower_bit_width;
222 let _target_lower = if self.lower_bit_width == 0 {
223 0
224 } else {
225 target & ((1u64 << self.lower_bit_width) - 1)
226 };
227
228 let bucket_start = self.select0(target_upper as u32);
230
231 for pos in bucket_start..self.len {
233 if let Some(val) = self.get(pos)
234 && val as u64 >= target
235 {
236 return Some((pos, val));
237 }
238 }
239
240 None
241 }
242
243 fn select0(&self, i: u32) -> u32 {
246 if i == 0 {
247 return 0;
248 }
249
250 let mut zeros_seen = 0u32;
251 let mut ones_seen = 0u32;
252
253 for &word in &self.upper_bits {
254 let zeros_in_word = 64 - word.count_ones();
255 let ones_in_word = word.count_ones();
256
257 if zeros_seen + zeros_in_word >= i {
258 let mut w = word;
260 let mut bit_idx = 0u32;
261 while bit_idx < 64 {
262 if (w & 1) == 0 {
263 zeros_seen += 1;
264 if zeros_seen == i {
265 return ones_seen;
267 }
268 } else {
269 ones_seen += 1;
270 }
271 w >>= 1;
272 bit_idx += 1;
273 }
274 }
275 zeros_seen += zeros_in_word;
276 ones_seen += ones_in_word;
277 }
278
279 self.len
280 }
281
282 pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
284 writer.write_u32::<LittleEndian>(self.len)?;
285 writer.write_u64::<LittleEndian>(self.universe)?;
286 writer.write_u8(self.lower_bit_width)?;
287
288 writer.write_u32::<LittleEndian>(self.lower_bits.len() as u32)?;
290 for &word in &self.lower_bits {
291 writer.write_u64::<LittleEndian>(word)?;
292 }
293
294 writer.write_u32::<LittleEndian>(self.upper_bits.len() as u32)?;
296 for &word in &self.upper_bits {
297 writer.write_u64::<LittleEndian>(word)?;
298 }
299
300 Ok(())
301 }
302
303 pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
305 let len = reader.read_u32::<LittleEndian>()?;
306 let universe = reader.read_u64::<LittleEndian>()?;
307 let lower_bit_width = reader.read_u8()?;
308
309 let lower_len = reader.read_u32::<LittleEndian>()? as usize;
310 let mut lower_bits = Vec::with_capacity(lower_len);
311 for _ in 0..lower_len {
312 lower_bits.push(reader.read_u64::<LittleEndian>()?);
313 }
314
315 let upper_len = reader.read_u32::<LittleEndian>()? as usize;
316 let mut upper_bits = Vec::with_capacity(upper_len);
317 for _ in 0..upper_len {
318 upper_bits.push(reader.read_u64::<LittleEndian>()?);
319 }
320
321 Ok(Self {
322 lower_bits,
323 upper_bits,
324 len,
325 universe,
326 lower_bit_width,
327 })
328 }
329
330 pub fn size_bytes(&self) -> usize {
332 self.lower_bits.len() * 8 + self.upper_bits.len() * 8 + 16
333 }
334
335 pub fn iter(&self) -> EliasFanoIterator<'_> {
337 EliasFanoIterator { ef: self, pos: 0 }
338 }
339}
340
341pub struct EliasFanoIterator<'a> {
343 ef: &'a EliasFano,
344 pos: u32,
345}
346
347impl<'a> Iterator for EliasFanoIterator<'a> {
348 type Item = u32;
349
350 fn next(&mut self) -> Option<Self::Item> {
351 if self.pos >= self.ef.len {
352 return None;
353 }
354
355 let val = self.ef.get(self.pos)?;
357 self.pos += 1;
358 Some(val)
359 }
360
361 fn size_hint(&self) -> (usize, Option<usize>) {
362 let remaining = (self.ef.len - self.pos) as usize;
363 (remaining, Some(remaining))
364 }
365}
366
367impl<'a> ExactSizeIterator for EliasFanoIterator<'a> {}
368
369pub const EF_BLOCK_SIZE: usize = 128;
371
372#[derive(Debug, Clone)]
374pub struct EFBlockInfo {
375 pub first_doc_id: u32,
377 pub last_doc_id: u32,
379 pub max_tf: u32,
381 pub max_block_score: f32,
383 pub start_pos: u32,
385 pub num_docs: u16,
387}
388
389impl EFBlockInfo {
390 pub fn serialize<W: std::io::Write>(&self, writer: &mut W) -> std::io::Result<()> {
392 use byteorder::WriteBytesExt;
393 writer.write_u32::<LittleEndian>(self.first_doc_id)?;
394 writer.write_u32::<LittleEndian>(self.last_doc_id)?;
395 writer.write_u32::<LittleEndian>(self.max_tf)?;
396 writer.write_f32::<LittleEndian>(self.max_block_score)?;
397 writer.write_u32::<LittleEndian>(self.start_pos)?;
398 writer.write_u16::<LittleEndian>(self.num_docs)?;
399 Ok(())
400 }
401
402 pub fn deserialize<R: std::io::Read>(reader: &mut R) -> std::io::Result<Self> {
404 use byteorder::ReadBytesExt;
405 Ok(Self {
406 first_doc_id: reader.read_u32::<LittleEndian>()?,
407 last_doc_id: reader.read_u32::<LittleEndian>()?,
408 max_tf: reader.read_u32::<LittleEndian>()?,
409 max_block_score: reader.read_f32::<LittleEndian>()?,
410 start_pos: reader.read_u32::<LittleEndian>()?,
411 num_docs: reader.read_u16::<LittleEndian>()?,
412 })
413 }
414}
415
416#[derive(Debug, Clone)]
418pub struct EliasFanoPostingList {
419 pub doc_ids: EliasFano,
421 pub term_freqs: Vec<u8>,
423 pub tf_bits: u8,
425 pub max_tf: u32,
427 pub blocks: Vec<EFBlockInfo>,
429 pub max_score: f32,
431}
432
433impl EliasFanoPostingList {
434 const K1: f32 = 1.2;
436 const B: f32 = 0.75;
437
438 #[inline]
440 pub fn compute_bm25_upper_bound(max_tf: u32, idf: f32) -> f32 {
441 let tf = max_tf as f32;
442 let min_length_norm = 1.0 - Self::B;
444 let tf_norm = (tf * (Self::K1 + 1.0)) / (tf + Self::K1 * min_length_norm);
445 idf * tf_norm
446 }
447
448 pub fn from_postings(doc_ids: &[u32], term_freqs: &[u32]) -> Self {
450 Self::from_postings_with_idf(doc_ids, term_freqs, 1.0)
451 }
452
453 pub fn from_postings_with_idf(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> Self {
455 assert_eq!(doc_ids.len(), term_freqs.len());
456
457 if doc_ids.is_empty() {
458 return Self {
459 doc_ids: EliasFano::from_sorted_slice(&[]),
460 term_freqs: Vec::new(),
461 tf_bits: 0,
462 max_tf: 0,
463 blocks: Vec::new(),
464 max_score: 0.0,
465 };
466 }
467
468 let ef_doc_ids = EliasFano::from_sorted_slice(doc_ids);
469
470 let max_tf = *term_freqs.iter().max().unwrap();
472 let tf_bits = if max_tf == 0 {
473 0
474 } else {
475 (32 - max_tf.leading_zeros()) as u8
476 };
477
478 let total_bits = doc_ids.len() * (tf_bits as usize);
480 let total_bytes = total_bits.div_ceil(8);
481 let mut packed_tfs = vec![0u8; total_bytes];
482
483 if tf_bits > 0 {
484 for (i, &tf) in term_freqs.iter().enumerate() {
485 let bit_pos = i * (tf_bits as usize);
486 let byte_idx = bit_pos / 8;
487 let bit_offset = bit_pos % 8;
488
489 let val = tf.saturating_sub(1);
491
492 packed_tfs[byte_idx] |= (val as u8) << bit_offset;
493 if bit_offset + (tf_bits as usize) > 8 && byte_idx + 1 < packed_tfs.len() {
494 packed_tfs[byte_idx + 1] |= (val >> (8 - bit_offset)) as u8;
495 }
496 if bit_offset + (tf_bits as usize) > 16 && byte_idx + 2 < packed_tfs.len() {
497 packed_tfs[byte_idx + 2] |= (val >> (16 - bit_offset)) as u8;
498 }
499 }
500 }
501
502 let mut blocks = Vec::new();
504 let mut max_score = 0.0f32;
505 let mut i = 0;
506
507 while i < doc_ids.len() {
508 let block_end = (i + EF_BLOCK_SIZE).min(doc_ids.len());
509 let block_doc_ids = &doc_ids[i..block_end];
510 let block_tfs = &term_freqs[i..block_end];
511
512 let block_max_tf = *block_tfs.iter().max().unwrap_or(&1);
513 let block_score = Self::compute_bm25_upper_bound(block_max_tf, idf);
514 max_score = max_score.max(block_score);
515
516 blocks.push(EFBlockInfo {
517 first_doc_id: block_doc_ids[0],
518 last_doc_id: *block_doc_ids.last().unwrap(),
519 max_tf: block_max_tf,
520 max_block_score: block_score,
521 start_pos: i as u32,
522 num_docs: (block_end - i) as u16,
523 });
524
525 i = block_end;
526 }
527
528 Self {
529 doc_ids: ef_doc_ids,
530 term_freqs: packed_tfs,
531 tf_bits,
532 max_tf,
533 blocks,
534 max_score,
535 }
536 }
537
538 pub fn get_tf(&self, pos: u32) -> u32 {
540 if self.tf_bits == 0 || pos >= self.doc_ids.len() {
541 return 1;
542 }
543
544 let bit_pos = (pos as usize) * (self.tf_bits as usize);
545 let byte_idx = bit_pos / 8;
546 let bit_offset = bit_pos % 8;
547 let mask = (1u32 << self.tf_bits) - 1;
548
549 let mut val = (self.term_freqs[byte_idx] >> bit_offset) as u32;
550 if bit_offset + (self.tf_bits as usize) > 8 && byte_idx + 1 < self.term_freqs.len() {
551 val |= (self.term_freqs[byte_idx + 1] as u32) << (8 - bit_offset);
552 }
553 if bit_offset + (self.tf_bits as usize) > 16 && byte_idx + 2 < self.term_freqs.len() {
554 val |= (self.term_freqs[byte_idx + 2] as u32) << (16 - bit_offset);
555 }
556
557 (val & mask) + 1 }
559
560 pub fn len(&self) -> u32 {
562 self.doc_ids.len()
563 }
564
565 pub fn is_empty(&self) -> bool {
567 self.doc_ids.is_empty()
568 }
569
570 pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
572 self.doc_ids.serialize(writer)?;
573 writer.write_u8(self.tf_bits)?;
574 writer.write_u32::<LittleEndian>(self.max_tf)?;
575 writer.write_f32::<LittleEndian>(self.max_score)?;
576 writer.write_u32::<LittleEndian>(self.term_freqs.len() as u32)?;
577 writer.write_all(&self.term_freqs)?;
578
579 writer.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
581 for block in &self.blocks {
582 block.serialize(writer)?;
583 }
584
585 Ok(())
586 }
587
588 pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
590 let doc_ids = EliasFano::deserialize(reader)?;
591 let tf_bits = reader.read_u8()?;
592 let max_tf = reader.read_u32::<LittleEndian>()?;
593 let max_score = reader.read_f32::<LittleEndian>()?;
594 let tf_len = reader.read_u32::<LittleEndian>()? as usize;
595 let mut term_freqs = vec![0u8; tf_len];
596 reader.read_exact(&mut term_freqs)?;
597
598 let num_blocks = reader.read_u32::<LittleEndian>()? as usize;
600 let mut blocks = Vec::with_capacity(num_blocks);
601 for _ in 0..num_blocks {
602 blocks.push(EFBlockInfo::deserialize(reader)?);
603 }
604
605 Ok(Self {
606 doc_ids,
607 term_freqs,
608 tf_bits,
609 max_tf,
610 blocks,
611 max_score,
612 })
613 }
614
615 pub fn num_blocks(&self) -> usize {
617 self.blocks.len()
618 }
619
620 pub fn block_for_pos(&self, pos: u32) -> usize {
622 (pos as usize) / EF_BLOCK_SIZE
623 }
624
625 pub fn iterator(&self) -> EliasFanoPostingIterator<'_> {
627 EliasFanoPostingIterator {
628 list: self,
629 pos: 0,
630 current_block: 0,
631 }
632 }
633}
634
635pub struct EliasFanoPostingIterator<'a> {
637 list: &'a EliasFanoPostingList,
638 pos: u32,
639 current_block: usize,
640}
641
642impl<'a> EliasFanoPostingIterator<'a> {
643 pub fn doc(&self) -> u32 {
645 self.list.doc_ids.get(self.pos).unwrap_or(u32::MAX)
646 }
647
648 pub fn term_freq(&self) -> u32 {
650 self.list.get_tf(self.pos)
651 }
652
653 pub fn advance(&mut self) -> u32 {
655 self.pos += 1;
656 if !self.list.blocks.is_empty() {
658 let new_block = self.list.block_for_pos(self.pos);
659 if new_block < self.list.blocks.len() {
660 self.current_block = new_block;
661 }
662 }
663 self.doc()
664 }
665
666 pub fn seek(&mut self, target: u32) -> u32 {
668 if !self.list.blocks.is_empty() {
670 let block_idx = self.list.blocks[self.current_block..].binary_search_by(|block| {
672 if block.last_doc_id < target {
673 std::cmp::Ordering::Less
674 } else if block.first_doc_id > target {
675 std::cmp::Ordering::Greater
676 } else {
677 std::cmp::Ordering::Equal
678 }
679 });
680
681 let target_block = match block_idx {
682 Ok(idx) => self.current_block + idx,
683 Err(idx) => {
684 let abs_idx = self.current_block + idx;
685 if abs_idx >= self.list.blocks.len() {
686 self.pos = self.list.len();
687 return u32::MAX;
688 }
689 abs_idx
690 }
691 };
692
693 if target_block > self.current_block {
695 self.current_block = target_block;
696 self.pos = self.list.blocks[target_block].start_pos;
697 }
698 }
699
700 if let Some((pos, val)) = self.list.doc_ids.next_geq(target)
702 && pos >= self.pos
703 {
704 self.pos = pos;
705 if !self.list.blocks.is_empty() {
706 self.current_block = self.list.block_for_pos(pos);
707 }
708 return val;
709 }
710
711 while self.pos < self.list.len() {
713 let doc = self.doc();
714 if doc >= target {
715 return doc;
716 }
717 self.pos += 1;
718 }
719
720 u32::MAX
721 }
722
723 pub fn is_exhausted(&self) -> bool {
725 self.pos >= self.list.len()
726 }
727
728 pub fn current_block_max_score(&self) -> f32 {
730 if self.is_exhausted() || self.list.blocks.is_empty() {
731 return 0.0;
732 }
733 if self.current_block < self.list.blocks.len() {
734 self.list.blocks[self.current_block].max_block_score
735 } else {
736 0.0
737 }
738 }
739
740 pub fn current_block_max_tf(&self) -> u32 {
742 if self.is_exhausted() || self.list.blocks.is_empty() {
743 return 0;
744 }
745 if self.current_block < self.list.blocks.len() {
746 self.list.blocks[self.current_block].max_tf
747 } else {
748 0
749 }
750 }
751
752 pub fn max_remaining_score(&self) -> f32 {
754 if self.is_exhausted() || self.list.blocks.is_empty() {
755 return 0.0;
756 }
757 self.list.blocks[self.current_block..]
758 .iter()
759 .map(|b| b.max_block_score)
760 .fold(0.0f32, |a, b| a.max(b))
761 }
762
763 pub fn skip_to_block_with_doc(&mut self, target: u32) -> Option<(u32, f32)> {
766 if self.list.blocks.is_empty() {
767 return None;
768 }
769
770 while self.current_block < self.list.blocks.len() {
771 let block = &self.list.blocks[self.current_block];
772 if block.last_doc_id >= target {
773 self.pos = block.start_pos;
774 return Some((block.first_doc_id, block.max_block_score));
775 }
776 self.current_block += 1;
777 }
778
779 self.pos = self.list.len();
780 None
781 }
782}
783
784#[cfg(test)]
785mod tests {
786 use super::*;
787
788 #[test]
789 fn test_elias_fano_basic() {
790 let values = vec![2, 3, 5, 7, 11, 13, 24];
791 let ef = EliasFano::from_sorted_slice(&values);
792
793 assert_eq!(ef.len(), 7);
794
795 for (i, &expected) in values.iter().enumerate() {
796 assert_eq!(ef.get(i as u32), Some(expected), "Mismatch at index {}", i);
797 }
798 }
799
800 #[test]
801 fn test_elias_fano_next_geq() {
802 let values = vec![10, 20, 30, 100, 200, 300, 1000];
803 let ef = EliasFano::from_sorted_slice(&values);
804
805 assert_eq!(ef.next_geq(5), Some((0, 10)));
806 assert_eq!(ef.next_geq(10), Some((0, 10)));
807 assert_eq!(ef.next_geq(15), Some((1, 20)));
808 assert_eq!(ef.next_geq(100), Some((3, 100)));
809 assert_eq!(ef.next_geq(500), Some((6, 1000)));
810 assert_eq!(ef.next_geq(2000), None);
811 }
812
813 #[test]
814 fn test_elias_fano_serialization() {
815 let values: Vec<u32> = (0..1000).map(|i| i * 3).collect();
816 let ef = EliasFano::from_sorted_slice(&values);
817
818 let mut buffer = Vec::new();
819 ef.serialize(&mut buffer).unwrap();
820
821 let restored = EliasFano::deserialize(&mut &buffer[..]).unwrap();
822
823 assert_eq!(restored.len(), ef.len());
824 for i in 0..ef.len() {
825 assert_eq!(restored.get(i), ef.get(i));
826 }
827 }
828
829 #[test]
830 fn test_elias_fano_posting_list() {
831 let doc_ids: Vec<u32> = vec![1, 5, 10, 50, 100, 500, 1000];
832 let term_freqs: Vec<u32> = vec![1, 2, 3, 4, 5, 6, 7];
833
834 let list = EliasFanoPostingList::from_postings(&doc_ids, &term_freqs);
835
836 assert_eq!(list.len(), 7);
837 assert_eq!(list.max_tf, 7);
838
839 let mut iter = list.iterator();
840 for (i, (&expected_doc, &expected_tf)) in doc_ids.iter().zip(term_freqs.iter()).enumerate()
841 {
842 assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
843 assert_eq!(iter.term_freq(), expected_tf, "TF mismatch at {}", i);
844 iter.advance();
845 }
846 }
847
848 #[test]
849 fn test_elias_fano_iterator_seek() {
850 let doc_ids: Vec<u32> = (0..100).map(|i| i * 10).collect();
851 let term_freqs: Vec<u32> = vec![1; 100];
852
853 let list = EliasFanoPostingList::from_postings(&doc_ids, &term_freqs);
854 let mut iter = list.iterator();
855
856 assert_eq!(iter.seek(55), 60);
857 assert_eq!(iter.seek(100), 100);
858 assert_eq!(iter.seek(999), u32::MAX);
859 }
860
861 #[test]
862 fn test_elias_fano_block_max() {
863 let doc_ids: Vec<u32> = (0..500).map(|i| i * 2).collect();
865 let term_freqs: Vec<u32> = (0..500)
867 .map(|i| {
868 if i < 128 {
869 1 } else if i < 256 {
871 5 } else if i < 384 {
873 10 } else {
875 3 }
877 })
878 .collect();
879
880 let list = EliasFanoPostingList::from_postings_with_idf(&doc_ids, &term_freqs, 2.0);
881
882 assert_eq!(list.num_blocks(), 4);
884 assert_eq!(list.blocks[0].max_tf, 1);
885 assert_eq!(list.blocks[1].max_tf, 5);
886 assert_eq!(list.blocks[2].max_tf, 10);
887 assert_eq!(list.blocks[3].max_tf, 3);
888
889 assert!(list.blocks[2].max_block_score > list.blocks[0].max_block_score);
891 assert!(list.blocks[2].max_block_score > list.blocks[1].max_block_score);
892 assert!(list.blocks[2].max_block_score > list.blocks[3].max_block_score);
893
894 assert_eq!(list.max_score, list.blocks[2].max_block_score);
896
897 let (pos, val) = list.doc_ids.next_geq(256).unwrap();
899 assert_eq!(val, 256, "next_geq(256) should return 256, got {}", val);
900 assert_eq!(pos, 128, "position of 256 should be 128, got {}", pos);
901
902 let mut iter = list.iterator();
904 assert_eq!(iter.current_block_max_tf(), 1); assert_eq!(list.blocks[0].first_doc_id, 0);
912 assert_eq!(list.blocks[0].last_doc_id, 254);
913 assert_eq!(list.blocks[1].first_doc_id, 256);
914 assert_eq!(list.blocks[1].last_doc_id, 510);
915
916 let doc = iter.seek(256); assert_eq!(doc, 256, "seek(256) should return 256, got {}", doc);
919 let block_tf = iter.current_block_max_tf();
921 assert_eq!(block_tf, 5, "block 1 max_tf should be 5, got {}", block_tf);
922
923 let doc = iter.seek(512); assert_eq!(doc, 512, "seek(512) should return 512, got {}", doc);
926 assert_eq!(iter.current_block_max_tf(), 10);
927 }
928
929 #[test]
930 fn test_elias_fano_block_max_serialization() {
931 let doc_ids: Vec<u32> = (0..300).map(|i| i * 3).collect();
932 let term_freqs: Vec<u32> = (0..300).map(|i| (i % 10) + 1).collect();
933
934 let list = EliasFanoPostingList::from_postings_with_idf(&doc_ids, &term_freqs, 1.5);
935
936 let mut buffer = Vec::new();
937 list.serialize(&mut buffer).unwrap();
938
939 let restored = EliasFanoPostingList::deserialize(&mut &buffer[..]).unwrap();
940
941 assert_eq!(restored.len(), list.len());
942 assert_eq!(restored.max_tf, list.max_tf);
943 assert_eq!(restored.max_score, list.max_score);
944 assert_eq!(restored.num_blocks(), list.num_blocks());
945
946 for (orig, rest) in list.blocks.iter().zip(restored.blocks.iter()) {
948 assert_eq!(orig.first_doc_id, rest.first_doc_id);
949 assert_eq!(orig.last_doc_id, rest.last_doc_id);
950 assert_eq!(orig.max_tf, rest.max_tf);
951 assert_eq!(orig.max_block_score, rest.max_block_score);
952 }
953
954 let mut iter1 = list.iterator();
956 let mut iter2 = restored.iterator();
957
958 while !iter1.is_exhausted() {
959 assert_eq!(iter1.doc(), iter2.doc());
960 assert_eq!(iter1.term_freq(), iter2.term_freq());
961 iter1.advance();
962 iter2.advance();
963 }
964 assert!(iter2.is_exhausted());
965 }
966}