1use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
11use std::io::{self, Read, Write};
12
13#[derive(Debug, Clone)]
15pub struct EliasFano {
16 lower_bits: Vec<u64>,
18 upper_bits: Vec<u64>,
20 len: u32,
22 universe: u64,
24 lower_bit_width: u8,
26}
27
28impl EliasFano {
29 pub fn from_sorted_slice(values: &[u32]) -> Self {
31 if values.is_empty() {
32 return Self {
33 lower_bits: Vec::new(),
34 upper_bits: Vec::new(),
35 len: 0,
36 universe: 0,
37 lower_bit_width: 0,
38 };
39 }
40
41 let n = values.len() as u64;
42 let max_val = *values.last().unwrap() as u64;
43 let universe = max_val + 1;
44
45 let lower_bit_width = if n == 0 {
47 0
48 } else {
49 let ratio = universe.max(1) / n.max(1);
50 if ratio <= 1 {
51 0
52 } else {
53 (64 - ratio.leading_zeros() - 1) as u8
54 }
55 };
56
57 let lower_bits_total = (n as usize) * (lower_bit_width as usize);
59 let lower_words = lower_bits_total.div_ceil(64);
60 let mut lower_bits = vec![0u64; lower_words];
61
62 let upper_bound = n + (max_val >> lower_bit_width) + 1;
65 let upper_words = (upper_bound as usize).div_ceil(64);
66 let mut upper_bits = vec![0u64; upper_words];
67
68 let lower_mask = if lower_bit_width == 0 {
70 0
71 } else {
72 (1u64 << lower_bit_width) - 1
73 };
74
75 for (i, &val) in values.iter().enumerate() {
76 let val = val as u64;
77
78 if lower_bit_width > 0 {
80 let lower = val & lower_mask;
81 let bit_pos = i * (lower_bit_width as usize);
82 let word_idx = bit_pos / 64;
83 let bit_offset = bit_pos % 64;
84
85 lower_bits[word_idx] |= lower << bit_offset;
86 if bit_offset + (lower_bit_width as usize) > 64 && word_idx + 1 < lower_bits.len() {
87 lower_bits[word_idx + 1] |= lower >> (64 - bit_offset);
88 }
89 }
90
91 let upper = val >> lower_bit_width;
93 let upper_pos = (i as u64) + upper;
94 let word_idx = (upper_pos / 64) as usize;
95 let bit_offset = upper_pos % 64;
96 if word_idx < upper_bits.len() {
97 upper_bits[word_idx] |= 1u64 << bit_offset;
98 }
99 }
100
101 Self {
102 lower_bits,
103 upper_bits,
104 len: values.len() as u32,
105 universe,
106 lower_bit_width,
107 }
108 }
109
110 #[inline]
112 pub fn len(&self) -> u32 {
113 self.len
114 }
115
116 #[inline]
118 pub fn is_empty(&self) -> bool {
119 self.len == 0
120 }
121
122 #[inline]
124 pub fn get(&self, i: u32) -> Option<u32> {
125 if i >= self.len {
126 return None;
127 }
128
129 let i = i as usize;
130
131 let lower = if self.lower_bit_width == 0 {
133 0u64
134 } else {
135 let bit_pos = i * (self.lower_bit_width as usize);
136 let word_idx = bit_pos / 64;
137 let bit_offset = bit_pos % 64;
138 let lower_mask = (1u64 << self.lower_bit_width) - 1;
139
140 let mut val = (self.lower_bits[word_idx] >> bit_offset) & lower_mask;
141 if bit_offset + (self.lower_bit_width as usize) > 64
142 && word_idx + 1 < self.lower_bits.len()
143 {
144 val |= (self.lower_bits[word_idx + 1] << (64 - bit_offset)) & lower_mask;
145 }
146 val
147 };
148
149 let select_pos = self.select1(i as u32)?;
151 let upper = (select_pos as u64) - (i as u64);
152
153 Some(((upper << self.lower_bit_width) | lower) as u32)
154 }
155
156 fn select1(&self, i: u32) -> Option<u32> {
159 if i >= self.len {
160 return None;
161 }
162
163 let mut remaining = i + 1;
164 let mut pos = 0u32;
165
166 for &word in self.upper_bits.iter() {
168 let popcount = Self::popcount64(word);
169 if popcount >= remaining {
170 let bit_pos = Self::select_in_word(word, remaining);
172 return Some(pos + bit_pos);
173 }
174 remaining -= popcount;
175 pos += 64;
176 }
177
178 None
179 }
180
181 #[inline]
183 fn popcount64(word: u64) -> u32 {
184 word.count_ones()
186 }
187
188 #[inline]
190 fn select_in_word(word: u64, k: u32) -> u32 {
191 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2"))]
192 {
193 use std::arch::x86_64::_pdep_u64;
195 let mask = 1u64 << (k - 1);
196 let selected = unsafe { _pdep_u64(mask, word) };
197 selected.trailing_zeros()
198 }
199
200 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2")))]
201 {
202 let mut w = word;
204 for _ in 0..k - 1 {
205 w &= w - 1; }
207 w.trailing_zeros()
208 }
209 }
210
211 pub fn next_geq(&self, target: u32) -> Option<(u32, u32)> {
214 if self.len == 0 {
215 return None;
216 }
217
218 let target = target as u64;
219
220 let target_upper = target >> self.lower_bit_width;
222 let _target_lower = if self.lower_bit_width == 0 {
223 0
224 } else {
225 target & ((1u64 << self.lower_bit_width) - 1)
226 };
227
228 let bucket_start = self.select0(target_upper as u32);
230
231 for pos in bucket_start..self.len {
233 if let Some(val) = self.get(pos)
234 && val as u64 >= target
235 {
236 return Some((pos, val));
237 }
238 }
239
240 None
241 }
242
243 fn select0(&self, i: u32) -> u32 {
246 if i == 0 {
247 return 0;
248 }
249
250 let mut zeros_seen = 0u32;
251 let mut ones_seen = 0u32;
252
253 for &word in &self.upper_bits {
254 let zeros_in_word = 64 - word.count_ones();
255 let ones_in_word = word.count_ones();
256
257 if zeros_seen + zeros_in_word >= i {
258 let mut w = word;
260 let mut bit_idx = 0u32;
261 while bit_idx < 64 {
262 if (w & 1) == 0 {
263 zeros_seen += 1;
264 if zeros_seen == i {
265 return ones_seen;
267 }
268 } else {
269 ones_seen += 1;
270 }
271 w >>= 1;
272 bit_idx += 1;
273 }
274 }
275 zeros_seen += zeros_in_word;
276 ones_seen += ones_in_word;
277 }
278
279 self.len
280 }
281
282 pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
284 writer.write_u32::<LittleEndian>(self.len)?;
285 writer.write_u64::<LittleEndian>(self.universe)?;
286 writer.write_u8(self.lower_bit_width)?;
287
288 writer.write_u32::<LittleEndian>(self.lower_bits.len() as u32)?;
290 for &word in &self.lower_bits {
291 writer.write_u64::<LittleEndian>(word)?;
292 }
293
294 writer.write_u32::<LittleEndian>(self.upper_bits.len() as u32)?;
296 for &word in &self.upper_bits {
297 writer.write_u64::<LittleEndian>(word)?;
298 }
299
300 Ok(())
301 }
302
303 pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
305 let len = reader.read_u32::<LittleEndian>()?;
306 let universe = reader.read_u64::<LittleEndian>()?;
307 let lower_bit_width = reader.read_u8()?;
308
309 let lower_len = reader.read_u32::<LittleEndian>()? as usize;
310 let mut lower_bits = Vec::with_capacity(lower_len);
311 for _ in 0..lower_len {
312 lower_bits.push(reader.read_u64::<LittleEndian>()?);
313 }
314
315 let upper_len = reader.read_u32::<LittleEndian>()? as usize;
316 let mut upper_bits = Vec::with_capacity(upper_len);
317 for _ in 0..upper_len {
318 upper_bits.push(reader.read_u64::<LittleEndian>()?);
319 }
320
321 Ok(Self {
322 lower_bits,
323 upper_bits,
324 len,
325 universe,
326 lower_bit_width,
327 })
328 }
329
330 pub fn size_bytes(&self) -> usize {
332 self.lower_bits.len() * 8 + self.upper_bits.len() * 8 + 16
333 }
334
335 pub fn iter(&self) -> EliasFanoIterator<'_> {
337 EliasFanoIterator { ef: self, pos: 0 }
338 }
339}
340
341pub struct EliasFanoIterator<'a> {
343 ef: &'a EliasFano,
344 pos: u32,
345}
346
347impl<'a> Iterator for EliasFanoIterator<'a> {
348 type Item = u32;
349
350 fn next(&mut self) -> Option<Self::Item> {
351 if self.pos >= self.ef.len {
352 return None;
353 }
354
355 let val = self.ef.get(self.pos)?;
357 self.pos += 1;
358 Some(val)
359 }
360
361 fn size_hint(&self) -> (usize, Option<usize>) {
362 let remaining = (self.ef.len - self.pos) as usize;
363 (remaining, Some(remaining))
364 }
365}
366
367impl<'a> ExactSizeIterator for EliasFanoIterator<'a> {}
368
369pub const EF_BLOCK_SIZE: usize = 128;
371
372#[derive(Debug, Clone)]
374pub struct EFBlockInfo {
375 pub first_doc_id: u32,
377 pub last_doc_id: u32,
379 pub max_tf: u32,
381 pub max_block_score: f32,
383 pub start_pos: u32,
385 pub num_docs: u16,
387}
388
389impl EFBlockInfo {
390 pub fn serialize<W: std::io::Write>(&self, writer: &mut W) -> std::io::Result<()> {
392 use byteorder::WriteBytesExt;
393 writer.write_u32::<LittleEndian>(self.first_doc_id)?;
394 writer.write_u32::<LittleEndian>(self.last_doc_id)?;
395 writer.write_u32::<LittleEndian>(self.max_tf)?;
396 writer.write_f32::<LittleEndian>(self.max_block_score)?;
397 writer.write_u32::<LittleEndian>(self.start_pos)?;
398 writer.write_u16::<LittleEndian>(self.num_docs)?;
399 Ok(())
400 }
401
402 pub fn deserialize<R: std::io::Read>(reader: &mut R) -> std::io::Result<Self> {
404 use byteorder::ReadBytesExt;
405 Ok(Self {
406 first_doc_id: reader.read_u32::<LittleEndian>()?,
407 last_doc_id: reader.read_u32::<LittleEndian>()?,
408 max_tf: reader.read_u32::<LittleEndian>()?,
409 max_block_score: reader.read_f32::<LittleEndian>()?,
410 start_pos: reader.read_u32::<LittleEndian>()?,
411 num_docs: reader.read_u16::<LittleEndian>()?,
412 })
413 }
414}
415
416#[derive(Debug, Clone)]
418pub struct EliasFanoPostingList {
419 pub doc_ids: EliasFano,
421 pub term_freqs: Vec<u8>,
423 pub tf_bits: u8,
425 pub max_tf: u32,
427 pub blocks: Vec<EFBlockInfo>,
429 pub max_score: f32,
431}
432
433impl EliasFanoPostingList {
434 pub fn from_postings(doc_ids: &[u32], term_freqs: &[u32]) -> Self {
436 Self::from_postings_with_idf(doc_ids, term_freqs, 1.0)
437 }
438
439 pub fn from_postings_with_idf(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> Self {
441 assert_eq!(doc_ids.len(), term_freqs.len());
442
443 if doc_ids.is_empty() {
444 return Self {
445 doc_ids: EliasFano::from_sorted_slice(&[]),
446 term_freqs: Vec::new(),
447 tf_bits: 0,
448 max_tf: 0,
449 blocks: Vec::new(),
450 max_score: 0.0,
451 };
452 }
453
454 let ef_doc_ids = EliasFano::from_sorted_slice(doc_ids);
455
456 let max_tf = *term_freqs.iter().max().unwrap();
458 let tf_bits = if max_tf == 0 {
459 0
460 } else {
461 (32 - max_tf.leading_zeros()) as u8
462 };
463
464 let total_bits = doc_ids.len() * (tf_bits as usize);
466 let total_bytes = total_bits.div_ceil(8);
467 let mut packed_tfs = vec![0u8; total_bytes];
468
469 if tf_bits > 0 {
470 for (i, &tf) in term_freqs.iter().enumerate() {
471 let bit_pos = i * (tf_bits as usize);
472 let byte_idx = bit_pos / 8;
473 let bit_offset = bit_pos % 8;
474
475 let val = tf.saturating_sub(1);
477
478 packed_tfs[byte_idx] |= (val as u8) << bit_offset;
479 if bit_offset + (tf_bits as usize) > 8 && byte_idx + 1 < packed_tfs.len() {
480 packed_tfs[byte_idx + 1] |= (val >> (8 - bit_offset)) as u8;
481 }
482 if bit_offset + (tf_bits as usize) > 16 && byte_idx + 2 < packed_tfs.len() {
483 packed_tfs[byte_idx + 2] |= (val >> (16 - bit_offset)) as u8;
484 }
485 }
486 }
487
488 let mut blocks = Vec::new();
490 let mut max_score = 0.0f32;
491 let mut i = 0;
492
493 while i < doc_ids.len() {
494 let block_end = (i + EF_BLOCK_SIZE).min(doc_ids.len());
495 let block_doc_ids = &doc_ids[i..block_end];
496 let block_tfs = &term_freqs[i..block_end];
497
498 let block_max_tf = *block_tfs.iter().max().unwrap_or(&1);
499 let block_score = crate::query::bm25_upper_bound(block_max_tf as f32, idf);
500 max_score = max_score.max(block_score);
501
502 blocks.push(EFBlockInfo {
503 first_doc_id: block_doc_ids[0],
504 last_doc_id: *block_doc_ids.last().unwrap(),
505 max_tf: block_max_tf,
506 max_block_score: block_score,
507 start_pos: i as u32,
508 num_docs: (block_end - i) as u16,
509 });
510
511 i = block_end;
512 }
513
514 Self {
515 doc_ids: ef_doc_ids,
516 term_freqs: packed_tfs,
517 tf_bits,
518 max_tf,
519 blocks,
520 max_score,
521 }
522 }
523
524 pub fn get_tf(&self, pos: u32) -> u32 {
526 if self.tf_bits == 0 || pos >= self.doc_ids.len() {
527 return 1;
528 }
529
530 let bit_pos = (pos as usize) * (self.tf_bits as usize);
531 let byte_idx = bit_pos / 8;
532 let bit_offset = bit_pos % 8;
533 let mask = (1u32 << self.tf_bits) - 1;
534
535 let mut val = (self.term_freqs[byte_idx] >> bit_offset) as u32;
536 if bit_offset + (self.tf_bits as usize) > 8 && byte_idx + 1 < self.term_freqs.len() {
537 val |= (self.term_freqs[byte_idx + 1] as u32) << (8 - bit_offset);
538 }
539 if bit_offset + (self.tf_bits as usize) > 16 && byte_idx + 2 < self.term_freqs.len() {
540 val |= (self.term_freqs[byte_idx + 2] as u32) << (16 - bit_offset);
541 }
542
543 (val & mask) + 1 }
545
546 pub fn len(&self) -> u32 {
548 self.doc_ids.len()
549 }
550
551 pub fn is_empty(&self) -> bool {
553 self.doc_ids.is_empty()
554 }
555
556 pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
558 self.doc_ids.serialize(writer)?;
559 writer.write_u8(self.tf_bits)?;
560 writer.write_u32::<LittleEndian>(self.max_tf)?;
561 writer.write_f32::<LittleEndian>(self.max_score)?;
562 writer.write_u32::<LittleEndian>(self.term_freqs.len() as u32)?;
563 writer.write_all(&self.term_freqs)?;
564
565 writer.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
567 for block in &self.blocks {
568 block.serialize(writer)?;
569 }
570
571 Ok(())
572 }
573
574 pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
576 let doc_ids = EliasFano::deserialize(reader)?;
577 let tf_bits = reader.read_u8()?;
578 let max_tf = reader.read_u32::<LittleEndian>()?;
579 let max_score = reader.read_f32::<LittleEndian>()?;
580 let tf_len = reader.read_u32::<LittleEndian>()? as usize;
581 let mut term_freqs = vec![0u8; tf_len];
582 reader.read_exact(&mut term_freqs)?;
583
584 let num_blocks = reader.read_u32::<LittleEndian>()? as usize;
586 let mut blocks = Vec::with_capacity(num_blocks);
587 for _ in 0..num_blocks {
588 blocks.push(EFBlockInfo::deserialize(reader)?);
589 }
590
591 Ok(Self {
592 doc_ids,
593 term_freqs,
594 tf_bits,
595 max_tf,
596 blocks,
597 max_score,
598 })
599 }
600
601 pub fn num_blocks(&self) -> usize {
603 self.blocks.len()
604 }
605
606 pub fn block_for_pos(&self, pos: u32) -> usize {
608 (pos as usize) / EF_BLOCK_SIZE
609 }
610
611 pub fn iterator(&self) -> EliasFanoPostingIterator<'_> {
613 EliasFanoPostingIterator {
614 list: self,
615 pos: 0,
616 current_block: 0,
617 }
618 }
619}
620
621pub struct EliasFanoPostingIterator<'a> {
623 list: &'a EliasFanoPostingList,
624 pos: u32,
625 current_block: usize,
626}
627
628impl<'a> EliasFanoPostingIterator<'a> {
629 pub fn doc(&self) -> u32 {
631 self.list.doc_ids.get(self.pos).unwrap_or(u32::MAX)
632 }
633
634 pub fn term_freq(&self) -> u32 {
636 self.list.get_tf(self.pos)
637 }
638
639 pub fn advance(&mut self) -> u32 {
641 self.pos += 1;
642 if !self.list.blocks.is_empty() {
644 let new_block = self.list.block_for_pos(self.pos);
645 if new_block < self.list.blocks.len() {
646 self.current_block = new_block;
647 }
648 }
649 self.doc()
650 }
651
652 pub fn seek(&mut self, target: u32) -> u32 {
654 if !self.list.blocks.is_empty() {
656 let block_idx = self.list.blocks[self.current_block..].binary_search_by(|block| {
658 if block.last_doc_id < target {
659 std::cmp::Ordering::Less
660 } else if block.first_doc_id > target {
661 std::cmp::Ordering::Greater
662 } else {
663 std::cmp::Ordering::Equal
664 }
665 });
666
667 let target_block = match block_idx {
668 Ok(idx) => self.current_block + idx,
669 Err(idx) => {
670 let abs_idx = self.current_block + idx;
671 if abs_idx >= self.list.blocks.len() {
672 self.pos = self.list.len();
673 return u32::MAX;
674 }
675 abs_idx
676 }
677 };
678
679 if target_block > self.current_block {
681 self.current_block = target_block;
682 self.pos = self.list.blocks[target_block].start_pos;
683 }
684 }
685
686 if let Some((pos, val)) = self.list.doc_ids.next_geq(target)
688 && pos >= self.pos
689 {
690 self.pos = pos;
691 if !self.list.blocks.is_empty() {
692 self.current_block = self.list.block_for_pos(pos);
693 }
694 return val;
695 }
696
697 while self.pos < self.list.len() {
699 let doc = self.doc();
700 if doc >= target {
701 return doc;
702 }
703 self.pos += 1;
704 }
705
706 u32::MAX
707 }
708
709 pub fn is_exhausted(&self) -> bool {
711 self.pos >= self.list.len()
712 }
713
714 pub fn current_block_max_score(&self) -> f32 {
716 if self.is_exhausted() || self.list.blocks.is_empty() {
717 return 0.0;
718 }
719 if self.current_block < self.list.blocks.len() {
720 self.list.blocks[self.current_block].max_block_score
721 } else {
722 0.0
723 }
724 }
725
726 pub fn current_block_max_tf(&self) -> u32 {
728 if self.is_exhausted() || self.list.blocks.is_empty() {
729 return 0;
730 }
731 if self.current_block < self.list.blocks.len() {
732 self.list.blocks[self.current_block].max_tf
733 } else {
734 0
735 }
736 }
737
738 pub fn max_remaining_score(&self) -> f32 {
740 if self.is_exhausted() || self.list.blocks.is_empty() {
741 return 0.0;
742 }
743 self.list.blocks[self.current_block..]
744 .iter()
745 .map(|b| b.max_block_score)
746 .fold(0.0f32, |a, b| a.max(b))
747 }
748
749 pub fn skip_to_block_with_doc(&mut self, target: u32) -> Option<(u32, f32)> {
752 if self.list.blocks.is_empty() {
753 return None;
754 }
755
756 while self.current_block < self.list.blocks.len() {
757 let block = &self.list.blocks[self.current_block];
758 if block.last_doc_id >= target {
759 self.pos = block.start_pos;
760 return Some((block.first_doc_id, block.max_block_score));
761 }
762 self.current_block += 1;
763 }
764
765 self.pos = self.list.len();
766 None
767 }
768}
769
770#[cfg(test)]
771mod tests {
772 use super::*;
773
774 #[test]
775 fn test_elias_fano_basic() {
776 let values = vec![2, 3, 5, 7, 11, 13, 24];
777 let ef = EliasFano::from_sorted_slice(&values);
778
779 assert_eq!(ef.len(), 7);
780
781 for (i, &expected) in values.iter().enumerate() {
782 assert_eq!(ef.get(i as u32), Some(expected), "Mismatch at index {}", i);
783 }
784 }
785
786 #[test]
787 fn test_elias_fano_next_geq() {
788 let values = vec![10, 20, 30, 100, 200, 300, 1000];
789 let ef = EliasFano::from_sorted_slice(&values);
790
791 assert_eq!(ef.next_geq(5), Some((0, 10)));
792 assert_eq!(ef.next_geq(10), Some((0, 10)));
793 assert_eq!(ef.next_geq(15), Some((1, 20)));
794 assert_eq!(ef.next_geq(100), Some((3, 100)));
795 assert_eq!(ef.next_geq(500), Some((6, 1000)));
796 assert_eq!(ef.next_geq(2000), None);
797 }
798
799 #[test]
800 fn test_elias_fano_serialization() {
801 let values: Vec<u32> = (0..1000).map(|i| i * 3).collect();
802 let ef = EliasFano::from_sorted_slice(&values);
803
804 let mut buffer = Vec::new();
805 ef.serialize(&mut buffer).unwrap();
806
807 let restored = EliasFano::deserialize(&mut &buffer[..]).unwrap();
808
809 assert_eq!(restored.len(), ef.len());
810 for i in 0..ef.len() {
811 assert_eq!(restored.get(i), ef.get(i));
812 }
813 }
814
815 #[test]
816 fn test_elias_fano_posting_list() {
817 let doc_ids: Vec<u32> = vec![1, 5, 10, 50, 100, 500, 1000];
818 let term_freqs: Vec<u32> = vec![1, 2, 3, 4, 5, 6, 7];
819
820 let list = EliasFanoPostingList::from_postings(&doc_ids, &term_freqs);
821
822 assert_eq!(list.len(), 7);
823 assert_eq!(list.max_tf, 7);
824
825 let mut iter = list.iterator();
826 for (i, (&expected_doc, &expected_tf)) in doc_ids.iter().zip(term_freqs.iter()).enumerate()
827 {
828 assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
829 assert_eq!(iter.term_freq(), expected_tf, "TF mismatch at {}", i);
830 iter.advance();
831 }
832 }
833
834 #[test]
835 fn test_elias_fano_iterator_seek() {
836 let doc_ids: Vec<u32> = (0..100).map(|i| i * 10).collect();
837 let term_freqs: Vec<u32> = vec![1; 100];
838
839 let list = EliasFanoPostingList::from_postings(&doc_ids, &term_freqs);
840 let mut iter = list.iterator();
841
842 assert_eq!(iter.seek(55), 60);
843 assert_eq!(iter.seek(100), 100);
844 assert_eq!(iter.seek(999), u32::MAX);
845 }
846
847 #[test]
848 fn test_elias_fano_block_max() {
849 let doc_ids: Vec<u32> = (0..500).map(|i| i * 2).collect();
851 let term_freqs: Vec<u32> = (0..500)
853 .map(|i| {
854 if i < 128 {
855 1 } else if i < 256 {
857 5 } else if i < 384 {
859 10 } else {
861 3 }
863 })
864 .collect();
865
866 let list = EliasFanoPostingList::from_postings_with_idf(&doc_ids, &term_freqs, 2.0);
867
868 assert_eq!(list.num_blocks(), 4);
870 assert_eq!(list.blocks[0].max_tf, 1);
871 assert_eq!(list.blocks[1].max_tf, 5);
872 assert_eq!(list.blocks[2].max_tf, 10);
873 assert_eq!(list.blocks[3].max_tf, 3);
874
875 assert!(list.blocks[2].max_block_score > list.blocks[0].max_block_score);
877 assert!(list.blocks[2].max_block_score > list.blocks[1].max_block_score);
878 assert!(list.blocks[2].max_block_score > list.blocks[3].max_block_score);
879
880 assert_eq!(list.max_score, list.blocks[2].max_block_score);
882
883 let (pos, val) = list.doc_ids.next_geq(256).unwrap();
885 assert_eq!(val, 256, "next_geq(256) should return 256, got {}", val);
886 assert_eq!(pos, 128, "position of 256 should be 128, got {}", pos);
887
888 let mut iter = list.iterator();
890 assert_eq!(iter.current_block_max_tf(), 1); assert_eq!(list.blocks[0].first_doc_id, 0);
898 assert_eq!(list.blocks[0].last_doc_id, 254);
899 assert_eq!(list.blocks[1].first_doc_id, 256);
900 assert_eq!(list.blocks[1].last_doc_id, 510);
901
902 let doc = iter.seek(256); assert_eq!(doc, 256, "seek(256) should return 256, got {}", doc);
905 let block_tf = iter.current_block_max_tf();
907 assert_eq!(block_tf, 5, "block 1 max_tf should be 5, got {}", block_tf);
908
909 let doc = iter.seek(512); assert_eq!(doc, 512, "seek(512) should return 512, got {}", doc);
912 assert_eq!(iter.current_block_max_tf(), 10);
913 }
914
915 #[test]
916 fn test_elias_fano_block_max_serialization() {
917 let doc_ids: Vec<u32> = (0..300).map(|i| i * 3).collect();
918 let term_freqs: Vec<u32> = (0..300).map(|i| (i % 10) + 1).collect();
919
920 let list = EliasFanoPostingList::from_postings_with_idf(&doc_ids, &term_freqs, 1.5);
921
922 let mut buffer = Vec::new();
923 list.serialize(&mut buffer).unwrap();
924
925 let restored = EliasFanoPostingList::deserialize(&mut &buffer[..]).unwrap();
926
927 assert_eq!(restored.len(), list.len());
928 assert_eq!(restored.max_tf, list.max_tf);
929 assert_eq!(restored.max_score, list.max_score);
930 assert_eq!(restored.num_blocks(), list.num_blocks());
931
932 for (orig, rest) in list.blocks.iter().zip(restored.blocks.iter()) {
934 assert_eq!(orig.first_doc_id, rest.first_doc_id);
935 assert_eq!(orig.last_doc_id, rest.last_doc_id);
936 assert_eq!(orig.max_tf, rest.max_tf);
937 assert_eq!(orig.max_block_score, rest.max_block_score);
938 }
939
940 let mut iter1 = list.iterator();
942 let mut iter2 = restored.iterator();
943
944 while !iter1.is_exhausted() {
945 assert_eq!(iter1.doc(), iter2.doc());
946 assert_eq!(iter1.term_freq(), iter2.term_freq());
947 iter1.advance();
948 iter2.advance();
949 }
950 assert!(iter2.is_exhausted());
951 }
952}