1use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
11use std::io::{self, Read, Write};
12
13#[derive(Debug, Clone)]
15pub struct EliasFano {
16 lower_bits: Vec<u64>,
18 upper_bits: Vec<u64>,
20 len: u32,
22 universe: u64,
24 lower_bit_width: u8,
26}
27
28impl EliasFano {
29 pub fn from_sorted_slice(values: &[u32]) -> Self {
31 if values.is_empty() {
32 return Self {
33 lower_bits: Vec::new(),
34 upper_bits: Vec::new(),
35 len: 0,
36 universe: 0,
37 lower_bit_width: 0,
38 };
39 }
40
41 let n = values.len() as u64;
42 let max_val = *values.last().unwrap() as u64;
43 let universe = max_val + 1;
44
45 let lower_bit_width = if n == 0 {
47 0
48 } else {
49 let ratio = universe.max(1) / n.max(1);
50 if ratio <= 1 {
51 0
52 } else {
53 (64 - ratio.leading_zeros() - 1) as u8
54 }
55 };
56
57 let lower_bits_total = (n as usize) * (lower_bit_width as usize);
59 let lower_words = lower_bits_total.div_ceil(64);
60 let mut lower_bits = vec![0u64; lower_words];
61
62 let upper_bound = n + (max_val >> lower_bit_width) + 1;
65 let upper_words = (upper_bound as usize).div_ceil(64);
66 let mut upper_bits = vec![0u64; upper_words];
67
68 let lower_mask = if lower_bit_width == 0 {
70 0
71 } else {
72 (1u64 << lower_bit_width) - 1
73 };
74
75 for (i, &val) in values.iter().enumerate() {
76 let val = val as u64;
77
78 if lower_bit_width > 0 {
80 let lower = val & lower_mask;
81 let bit_pos = i * (lower_bit_width as usize);
82 let word_idx = bit_pos / 64;
83 let bit_offset = bit_pos % 64;
84
85 lower_bits[word_idx] |= lower << bit_offset;
86 if bit_offset + (lower_bit_width as usize) > 64 && word_idx + 1 < lower_bits.len() {
87 lower_bits[word_idx + 1] |= lower >> (64 - bit_offset);
88 }
89 }
90
91 let upper = val >> lower_bit_width;
93 let upper_pos = (i as u64) + upper;
94 let word_idx = (upper_pos / 64) as usize;
95 let bit_offset = upper_pos % 64;
96 if word_idx < upper_bits.len() {
97 upper_bits[word_idx] |= 1u64 << bit_offset;
98 }
99 }
100
101 Self {
102 lower_bits,
103 upper_bits,
104 len: values.len() as u32,
105 universe,
106 lower_bit_width,
107 }
108 }
109
110 #[inline]
112 pub fn len(&self) -> u32 {
113 self.len
114 }
115
116 #[inline]
118 pub fn is_empty(&self) -> bool {
119 self.len == 0
120 }
121
122 #[inline]
124 pub fn get(&self, i: u32) -> Option<u32> {
125 if i >= self.len {
126 return None;
127 }
128
129 let i = i as usize;
130
131 let lower = if self.lower_bit_width == 0 {
133 0u64
134 } else {
135 let bit_pos = i * (self.lower_bit_width as usize);
136 let word_idx = bit_pos / 64;
137 let bit_offset = bit_pos % 64;
138 let lower_mask = (1u64 << self.lower_bit_width) - 1;
139
140 let mut val = (self.lower_bits[word_idx] >> bit_offset) & lower_mask;
141 if bit_offset + (self.lower_bit_width as usize) > 64
142 && word_idx + 1 < self.lower_bits.len()
143 {
144 val |= (self.lower_bits[word_idx + 1] << (64 - bit_offset)) & lower_mask;
145 }
146 val
147 };
148
149 let select_pos = self.select1(i as u32)?;
151 let upper = (select_pos as u64) - (i as u64);
152
153 Some(((upper << self.lower_bit_width) | lower) as u32)
154 }
155
156 fn select1(&self, i: u32) -> Option<u32> {
158 if i >= self.len {
159 return None;
160 }
161
162 let mut remaining = i + 1;
163 let mut pos = 0u32;
164
165 for &word in self.upper_bits.iter() {
166 let popcount = word.count_ones();
167 if popcount >= remaining {
168 let mut w = word;
170 for _ in 0..remaining - 1 {
171 w &= w - 1; }
173 let bit_pos = w.trailing_zeros();
174 return Some(pos + bit_pos);
175 }
176 remaining -= popcount;
177 pos += 64;
178 }
179
180 None
181 }
182
183 pub fn next_geq(&self, target: u32) -> Option<(u32, u32)> {
186 if self.len == 0 {
187 return None;
188 }
189
190 let target = target as u64;
191
192 let target_upper = target >> self.lower_bit_width;
194 let _target_lower = if self.lower_bit_width == 0 {
195 0
196 } else {
197 target & ((1u64 << self.lower_bit_width) - 1)
198 };
199
200 let bucket_start = self.select0(target_upper as u32);
202
203 for pos in bucket_start..self.len {
205 if let Some(val) = self.get(pos)
206 && val as u64 >= target
207 {
208 return Some((pos, val));
209 }
210 }
211
212 None
213 }
214
215 fn select0(&self, i: u32) -> u32 {
218 if i == 0 {
219 return 0;
220 }
221
222 let mut zeros_seen = 0u32;
223 let mut ones_seen = 0u32;
224
225 for &word in &self.upper_bits {
226 let zeros_in_word = 64 - word.count_ones();
227 let ones_in_word = word.count_ones();
228
229 if zeros_seen + zeros_in_word >= i {
230 let mut w = word;
232 let mut bit_idx = 0u32;
233 while bit_idx < 64 {
234 if (w & 1) == 0 {
235 zeros_seen += 1;
236 if zeros_seen == i {
237 return ones_seen;
239 }
240 } else {
241 ones_seen += 1;
242 }
243 w >>= 1;
244 bit_idx += 1;
245 }
246 }
247 zeros_seen += zeros_in_word;
248 ones_seen += ones_in_word;
249 }
250
251 self.len
252 }
253
254 pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
256 writer.write_u32::<LittleEndian>(self.len)?;
257 writer.write_u64::<LittleEndian>(self.universe)?;
258 writer.write_u8(self.lower_bit_width)?;
259
260 writer.write_u32::<LittleEndian>(self.lower_bits.len() as u32)?;
262 for &word in &self.lower_bits {
263 writer.write_u64::<LittleEndian>(word)?;
264 }
265
266 writer.write_u32::<LittleEndian>(self.upper_bits.len() as u32)?;
268 for &word in &self.upper_bits {
269 writer.write_u64::<LittleEndian>(word)?;
270 }
271
272 Ok(())
273 }
274
275 pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
277 let len = reader.read_u32::<LittleEndian>()?;
278 let universe = reader.read_u64::<LittleEndian>()?;
279 let lower_bit_width = reader.read_u8()?;
280
281 let lower_len = reader.read_u32::<LittleEndian>()? as usize;
282 let mut lower_bits = Vec::with_capacity(lower_len);
283 for _ in 0..lower_len {
284 lower_bits.push(reader.read_u64::<LittleEndian>()?);
285 }
286
287 let upper_len = reader.read_u32::<LittleEndian>()? as usize;
288 let mut upper_bits = Vec::with_capacity(upper_len);
289 for _ in 0..upper_len {
290 upper_bits.push(reader.read_u64::<LittleEndian>()?);
291 }
292
293 Ok(Self {
294 lower_bits,
295 upper_bits,
296 len,
297 universe,
298 lower_bit_width,
299 })
300 }
301
302 pub fn size_bytes(&self) -> usize {
304 self.lower_bits.len() * 8 + self.upper_bits.len() * 8 + 16
305 }
306
307 pub fn iter(&self) -> EliasFanoIterator<'_> {
309 EliasFanoIterator { ef: self, pos: 0 }
310 }
311}
312
313pub struct EliasFanoIterator<'a> {
315 ef: &'a EliasFano,
316 pos: u32,
317}
318
319impl<'a> Iterator for EliasFanoIterator<'a> {
320 type Item = u32;
321
322 fn next(&mut self) -> Option<Self::Item> {
323 if self.pos >= self.ef.len {
324 return None;
325 }
326
327 let val = self.ef.get(self.pos)?;
329 self.pos += 1;
330 Some(val)
331 }
332
333 fn size_hint(&self) -> (usize, Option<usize>) {
334 let remaining = (self.ef.len - self.pos) as usize;
335 (remaining, Some(remaining))
336 }
337}
338
339impl<'a> ExactSizeIterator for EliasFanoIterator<'a> {}
340
341pub const EF_BLOCK_SIZE: usize = 128;
343
344#[derive(Debug, Clone)]
346pub struct EFBlockInfo {
347 pub first_doc_id: u32,
349 pub last_doc_id: u32,
351 pub max_tf: u32,
353 pub max_block_score: f32,
355 pub start_pos: u32,
357 pub num_docs: u16,
359}
360
361impl EFBlockInfo {
362 pub fn serialize<W: std::io::Write>(&self, writer: &mut W) -> std::io::Result<()> {
364 use byteorder::WriteBytesExt;
365 writer.write_u32::<LittleEndian>(self.first_doc_id)?;
366 writer.write_u32::<LittleEndian>(self.last_doc_id)?;
367 writer.write_u32::<LittleEndian>(self.max_tf)?;
368 writer.write_f32::<LittleEndian>(self.max_block_score)?;
369 writer.write_u32::<LittleEndian>(self.start_pos)?;
370 writer.write_u16::<LittleEndian>(self.num_docs)?;
371 Ok(())
372 }
373
374 pub fn deserialize<R: std::io::Read>(reader: &mut R) -> std::io::Result<Self> {
376 use byteorder::ReadBytesExt;
377 Ok(Self {
378 first_doc_id: reader.read_u32::<LittleEndian>()?,
379 last_doc_id: reader.read_u32::<LittleEndian>()?,
380 max_tf: reader.read_u32::<LittleEndian>()?,
381 max_block_score: reader.read_f32::<LittleEndian>()?,
382 start_pos: reader.read_u32::<LittleEndian>()?,
383 num_docs: reader.read_u16::<LittleEndian>()?,
384 })
385 }
386}
387
388#[derive(Debug, Clone)]
390pub struct EliasFanoPostingList {
391 pub doc_ids: EliasFano,
393 pub term_freqs: Vec<u8>,
395 pub tf_bits: u8,
397 pub max_tf: u32,
399 pub blocks: Vec<EFBlockInfo>,
401 pub max_score: f32,
403}
404
405impl EliasFanoPostingList {
406 const K1: f32 = 1.2;
408 const B: f32 = 0.75;
409
410 #[inline]
412 pub fn compute_bm25_upper_bound(max_tf: u32, idf: f32) -> f32 {
413 let tf = max_tf as f32;
414 let min_length_norm = 1.0 - Self::B;
416 let tf_norm = (tf * (Self::K1 + 1.0)) / (tf + Self::K1 * min_length_norm);
417 idf * tf_norm
418 }
419
420 pub fn from_postings(doc_ids: &[u32], term_freqs: &[u32]) -> Self {
422 Self::from_postings_with_idf(doc_ids, term_freqs, 1.0)
423 }
424
425 pub fn from_postings_with_idf(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> Self {
427 assert_eq!(doc_ids.len(), term_freqs.len());
428
429 if doc_ids.is_empty() {
430 return Self {
431 doc_ids: EliasFano::from_sorted_slice(&[]),
432 term_freqs: Vec::new(),
433 tf_bits: 0,
434 max_tf: 0,
435 blocks: Vec::new(),
436 max_score: 0.0,
437 };
438 }
439
440 let ef_doc_ids = EliasFano::from_sorted_slice(doc_ids);
441
442 let max_tf = *term_freqs.iter().max().unwrap();
444 let tf_bits = if max_tf == 0 {
445 0
446 } else {
447 (32 - max_tf.leading_zeros()) as u8
448 };
449
450 let total_bits = doc_ids.len() * (tf_bits as usize);
452 let total_bytes = total_bits.div_ceil(8);
453 let mut packed_tfs = vec![0u8; total_bytes];
454
455 if tf_bits > 0 {
456 for (i, &tf) in term_freqs.iter().enumerate() {
457 let bit_pos = i * (tf_bits as usize);
458 let byte_idx = bit_pos / 8;
459 let bit_offset = bit_pos % 8;
460
461 let val = tf.saturating_sub(1);
463
464 packed_tfs[byte_idx] |= (val as u8) << bit_offset;
465 if bit_offset + (tf_bits as usize) > 8 && byte_idx + 1 < packed_tfs.len() {
466 packed_tfs[byte_idx + 1] |= (val >> (8 - bit_offset)) as u8;
467 }
468 if bit_offset + (tf_bits as usize) > 16 && byte_idx + 2 < packed_tfs.len() {
469 packed_tfs[byte_idx + 2] |= (val >> (16 - bit_offset)) as u8;
470 }
471 }
472 }
473
474 let mut blocks = Vec::new();
476 let mut max_score = 0.0f32;
477 let mut i = 0;
478
479 while i < doc_ids.len() {
480 let block_end = (i + EF_BLOCK_SIZE).min(doc_ids.len());
481 let block_doc_ids = &doc_ids[i..block_end];
482 let block_tfs = &term_freqs[i..block_end];
483
484 let block_max_tf = *block_tfs.iter().max().unwrap_or(&1);
485 let block_score = Self::compute_bm25_upper_bound(block_max_tf, idf);
486 max_score = max_score.max(block_score);
487
488 blocks.push(EFBlockInfo {
489 first_doc_id: block_doc_ids[0],
490 last_doc_id: *block_doc_ids.last().unwrap(),
491 max_tf: block_max_tf,
492 max_block_score: block_score,
493 start_pos: i as u32,
494 num_docs: (block_end - i) as u16,
495 });
496
497 i = block_end;
498 }
499
500 Self {
501 doc_ids: ef_doc_ids,
502 term_freqs: packed_tfs,
503 tf_bits,
504 max_tf,
505 blocks,
506 max_score,
507 }
508 }
509
510 pub fn get_tf(&self, pos: u32) -> u32 {
512 if self.tf_bits == 0 || pos >= self.doc_ids.len() {
513 return 1;
514 }
515
516 let bit_pos = (pos as usize) * (self.tf_bits as usize);
517 let byte_idx = bit_pos / 8;
518 let bit_offset = bit_pos % 8;
519 let mask = (1u32 << self.tf_bits) - 1;
520
521 let mut val = (self.term_freqs[byte_idx] >> bit_offset) as u32;
522 if bit_offset + (self.tf_bits as usize) > 8 && byte_idx + 1 < self.term_freqs.len() {
523 val |= (self.term_freqs[byte_idx + 1] as u32) << (8 - bit_offset);
524 }
525 if bit_offset + (self.tf_bits as usize) > 16 && byte_idx + 2 < self.term_freqs.len() {
526 val |= (self.term_freqs[byte_idx + 2] as u32) << (16 - bit_offset);
527 }
528
529 (val & mask) + 1 }
531
532 pub fn len(&self) -> u32 {
534 self.doc_ids.len()
535 }
536
537 pub fn is_empty(&self) -> bool {
539 self.doc_ids.is_empty()
540 }
541
542 pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
544 self.doc_ids.serialize(writer)?;
545 writer.write_u8(self.tf_bits)?;
546 writer.write_u32::<LittleEndian>(self.max_tf)?;
547 writer.write_f32::<LittleEndian>(self.max_score)?;
548 writer.write_u32::<LittleEndian>(self.term_freqs.len() as u32)?;
549 writer.write_all(&self.term_freqs)?;
550
551 writer.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
553 for block in &self.blocks {
554 block.serialize(writer)?;
555 }
556
557 Ok(())
558 }
559
560 pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
562 let doc_ids = EliasFano::deserialize(reader)?;
563 let tf_bits = reader.read_u8()?;
564 let max_tf = reader.read_u32::<LittleEndian>()?;
565 let max_score = reader.read_f32::<LittleEndian>()?;
566 let tf_len = reader.read_u32::<LittleEndian>()? as usize;
567 let mut term_freqs = vec![0u8; tf_len];
568 reader.read_exact(&mut term_freqs)?;
569
570 let num_blocks = reader.read_u32::<LittleEndian>()? as usize;
572 let mut blocks = Vec::with_capacity(num_blocks);
573 for _ in 0..num_blocks {
574 blocks.push(EFBlockInfo::deserialize(reader)?);
575 }
576
577 Ok(Self {
578 doc_ids,
579 term_freqs,
580 tf_bits,
581 max_tf,
582 blocks,
583 max_score,
584 })
585 }
586
587 pub fn num_blocks(&self) -> usize {
589 self.blocks.len()
590 }
591
592 pub fn block_for_pos(&self, pos: u32) -> usize {
594 (pos as usize) / EF_BLOCK_SIZE
595 }
596
597 pub fn iterator(&self) -> EliasFanoPostingIterator<'_> {
599 EliasFanoPostingIterator {
600 list: self,
601 pos: 0,
602 current_block: 0,
603 }
604 }
605}
606
607pub struct EliasFanoPostingIterator<'a> {
609 list: &'a EliasFanoPostingList,
610 pos: u32,
611 current_block: usize,
612}
613
614impl<'a> EliasFanoPostingIterator<'a> {
615 pub fn doc(&self) -> u32 {
617 self.list.doc_ids.get(self.pos).unwrap_or(u32::MAX)
618 }
619
620 pub fn term_freq(&self) -> u32 {
622 self.list.get_tf(self.pos)
623 }
624
625 pub fn advance(&mut self) -> u32 {
627 self.pos += 1;
628 if !self.list.blocks.is_empty() {
630 let new_block = self.list.block_for_pos(self.pos);
631 if new_block < self.list.blocks.len() {
632 self.current_block = new_block;
633 }
634 }
635 self.doc()
636 }
637
638 pub fn seek(&mut self, target: u32) -> u32 {
640 if !self.list.blocks.is_empty() {
642 let block_idx = self.list.blocks[self.current_block..].binary_search_by(|block| {
644 if block.last_doc_id < target {
645 std::cmp::Ordering::Less
646 } else if block.first_doc_id > target {
647 std::cmp::Ordering::Greater
648 } else {
649 std::cmp::Ordering::Equal
650 }
651 });
652
653 let target_block = match block_idx {
654 Ok(idx) => self.current_block + idx,
655 Err(idx) => {
656 let abs_idx = self.current_block + idx;
657 if abs_idx >= self.list.blocks.len() {
658 self.pos = self.list.len();
659 return u32::MAX;
660 }
661 abs_idx
662 }
663 };
664
665 if target_block > self.current_block {
667 self.current_block = target_block;
668 self.pos = self.list.blocks[target_block].start_pos;
669 }
670 }
671
672 if let Some((pos, val)) = self.list.doc_ids.next_geq(target)
674 && pos >= self.pos
675 {
676 self.pos = pos;
677 if !self.list.blocks.is_empty() {
678 self.current_block = self.list.block_for_pos(pos);
679 }
680 return val;
681 }
682
683 while self.pos < self.list.len() {
685 let doc = self.doc();
686 if doc >= target {
687 return doc;
688 }
689 self.pos += 1;
690 }
691
692 u32::MAX
693 }
694
695 pub fn is_exhausted(&self) -> bool {
697 self.pos >= self.list.len()
698 }
699
700 pub fn current_block_max_score(&self) -> f32 {
702 if self.is_exhausted() || self.list.blocks.is_empty() {
703 return 0.0;
704 }
705 if self.current_block < self.list.blocks.len() {
706 self.list.blocks[self.current_block].max_block_score
707 } else {
708 0.0
709 }
710 }
711
712 pub fn current_block_max_tf(&self) -> u32 {
714 if self.is_exhausted() || self.list.blocks.is_empty() {
715 return 0;
716 }
717 if self.current_block < self.list.blocks.len() {
718 self.list.blocks[self.current_block].max_tf
719 } else {
720 0
721 }
722 }
723
724 pub fn max_remaining_score(&self) -> f32 {
726 if self.is_exhausted() || self.list.blocks.is_empty() {
727 return 0.0;
728 }
729 self.list.blocks[self.current_block..]
730 .iter()
731 .map(|b| b.max_block_score)
732 .fold(0.0f32, |a, b| a.max(b))
733 }
734
735 pub fn skip_to_block_with_doc(&mut self, target: u32) -> Option<(u32, f32)> {
738 if self.list.blocks.is_empty() {
739 return None;
740 }
741
742 while self.current_block < self.list.blocks.len() {
743 let block = &self.list.blocks[self.current_block];
744 if block.last_doc_id >= target {
745 self.pos = block.start_pos;
746 return Some((block.first_doc_id, block.max_block_score));
747 }
748 self.current_block += 1;
749 }
750
751 self.pos = self.list.len();
752 None
753 }
754}
755
756#[cfg(test)]
757mod tests {
758 use super::*;
759
760 #[test]
761 fn test_elias_fano_basic() {
762 let values = vec![2, 3, 5, 7, 11, 13, 24];
763 let ef = EliasFano::from_sorted_slice(&values);
764
765 assert_eq!(ef.len(), 7);
766
767 for (i, &expected) in values.iter().enumerate() {
768 assert_eq!(ef.get(i as u32), Some(expected), "Mismatch at index {}", i);
769 }
770 }
771
772 #[test]
773 fn test_elias_fano_next_geq() {
774 let values = vec![10, 20, 30, 100, 200, 300, 1000];
775 let ef = EliasFano::from_sorted_slice(&values);
776
777 assert_eq!(ef.next_geq(5), Some((0, 10)));
778 assert_eq!(ef.next_geq(10), Some((0, 10)));
779 assert_eq!(ef.next_geq(15), Some((1, 20)));
780 assert_eq!(ef.next_geq(100), Some((3, 100)));
781 assert_eq!(ef.next_geq(500), Some((6, 1000)));
782 assert_eq!(ef.next_geq(2000), None);
783 }
784
785 #[test]
786 fn test_elias_fano_serialization() {
787 let values: Vec<u32> = (0..1000).map(|i| i * 3).collect();
788 let ef = EliasFano::from_sorted_slice(&values);
789
790 let mut buffer = Vec::new();
791 ef.serialize(&mut buffer).unwrap();
792
793 let restored = EliasFano::deserialize(&mut &buffer[..]).unwrap();
794
795 assert_eq!(restored.len(), ef.len());
796 for i in 0..ef.len() {
797 assert_eq!(restored.get(i), ef.get(i));
798 }
799 }
800
801 #[test]
802 fn test_elias_fano_posting_list() {
803 let doc_ids: Vec<u32> = vec![1, 5, 10, 50, 100, 500, 1000];
804 let term_freqs: Vec<u32> = vec![1, 2, 3, 4, 5, 6, 7];
805
806 let list = EliasFanoPostingList::from_postings(&doc_ids, &term_freqs);
807
808 assert_eq!(list.len(), 7);
809 assert_eq!(list.max_tf, 7);
810
811 let mut iter = list.iterator();
812 for (i, (&expected_doc, &expected_tf)) in doc_ids.iter().zip(term_freqs.iter()).enumerate()
813 {
814 assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
815 assert_eq!(iter.term_freq(), expected_tf, "TF mismatch at {}", i);
816 iter.advance();
817 }
818 }
819
820 #[test]
821 fn test_elias_fano_iterator_seek() {
822 let doc_ids: Vec<u32> = (0..100).map(|i| i * 10).collect();
823 let term_freqs: Vec<u32> = vec![1; 100];
824
825 let list = EliasFanoPostingList::from_postings(&doc_ids, &term_freqs);
826 let mut iter = list.iterator();
827
828 assert_eq!(iter.seek(55), 60);
829 assert_eq!(iter.seek(100), 100);
830 assert_eq!(iter.seek(999), u32::MAX);
831 }
832
833 #[test]
834 fn test_elias_fano_block_max() {
835 let doc_ids: Vec<u32> = (0..500).map(|i| i * 2).collect();
837 let term_freqs: Vec<u32> = (0..500)
839 .map(|i| {
840 if i < 128 {
841 1 } else if i < 256 {
843 5 } else if i < 384 {
845 10 } else {
847 3 }
849 })
850 .collect();
851
852 let list = EliasFanoPostingList::from_postings_with_idf(&doc_ids, &term_freqs, 2.0);
853
854 assert_eq!(list.num_blocks(), 4);
856 assert_eq!(list.blocks[0].max_tf, 1);
857 assert_eq!(list.blocks[1].max_tf, 5);
858 assert_eq!(list.blocks[2].max_tf, 10);
859 assert_eq!(list.blocks[3].max_tf, 3);
860
861 assert!(list.blocks[2].max_block_score > list.blocks[0].max_block_score);
863 assert!(list.blocks[2].max_block_score > list.blocks[1].max_block_score);
864 assert!(list.blocks[2].max_block_score > list.blocks[3].max_block_score);
865
866 assert_eq!(list.max_score, list.blocks[2].max_block_score);
868
869 let (pos, val) = list.doc_ids.next_geq(256).unwrap();
871 assert_eq!(val, 256, "next_geq(256) should return 256, got {}", val);
872 assert_eq!(pos, 128, "position of 256 should be 128, got {}", pos);
873
874 let mut iter = list.iterator();
876 assert_eq!(iter.current_block_max_tf(), 1); assert_eq!(list.blocks[0].first_doc_id, 0);
884 assert_eq!(list.blocks[0].last_doc_id, 254);
885 assert_eq!(list.blocks[1].first_doc_id, 256);
886 assert_eq!(list.blocks[1].last_doc_id, 510);
887
888 let doc = iter.seek(256); assert_eq!(doc, 256, "seek(256) should return 256, got {}", doc);
891 let block_tf = iter.current_block_max_tf();
893 assert_eq!(block_tf, 5, "block 1 max_tf should be 5, got {}", block_tf);
894
895 let doc = iter.seek(512); assert_eq!(doc, 512, "seek(512) should return 512, got {}", doc);
898 assert_eq!(iter.current_block_max_tf(), 10);
899 }
900
901 #[test]
902 fn test_elias_fano_block_max_serialization() {
903 let doc_ids: Vec<u32> = (0..300).map(|i| i * 3).collect();
904 let term_freqs: Vec<u32> = (0..300).map(|i| (i % 10) + 1).collect();
905
906 let list = EliasFanoPostingList::from_postings_with_idf(&doc_ids, &term_freqs, 1.5);
907
908 let mut buffer = Vec::new();
909 list.serialize(&mut buffer).unwrap();
910
911 let restored = EliasFanoPostingList::deserialize(&mut &buffer[..]).unwrap();
912
913 assert_eq!(restored.len(), list.len());
914 assert_eq!(restored.max_tf, list.max_tf);
915 assert_eq!(restored.max_score, list.max_score);
916 assert_eq!(restored.num_blocks(), list.num_blocks());
917
918 for (orig, rest) in list.blocks.iter().zip(restored.blocks.iter()) {
920 assert_eq!(orig.first_doc_id, rest.first_doc_id);
921 assert_eq!(orig.last_doc_id, rest.last_doc_id);
922 assert_eq!(orig.max_tf, rest.max_tf);
923 assert_eq!(orig.max_block_score, rest.max_block_score);
924 }
925
926 let mut iter1 = list.iterator();
928 let mut iter2 = restored.iterator();
929
930 while !iter1.is_exhausted() {
931 assert_eq!(iter1.doc(), iter2.doc());
932 assert_eq!(iter1.term_freq(), iter2.term_freq());
933 iter1.advance();
934 iter2.advance();
935 }
936 assert!(iter2.is_exhausted());
937 }
938}