1use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
9use std::io::{self, Cursor, Read, Write};
10
11use super::config::WeightQuantization;
12use crate::DocId;
13use crate::structures::postings::TERMINATED;
14use crate::structures::simd;
15
16pub const BLOCK_SIZE: usize = 128;
17
18#[derive(Debug, Clone, Copy)]
19pub struct BlockHeader {
20 pub count: u16,
21 pub doc_id_bits: u8,
22 pub ordinal_bits: u8,
23 pub weight_quant: WeightQuantization,
24 pub first_doc_id: DocId,
25 pub max_weight: f32,
26}
27
28impl BlockHeader {
29 pub const SIZE: usize = 16;
30
31 pub fn write<W: Write>(&self, w: &mut W) -> io::Result<()> {
32 w.write_u16::<LittleEndian>(self.count)?;
33 w.write_u8(self.doc_id_bits)?;
34 w.write_u8(self.ordinal_bits)?;
35 w.write_u8(self.weight_quant as u8)?;
36 w.write_u8(0)?;
37 w.write_u16::<LittleEndian>(0)?;
38 w.write_u32::<LittleEndian>(self.first_doc_id)?;
39 w.write_f32::<LittleEndian>(self.max_weight)?;
40 Ok(())
41 }
42
43 pub fn read<R: Read>(r: &mut R) -> io::Result<Self> {
44 let count = r.read_u16::<LittleEndian>()?;
45 let doc_id_bits = r.read_u8()?;
46 let ordinal_bits = r.read_u8()?;
47 let weight_quant_byte = r.read_u8()?;
48 let _ = r.read_u8()?;
49 let _ = r.read_u16::<LittleEndian>()?;
50 let first_doc_id = r.read_u32::<LittleEndian>()?;
51 let max_weight = r.read_f32::<LittleEndian>()?;
52
53 let weight_quant = WeightQuantization::from_u8(weight_quant_byte)
54 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Invalid weight quant"))?;
55
56 Ok(Self {
57 count,
58 doc_id_bits,
59 ordinal_bits,
60 weight_quant,
61 first_doc_id,
62 max_weight,
63 })
64 }
65}
66
67#[derive(Debug, Clone)]
68pub struct SparseBlock {
69 pub header: BlockHeader,
70 pub doc_ids_data: Vec<u8>,
71 pub ordinals_data: Vec<u8>,
72 pub weights_data: Vec<u8>,
73}
74
75impl SparseBlock {
76 pub fn from_postings(
77 postings: &[(DocId, u16, f32)],
78 weight_quant: WeightQuantization,
79 ) -> io::Result<Self> {
80 assert!(!postings.is_empty() && postings.len() <= BLOCK_SIZE);
81
82 let count = postings.len();
83 let first_doc_id = postings[0].0;
84
85 let mut deltas = Vec::with_capacity(count);
87 let mut prev = first_doc_id;
88 for &(doc_id, _, _) in postings {
89 deltas.push(doc_id.saturating_sub(prev));
90 prev = doc_id;
91 }
92 deltas[0] = 0;
93
94 let doc_id_bits = find_optimal_bit_width(&deltas[1..]);
95 let ordinals: Vec<u16> = postings.iter().map(|(_, o, _)| *o).collect();
96 let max_ordinal = ordinals.iter().copied().max().unwrap_or(0);
97 let ordinal_bits = if max_ordinal == 0 {
98 0
99 } else {
100 bits_needed_u16(max_ordinal)
101 };
102
103 let weights: Vec<f32> = postings.iter().map(|(_, _, w)| *w).collect();
104 let max_weight = weights.iter().copied().fold(0.0f32, f32::max);
105
106 let doc_ids_data = pack_bit_array(&deltas[1..], doc_id_bits);
107 let ordinals_data = if ordinal_bits > 0 {
108 pack_bit_array_u16(&ordinals, ordinal_bits)
109 } else {
110 Vec::new()
111 };
112 let weights_data = encode_weights(&weights, weight_quant)?;
113
114 Ok(Self {
115 header: BlockHeader {
116 count: count as u16,
117 doc_id_bits,
118 ordinal_bits,
119 weight_quant,
120 first_doc_id,
121 max_weight,
122 },
123 doc_ids_data,
124 ordinals_data,
125 weights_data,
126 })
127 }
128
129 pub fn decode_doc_ids(&self) -> Vec<DocId> {
130 let mut out = Vec::with_capacity(self.header.count as usize);
131 self.decode_doc_ids_into(&mut out);
132 out
133 }
134
135 pub fn decode_doc_ids_into(&self, out: &mut Vec<DocId>) {
137 let count = self.header.count as usize;
138 out.clear();
139 out.push(self.header.first_doc_id);
140
141 if count > 1 {
142 let deltas = unpack_bit_array(&self.doc_ids_data, self.header.doc_id_bits, count - 1);
143 let mut prev = self.header.first_doc_id;
144 for delta in deltas {
145 prev += delta;
146 out.push(prev);
147 }
148 }
149 }
150
151 pub fn decode_ordinals(&self) -> Vec<u16> {
152 let mut out = Vec::with_capacity(self.header.count as usize);
153 self.decode_ordinals_into(&mut out);
154 out
155 }
156
157 pub fn decode_ordinals_into(&self, out: &mut Vec<u16>) {
159 let count = self.header.count as usize;
160 out.clear();
161 if self.header.ordinal_bits == 0 {
162 out.resize(count, 0u16);
163 } else {
164 unpack_bit_array_u16_into(&self.ordinals_data, self.header.ordinal_bits, count, out);
165 }
166 }
167
168 pub fn decode_weights(&self) -> Vec<f32> {
169 let mut out = Vec::with_capacity(self.header.count as usize);
170 self.decode_weights_into(&mut out);
171 out
172 }
173
174 pub fn decode_weights_into(&self, out: &mut Vec<f32>) {
176 out.clear();
177 decode_weights_into(
178 &self.weights_data,
179 self.header.weight_quant,
180 self.header.count as usize,
181 out,
182 );
183 }
184
185 pub fn write<W: Write>(&self, w: &mut W) -> io::Result<()> {
186 self.header.write(w)?;
187 w.write_u16::<LittleEndian>(self.doc_ids_data.len() as u16)?;
188 w.write_u16::<LittleEndian>(self.ordinals_data.len() as u16)?;
189 w.write_u16::<LittleEndian>(self.weights_data.len() as u16)?;
190 w.write_u16::<LittleEndian>(0)?;
191 w.write_all(&self.doc_ids_data)?;
192 w.write_all(&self.ordinals_data)?;
193 w.write_all(&self.weights_data)?;
194 Ok(())
195 }
196
197 pub fn read<R: Read>(r: &mut R) -> io::Result<Self> {
198 let header = BlockHeader::read(r)?;
199 let doc_ids_len = r.read_u16::<LittleEndian>()? as usize;
200 let ordinals_len = r.read_u16::<LittleEndian>()? as usize;
201 let weights_len = r.read_u16::<LittleEndian>()? as usize;
202 let _ = r.read_u16::<LittleEndian>()?;
203
204 let mut doc_ids_data = vec![0u8; doc_ids_len];
205 r.read_exact(&mut doc_ids_data)?;
206 let mut ordinals_data = vec![0u8; ordinals_len];
207 r.read_exact(&mut ordinals_data)?;
208 let mut weights_data = vec![0u8; weights_len];
209 r.read_exact(&mut weights_data)?;
210
211 Ok(Self {
212 header,
213 doc_ids_data,
214 ordinals_data,
215 weights_data,
216 })
217 }
218
219 pub fn with_doc_offset(&self, doc_offset: u32) -> Self {
225 Self {
226 header: BlockHeader {
227 first_doc_id: self.header.first_doc_id + doc_offset,
228 ..self.header
229 },
230 doc_ids_data: self.doc_ids_data.clone(),
231 ordinals_data: self.ordinals_data.clone(),
232 weights_data: self.weights_data.clone(),
233 }
234 }
235}
236
237#[derive(Debug, Clone)]
242pub struct BlockSparsePostingList {
243 pub doc_count: u32,
244 pub blocks: Vec<SparseBlock>,
245}
246
247impl BlockSparsePostingList {
248 pub fn from_postings_with_block_size(
250 postings: &[(DocId, u16, f32)],
251 weight_quant: WeightQuantization,
252 block_size: usize,
253 ) -> io::Result<Self> {
254 if postings.is_empty() {
255 return Ok(Self {
256 doc_count: 0,
257 blocks: Vec::new(),
258 });
259 }
260
261 let block_size = block_size.max(16); let mut blocks = Vec::new();
263 for chunk in postings.chunks(block_size) {
264 blocks.push(SparseBlock::from_postings(chunk, weight_quant)?);
265 }
266
267 let mut unique_docs = 1u32;
272 for i in 1..postings.len() {
273 if postings[i].0 != postings[i - 1].0 {
274 unique_docs += 1;
275 }
276 }
277
278 Ok(Self {
279 doc_count: unique_docs,
280 blocks,
281 })
282 }
283
284 pub fn from_postings(
286 postings: &[(DocId, u16, f32)],
287 weight_quant: WeightQuantization,
288 ) -> io::Result<Self> {
289 Self::from_postings_with_block_size(postings, weight_quant, BLOCK_SIZE)
290 }
291
292 pub fn doc_count(&self) -> u32 {
293 self.doc_count
294 }
295
296 pub fn num_blocks(&self) -> usize {
297 self.blocks.len()
298 }
299
300 pub fn global_max_weight(&self) -> f32 {
301 self.blocks
302 .iter()
303 .map(|b| b.header.max_weight)
304 .fold(0.0f32, f32::max)
305 }
306
307 pub fn block_max_weight(&self, block_idx: usize) -> Option<f32> {
308 self.blocks.get(block_idx).map(|b| b.header.max_weight)
309 }
310
311 pub fn size_bytes(&self) -> usize {
313 use std::mem::size_of;
314
315 let header_size = size_of::<u32>() * 2; let blocks_size: usize = self
317 .blocks
318 .iter()
319 .map(|b| {
320 size_of::<BlockHeader>()
321 + b.doc_ids_data.len()
322 + b.ordinals_data.len()
323 + b.weights_data.len()
324 })
325 .sum();
326 header_size + blocks_size
327 }
328
329 pub fn iterator(&self) -> BlockSparsePostingIterator<'_> {
330 BlockSparsePostingIterator::new(self)
331 }
332
333 pub fn serialize<W: Write>(&self, w: &mut W) -> io::Result<()> {
342 use super::SparseSkipEntry;
343
344 w.write_u32::<LittleEndian>(self.doc_count)?;
345 w.write_f32::<LittleEndian>(self.global_max_weight())?;
346 w.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
347
348 let mut block_bytes: Vec<Vec<u8>> = Vec::with_capacity(self.blocks.len());
350 for block in &self.blocks {
351 let mut buf = Vec::new();
352 block.write(&mut buf)?;
353 block_bytes.push(buf);
354 }
355
356 let mut offset = 0u32;
358 for (block, bytes) in self.blocks.iter().zip(block_bytes.iter()) {
359 let first_doc = block.header.first_doc_id;
360 let doc_ids = block.decode_doc_ids();
361 let last_doc = doc_ids.last().copied().unwrap_or(first_doc);
362 let length = bytes.len() as u32;
363
364 let entry =
365 SparseSkipEntry::new(first_doc, last_doc, offset, length, block.header.max_weight);
366 entry.write(w)?;
367 offset += length;
368 }
369
370 for bytes in block_bytes {
372 w.write_all(&bytes)?;
373 }
374
375 Ok(())
376 }
377
378 pub fn deserialize<R: Read>(r: &mut R) -> io::Result<Self> {
381 use super::SparseSkipEntry;
382
383 let doc_count = r.read_u32::<LittleEndian>()?;
384 let _global_max_weight = r.read_f32::<LittleEndian>()?;
385 let num_blocks = r.read_u32::<LittleEndian>()? as usize;
386
387 for _ in 0..num_blocks {
389 let _ = SparseSkipEntry::read(r)?;
390 }
391
392 let mut blocks = Vec::with_capacity(num_blocks);
394 for _ in 0..num_blocks {
395 blocks.push(SparseBlock::read(r)?);
396 }
397 Ok(Self { doc_count, blocks })
398 }
399
400 pub fn deserialize_header<R: Read>(
403 r: &mut R,
404 ) -> io::Result<(u32, f32, Vec<super::SparseSkipEntry>, usize)> {
405 use super::SparseSkipEntry;
406
407 let doc_count = r.read_u32::<LittleEndian>()?;
408 let global_max_weight = r.read_f32::<LittleEndian>()?;
409 let num_blocks = r.read_u32::<LittleEndian>()? as usize;
410
411 let mut entries = Vec::with_capacity(num_blocks);
412 for _ in 0..num_blocks {
413 entries.push(SparseSkipEntry::read(r)?);
414 }
415
416 let header_size = 4 + 4 + 4 + num_blocks * SparseSkipEntry::SIZE;
418
419 Ok((doc_count, global_max_weight, entries, header_size))
420 }
421
422 pub fn decode_all(&self) -> Vec<(DocId, u16, f32)> {
423 let total_postings: usize = self.blocks.iter().map(|b| b.header.count as usize).sum();
424 let mut result = Vec::with_capacity(total_postings);
425 for block in &self.blocks {
426 let doc_ids = block.decode_doc_ids();
427 let ordinals = block.decode_ordinals();
428 let weights = block.decode_weights();
429 for i in 0..block.header.count as usize {
430 result.push((doc_ids[i], ordinals[i], weights[i]));
431 }
432 }
433 result
434 }
435
436 pub fn merge_with_offsets(lists: &[(&BlockSparsePostingList, u32)]) -> Self {
447 if lists.is_empty() {
448 return Self {
449 doc_count: 0,
450 blocks: Vec::new(),
451 };
452 }
453
454 let total_blocks: usize = lists.iter().map(|(pl, _)| pl.blocks.len()).sum();
456 let total_docs: u32 = lists.iter().map(|(pl, _)| pl.doc_count).sum();
457
458 let mut merged_blocks = Vec::with_capacity(total_blocks);
459
460 for (posting_list, doc_offset) in lists {
462 for block in &posting_list.blocks {
463 merged_blocks.push(block.with_doc_offset(*doc_offset));
464 }
465 }
466
467 Self {
468 doc_count: total_docs,
469 blocks: merged_blocks,
470 }
471 }
472
473 fn find_block(&self, target: DocId) -> Option<usize> {
474 if self.blocks.is_empty() {
475 return None;
476 }
477 let idx = self
480 .blocks
481 .partition_point(|b| b.header.first_doc_id <= target);
482 if idx == 0 {
483 Some(0)
485 } else {
486 Some(idx - 1)
487 }
488 }
489}
490
491pub struct BlockSparsePostingIterator<'a> {
496 posting_list: &'a BlockSparsePostingList,
497 block_idx: usize,
498 in_block_idx: usize,
499 current_doc_ids: Vec<DocId>,
500 current_ordinals: Vec<u16>,
501 current_weights: Vec<f32>,
502 exhausted: bool,
503}
504
505impl<'a> BlockSparsePostingIterator<'a> {
506 fn new(posting_list: &'a BlockSparsePostingList) -> Self {
507 let mut iter = Self {
508 posting_list,
509 block_idx: 0,
510 in_block_idx: 0,
511 current_doc_ids: Vec::new(),
512 current_ordinals: Vec::new(),
513 current_weights: Vec::new(),
514 exhausted: posting_list.blocks.is_empty(),
515 };
516 if !iter.exhausted {
517 iter.load_block(0);
518 }
519 iter
520 }
521
522 fn load_block(&mut self, block_idx: usize) {
523 if let Some(block) = self.posting_list.blocks.get(block_idx) {
524 block.decode_doc_ids_into(&mut self.current_doc_ids);
525 block.decode_ordinals_into(&mut self.current_ordinals);
526 block.decode_weights_into(&mut self.current_weights);
527 self.block_idx = block_idx;
528 self.in_block_idx = 0;
529 }
530 }
531
532 pub fn doc(&self) -> DocId {
533 if self.exhausted {
534 TERMINATED
535 } else {
536 self.current_doc_ids
537 .get(self.in_block_idx)
538 .copied()
539 .unwrap_or(TERMINATED)
540 }
541 }
542
543 pub fn weight(&self) -> f32 {
544 self.current_weights
545 .get(self.in_block_idx)
546 .copied()
547 .unwrap_or(0.0)
548 }
549
550 pub fn ordinal(&self) -> u16 {
551 self.current_ordinals
552 .get(self.in_block_idx)
553 .copied()
554 .unwrap_or(0)
555 }
556
557 pub fn advance(&mut self) -> DocId {
558 if self.exhausted {
559 return TERMINATED;
560 }
561 self.in_block_idx += 1;
562 if self.in_block_idx >= self.current_doc_ids.len() {
563 self.block_idx += 1;
564 if self.block_idx >= self.posting_list.blocks.len() {
565 self.exhausted = true;
566 } else {
567 self.load_block(self.block_idx);
568 }
569 }
570 self.doc()
571 }
572
573 pub fn seek(&mut self, target: DocId) -> DocId {
574 if self.exhausted {
575 return TERMINATED;
576 }
577 if self.doc() >= target {
578 return self.doc();
579 }
580
581 if let Some(&last_doc) = self.current_doc_ids.last()
583 && last_doc >= target
584 {
585 let remaining = &self.current_doc_ids[self.in_block_idx..];
586 let pos = crate::structures::simd::find_first_ge_u32(remaining, target);
587 self.in_block_idx += pos;
588 if self.in_block_idx >= self.current_doc_ids.len() {
589 self.block_idx += 1;
590 if self.block_idx >= self.posting_list.blocks.len() {
591 self.exhausted = true;
592 } else {
593 self.load_block(self.block_idx);
594 }
595 }
596 return self.doc();
597 }
598
599 if let Some(block_idx) = self.posting_list.find_block(target) {
601 self.load_block(block_idx);
602 let pos = crate::structures::simd::find_first_ge_u32(&self.current_doc_ids, target);
603 self.in_block_idx = pos;
604 if self.in_block_idx >= self.current_doc_ids.len() {
605 self.block_idx += 1;
606 if self.block_idx >= self.posting_list.blocks.len() {
607 self.exhausted = true;
608 } else {
609 self.load_block(self.block_idx);
610 }
611 }
612 } else {
613 self.exhausted = true;
614 }
615 self.doc()
616 }
617
618 pub fn skip_to_next_block(&mut self) -> DocId {
621 if self.exhausted {
622 return TERMINATED;
623 }
624 let next = self.block_idx + 1;
625 if next >= self.posting_list.blocks.len() {
626 self.exhausted = true;
627 return TERMINATED;
628 }
629 self.load_block(next);
630 self.doc()
631 }
632
633 pub fn is_exhausted(&self) -> bool {
634 self.exhausted
635 }
636
637 pub fn current_block_max_weight(&self) -> f32 {
638 self.posting_list
639 .blocks
640 .get(self.block_idx)
641 .map(|b| b.header.max_weight)
642 .unwrap_or(0.0)
643 }
644
645 pub fn current_block_max_contribution(&self, query_weight: f32) -> f32 {
646 query_weight * self.current_block_max_weight()
647 }
648}
649
650fn find_optimal_bit_width(values: &[u32]) -> u8 {
655 if values.is_empty() {
656 return 0;
657 }
658 let max_val = values.iter().copied().max().unwrap_or(0);
659 simd::bits_needed(max_val)
660}
661
662fn bits_needed_u16(val: u16) -> u8 {
663 if val == 0 {
664 0
665 } else {
666 16 - val.leading_zeros() as u8
667 }
668}
669
670fn pack_bit_array(values: &[u32], bits: u8) -> Vec<u8> {
671 if bits == 0 || values.is_empty() {
672 return Vec::new();
673 }
674 let total_bytes = (values.len() * bits as usize).div_ceil(8);
675 let mut result = vec![0u8; total_bytes];
676 let mut bit_pos = 0usize;
677 for &val in values {
678 pack_value(&mut result, bit_pos, val & ((1u32 << bits) - 1), bits);
679 bit_pos += bits as usize;
680 }
681 result
682}
683
684fn pack_bit_array_u16(values: &[u16], bits: u8) -> Vec<u8> {
685 if bits == 0 || values.is_empty() {
686 return Vec::new();
687 }
688 let total_bytes = (values.len() * bits as usize).div_ceil(8);
689 let mut result = vec![0u8; total_bytes];
690 let mut bit_pos = 0usize;
691 for &val in values {
692 pack_value(
693 &mut result,
694 bit_pos,
695 (val as u32) & ((1u32 << bits) - 1),
696 bits,
697 );
698 bit_pos += bits as usize;
699 }
700 result
701}
702
703#[inline]
704fn pack_value(data: &mut [u8], bit_pos: usize, val: u32, bits: u8) {
705 let mut remaining = bits as usize;
706 let mut val = val;
707 let mut byte = bit_pos / 8;
708 let mut offset = bit_pos % 8;
709 while remaining > 0 {
710 let space = 8 - offset;
711 let to_write = remaining.min(space);
712 let mask = (1u32 << to_write) - 1;
713 data[byte] |= ((val & mask) as u8) << offset;
714 val >>= to_write;
715 remaining -= to_write;
716 byte += 1;
717 offset = 0;
718 }
719}
720
721fn unpack_bit_array(data: &[u8], bits: u8, count: usize) -> Vec<u32> {
722 if bits == 0 || count == 0 {
723 return vec![0; count];
724 }
725 let mut result = Vec::with_capacity(count);
726 let mut bit_pos = 0usize;
727 for _ in 0..count {
728 result.push(unpack_value(data, bit_pos, bits));
729 bit_pos += bits as usize;
730 }
731 result
732}
733
734fn unpack_bit_array_u16_into(data: &[u8], bits: u8, count: usize, out: &mut Vec<u16>) {
735 if bits == 0 || count == 0 {
736 out.resize(count, 0u16);
737 return;
738 }
739 let mut bit_pos = 0usize;
740 for _ in 0..count {
741 out.push(unpack_value(data, bit_pos, bits) as u16);
742 bit_pos += bits as usize;
743 }
744}
745
746#[inline]
747fn unpack_value(data: &[u8], bit_pos: usize, bits: u8) -> u32 {
748 let mut val = 0u32;
749 let mut remaining = bits as usize;
750 let mut byte = bit_pos / 8;
751 let mut offset = bit_pos % 8;
752 let mut shift = 0;
753 while remaining > 0 {
754 let space = 8 - offset;
755 let to_read = remaining.min(space);
756 let mask = (1u8 << to_read) - 1;
757 val |= (((data.get(byte).copied().unwrap_or(0) >> offset) & mask) as u32) << shift;
758 remaining -= to_read;
759 shift += to_read;
760 byte += 1;
761 offset = 0;
762 }
763 val
764}
765
766fn encode_weights(weights: &[f32], quant: WeightQuantization) -> io::Result<Vec<u8>> {
771 let mut data = Vec::new();
772 match quant {
773 WeightQuantization::Float32 => {
774 for &w in weights {
775 data.write_f32::<LittleEndian>(w)?;
776 }
777 }
778 WeightQuantization::Float16 => {
779 use half::f16;
780 for &w in weights {
781 data.write_u16::<LittleEndian>(f16::from_f32(w).to_bits())?;
782 }
783 }
784 WeightQuantization::UInt8 => {
785 let min = weights.iter().copied().fold(f32::INFINITY, f32::min);
786 let max = weights.iter().copied().fold(f32::NEG_INFINITY, f32::max);
787 let range = max - min;
788 let scale = if range < f32::EPSILON {
789 1.0
790 } else {
791 range / 255.0
792 };
793 data.write_f32::<LittleEndian>(scale)?;
794 data.write_f32::<LittleEndian>(min)?;
795 for &w in weights {
796 data.write_u8(((w - min) / scale).round() as u8)?;
797 }
798 }
799 WeightQuantization::UInt4 => {
800 let min = weights.iter().copied().fold(f32::INFINITY, f32::min);
801 let max = weights.iter().copied().fold(f32::NEG_INFINITY, f32::max);
802 let range = max - min;
803 let scale = if range < f32::EPSILON {
804 1.0
805 } else {
806 range / 15.0
807 };
808 data.write_f32::<LittleEndian>(scale)?;
809 data.write_f32::<LittleEndian>(min)?;
810 let mut i = 0;
811 while i < weights.len() {
812 let q1 = ((weights[i] - min) / scale).round() as u8 & 0x0F;
813 let q2 = if i + 1 < weights.len() {
814 ((weights[i + 1] - min) / scale).round() as u8 & 0x0F
815 } else {
816 0
817 };
818 data.write_u8((q2 << 4) | q1)?;
819 i += 2;
820 }
821 }
822 }
823 Ok(data)
824}
825
826fn decode_weights_into(data: &[u8], quant: WeightQuantization, count: usize, out: &mut Vec<f32>) {
827 let mut cursor = Cursor::new(data);
828 match quant {
829 WeightQuantization::Float32 => {
830 for _ in 0..count {
831 out.push(cursor.read_f32::<LittleEndian>().unwrap_or(0.0));
832 }
833 }
834 WeightQuantization::Float16 => {
835 use half::f16;
836 for _ in 0..count {
837 let bits = cursor.read_u16::<LittleEndian>().unwrap_or(0);
838 out.push(f16::from_bits(bits).to_f32());
839 }
840 }
841 WeightQuantization::UInt8 => {
842 let scale = cursor.read_f32::<LittleEndian>().unwrap_or(1.0);
843 let min = cursor.read_f32::<LittleEndian>().unwrap_or(0.0);
844 for _ in 0..count {
845 let q = cursor.read_u8().unwrap_or(0);
846 out.push(q as f32 * scale + min);
847 }
848 }
849 WeightQuantization::UInt4 => {
850 let scale = cursor.read_f32::<LittleEndian>().unwrap_or(1.0);
851 let min = cursor.read_f32::<LittleEndian>().unwrap_or(0.0);
852 let mut i = 0;
853 while i < count {
854 let byte = cursor.read_u8().unwrap_or(0);
855 out.push((byte & 0x0F) as f32 * scale + min);
856 i += 1;
857 if i < count {
858 out.push((byte >> 4) as f32 * scale + min);
859 i += 1;
860 }
861 }
862 }
863 }
864}
865
866#[cfg(test)]
867mod tests {
868 use super::*;
869
870 #[test]
871 fn test_block_roundtrip() {
872 let postings = vec![
873 (10u32, 0u16, 1.5f32),
874 (15, 0, 2.0),
875 (20, 1, 0.5),
876 (100, 0, 3.0),
877 ];
878 let block = SparseBlock::from_postings(&postings, WeightQuantization::Float32).unwrap();
879
880 assert_eq!(block.decode_doc_ids(), vec![10, 15, 20, 100]);
881 assert_eq!(block.decode_ordinals(), vec![0, 0, 1, 0]);
882 let weights = block.decode_weights();
883 assert!((weights[0] - 1.5).abs() < 0.01);
884 }
885
886 #[test]
887 fn test_posting_list() {
888 let postings: Vec<(DocId, u16, f32)> =
889 (0..300).map(|i| (i * 2, 0, i as f32 * 0.1)).collect();
890 let list =
891 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
892
893 assert_eq!(list.doc_count(), 300);
894 assert_eq!(list.num_blocks(), 3);
895
896 let mut iter = list.iterator();
897 assert_eq!(iter.doc(), 0);
898 iter.advance();
899 assert_eq!(iter.doc(), 2);
900 }
901
902 #[test]
903 fn test_serialization() {
904 let postings = vec![(1u32, 0u16, 0.5f32), (10, 1, 1.5), (100, 0, 2.5)];
905 let list =
906 BlockSparsePostingList::from_postings(&postings, WeightQuantization::UInt8).unwrap();
907
908 let mut buf = Vec::new();
909 list.serialize(&mut buf).unwrap();
910 let list2 = BlockSparsePostingList::deserialize(&mut Cursor::new(&buf)).unwrap();
911
912 assert_eq!(list.doc_count(), list2.doc_count());
913 }
914
915 #[test]
916 fn test_seek() {
917 let postings: Vec<(DocId, u16, f32)> = (0..500).map(|i| (i * 3, 0, i as f32)).collect();
918 let list =
919 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
920
921 let mut iter = list.iterator();
922 assert_eq!(iter.seek(300), 300);
923 assert_eq!(iter.seek(301), 303);
924 assert_eq!(iter.seek(2000), TERMINATED);
925 }
926
927 #[test]
928 fn test_merge_with_offsets() {
929 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.0), (5, 0, 2.0), (10, 1, 3.0)];
931 let list1 =
932 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
933
934 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.0), (3, 1, 5.0), (7, 0, 6.0)];
936 let list2 =
937 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
938
939 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
941
942 assert_eq!(merged.doc_count(), 6);
943
944 let decoded = merged.decode_all();
946 assert_eq!(decoded.len(), 6);
947
948 assert_eq!(decoded[0].0, 0);
950 assert_eq!(decoded[1].0, 5);
951 assert_eq!(decoded[2].0, 10);
952
953 assert_eq!(decoded[3].0, 100); assert_eq!(decoded[4].0, 103); assert_eq!(decoded[5].0, 107); assert!((decoded[0].2 - 1.0).abs() < 0.01);
960 assert!((decoded[3].2 - 4.0).abs() < 0.01);
961
962 assert_eq!(decoded[2].1, 1); assert_eq!(decoded[4].1, 1); }
966
967 #[test]
968 fn test_merge_with_offsets_multi_block() {
969 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, i as f32)).collect();
971 let list1 =
972 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
973 assert!(list1.num_blocks() > 1, "Should have multiple blocks");
974
975 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 1, i as f32)).collect();
976 let list2 =
977 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
978
979 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 1000)]);
981
982 assert_eq!(merged.doc_count(), 350);
983 assert_eq!(merged.num_blocks(), list1.num_blocks() + list2.num_blocks());
984
985 let mut iter = merged.iterator();
987
988 assert_eq!(iter.doc(), 0);
990
991 let doc = iter.seek(1000);
993 assert_eq!(doc, 1000); iter.advance();
997 assert_eq!(iter.doc(), 1003); }
999
1000 #[test]
1001 fn test_merge_with_offsets_serialize_roundtrip() {
1002 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.0), (5, 0, 2.0), (10, 1, 3.0)];
1004 let list1 =
1005 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1006
1007 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.0), (3, 1, 5.0), (7, 0, 6.0)];
1008 let list2 =
1009 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1010
1011 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1013
1014 let mut bytes = Vec::new();
1016 merged.serialize(&mut bytes).unwrap();
1017
1018 let mut cursor = std::io::Cursor::new(&bytes);
1020 let loaded = BlockSparsePostingList::deserialize(&mut cursor).unwrap();
1021
1022 let decoded = loaded.decode_all();
1024 assert_eq!(decoded.len(), 6);
1025
1026 assert_eq!(decoded[0].0, 0);
1028 assert_eq!(decoded[1].0, 5);
1029 assert_eq!(decoded[2].0, 10);
1030
1031 assert_eq!(decoded[3].0, 100, "First doc of seg2 should be 0+100=100");
1033 assert_eq!(decoded[4].0, 103, "Second doc of seg2 should be 3+100=103");
1034 assert_eq!(decoded[5].0, 107, "Third doc of seg2 should be 7+100=107");
1035
1036 let mut iter = loaded.iterator();
1038 assert_eq!(iter.doc(), 0);
1039 iter.advance();
1040 assert_eq!(iter.doc(), 5);
1041 iter.advance();
1042 assert_eq!(iter.doc(), 10);
1043 iter.advance();
1044 assert_eq!(iter.doc(), 100);
1045 iter.advance();
1046 assert_eq!(iter.doc(), 103);
1047 iter.advance();
1048 assert_eq!(iter.doc(), 107);
1049 }
1050
1051 #[test]
1052 fn test_merge_seek_after_roundtrip() {
1053 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, 1.0)).collect();
1055 let list1 =
1056 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1057
1058 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 0, 2.0)).collect();
1059 let list2 =
1060 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1061
1062 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 1000)]);
1064
1065 let mut bytes = Vec::new();
1067 merged.serialize(&mut bytes).unwrap();
1068 let loaded =
1069 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1070
1071 let mut iter = loaded.iterator();
1073
1074 let doc = iter.seek(100);
1076 assert_eq!(doc, 100, "Seek to 100 in segment 1");
1077
1078 let doc = iter.seek(1000);
1080 assert_eq!(doc, 1000, "Seek to 1000 (first doc of segment 2)");
1081
1082 let doc = iter.seek(1050);
1084 assert!(
1085 doc >= 1050,
1086 "Seek to 1050 should find doc >= 1050, got {}",
1087 doc
1088 );
1089
1090 let doc = iter.seek(500);
1092 assert!(
1093 doc >= 1050,
1094 "Seek backwards should not go back, got {}",
1095 doc
1096 );
1097
1098 let mut iter2 = loaded.iterator();
1100
1101 let mut count = 0;
1103 let mut prev_doc = 0;
1104 while iter2.doc() != super::TERMINATED {
1105 let current = iter2.doc();
1106 if count > 0 {
1107 assert!(
1108 current > prev_doc,
1109 "Docs should be monotonically increasing: {} vs {}",
1110 prev_doc,
1111 current
1112 );
1113 }
1114 prev_doc = current;
1115 iter2.advance();
1116 count += 1;
1117 }
1118 assert_eq!(count, 350, "Should have 350 total docs");
1119 }
1120
1121 #[test]
1122 fn test_doc_count_multi_value() {
1123 let postings: Vec<(DocId, u16, f32)> = vec![
1126 (0, 0, 1.0),
1127 (0, 1, 1.5),
1128 (0, 2, 2.0),
1129 (5, 0, 3.0),
1130 (5, 1, 3.5),
1131 (10, 0, 4.0),
1132 ];
1133 let list =
1134 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1135
1136 assert_eq!(list.doc_count(), 3);
1138
1139 let decoded = list.decode_all();
1141 assert_eq!(decoded.len(), 6);
1142 }
1143
1144 #[test]
1148 fn test_zero_copy_merge_patches_first_doc_id() {
1149 use crate::structures::SparseSkipEntry;
1150
1151 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, i as f32)).collect();
1153 let list1 =
1154 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1155 assert!(list1.num_blocks() > 1);
1156
1157 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 1, i as f32)).collect();
1158 let list2 =
1159 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1160
1161 let mut bytes1 = Vec::new();
1163 list1.serialize(&mut bytes1).unwrap();
1164 let mut bytes2 = Vec::new();
1165 list2.serialize(&mut bytes2).unwrap();
1166
1167 fn parse_raw(data: &[u8]) -> (u32, f32, Vec<SparseSkipEntry>, &[u8]) {
1169 let doc_count = u32::from_le_bytes(data[0..4].try_into().unwrap());
1170 let global_max = f32::from_le_bytes(data[4..8].try_into().unwrap());
1171 let num_blocks = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize;
1172 let mut pos = 12;
1173 let mut skip = Vec::new();
1174 for _ in 0..num_blocks {
1175 let first_doc = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
1176 let last_doc = u32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap());
1177 let offset = u32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap());
1178 let length = u32::from_le_bytes(data[pos + 12..pos + 16].try_into().unwrap());
1179 let max_w = f32::from_le_bytes(data[pos + 16..pos + 20].try_into().unwrap());
1180 skip.push(SparseSkipEntry::new(
1181 first_doc, last_doc, offset, length, max_w,
1182 ));
1183 pos += 20;
1184 }
1185 (doc_count, global_max, skip, &data[pos..])
1186 }
1187
1188 let (dc1, gm1, skip1, raw1) = parse_raw(&bytes1);
1189 let (dc2, gm2, skip2, raw2) = parse_raw(&bytes2);
1190
1191 let doc_offset: u32 = 1000; let total_docs = dc1 + dc2;
1194 let global_max = gm1.max(gm2);
1195 let total_blocks = (skip1.len() + skip2.len()) as u32;
1196
1197 let mut output = Vec::new();
1198 output.extend_from_slice(&total_docs.to_le_bytes());
1200 output.extend_from_slice(&global_max.to_le_bytes());
1201 output.extend_from_slice(&total_blocks.to_le_bytes());
1202
1203 let mut block_data_offset = 0u32;
1205 for entry in &skip1 {
1206 let adjusted = SparseSkipEntry::new(
1207 entry.first_doc,
1208 entry.last_doc,
1209 block_data_offset + entry.offset,
1210 entry.length,
1211 entry.max_weight,
1212 );
1213 adjusted.write(&mut output).unwrap();
1214 }
1215 if let Some(last) = skip1.last() {
1216 block_data_offset += last.offset + last.length;
1217 }
1218 for entry in &skip2 {
1219 let adjusted = SparseSkipEntry::new(
1220 entry.first_doc + doc_offset,
1221 entry.last_doc + doc_offset,
1222 block_data_offset + entry.offset,
1223 entry.length,
1224 entry.max_weight,
1225 );
1226 adjusted.write(&mut output).unwrap();
1227 }
1228
1229 output.extend_from_slice(raw1);
1231
1232 const FIRST_DOC_ID_OFFSET: usize = 8;
1233 let mut buf2 = raw2.to_vec();
1234 for entry in &skip2 {
1235 let off = entry.offset as usize + FIRST_DOC_ID_OFFSET;
1236 if off + 4 <= buf2.len() {
1237 let old = u32::from_le_bytes(buf2[off..off + 4].try_into().unwrap());
1238 let patched = (old + doc_offset).to_le_bytes();
1239 buf2[off..off + 4].copy_from_slice(&patched);
1240 }
1241 }
1242 output.extend_from_slice(&buf2);
1243
1244 let loaded = BlockSparsePostingList::deserialize(&mut Cursor::new(&output)).unwrap();
1246 assert_eq!(loaded.doc_count(), 350);
1247
1248 let mut iter = loaded.iterator();
1249
1250 assert_eq!(iter.doc(), 0);
1252 let doc = iter.seek(100);
1253 assert_eq!(doc, 100);
1254 let doc = iter.seek(398);
1255 assert_eq!(doc, 398);
1256
1257 let doc = iter.seek(1000);
1259 assert_eq!(doc, 1000, "First doc of segment 2 should be 1000");
1260 iter.advance();
1261 assert_eq!(iter.doc(), 1003, "Second doc of segment 2 should be 1003");
1262 let doc = iter.seek(1447);
1263 assert_eq!(doc, 1447, "Last doc of segment 2 should be 1447");
1264
1265 iter.advance();
1267 assert_eq!(iter.doc(), super::TERMINATED);
1268
1269 let reference =
1271 BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, doc_offset)]);
1272 let mut ref_iter = reference.iterator();
1273 let mut zc_iter = loaded.iterator();
1274 while ref_iter.doc() != super::TERMINATED {
1275 assert_eq!(
1276 ref_iter.doc(),
1277 zc_iter.doc(),
1278 "Zero-copy and reference merge should produce identical doc_ids"
1279 );
1280 assert!(
1281 (ref_iter.weight() - zc_iter.weight()).abs() < 0.01,
1282 "Weights should match: {} vs {}",
1283 ref_iter.weight(),
1284 zc_iter.weight()
1285 );
1286 ref_iter.advance();
1287 zc_iter.advance();
1288 }
1289 assert_eq!(zc_iter.doc(), super::TERMINATED);
1290 }
1291
1292 #[test]
1293 fn test_doc_count_single_value() {
1294 let postings: Vec<(DocId, u16, f32)> =
1296 vec![(0, 0, 1.0), (5, 0, 2.0), (10, 0, 3.0), (15, 0, 4.0)];
1297 let list =
1298 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1299
1300 assert_eq!(list.doc_count(), 4);
1302 }
1303
1304 #[test]
1305 fn test_doc_count_multi_value_serialization_roundtrip() {
1306 let postings: Vec<(DocId, u16, f32)> =
1308 vec![(0, 0, 1.0), (0, 1, 1.5), (5, 0, 2.0), (5, 1, 2.5)];
1309 let list =
1310 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1311 assert_eq!(list.doc_count(), 2);
1312
1313 let mut buf = Vec::new();
1314 list.serialize(&mut buf).unwrap();
1315 let loaded = BlockSparsePostingList::deserialize(&mut Cursor::new(&buf)).unwrap();
1316 assert_eq!(loaded.doc_count(), 2);
1317 }
1318
1319 #[test]
1320 fn test_merge_preserves_weights_and_ordinals() {
1321 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.5), (5, 1, 2.5), (10, 2, 3.5)];
1323 let list1 =
1324 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1325
1326 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.5), (3, 1, 5.5), (7, 3, 6.5)];
1327 let list2 =
1328 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1329
1330 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1332
1333 let mut bytes = Vec::new();
1335 merged.serialize(&mut bytes).unwrap();
1336 let loaded =
1337 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1338
1339 let mut iter = loaded.iterator();
1341
1342 assert_eq!(iter.doc(), 0);
1344 assert!(
1345 (iter.weight() - 1.5).abs() < 0.01,
1346 "Weight should be 1.5, got {}",
1347 iter.weight()
1348 );
1349 assert_eq!(iter.ordinal(), 0);
1350
1351 iter.advance();
1352 assert_eq!(iter.doc(), 5);
1353 assert!(
1354 (iter.weight() - 2.5).abs() < 0.01,
1355 "Weight should be 2.5, got {}",
1356 iter.weight()
1357 );
1358 assert_eq!(iter.ordinal(), 1);
1359
1360 iter.advance();
1361 assert_eq!(iter.doc(), 10);
1362 assert!(
1363 (iter.weight() - 3.5).abs() < 0.01,
1364 "Weight should be 3.5, got {}",
1365 iter.weight()
1366 );
1367 assert_eq!(iter.ordinal(), 2);
1368
1369 iter.advance();
1371 assert_eq!(iter.doc(), 100);
1372 assert!(
1373 (iter.weight() - 4.5).abs() < 0.01,
1374 "Weight should be 4.5, got {}",
1375 iter.weight()
1376 );
1377 assert_eq!(iter.ordinal(), 0);
1378
1379 iter.advance();
1380 assert_eq!(iter.doc(), 103);
1381 assert!(
1382 (iter.weight() - 5.5).abs() < 0.01,
1383 "Weight should be 5.5, got {}",
1384 iter.weight()
1385 );
1386 assert_eq!(iter.ordinal(), 1);
1387
1388 iter.advance();
1389 assert_eq!(iter.doc(), 107);
1390 assert!(
1391 (iter.weight() - 6.5).abs() < 0.01,
1392 "Weight should be 6.5, got {}",
1393 iter.weight()
1394 );
1395 assert_eq!(iter.ordinal(), 3);
1396
1397 iter.advance();
1399 assert_eq!(iter.doc(), super::TERMINATED);
1400 }
1401
1402 #[test]
1403 fn test_merge_global_max_weight() {
1404 let postings1: Vec<(DocId, u16, f32)> = vec![
1406 (0, 0, 3.0),
1407 (1, 0, 7.0), (2, 0, 2.0),
1409 ];
1410 let list1 =
1411 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1412
1413 let postings2: Vec<(DocId, u16, f32)> = vec![
1414 (0, 0, 5.0),
1415 (1, 0, 4.0),
1416 (2, 0, 6.0), ];
1418 let list2 =
1419 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1420
1421 assert!((list1.global_max_weight() - 7.0).abs() < 0.01);
1423 assert!((list2.global_max_weight() - 6.0).abs() < 0.01);
1424
1425 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1427
1428 assert!(
1430 (merged.global_max_weight() - 7.0).abs() < 0.01,
1431 "Global max should be 7.0, got {}",
1432 merged.global_max_weight()
1433 );
1434
1435 let mut bytes = Vec::new();
1437 merged.serialize(&mut bytes).unwrap();
1438 let loaded =
1439 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1440
1441 assert!(
1442 (loaded.global_max_weight() - 7.0).abs() < 0.01,
1443 "After roundtrip, global max should still be 7.0, got {}",
1444 loaded.global_max_weight()
1445 );
1446 }
1447
1448 #[test]
1449 fn test_scoring_simulation_after_merge() {
1450 let postings1: Vec<(DocId, u16, f32)> = vec![
1452 (0, 0, 0.5), (5, 0, 0.8), ];
1455 let list1 =
1456 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1457
1458 let postings2: Vec<(DocId, u16, f32)> = vec![
1459 (0, 0, 0.6), (3, 0, 0.9), ];
1462 let list2 =
1463 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1464
1465 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1467
1468 let mut bytes = Vec::new();
1470 merged.serialize(&mut bytes).unwrap();
1471 let loaded =
1472 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1473
1474 let query_weight = 2.0f32;
1476 let mut iter = loaded.iterator();
1477
1478 assert_eq!(iter.doc(), 0);
1481 let score = query_weight * iter.weight();
1482 assert!(
1483 (score - 1.0).abs() < 0.01,
1484 "Doc 0 score should be 1.0, got {}",
1485 score
1486 );
1487
1488 iter.advance();
1489 assert_eq!(iter.doc(), 5);
1491 let score = query_weight * iter.weight();
1492 assert!(
1493 (score - 1.6).abs() < 0.01,
1494 "Doc 5 score should be 1.6, got {}",
1495 score
1496 );
1497
1498 iter.advance();
1499 assert_eq!(iter.doc(), 100);
1501 let score = query_weight * iter.weight();
1502 assert!(
1503 (score - 1.2).abs() < 0.01,
1504 "Doc 100 score should be 1.2, got {}",
1505 score
1506 );
1507
1508 iter.advance();
1509 assert_eq!(iter.doc(), 103);
1511 let score = query_weight * iter.weight();
1512 assert!(
1513 (score - 1.8).abs() < 0.01,
1514 "Doc 103 score should be 1.8, got {}",
1515 score
1516 );
1517 }
1518}