1use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
9use std::io::{self, Cursor, Read, Write};
10
11use super::config::WeightQuantization;
12use crate::DocId;
13use crate::directories::OwnedBytes;
14use crate::structures::postings::TERMINATED;
15use crate::structures::simd;
16
17pub const BLOCK_SIZE: usize = 128;
18
19#[derive(Debug, Clone, Copy)]
20pub struct BlockHeader {
21 pub count: u16,
22 pub doc_id_bits: u8,
23 pub ordinal_bits: u8,
24 pub weight_quant: WeightQuantization,
25 pub first_doc_id: DocId,
26 pub max_weight: f32,
27}
28
29impl BlockHeader {
30 pub const SIZE: usize = 16;
31
32 pub fn write<W: Write>(&self, w: &mut W) -> io::Result<()> {
33 w.write_u16::<LittleEndian>(self.count)?;
34 w.write_u8(self.doc_id_bits)?;
35 w.write_u8(self.ordinal_bits)?;
36 w.write_u8(self.weight_quant as u8)?;
37 w.write_u8(0)?;
38 w.write_u16::<LittleEndian>(0)?;
39 w.write_u32::<LittleEndian>(self.first_doc_id)?;
40 w.write_f32::<LittleEndian>(self.max_weight)?;
41 Ok(())
42 }
43
44 pub fn read<R: Read>(r: &mut R) -> io::Result<Self> {
45 let count = r.read_u16::<LittleEndian>()?;
46 let doc_id_bits = r.read_u8()?;
47 let ordinal_bits = r.read_u8()?;
48 let weight_quant_byte = r.read_u8()?;
49 let _ = r.read_u8()?;
50 let _ = r.read_u16::<LittleEndian>()?;
51 let first_doc_id = r.read_u32::<LittleEndian>()?;
52 let max_weight = r.read_f32::<LittleEndian>()?;
53
54 let weight_quant = WeightQuantization::from_u8(weight_quant_byte)
55 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Invalid weight quant"))?;
56
57 Ok(Self {
58 count,
59 doc_id_bits,
60 ordinal_bits,
61 weight_quant,
62 first_doc_id,
63 max_weight,
64 })
65 }
66}
67
68#[derive(Debug, Clone)]
69pub struct SparseBlock {
70 pub header: BlockHeader,
71 pub doc_ids_data: OwnedBytes,
73 pub ordinals_data: OwnedBytes,
75 pub weights_data: OwnedBytes,
77}
78
79impl SparseBlock {
80 pub fn from_postings(
81 postings: &[(DocId, u16, f32)],
82 weight_quant: WeightQuantization,
83 ) -> io::Result<Self> {
84 assert!(!postings.is_empty() && postings.len() <= BLOCK_SIZE);
85
86 let count = postings.len();
87 let first_doc_id = postings[0].0;
88
89 let mut deltas = Vec::with_capacity(count);
91 let mut prev = first_doc_id;
92 for &(doc_id, _, _) in postings {
93 deltas.push(doc_id.saturating_sub(prev));
94 prev = doc_id;
95 }
96 deltas[0] = 0;
97
98 let doc_id_bits = simd::round_bit_width(find_optimal_bit_width(&deltas[1..]));
99 let ordinals: Vec<u16> = postings.iter().map(|(_, o, _)| *o).collect();
100 let max_ordinal = ordinals.iter().copied().max().unwrap_or(0);
101 let ordinal_bits = if max_ordinal == 0 {
102 0
103 } else {
104 simd::round_bit_width(bits_needed_u16(max_ordinal))
105 };
106
107 let weights: Vec<f32> = postings.iter().map(|(_, _, w)| *w).collect();
108 let max_weight = weights.iter().copied().fold(0.0f32, f32::max);
109
110 let doc_ids_data = OwnedBytes::new({
111 let rounded = simd::RoundedBitWidth::from_u8(doc_id_bits);
112 let num_deltas = count - 1;
113 let byte_count = num_deltas * rounded.bytes_per_value();
114 let mut data = vec![0u8; byte_count];
115 simd::pack_rounded(&deltas[1..], rounded, &mut data);
116 data
117 });
118 let ordinals_data = OwnedBytes::new(if ordinal_bits > 0 {
119 let rounded = simd::RoundedBitWidth::from_u8(ordinal_bits);
120 let byte_count = count * rounded.bytes_per_value();
121 let mut data = vec![0u8; byte_count];
122 let ord_u32: Vec<u32> = ordinals.iter().map(|&o| o as u32).collect();
123 simd::pack_rounded(&ord_u32, rounded, &mut data);
124 data
125 } else {
126 Vec::new()
127 });
128 let weights_data = OwnedBytes::new(encode_weights(&weights, weight_quant)?);
129
130 Ok(Self {
131 header: BlockHeader {
132 count: count as u16,
133 doc_id_bits,
134 ordinal_bits,
135 weight_quant,
136 first_doc_id,
137 max_weight,
138 },
139 doc_ids_data,
140 ordinals_data,
141 weights_data,
142 })
143 }
144
145 pub fn decode_doc_ids(&self) -> Vec<DocId> {
146 let mut out = Vec::with_capacity(self.header.count as usize);
147 self.decode_doc_ids_into(&mut out);
148 out
149 }
150
151 pub fn decode_doc_ids_into(&self, out: &mut Vec<DocId>) {
155 let count = self.header.count as usize;
156 out.clear();
157 out.resize(count, 0);
158 out[0] = self.header.first_doc_id;
159
160 if count > 1 {
161 let bits = self.header.doc_id_bits;
162 if bits == 0 {
163 out[1..].fill(self.header.first_doc_id);
165 } else {
166 simd::unpack_rounded(
168 &self.doc_ids_data,
169 simd::RoundedBitWidth::from_u8(bits),
170 &mut out[1..],
171 count - 1,
172 );
173 for i in 1..count {
175 out[i] += out[i - 1];
176 }
177 }
178 }
179 }
180
181 pub fn decode_ordinals(&self) -> Vec<u16> {
182 let mut out = Vec::with_capacity(self.header.count as usize);
183 self.decode_ordinals_into(&mut out);
184 out
185 }
186
187 pub fn decode_ordinals_into(&self, out: &mut Vec<u16>) {
191 let count = self.header.count as usize;
192 out.clear();
193 if self.header.ordinal_bits == 0 {
194 out.resize(count, 0u16);
195 } else {
196 let mut temp = [0u32; BLOCK_SIZE];
198 simd::unpack_rounded(
199 &self.ordinals_data,
200 simd::RoundedBitWidth::from_u8(self.header.ordinal_bits),
201 &mut temp[..count],
202 count,
203 );
204 out.reserve(count);
205 for &v in &temp[..count] {
206 out.push(v as u16);
207 }
208 }
209 }
210
211 pub fn decode_weights(&self) -> Vec<f32> {
212 let mut out = Vec::with_capacity(self.header.count as usize);
213 self.decode_weights_into(&mut out);
214 out
215 }
216
217 pub fn decode_weights_into(&self, out: &mut Vec<f32>) {
219 out.clear();
220 decode_weights_into(
221 &self.weights_data,
222 self.header.weight_quant,
223 self.header.count as usize,
224 out,
225 );
226 }
227
228 pub fn decode_scored_weights_into(&self, query_weight: f32, out: &mut Vec<f32>) {
236 out.clear();
237 let count = self.header.count as usize;
238 match self.header.weight_quant {
239 WeightQuantization::UInt8 if self.weights_data.len() >= 8 => {
240 let scale = f32::from_le_bytes([
242 self.weights_data[0],
243 self.weights_data[1],
244 self.weights_data[2],
245 self.weights_data[3],
246 ]);
247 let min_val = f32::from_le_bytes([
248 self.weights_data[4],
249 self.weights_data[5],
250 self.weights_data[6],
251 self.weights_data[7],
252 ]);
253 let eff_scale = query_weight * scale;
255 let eff_bias = query_weight * min_val;
256 out.resize(count, 0.0);
257 simd::dequantize_uint8(&self.weights_data[8..], out, eff_scale, eff_bias, count);
258 }
259 _ => {
260 decode_weights_into(&self.weights_data, self.header.weight_quant, count, out);
262 for w in out.iter_mut() {
263 *w *= query_weight;
264 }
265 }
266 }
267 }
268
269 pub fn write<W: Write>(&self, w: &mut W) -> io::Result<()> {
270 self.header.write(w)?;
271 w.write_u16::<LittleEndian>(self.doc_ids_data.len() as u16)?;
272 w.write_u16::<LittleEndian>(self.ordinals_data.len() as u16)?;
273 w.write_u16::<LittleEndian>(self.weights_data.len() as u16)?;
274 w.write_u16::<LittleEndian>(0)?;
275 w.write_all(&self.doc_ids_data)?;
276 w.write_all(&self.ordinals_data)?;
277 w.write_all(&self.weights_data)?;
278 Ok(())
279 }
280
281 pub fn read<R: Read>(r: &mut R) -> io::Result<Self> {
282 let header = BlockHeader::read(r)?;
283 let doc_ids_len = r.read_u16::<LittleEndian>()? as usize;
284 let ordinals_len = r.read_u16::<LittleEndian>()? as usize;
285 let weights_len = r.read_u16::<LittleEndian>()? as usize;
286 let _ = r.read_u16::<LittleEndian>()?;
287
288 let mut doc_ids_vec = vec![0u8; doc_ids_len];
289 r.read_exact(&mut doc_ids_vec)?;
290 let mut ordinals_vec = vec![0u8; ordinals_len];
291 r.read_exact(&mut ordinals_vec)?;
292 let mut weights_vec = vec![0u8; weights_len];
293 r.read_exact(&mut weights_vec)?;
294
295 Ok(Self {
296 header,
297 doc_ids_data: OwnedBytes::new(doc_ids_vec),
298 ordinals_data: OwnedBytes::new(ordinals_vec),
299 weights_data: OwnedBytes::new(weights_vec),
300 })
301 }
302
303 pub fn from_owned_bytes(data: crate::directories::OwnedBytes) -> crate::Result<Self> {
309 let b = data.as_slice();
310 if b.len() < BlockHeader::SIZE + 8 {
311 return Err(crate::Error::Corruption(
312 "sparse block too small".to_string(),
313 ));
314 }
315 let mut cursor = Cursor::new(&b[..BlockHeader::SIZE]);
316 let header =
317 BlockHeader::read(&mut cursor).map_err(|e| crate::Error::Corruption(e.to_string()))?;
318
319 let p = BlockHeader::SIZE;
320 let doc_ids_len = u16::from_le_bytes([b[p], b[p + 1]]) as usize;
321 let ordinals_len = u16::from_le_bytes([b[p + 2], b[p + 3]]) as usize;
322 let weights_len = u16::from_le_bytes([b[p + 4], b[p + 5]]) as usize;
323 let data_start = p + 8;
326 let ord_start = data_start + doc_ids_len;
327 let wt_start = ord_start + ordinals_len;
328
329 Ok(Self {
330 header,
331 doc_ids_data: data.slice(data_start..ord_start),
332 ordinals_data: data.slice(ord_start..wt_start),
333 weights_data: data.slice(wt_start..wt_start + weights_len),
334 })
335 }
336
337 pub fn with_doc_offset(&self, doc_offset: u32) -> Self {
343 Self {
344 header: BlockHeader {
345 first_doc_id: self.header.first_doc_id + doc_offset,
346 ..self.header
347 },
348 doc_ids_data: self.doc_ids_data.clone(),
349 ordinals_data: self.ordinals_data.clone(),
350 weights_data: self.weights_data.clone(),
351 }
352 }
353}
354
355#[derive(Debug, Clone)]
360pub struct BlockSparsePostingList {
361 pub doc_count: u32,
362 pub blocks: Vec<SparseBlock>,
363}
364
365impl BlockSparsePostingList {
366 pub fn from_postings_with_block_size(
368 postings: &[(DocId, u16, f32)],
369 weight_quant: WeightQuantization,
370 block_size: usize,
371 ) -> io::Result<Self> {
372 if postings.is_empty() {
373 return Ok(Self {
374 doc_count: 0,
375 blocks: Vec::new(),
376 });
377 }
378
379 let block_size = block_size.max(16); let mut blocks = Vec::new();
381 for chunk in postings.chunks(block_size) {
382 blocks.push(SparseBlock::from_postings(chunk, weight_quant)?);
383 }
384
385 let mut unique_docs = 1u32;
390 for i in 1..postings.len() {
391 if postings[i].0 != postings[i - 1].0 {
392 unique_docs += 1;
393 }
394 }
395
396 Ok(Self {
397 doc_count: unique_docs,
398 blocks,
399 })
400 }
401
402 pub fn from_postings(
404 postings: &[(DocId, u16, f32)],
405 weight_quant: WeightQuantization,
406 ) -> io::Result<Self> {
407 Self::from_postings_with_block_size(postings, weight_quant, BLOCK_SIZE)
408 }
409
410 pub fn doc_count(&self) -> u32 {
411 self.doc_count
412 }
413
414 pub fn num_blocks(&self) -> usize {
415 self.blocks.len()
416 }
417
418 pub fn global_max_weight(&self) -> f32 {
419 self.blocks
420 .iter()
421 .map(|b| b.header.max_weight)
422 .fold(0.0f32, f32::max)
423 }
424
425 pub fn block_max_weight(&self, block_idx: usize) -> Option<f32> {
426 self.blocks.get(block_idx).map(|b| b.header.max_weight)
427 }
428
429 pub fn size_bytes(&self) -> usize {
431 use std::mem::size_of;
432
433 let header_size = size_of::<u32>() * 2; let blocks_size: usize = self
435 .blocks
436 .iter()
437 .map(|b| {
438 size_of::<BlockHeader>()
439 + b.doc_ids_data.len()
440 + b.ordinals_data.len()
441 + b.weights_data.len()
442 })
443 .sum();
444 header_size + blocks_size
445 }
446
447 pub fn iterator(&self) -> BlockSparsePostingIterator<'_> {
448 BlockSparsePostingIterator::new(self)
449 }
450
451 pub fn serialize<W: Write>(&self, w: &mut W) -> io::Result<()> {
460 use super::SparseSkipEntry;
461
462 w.write_u32::<LittleEndian>(self.doc_count)?;
463 w.write_f32::<LittleEndian>(self.global_max_weight())?;
464 w.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
465
466 let mut block_bytes: Vec<Vec<u8>> = Vec::with_capacity(self.blocks.len());
468 for block in &self.blocks {
469 let mut buf = Vec::new();
470 block.write(&mut buf)?;
471 block_bytes.push(buf);
472 }
473
474 let mut offset = 0u32;
476 for (block, bytes) in self.blocks.iter().zip(block_bytes.iter()) {
477 let first_doc = block.header.first_doc_id;
478 let doc_ids = block.decode_doc_ids();
479 let last_doc = doc_ids.last().copied().unwrap_or(first_doc);
480 let length = bytes.len() as u32;
481
482 let entry =
483 SparseSkipEntry::new(first_doc, last_doc, offset, length, block.header.max_weight);
484 entry.write(w)?;
485 offset += length;
486 }
487
488 for bytes in block_bytes {
490 w.write_all(&bytes)?;
491 }
492
493 Ok(())
494 }
495
496 pub fn serialize_v3(&self) -> io::Result<(Vec<u8>, Vec<super::SparseSkipEntry>)> {
502 let mut block_data = Vec::new();
504 let mut skip_entries = Vec::with_capacity(self.blocks.len());
505 let mut offset = 0u32;
506
507 for block in &self.blocks {
508 let mut buf = Vec::new();
509 block.write(&mut buf)?;
510 let length = buf.len() as u32;
511
512 let first_doc = block.header.first_doc_id;
513 let doc_ids = block.decode_doc_ids();
514 let last_doc = doc_ids.last().copied().unwrap_or(first_doc);
515
516 skip_entries.push(super::SparseSkipEntry::new(
517 first_doc,
518 last_doc,
519 offset,
520 length,
521 block.header.max_weight,
522 ));
523
524 block_data.extend_from_slice(&buf);
525 offset += length;
526 }
527
528 Ok((block_data, skip_entries))
529 }
530
531 pub fn deserialize<R: Read>(r: &mut R) -> io::Result<Self> {
534 use super::SparseSkipEntry;
535
536 let doc_count = r.read_u32::<LittleEndian>()?;
537 let _global_max_weight = r.read_f32::<LittleEndian>()?;
538 let num_blocks = r.read_u32::<LittleEndian>()? as usize;
539
540 for _ in 0..num_blocks {
542 let _ = SparseSkipEntry::read(r)?;
543 }
544
545 let mut blocks = Vec::with_capacity(num_blocks);
547 for _ in 0..num_blocks {
548 blocks.push(SparseBlock::read(r)?);
549 }
550 Ok(Self { doc_count, blocks })
551 }
552
553 pub fn deserialize_header<R: Read>(
556 r: &mut R,
557 ) -> io::Result<(u32, f32, Vec<super::SparseSkipEntry>, usize)> {
558 use super::SparseSkipEntry;
559
560 let doc_count = r.read_u32::<LittleEndian>()?;
561 let global_max_weight = r.read_f32::<LittleEndian>()?;
562 let num_blocks = r.read_u32::<LittleEndian>()? as usize;
563
564 let mut entries = Vec::with_capacity(num_blocks);
565 for _ in 0..num_blocks {
566 entries.push(SparseSkipEntry::read(r)?);
567 }
568
569 let header_size = 4 + 4 + 4 + num_blocks * SparseSkipEntry::SIZE;
571
572 Ok((doc_count, global_max_weight, entries, header_size))
573 }
574
575 pub fn decode_all(&self) -> Vec<(DocId, u16, f32)> {
576 let total_postings: usize = self.blocks.iter().map(|b| b.header.count as usize).sum();
577 let mut result = Vec::with_capacity(total_postings);
578 for block in &self.blocks {
579 let doc_ids = block.decode_doc_ids();
580 let ordinals = block.decode_ordinals();
581 let weights = block.decode_weights();
582 for i in 0..block.header.count as usize {
583 result.push((doc_ids[i], ordinals[i], weights[i]));
584 }
585 }
586 result
587 }
588
589 pub fn merge_with_offsets(lists: &[(&BlockSparsePostingList, u32)]) -> Self {
600 if lists.is_empty() {
601 return Self {
602 doc_count: 0,
603 blocks: Vec::new(),
604 };
605 }
606
607 let total_blocks: usize = lists.iter().map(|(pl, _)| pl.blocks.len()).sum();
609 let total_docs: u32 = lists.iter().map(|(pl, _)| pl.doc_count).sum();
610
611 let mut merged_blocks = Vec::with_capacity(total_blocks);
612
613 for (posting_list, doc_offset) in lists {
615 for block in &posting_list.blocks {
616 merged_blocks.push(block.with_doc_offset(*doc_offset));
617 }
618 }
619
620 Self {
621 doc_count: total_docs,
622 blocks: merged_blocks,
623 }
624 }
625
626 fn find_block(&self, target: DocId) -> Option<usize> {
627 if self.blocks.is_empty() {
628 return None;
629 }
630 let idx = self
633 .blocks
634 .partition_point(|b| b.header.first_doc_id <= target);
635 if idx == 0 {
636 Some(0)
638 } else {
639 Some(idx - 1)
640 }
641 }
642}
643
644pub struct BlockSparsePostingIterator<'a> {
649 posting_list: &'a BlockSparsePostingList,
650 block_idx: usize,
651 in_block_idx: usize,
652 current_doc_ids: Vec<DocId>,
653 current_ordinals: Vec<u16>,
654 current_weights: Vec<f32>,
655 exhausted: bool,
656}
657
658impl<'a> BlockSparsePostingIterator<'a> {
659 fn new(posting_list: &'a BlockSparsePostingList) -> Self {
660 let mut iter = Self {
661 posting_list,
662 block_idx: 0,
663 in_block_idx: 0,
664 current_doc_ids: Vec::new(),
665 current_ordinals: Vec::new(),
666 current_weights: Vec::new(),
667 exhausted: posting_list.blocks.is_empty(),
668 };
669 if !iter.exhausted {
670 iter.load_block(0);
671 }
672 iter
673 }
674
675 fn load_block(&mut self, block_idx: usize) {
676 if let Some(block) = self.posting_list.blocks.get(block_idx) {
677 block.decode_doc_ids_into(&mut self.current_doc_ids);
678 block.decode_ordinals_into(&mut self.current_ordinals);
679 block.decode_weights_into(&mut self.current_weights);
680 self.block_idx = block_idx;
681 self.in_block_idx = 0;
682 }
683 }
684
685 pub fn doc(&self) -> DocId {
686 if self.exhausted {
687 TERMINATED
688 } else {
689 self.current_doc_ids
690 .get(self.in_block_idx)
691 .copied()
692 .unwrap_or(TERMINATED)
693 }
694 }
695
696 pub fn weight(&self) -> f32 {
697 self.current_weights
698 .get(self.in_block_idx)
699 .copied()
700 .unwrap_or(0.0)
701 }
702
703 pub fn ordinal(&self) -> u16 {
704 self.current_ordinals
705 .get(self.in_block_idx)
706 .copied()
707 .unwrap_or(0)
708 }
709
710 pub fn advance(&mut self) -> DocId {
711 if self.exhausted {
712 return TERMINATED;
713 }
714 self.in_block_idx += 1;
715 if self.in_block_idx >= self.current_doc_ids.len() {
716 self.block_idx += 1;
717 if self.block_idx >= self.posting_list.blocks.len() {
718 self.exhausted = true;
719 } else {
720 self.load_block(self.block_idx);
721 }
722 }
723 self.doc()
724 }
725
726 pub fn seek(&mut self, target: DocId) -> DocId {
727 if self.exhausted {
728 return TERMINATED;
729 }
730 if self.doc() >= target {
731 return self.doc();
732 }
733
734 if let Some(&last_doc) = self.current_doc_ids.last()
736 && last_doc >= target
737 {
738 let remaining = &self.current_doc_ids[self.in_block_idx..];
739 let pos = crate::structures::simd::find_first_ge_u32(remaining, target);
740 self.in_block_idx += pos;
741 if self.in_block_idx >= self.current_doc_ids.len() {
742 self.block_idx += 1;
743 if self.block_idx >= self.posting_list.blocks.len() {
744 self.exhausted = true;
745 } else {
746 self.load_block(self.block_idx);
747 }
748 }
749 return self.doc();
750 }
751
752 if let Some(block_idx) = self.posting_list.find_block(target) {
754 self.load_block(block_idx);
755 let pos = crate::structures::simd::find_first_ge_u32(&self.current_doc_ids, target);
756 self.in_block_idx = pos;
757 if self.in_block_idx >= self.current_doc_ids.len() {
758 self.block_idx += 1;
759 if self.block_idx >= self.posting_list.blocks.len() {
760 self.exhausted = true;
761 } else {
762 self.load_block(self.block_idx);
763 }
764 }
765 } else {
766 self.exhausted = true;
767 }
768 self.doc()
769 }
770
771 pub fn skip_to_next_block(&mut self) -> DocId {
774 if self.exhausted {
775 return TERMINATED;
776 }
777 let next = self.block_idx + 1;
778 if next >= self.posting_list.blocks.len() {
779 self.exhausted = true;
780 return TERMINATED;
781 }
782 self.load_block(next);
783 self.doc()
784 }
785
786 pub fn is_exhausted(&self) -> bool {
787 self.exhausted
788 }
789
790 pub fn current_block_max_weight(&self) -> f32 {
791 self.posting_list
792 .blocks
793 .get(self.block_idx)
794 .map(|b| b.header.max_weight)
795 .unwrap_or(0.0)
796 }
797
798 pub fn current_block_max_contribution(&self, query_weight: f32) -> f32 {
799 query_weight * self.current_block_max_weight()
800 }
801}
802
803fn find_optimal_bit_width(values: &[u32]) -> u8 {
808 if values.is_empty() {
809 return 0;
810 }
811 let max_val = values.iter().copied().max().unwrap_or(0);
812 simd::bits_needed(max_val)
813}
814
815fn bits_needed_u16(val: u16) -> u8 {
816 if val == 0 {
817 0
818 } else {
819 16 - val.leading_zeros() as u8
820 }
821}
822
823fn encode_weights(weights: &[f32], quant: WeightQuantization) -> io::Result<Vec<u8>> {
828 let mut data = Vec::new();
829 match quant {
830 WeightQuantization::Float32 => {
831 for &w in weights {
832 data.write_f32::<LittleEndian>(w)?;
833 }
834 }
835 WeightQuantization::Float16 => {
836 use half::f16;
837 for &w in weights {
838 data.write_u16::<LittleEndian>(f16::from_f32(w).to_bits())?;
839 }
840 }
841 WeightQuantization::UInt8 => {
842 let min = weights.iter().copied().fold(f32::INFINITY, f32::min);
843 let max = weights.iter().copied().fold(f32::NEG_INFINITY, f32::max);
844 let range = max - min;
845 let scale = if range < f32::EPSILON {
846 1.0
847 } else {
848 range / 255.0
849 };
850 data.write_f32::<LittleEndian>(scale)?;
851 data.write_f32::<LittleEndian>(min)?;
852 for &w in weights {
853 data.write_u8(((w - min) / scale).round() as u8)?;
854 }
855 }
856 WeightQuantization::UInt4 => {
857 let min = weights.iter().copied().fold(f32::INFINITY, f32::min);
858 let max = weights.iter().copied().fold(f32::NEG_INFINITY, f32::max);
859 let range = max - min;
860 let scale = if range < f32::EPSILON {
861 1.0
862 } else {
863 range / 15.0
864 };
865 data.write_f32::<LittleEndian>(scale)?;
866 data.write_f32::<LittleEndian>(min)?;
867 let mut i = 0;
868 while i < weights.len() {
869 let q1 = ((weights[i] - min) / scale).round() as u8 & 0x0F;
870 let q2 = if i + 1 < weights.len() {
871 ((weights[i + 1] - min) / scale).round() as u8 & 0x0F
872 } else {
873 0
874 };
875 data.write_u8((q2 << 4) | q1)?;
876 i += 2;
877 }
878 }
879 }
880 Ok(data)
881}
882
883fn decode_weights_into(data: &[u8], quant: WeightQuantization, count: usize, out: &mut Vec<f32>) {
884 let mut cursor = Cursor::new(data);
885 match quant {
886 WeightQuantization::Float32 => {
887 for _ in 0..count {
888 out.push(cursor.read_f32::<LittleEndian>().unwrap_or(0.0));
889 }
890 }
891 WeightQuantization::Float16 => {
892 use half::f16;
893 for _ in 0..count {
894 let bits = cursor.read_u16::<LittleEndian>().unwrap_or(0);
895 out.push(f16::from_bits(bits).to_f32());
896 }
897 }
898 WeightQuantization::UInt8 => {
899 let scale = cursor.read_f32::<LittleEndian>().unwrap_or(1.0);
900 let min_val = cursor.read_f32::<LittleEndian>().unwrap_or(0.0);
901 let offset = cursor.position() as usize;
902 out.resize(count, 0.0);
903 simd::dequantize_uint8(&data[offset..], out, scale, min_val, count);
904 }
905 WeightQuantization::UInt4 => {
906 let scale = cursor.read_f32::<LittleEndian>().unwrap_or(1.0);
907 let min = cursor.read_f32::<LittleEndian>().unwrap_or(0.0);
908 let mut i = 0;
909 while i < count {
910 let byte = cursor.read_u8().unwrap_or(0);
911 out.push((byte & 0x0F) as f32 * scale + min);
912 i += 1;
913 if i < count {
914 out.push((byte >> 4) as f32 * scale + min);
915 i += 1;
916 }
917 }
918 }
919 }
920}
921
922#[cfg(test)]
923mod tests {
924 use super::*;
925
926 #[test]
927 fn test_block_roundtrip() {
928 let postings = vec![
929 (10u32, 0u16, 1.5f32),
930 (15, 0, 2.0),
931 (20, 1, 0.5),
932 (100, 0, 3.0),
933 ];
934 let block = SparseBlock::from_postings(&postings, WeightQuantization::Float32).unwrap();
935
936 assert_eq!(block.decode_doc_ids(), vec![10, 15, 20, 100]);
937 assert_eq!(block.decode_ordinals(), vec![0, 0, 1, 0]);
938 let weights = block.decode_weights();
939 assert!((weights[0] - 1.5).abs() < 0.01);
940 }
941
942 #[test]
943 fn test_posting_list() {
944 let postings: Vec<(DocId, u16, f32)> =
945 (0..300).map(|i| (i * 2, 0, i as f32 * 0.1)).collect();
946 let list =
947 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
948
949 assert_eq!(list.doc_count(), 300);
950 assert_eq!(list.num_blocks(), 3);
951
952 let mut iter = list.iterator();
953 assert_eq!(iter.doc(), 0);
954 iter.advance();
955 assert_eq!(iter.doc(), 2);
956 }
957
958 #[test]
959 fn test_serialization() {
960 let postings = vec![(1u32, 0u16, 0.5f32), (10, 1, 1.5), (100, 0, 2.5)];
961 let list =
962 BlockSparsePostingList::from_postings(&postings, WeightQuantization::UInt8).unwrap();
963
964 let mut buf = Vec::new();
965 list.serialize(&mut buf).unwrap();
966 let list2 = BlockSparsePostingList::deserialize(&mut Cursor::new(&buf)).unwrap();
967
968 assert_eq!(list.doc_count(), list2.doc_count());
969 }
970
971 #[test]
972 fn test_seek() {
973 let postings: Vec<(DocId, u16, f32)> = (0..500).map(|i| (i * 3, 0, i as f32)).collect();
974 let list =
975 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
976
977 let mut iter = list.iterator();
978 assert_eq!(iter.seek(300), 300);
979 assert_eq!(iter.seek(301), 303);
980 assert_eq!(iter.seek(2000), TERMINATED);
981 }
982
983 #[test]
984 fn test_merge_with_offsets() {
985 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.0), (5, 0, 2.0), (10, 1, 3.0)];
987 let list1 =
988 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
989
990 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.0), (3, 1, 5.0), (7, 0, 6.0)];
992 let list2 =
993 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
994
995 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
997
998 assert_eq!(merged.doc_count(), 6);
999
1000 let decoded = merged.decode_all();
1002 assert_eq!(decoded.len(), 6);
1003
1004 assert_eq!(decoded[0].0, 0);
1006 assert_eq!(decoded[1].0, 5);
1007 assert_eq!(decoded[2].0, 10);
1008
1009 assert_eq!(decoded[3].0, 100); assert_eq!(decoded[4].0, 103); assert_eq!(decoded[5].0, 107); assert!((decoded[0].2 - 1.0).abs() < 0.01);
1016 assert!((decoded[3].2 - 4.0).abs() < 0.01);
1017
1018 assert_eq!(decoded[2].1, 1); assert_eq!(decoded[4].1, 1); }
1022
1023 #[test]
1024 fn test_merge_with_offsets_multi_block() {
1025 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, i as f32)).collect();
1027 let list1 =
1028 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1029 assert!(list1.num_blocks() > 1, "Should have multiple blocks");
1030
1031 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 1, i as f32)).collect();
1032 let list2 =
1033 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1034
1035 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 1000)]);
1037
1038 assert_eq!(merged.doc_count(), 350);
1039 assert_eq!(merged.num_blocks(), list1.num_blocks() + list2.num_blocks());
1040
1041 let mut iter = merged.iterator();
1043
1044 assert_eq!(iter.doc(), 0);
1046
1047 let doc = iter.seek(1000);
1049 assert_eq!(doc, 1000); iter.advance();
1053 assert_eq!(iter.doc(), 1003); }
1055
1056 #[test]
1057 fn test_merge_with_offsets_serialize_roundtrip() {
1058 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.0), (5, 0, 2.0), (10, 1, 3.0)];
1060 let list1 =
1061 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1062
1063 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.0), (3, 1, 5.0), (7, 0, 6.0)];
1064 let list2 =
1065 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1066
1067 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1069
1070 let mut bytes = Vec::new();
1072 merged.serialize(&mut bytes).unwrap();
1073
1074 let mut cursor = std::io::Cursor::new(&bytes);
1076 let loaded = BlockSparsePostingList::deserialize(&mut cursor).unwrap();
1077
1078 let decoded = loaded.decode_all();
1080 assert_eq!(decoded.len(), 6);
1081
1082 assert_eq!(decoded[0].0, 0);
1084 assert_eq!(decoded[1].0, 5);
1085 assert_eq!(decoded[2].0, 10);
1086
1087 assert_eq!(decoded[3].0, 100, "First doc of seg2 should be 0+100=100");
1089 assert_eq!(decoded[4].0, 103, "Second doc of seg2 should be 3+100=103");
1090 assert_eq!(decoded[5].0, 107, "Third doc of seg2 should be 7+100=107");
1091
1092 let mut iter = loaded.iterator();
1094 assert_eq!(iter.doc(), 0);
1095 iter.advance();
1096 assert_eq!(iter.doc(), 5);
1097 iter.advance();
1098 assert_eq!(iter.doc(), 10);
1099 iter.advance();
1100 assert_eq!(iter.doc(), 100);
1101 iter.advance();
1102 assert_eq!(iter.doc(), 103);
1103 iter.advance();
1104 assert_eq!(iter.doc(), 107);
1105 }
1106
1107 #[test]
1108 fn test_merge_seek_after_roundtrip() {
1109 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, 1.0)).collect();
1111 let list1 =
1112 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1113
1114 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 0, 2.0)).collect();
1115 let list2 =
1116 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1117
1118 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 1000)]);
1120
1121 let mut bytes = Vec::new();
1123 merged.serialize(&mut bytes).unwrap();
1124 let loaded =
1125 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1126
1127 let mut iter = loaded.iterator();
1129
1130 let doc = iter.seek(100);
1132 assert_eq!(doc, 100, "Seek to 100 in segment 1");
1133
1134 let doc = iter.seek(1000);
1136 assert_eq!(doc, 1000, "Seek to 1000 (first doc of segment 2)");
1137
1138 let doc = iter.seek(1050);
1140 assert!(
1141 doc >= 1050,
1142 "Seek to 1050 should find doc >= 1050, got {}",
1143 doc
1144 );
1145
1146 let doc = iter.seek(500);
1148 assert!(
1149 doc >= 1050,
1150 "Seek backwards should not go back, got {}",
1151 doc
1152 );
1153
1154 let mut iter2 = loaded.iterator();
1156
1157 let mut count = 0;
1159 let mut prev_doc = 0;
1160 while iter2.doc() != super::TERMINATED {
1161 let current = iter2.doc();
1162 if count > 0 {
1163 assert!(
1164 current > prev_doc,
1165 "Docs should be monotonically increasing: {} vs {}",
1166 prev_doc,
1167 current
1168 );
1169 }
1170 prev_doc = current;
1171 iter2.advance();
1172 count += 1;
1173 }
1174 assert_eq!(count, 350, "Should have 350 total docs");
1175 }
1176
1177 #[test]
1178 fn test_doc_count_multi_value() {
1179 let postings: Vec<(DocId, u16, f32)> = vec![
1182 (0, 0, 1.0),
1183 (0, 1, 1.5),
1184 (0, 2, 2.0),
1185 (5, 0, 3.0),
1186 (5, 1, 3.5),
1187 (10, 0, 4.0),
1188 ];
1189 let list =
1190 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1191
1192 assert_eq!(list.doc_count(), 3);
1194
1195 let decoded = list.decode_all();
1197 assert_eq!(decoded.len(), 6);
1198 }
1199
1200 #[test]
1204 fn test_zero_copy_merge_patches_first_doc_id() {
1205 use crate::structures::SparseSkipEntry;
1206
1207 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, i as f32)).collect();
1209 let list1 =
1210 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1211 assert!(list1.num_blocks() > 1);
1212
1213 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 1, i as f32)).collect();
1214 let list2 =
1215 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1216
1217 let mut bytes1 = Vec::new();
1219 list1.serialize(&mut bytes1).unwrap();
1220 let mut bytes2 = Vec::new();
1221 list2.serialize(&mut bytes2).unwrap();
1222
1223 fn parse_raw(data: &[u8]) -> (u32, f32, Vec<SparseSkipEntry>, &[u8]) {
1225 let doc_count = u32::from_le_bytes(data[0..4].try_into().unwrap());
1226 let global_max = f32::from_le_bytes(data[4..8].try_into().unwrap());
1227 let num_blocks = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize;
1228 let mut pos = 12;
1229 let mut skip = Vec::new();
1230 for _ in 0..num_blocks {
1231 let first_doc = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
1232 let last_doc = u32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap());
1233 let offset = u32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap());
1234 let length = u32::from_le_bytes(data[pos + 12..pos + 16].try_into().unwrap());
1235 let max_w = f32::from_le_bytes(data[pos + 16..pos + 20].try_into().unwrap());
1236 skip.push(SparseSkipEntry::new(
1237 first_doc, last_doc, offset, length, max_w,
1238 ));
1239 pos += 20;
1240 }
1241 (doc_count, global_max, skip, &data[pos..])
1242 }
1243
1244 let (dc1, gm1, skip1, raw1) = parse_raw(&bytes1);
1245 let (dc2, gm2, skip2, raw2) = parse_raw(&bytes2);
1246
1247 let doc_offset: u32 = 1000; let total_docs = dc1 + dc2;
1250 let global_max = gm1.max(gm2);
1251 let total_blocks = (skip1.len() + skip2.len()) as u32;
1252
1253 let mut output = Vec::new();
1254 output.extend_from_slice(&total_docs.to_le_bytes());
1256 output.extend_from_slice(&global_max.to_le_bytes());
1257 output.extend_from_slice(&total_blocks.to_le_bytes());
1258
1259 let mut block_data_offset = 0u32;
1261 for entry in &skip1 {
1262 let adjusted = SparseSkipEntry::new(
1263 entry.first_doc,
1264 entry.last_doc,
1265 block_data_offset + entry.offset,
1266 entry.length,
1267 entry.max_weight,
1268 );
1269 adjusted.write(&mut output).unwrap();
1270 }
1271 if let Some(last) = skip1.last() {
1272 block_data_offset += last.offset + last.length;
1273 }
1274 for entry in &skip2 {
1275 let adjusted = SparseSkipEntry::new(
1276 entry.first_doc + doc_offset,
1277 entry.last_doc + doc_offset,
1278 block_data_offset + entry.offset,
1279 entry.length,
1280 entry.max_weight,
1281 );
1282 adjusted.write(&mut output).unwrap();
1283 }
1284
1285 output.extend_from_slice(raw1);
1287
1288 const FIRST_DOC_ID_OFFSET: usize = 8;
1289 let mut buf2 = raw2.to_vec();
1290 for entry in &skip2 {
1291 let off = entry.offset as usize + FIRST_DOC_ID_OFFSET;
1292 if off + 4 <= buf2.len() {
1293 let old = u32::from_le_bytes(buf2[off..off + 4].try_into().unwrap());
1294 let patched = (old + doc_offset).to_le_bytes();
1295 buf2[off..off + 4].copy_from_slice(&patched);
1296 }
1297 }
1298 output.extend_from_slice(&buf2);
1299
1300 let loaded = BlockSparsePostingList::deserialize(&mut Cursor::new(&output)).unwrap();
1302 assert_eq!(loaded.doc_count(), 350);
1303
1304 let mut iter = loaded.iterator();
1305
1306 assert_eq!(iter.doc(), 0);
1308 let doc = iter.seek(100);
1309 assert_eq!(doc, 100);
1310 let doc = iter.seek(398);
1311 assert_eq!(doc, 398);
1312
1313 let doc = iter.seek(1000);
1315 assert_eq!(doc, 1000, "First doc of segment 2 should be 1000");
1316 iter.advance();
1317 assert_eq!(iter.doc(), 1003, "Second doc of segment 2 should be 1003");
1318 let doc = iter.seek(1447);
1319 assert_eq!(doc, 1447, "Last doc of segment 2 should be 1447");
1320
1321 iter.advance();
1323 assert_eq!(iter.doc(), super::TERMINATED);
1324
1325 let reference =
1327 BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, doc_offset)]);
1328 let mut ref_iter = reference.iterator();
1329 let mut zc_iter = loaded.iterator();
1330 while ref_iter.doc() != super::TERMINATED {
1331 assert_eq!(
1332 ref_iter.doc(),
1333 zc_iter.doc(),
1334 "Zero-copy and reference merge should produce identical doc_ids"
1335 );
1336 assert!(
1337 (ref_iter.weight() - zc_iter.weight()).abs() < 0.01,
1338 "Weights should match: {} vs {}",
1339 ref_iter.weight(),
1340 zc_iter.weight()
1341 );
1342 ref_iter.advance();
1343 zc_iter.advance();
1344 }
1345 assert_eq!(zc_iter.doc(), super::TERMINATED);
1346 }
1347
1348 #[test]
1349 fn test_doc_count_single_value() {
1350 let postings: Vec<(DocId, u16, f32)> =
1352 vec![(0, 0, 1.0), (5, 0, 2.0), (10, 0, 3.0), (15, 0, 4.0)];
1353 let list =
1354 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1355
1356 assert_eq!(list.doc_count(), 4);
1358 }
1359
1360 #[test]
1361 fn test_doc_count_multi_value_serialization_roundtrip() {
1362 let postings: Vec<(DocId, u16, f32)> =
1364 vec![(0, 0, 1.0), (0, 1, 1.5), (5, 0, 2.0), (5, 1, 2.5)];
1365 let list =
1366 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1367 assert_eq!(list.doc_count(), 2);
1368
1369 let mut buf = Vec::new();
1370 list.serialize(&mut buf).unwrap();
1371 let loaded = BlockSparsePostingList::deserialize(&mut Cursor::new(&buf)).unwrap();
1372 assert_eq!(loaded.doc_count(), 2);
1373 }
1374
1375 #[test]
1376 fn test_merge_preserves_weights_and_ordinals() {
1377 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.5), (5, 1, 2.5), (10, 2, 3.5)];
1379 let list1 =
1380 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1381
1382 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.5), (3, 1, 5.5), (7, 3, 6.5)];
1383 let list2 =
1384 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1385
1386 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1388
1389 let mut bytes = Vec::new();
1391 merged.serialize(&mut bytes).unwrap();
1392 let loaded =
1393 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1394
1395 let mut iter = loaded.iterator();
1397
1398 assert_eq!(iter.doc(), 0);
1400 assert!(
1401 (iter.weight() - 1.5).abs() < 0.01,
1402 "Weight should be 1.5, got {}",
1403 iter.weight()
1404 );
1405 assert_eq!(iter.ordinal(), 0);
1406
1407 iter.advance();
1408 assert_eq!(iter.doc(), 5);
1409 assert!(
1410 (iter.weight() - 2.5).abs() < 0.01,
1411 "Weight should be 2.5, got {}",
1412 iter.weight()
1413 );
1414 assert_eq!(iter.ordinal(), 1);
1415
1416 iter.advance();
1417 assert_eq!(iter.doc(), 10);
1418 assert!(
1419 (iter.weight() - 3.5).abs() < 0.01,
1420 "Weight should be 3.5, got {}",
1421 iter.weight()
1422 );
1423 assert_eq!(iter.ordinal(), 2);
1424
1425 iter.advance();
1427 assert_eq!(iter.doc(), 100);
1428 assert!(
1429 (iter.weight() - 4.5).abs() < 0.01,
1430 "Weight should be 4.5, got {}",
1431 iter.weight()
1432 );
1433 assert_eq!(iter.ordinal(), 0);
1434
1435 iter.advance();
1436 assert_eq!(iter.doc(), 103);
1437 assert!(
1438 (iter.weight() - 5.5).abs() < 0.01,
1439 "Weight should be 5.5, got {}",
1440 iter.weight()
1441 );
1442 assert_eq!(iter.ordinal(), 1);
1443
1444 iter.advance();
1445 assert_eq!(iter.doc(), 107);
1446 assert!(
1447 (iter.weight() - 6.5).abs() < 0.01,
1448 "Weight should be 6.5, got {}",
1449 iter.weight()
1450 );
1451 assert_eq!(iter.ordinal(), 3);
1452
1453 iter.advance();
1455 assert_eq!(iter.doc(), super::TERMINATED);
1456 }
1457
1458 #[test]
1459 fn test_merge_global_max_weight() {
1460 let postings1: Vec<(DocId, u16, f32)> = vec![
1462 (0, 0, 3.0),
1463 (1, 0, 7.0), (2, 0, 2.0),
1465 ];
1466 let list1 =
1467 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1468
1469 let postings2: Vec<(DocId, u16, f32)> = vec![
1470 (0, 0, 5.0),
1471 (1, 0, 4.0),
1472 (2, 0, 6.0), ];
1474 let list2 =
1475 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1476
1477 assert!((list1.global_max_weight() - 7.0).abs() < 0.01);
1479 assert!((list2.global_max_weight() - 6.0).abs() < 0.01);
1480
1481 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1483
1484 assert!(
1486 (merged.global_max_weight() - 7.0).abs() < 0.01,
1487 "Global max should be 7.0, got {}",
1488 merged.global_max_weight()
1489 );
1490
1491 let mut bytes = Vec::new();
1493 merged.serialize(&mut bytes).unwrap();
1494 let loaded =
1495 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1496
1497 assert!(
1498 (loaded.global_max_weight() - 7.0).abs() < 0.01,
1499 "After roundtrip, global max should still be 7.0, got {}",
1500 loaded.global_max_weight()
1501 );
1502 }
1503
1504 #[test]
1505 fn test_scoring_simulation_after_merge() {
1506 let postings1: Vec<(DocId, u16, f32)> = vec![
1508 (0, 0, 0.5), (5, 0, 0.8), ];
1511 let list1 =
1512 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1513
1514 let postings2: Vec<(DocId, u16, f32)> = vec![
1515 (0, 0, 0.6), (3, 0, 0.9), ];
1518 let list2 =
1519 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1520
1521 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1523
1524 let mut bytes = Vec::new();
1526 merged.serialize(&mut bytes).unwrap();
1527 let loaded =
1528 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1529
1530 let query_weight = 2.0f32;
1532 let mut iter = loaded.iterator();
1533
1534 assert_eq!(iter.doc(), 0);
1537 let score = query_weight * iter.weight();
1538 assert!(
1539 (score - 1.0).abs() < 0.01,
1540 "Doc 0 score should be 1.0, got {}",
1541 score
1542 );
1543
1544 iter.advance();
1545 assert_eq!(iter.doc(), 5);
1547 let score = query_weight * iter.weight();
1548 assert!(
1549 (score - 1.6).abs() < 0.01,
1550 "Doc 5 score should be 1.6, got {}",
1551 score
1552 );
1553
1554 iter.advance();
1555 assert_eq!(iter.doc(), 100);
1557 let score = query_weight * iter.weight();
1558 assert!(
1559 (score - 1.2).abs() < 0.01,
1560 "Doc 100 score should be 1.2, got {}",
1561 score
1562 );
1563
1564 iter.advance();
1565 assert_eq!(iter.doc(), 103);
1567 let score = query_weight * iter.weight();
1568 assert!(
1569 (score - 1.8).abs() < 0.01,
1570 "Doc 103 score should be 1.8, got {}",
1571 score
1572 );
1573 }
1574}