1use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
9use std::io::{self, Cursor, Read, Write};
10
11use super::config::WeightQuantization;
12use crate::DocId;
13use crate::structures::postings::TERMINATED;
14use crate::structures::simd;
15
16pub const BLOCK_SIZE: usize = 128;
17
18#[derive(Debug, Clone, Copy)]
19pub struct BlockHeader {
20 pub count: u16,
21 pub doc_id_bits: u8,
22 pub ordinal_bits: u8,
23 pub weight_quant: WeightQuantization,
24 pub first_doc_id: DocId,
25 pub max_weight: f32,
26}
27
28impl BlockHeader {
29 pub const SIZE: usize = 16;
30
31 pub fn write<W: Write>(&self, w: &mut W) -> io::Result<()> {
32 w.write_u16::<LittleEndian>(self.count)?;
33 w.write_u8(self.doc_id_bits)?;
34 w.write_u8(self.ordinal_bits)?;
35 w.write_u8(self.weight_quant as u8)?;
36 w.write_u8(0)?;
37 w.write_u16::<LittleEndian>(0)?;
38 w.write_u32::<LittleEndian>(self.first_doc_id)?;
39 w.write_f32::<LittleEndian>(self.max_weight)?;
40 Ok(())
41 }
42
43 pub fn read<R: Read>(r: &mut R) -> io::Result<Self> {
44 let count = r.read_u16::<LittleEndian>()?;
45 let doc_id_bits = r.read_u8()?;
46 let ordinal_bits = r.read_u8()?;
47 let weight_quant_byte = r.read_u8()?;
48 let _ = r.read_u8()?;
49 let _ = r.read_u16::<LittleEndian>()?;
50 let first_doc_id = r.read_u32::<LittleEndian>()?;
51 let max_weight = r.read_f32::<LittleEndian>()?;
52
53 let weight_quant = WeightQuantization::from_u8(weight_quant_byte)
54 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Invalid weight quant"))?;
55
56 Ok(Self {
57 count,
58 doc_id_bits,
59 ordinal_bits,
60 weight_quant,
61 first_doc_id,
62 max_weight,
63 })
64 }
65}
66
67#[derive(Debug, Clone)]
68pub struct SparseBlock {
69 pub header: BlockHeader,
70 pub doc_ids_data: Vec<u8>,
71 pub ordinals_data: Vec<u8>,
72 pub weights_data: Vec<u8>,
73}
74
75impl SparseBlock {
76 pub fn from_postings(
77 postings: &[(DocId, u16, f32)],
78 weight_quant: WeightQuantization,
79 ) -> io::Result<Self> {
80 assert!(!postings.is_empty() && postings.len() <= BLOCK_SIZE);
81
82 let count = postings.len();
83 let first_doc_id = postings[0].0;
84
85 let mut deltas = Vec::with_capacity(count);
87 let mut prev = first_doc_id;
88 for &(doc_id, _, _) in postings {
89 deltas.push(doc_id.saturating_sub(prev));
90 prev = doc_id;
91 }
92 deltas[0] = 0;
93
94 let doc_id_bits = find_optimal_bit_width(&deltas[1..]);
95 let ordinals: Vec<u16> = postings.iter().map(|(_, o, _)| *o).collect();
96 let max_ordinal = ordinals.iter().copied().max().unwrap_or(0);
97 let ordinal_bits = if max_ordinal == 0 {
98 0
99 } else {
100 bits_needed_u16(max_ordinal)
101 };
102
103 let weights: Vec<f32> = postings.iter().map(|(_, _, w)| *w).collect();
104 let max_weight = weights.iter().copied().fold(0.0f32, f32::max);
105
106 let doc_ids_data = pack_bit_array(&deltas[1..], doc_id_bits);
107 let ordinals_data = if ordinal_bits > 0 {
108 pack_bit_array_u16(&ordinals, ordinal_bits)
109 } else {
110 Vec::new()
111 };
112 let weights_data = encode_weights(&weights, weight_quant)?;
113
114 Ok(Self {
115 header: BlockHeader {
116 count: count as u16,
117 doc_id_bits,
118 ordinal_bits,
119 weight_quant,
120 first_doc_id,
121 max_weight,
122 },
123 doc_ids_data,
124 ordinals_data,
125 weights_data,
126 })
127 }
128
129 pub fn decode_doc_ids(&self) -> Vec<DocId> {
130 let count = self.header.count as usize;
131 let mut doc_ids = Vec::with_capacity(count);
132 doc_ids.push(self.header.first_doc_id);
133
134 if count > 1 {
135 let deltas = unpack_bit_array(&self.doc_ids_data, self.header.doc_id_bits, count - 1);
136 let mut prev = self.header.first_doc_id;
137 for delta in deltas {
138 prev += delta;
139 doc_ids.push(prev);
140 }
141 }
142 doc_ids
143 }
144
145 pub fn decode_ordinals(&self) -> Vec<u16> {
146 let count = self.header.count as usize;
147 if self.header.ordinal_bits == 0 {
148 vec![0u16; count]
149 } else {
150 unpack_bit_array_u16(&self.ordinals_data, self.header.ordinal_bits, count)
151 }
152 }
153
154 pub fn decode_weights(&self) -> Vec<f32> {
155 decode_weights(
156 &self.weights_data,
157 self.header.weight_quant,
158 self.header.count as usize,
159 )
160 }
161
162 pub fn write<W: Write>(&self, w: &mut W) -> io::Result<()> {
163 self.header.write(w)?;
164 w.write_u16::<LittleEndian>(self.doc_ids_data.len() as u16)?;
165 w.write_u16::<LittleEndian>(self.ordinals_data.len() as u16)?;
166 w.write_u16::<LittleEndian>(self.weights_data.len() as u16)?;
167 w.write_u16::<LittleEndian>(0)?;
168 w.write_all(&self.doc_ids_data)?;
169 w.write_all(&self.ordinals_data)?;
170 w.write_all(&self.weights_data)?;
171 Ok(())
172 }
173
174 pub fn read<R: Read>(r: &mut R) -> io::Result<Self> {
175 let header = BlockHeader::read(r)?;
176 let doc_ids_len = r.read_u16::<LittleEndian>()? as usize;
177 let ordinals_len = r.read_u16::<LittleEndian>()? as usize;
178 let weights_len = r.read_u16::<LittleEndian>()? as usize;
179 let _ = r.read_u16::<LittleEndian>()?;
180
181 let mut doc_ids_data = vec![0u8; doc_ids_len];
182 r.read_exact(&mut doc_ids_data)?;
183 let mut ordinals_data = vec![0u8; ordinals_len];
184 r.read_exact(&mut ordinals_data)?;
185 let mut weights_data = vec![0u8; weights_len];
186 r.read_exact(&mut weights_data)?;
187
188 Ok(Self {
189 header,
190 doc_ids_data,
191 ordinals_data,
192 weights_data,
193 })
194 }
195
196 pub fn with_doc_offset(&self, doc_offset: u32) -> Self {
202 Self {
203 header: BlockHeader {
204 first_doc_id: self.header.first_doc_id + doc_offset,
205 ..self.header
206 },
207 doc_ids_data: self.doc_ids_data.clone(),
208 ordinals_data: self.ordinals_data.clone(),
209 weights_data: self.weights_data.clone(),
210 }
211 }
212}
213
214#[derive(Debug, Clone)]
219pub struct BlockSparsePostingList {
220 pub doc_count: u32,
221 pub blocks: Vec<SparseBlock>,
222}
223
224impl BlockSparsePostingList {
225 pub fn from_postings_with_block_size(
227 postings: &[(DocId, u16, f32)],
228 weight_quant: WeightQuantization,
229 block_size: usize,
230 ) -> io::Result<Self> {
231 if postings.is_empty() {
232 return Ok(Self {
233 doc_count: 0,
234 blocks: Vec::new(),
235 });
236 }
237
238 let block_size = block_size.max(16); let mut blocks = Vec::new();
240 for chunk in postings.chunks(block_size) {
241 blocks.push(SparseBlock::from_postings(chunk, weight_quant)?);
242 }
243
244 let mut unique_docs = 1u32;
249 for i in 1..postings.len() {
250 if postings[i].0 != postings[i - 1].0 {
251 unique_docs += 1;
252 }
253 }
254
255 Ok(Self {
256 doc_count: unique_docs,
257 blocks,
258 })
259 }
260
261 pub fn from_postings(
263 postings: &[(DocId, u16, f32)],
264 weight_quant: WeightQuantization,
265 ) -> io::Result<Self> {
266 Self::from_postings_with_block_size(postings, weight_quant, BLOCK_SIZE)
267 }
268
269 pub fn doc_count(&self) -> u32 {
270 self.doc_count
271 }
272
273 pub fn num_blocks(&self) -> usize {
274 self.blocks.len()
275 }
276
277 pub fn global_max_weight(&self) -> f32 {
278 self.blocks
279 .iter()
280 .map(|b| b.header.max_weight)
281 .fold(0.0f32, f32::max)
282 }
283
284 pub fn block_max_weight(&self, block_idx: usize) -> Option<f32> {
285 self.blocks.get(block_idx).map(|b| b.header.max_weight)
286 }
287
288 pub fn size_bytes(&self) -> usize {
290 use std::mem::size_of;
291
292 let header_size = size_of::<u32>() * 2; let blocks_size: usize = self
294 .blocks
295 .iter()
296 .map(|b| {
297 size_of::<BlockHeader>()
298 + b.doc_ids_data.len()
299 + b.ordinals_data.len()
300 + b.weights_data.len()
301 })
302 .sum();
303 header_size + blocks_size
304 }
305
306 pub fn iterator(&self) -> BlockSparsePostingIterator<'_> {
307 BlockSparsePostingIterator::new(self)
308 }
309
310 pub fn serialize<W: Write>(&self, w: &mut W) -> io::Result<()> {
319 use super::SparseSkipEntry;
320
321 w.write_u32::<LittleEndian>(self.doc_count)?;
322 w.write_f32::<LittleEndian>(self.global_max_weight())?;
323 w.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
324
325 let mut block_bytes: Vec<Vec<u8>> = Vec::with_capacity(self.blocks.len());
327 for block in &self.blocks {
328 let mut buf = Vec::new();
329 block.write(&mut buf)?;
330 block_bytes.push(buf);
331 }
332
333 let mut offset = 0u32;
335 for (block, bytes) in self.blocks.iter().zip(block_bytes.iter()) {
336 let doc_ids = block.decode_doc_ids();
337 let first_doc = doc_ids.first().copied().unwrap_or(0);
338 let last_doc = doc_ids.last().copied().unwrap_or(0);
339 let length = bytes.len() as u32;
340
341 let entry =
342 SparseSkipEntry::new(first_doc, last_doc, offset, length, block.header.max_weight);
343 entry.write(w)?;
344 offset += length;
345 }
346
347 for bytes in block_bytes {
349 w.write_all(&bytes)?;
350 }
351
352 Ok(())
353 }
354
355 pub fn deserialize<R: Read>(r: &mut R) -> io::Result<Self> {
358 use super::SparseSkipEntry;
359
360 let doc_count = r.read_u32::<LittleEndian>()?;
361 let _global_max_weight = r.read_f32::<LittleEndian>()?;
362 let num_blocks = r.read_u32::<LittleEndian>()? as usize;
363
364 for _ in 0..num_blocks {
366 let _ = SparseSkipEntry::read(r)?;
367 }
368
369 let mut blocks = Vec::with_capacity(num_blocks);
371 for _ in 0..num_blocks {
372 blocks.push(SparseBlock::read(r)?);
373 }
374 Ok(Self { doc_count, blocks })
375 }
376
377 pub fn deserialize_header<R: Read>(
380 r: &mut R,
381 ) -> io::Result<(u32, f32, Vec<super::SparseSkipEntry>, usize)> {
382 use super::SparseSkipEntry;
383
384 let doc_count = r.read_u32::<LittleEndian>()?;
385 let global_max_weight = r.read_f32::<LittleEndian>()?;
386 let num_blocks = r.read_u32::<LittleEndian>()? as usize;
387
388 let mut entries = Vec::with_capacity(num_blocks);
389 for _ in 0..num_blocks {
390 entries.push(SparseSkipEntry::read(r)?);
391 }
392
393 let header_size = 4 + 4 + 4 + num_blocks * SparseSkipEntry::SIZE;
395
396 Ok((doc_count, global_max_weight, entries, header_size))
397 }
398
399 pub fn decode_all(&self) -> Vec<(DocId, u16, f32)> {
400 let mut result = Vec::with_capacity(self.doc_count as usize);
401 for block in &self.blocks {
402 let doc_ids = block.decode_doc_ids();
403 let ordinals = block.decode_ordinals();
404 let weights = block.decode_weights();
405 for i in 0..block.header.count as usize {
406 result.push((doc_ids[i], ordinals[i], weights[i]));
407 }
408 }
409 result
410 }
411
412 pub fn merge_with_offsets(lists: &[(&BlockSparsePostingList, u32)]) -> Self {
423 if lists.is_empty() {
424 return Self {
425 doc_count: 0,
426 blocks: Vec::new(),
427 };
428 }
429
430 let total_blocks: usize = lists.iter().map(|(pl, _)| pl.blocks.len()).sum();
432 let total_docs: u32 = lists.iter().map(|(pl, _)| pl.doc_count).sum();
433
434 let mut merged_blocks = Vec::with_capacity(total_blocks);
435
436 for (posting_list, doc_offset) in lists {
438 for block in &posting_list.blocks {
439 merged_blocks.push(block.with_doc_offset(*doc_offset));
440 }
441 }
442
443 Self {
444 doc_count: total_docs,
445 blocks: merged_blocks,
446 }
447 }
448
449 fn find_block(&self, target: DocId) -> Option<usize> {
450 let mut lo = 0;
451 let mut hi = self.blocks.len();
452 while lo < hi {
453 let mid = lo + (hi - lo) / 2;
454 let block = &self.blocks[mid];
455 let doc_ids = block.decode_doc_ids();
456 let last_doc = doc_ids.last().copied().unwrap_or(block.header.first_doc_id);
457 if last_doc < target {
458 lo = mid + 1;
459 } else {
460 hi = mid;
461 }
462 }
463 if lo < self.blocks.len() {
464 Some(lo)
465 } else {
466 None
467 }
468 }
469}
470
471pub struct BlockSparsePostingIterator<'a> {
476 posting_list: &'a BlockSparsePostingList,
477 block_idx: usize,
478 in_block_idx: usize,
479 current_doc_ids: Vec<DocId>,
480 current_weights: Vec<f32>,
481 exhausted: bool,
482}
483
484impl<'a> BlockSparsePostingIterator<'a> {
485 fn new(posting_list: &'a BlockSparsePostingList) -> Self {
486 let mut iter = Self {
487 posting_list,
488 block_idx: 0,
489 in_block_idx: 0,
490 current_doc_ids: Vec::new(),
491 current_weights: Vec::new(),
492 exhausted: posting_list.blocks.is_empty(),
493 };
494 if !iter.exhausted {
495 iter.load_block(0);
496 }
497 iter
498 }
499
500 fn load_block(&mut self, block_idx: usize) {
501 if let Some(block) = self.posting_list.blocks.get(block_idx) {
502 self.current_doc_ids = block.decode_doc_ids();
503 self.current_weights = block.decode_weights();
504 self.block_idx = block_idx;
505 self.in_block_idx = 0;
506 }
507 }
508
509 pub fn doc(&self) -> DocId {
510 if self.exhausted {
511 TERMINATED
512 } else {
513 self.current_doc_ids
514 .get(self.in_block_idx)
515 .copied()
516 .unwrap_or(TERMINATED)
517 }
518 }
519
520 pub fn weight(&self) -> f32 {
521 self.current_weights
522 .get(self.in_block_idx)
523 .copied()
524 .unwrap_or(0.0)
525 }
526
527 pub fn ordinal(&self) -> u16 {
528 if let Some(block) = self.posting_list.blocks.get(self.block_idx) {
529 let ordinals = block.decode_ordinals();
530 ordinals.get(self.in_block_idx).copied().unwrap_or(0)
531 } else {
532 0
533 }
534 }
535
536 pub fn advance(&mut self) -> DocId {
537 if self.exhausted {
538 return TERMINATED;
539 }
540 self.in_block_idx += 1;
541 if self.in_block_idx >= self.current_doc_ids.len() {
542 self.block_idx += 1;
543 if self.block_idx >= self.posting_list.blocks.len() {
544 self.exhausted = true;
545 } else {
546 self.load_block(self.block_idx);
547 }
548 }
549 self.doc()
550 }
551
552 pub fn seek(&mut self, target: DocId) -> DocId {
553 if self.exhausted {
554 return TERMINATED;
555 }
556 if self.doc() >= target {
557 return self.doc();
558 }
559
560 if let Some(&last_doc) = self.current_doc_ids.last()
562 && last_doc >= target
563 {
564 while !self.exhausted && self.doc() < target {
565 self.in_block_idx += 1;
566 if self.in_block_idx >= self.current_doc_ids.len() {
567 self.block_idx += 1;
568 if self.block_idx >= self.posting_list.blocks.len() {
569 self.exhausted = true;
570 } else {
571 self.load_block(self.block_idx);
572 }
573 }
574 }
575 return self.doc();
576 }
577
578 if let Some(block_idx) = self.posting_list.find_block(target) {
580 self.load_block(block_idx);
581 while self.in_block_idx < self.current_doc_ids.len()
582 && self.current_doc_ids[self.in_block_idx] < target
583 {
584 self.in_block_idx += 1;
585 }
586 if self.in_block_idx >= self.current_doc_ids.len() {
587 self.block_idx += 1;
588 if self.block_idx >= self.posting_list.blocks.len() {
589 self.exhausted = true;
590 } else {
591 self.load_block(self.block_idx);
592 }
593 }
594 } else {
595 self.exhausted = true;
596 }
597 self.doc()
598 }
599
600 pub fn skip_to_next_block(&mut self) -> DocId {
603 if self.exhausted {
604 return TERMINATED;
605 }
606 let next = self.block_idx + 1;
607 if next >= self.posting_list.blocks.len() {
608 self.exhausted = true;
609 return TERMINATED;
610 }
611 self.load_block(next);
612 self.doc()
613 }
614
615 pub fn is_exhausted(&self) -> bool {
616 self.exhausted
617 }
618
619 pub fn current_block_max_weight(&self) -> f32 {
620 self.posting_list
621 .blocks
622 .get(self.block_idx)
623 .map(|b| b.header.max_weight)
624 .unwrap_or(0.0)
625 }
626
627 pub fn current_block_max_contribution(&self, query_weight: f32) -> f32 {
628 query_weight * self.current_block_max_weight()
629 }
630}
631
632fn find_optimal_bit_width(values: &[u32]) -> u8 {
637 if values.is_empty() {
638 return 0;
639 }
640 let max_val = values.iter().copied().max().unwrap_or(0);
641 simd::bits_needed(max_val)
642}
643
644fn bits_needed_u16(val: u16) -> u8 {
645 if val == 0 {
646 0
647 } else {
648 16 - val.leading_zeros() as u8
649 }
650}
651
652fn pack_bit_array(values: &[u32], bits: u8) -> Vec<u8> {
653 if bits == 0 || values.is_empty() {
654 return Vec::new();
655 }
656 let total_bytes = (values.len() * bits as usize).div_ceil(8);
657 let mut result = vec![0u8; total_bytes];
658 let mut bit_pos = 0usize;
659 for &val in values {
660 pack_value(&mut result, bit_pos, val & ((1u32 << bits) - 1), bits);
661 bit_pos += bits as usize;
662 }
663 result
664}
665
666fn pack_bit_array_u16(values: &[u16], bits: u8) -> Vec<u8> {
667 if bits == 0 || values.is_empty() {
668 return Vec::new();
669 }
670 let total_bytes = (values.len() * bits as usize).div_ceil(8);
671 let mut result = vec![0u8; total_bytes];
672 let mut bit_pos = 0usize;
673 for &val in values {
674 pack_value(
675 &mut result,
676 bit_pos,
677 (val as u32) & ((1u32 << bits) - 1),
678 bits,
679 );
680 bit_pos += bits as usize;
681 }
682 result
683}
684
685#[inline]
686fn pack_value(data: &mut [u8], bit_pos: usize, val: u32, bits: u8) {
687 let mut remaining = bits as usize;
688 let mut val = val;
689 let mut byte = bit_pos / 8;
690 let mut offset = bit_pos % 8;
691 while remaining > 0 {
692 let space = 8 - offset;
693 let to_write = remaining.min(space);
694 let mask = (1u32 << to_write) - 1;
695 data[byte] |= ((val & mask) as u8) << offset;
696 val >>= to_write;
697 remaining -= to_write;
698 byte += 1;
699 offset = 0;
700 }
701}
702
703fn unpack_bit_array(data: &[u8], bits: u8, count: usize) -> Vec<u32> {
704 if bits == 0 || count == 0 {
705 return vec![0; count];
706 }
707 let mut result = Vec::with_capacity(count);
708 let mut bit_pos = 0usize;
709 for _ in 0..count {
710 result.push(unpack_value(data, bit_pos, bits));
711 bit_pos += bits as usize;
712 }
713 result
714}
715
716fn unpack_bit_array_u16(data: &[u8], bits: u8, count: usize) -> Vec<u16> {
717 if bits == 0 || count == 0 {
718 return vec![0; count];
719 }
720 let mut result = Vec::with_capacity(count);
721 let mut bit_pos = 0usize;
722 for _ in 0..count {
723 result.push(unpack_value(data, bit_pos, bits) as u16);
724 bit_pos += bits as usize;
725 }
726 result
727}
728
729#[inline]
730fn unpack_value(data: &[u8], bit_pos: usize, bits: u8) -> u32 {
731 let mut val = 0u32;
732 let mut remaining = bits as usize;
733 let mut byte = bit_pos / 8;
734 let mut offset = bit_pos % 8;
735 let mut shift = 0;
736 while remaining > 0 {
737 let space = 8 - offset;
738 let to_read = remaining.min(space);
739 let mask = (1u8 << to_read) - 1;
740 val |= (((data.get(byte).copied().unwrap_or(0) >> offset) & mask) as u32) << shift;
741 remaining -= to_read;
742 shift += to_read;
743 byte += 1;
744 offset = 0;
745 }
746 val
747}
748
749fn encode_weights(weights: &[f32], quant: WeightQuantization) -> io::Result<Vec<u8>> {
754 let mut data = Vec::new();
755 match quant {
756 WeightQuantization::Float32 => {
757 for &w in weights {
758 data.write_f32::<LittleEndian>(w)?;
759 }
760 }
761 WeightQuantization::Float16 => {
762 use half::f16;
763 for &w in weights {
764 data.write_u16::<LittleEndian>(f16::from_f32(w).to_bits())?;
765 }
766 }
767 WeightQuantization::UInt8 => {
768 let min = weights.iter().copied().fold(f32::INFINITY, f32::min);
769 let max = weights.iter().copied().fold(f32::NEG_INFINITY, f32::max);
770 let range = max - min;
771 let scale = if range < f32::EPSILON {
772 1.0
773 } else {
774 range / 255.0
775 };
776 data.write_f32::<LittleEndian>(scale)?;
777 data.write_f32::<LittleEndian>(min)?;
778 for &w in weights {
779 data.write_u8(((w - min) / scale).round() as u8)?;
780 }
781 }
782 WeightQuantization::UInt4 => {
783 let min = weights.iter().copied().fold(f32::INFINITY, f32::min);
784 let max = weights.iter().copied().fold(f32::NEG_INFINITY, f32::max);
785 let range = max - min;
786 let scale = if range < f32::EPSILON {
787 1.0
788 } else {
789 range / 15.0
790 };
791 data.write_f32::<LittleEndian>(scale)?;
792 data.write_f32::<LittleEndian>(min)?;
793 let mut i = 0;
794 while i < weights.len() {
795 let q1 = ((weights[i] - min) / scale).round() as u8 & 0x0F;
796 let q2 = if i + 1 < weights.len() {
797 ((weights[i + 1] - min) / scale).round() as u8 & 0x0F
798 } else {
799 0
800 };
801 data.write_u8((q2 << 4) | q1)?;
802 i += 2;
803 }
804 }
805 }
806 Ok(data)
807}
808
809fn decode_weights(data: &[u8], quant: WeightQuantization, count: usize) -> Vec<f32> {
810 let mut cursor = Cursor::new(data);
811 let mut weights = Vec::with_capacity(count);
812 match quant {
813 WeightQuantization::Float32 => {
814 for _ in 0..count {
815 weights.push(cursor.read_f32::<LittleEndian>().unwrap_or(0.0));
816 }
817 }
818 WeightQuantization::Float16 => {
819 use half::f16;
820 for _ in 0..count {
821 let bits = cursor.read_u16::<LittleEndian>().unwrap_or(0);
822 weights.push(f16::from_bits(bits).to_f32());
823 }
824 }
825 WeightQuantization::UInt8 => {
826 let scale = cursor.read_f32::<LittleEndian>().unwrap_or(1.0);
827 let min = cursor.read_f32::<LittleEndian>().unwrap_or(0.0);
828 for _ in 0..count {
829 let q = cursor.read_u8().unwrap_or(0);
830 weights.push(q as f32 * scale + min);
831 }
832 }
833 WeightQuantization::UInt4 => {
834 let scale = cursor.read_f32::<LittleEndian>().unwrap_or(1.0);
835 let min = cursor.read_f32::<LittleEndian>().unwrap_or(0.0);
836 let mut i = 0;
837 while i < count {
838 let byte = cursor.read_u8().unwrap_or(0);
839 weights.push((byte & 0x0F) as f32 * scale + min);
840 i += 1;
841 if i < count {
842 weights.push((byte >> 4) as f32 * scale + min);
843 i += 1;
844 }
845 }
846 }
847 }
848 weights
849}
850
851#[cfg(test)]
852mod tests {
853 use super::*;
854
855 #[test]
856 fn test_block_roundtrip() {
857 let postings = vec![
858 (10u32, 0u16, 1.5f32),
859 (15, 0, 2.0),
860 (20, 1, 0.5),
861 (100, 0, 3.0),
862 ];
863 let block = SparseBlock::from_postings(&postings, WeightQuantization::Float32).unwrap();
864
865 assert_eq!(block.decode_doc_ids(), vec![10, 15, 20, 100]);
866 assert_eq!(block.decode_ordinals(), vec![0, 0, 1, 0]);
867 let weights = block.decode_weights();
868 assert!((weights[0] - 1.5).abs() < 0.01);
869 }
870
871 #[test]
872 fn test_posting_list() {
873 let postings: Vec<(DocId, u16, f32)> =
874 (0..300).map(|i| (i * 2, 0, i as f32 * 0.1)).collect();
875 let list =
876 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
877
878 assert_eq!(list.doc_count(), 300);
879 assert_eq!(list.num_blocks(), 3);
880
881 let mut iter = list.iterator();
882 assert_eq!(iter.doc(), 0);
883 iter.advance();
884 assert_eq!(iter.doc(), 2);
885 }
886
887 #[test]
888 fn test_serialization() {
889 let postings = vec![(1u32, 0u16, 0.5f32), (10, 1, 1.5), (100, 0, 2.5)];
890 let list =
891 BlockSparsePostingList::from_postings(&postings, WeightQuantization::UInt8).unwrap();
892
893 let mut buf = Vec::new();
894 list.serialize(&mut buf).unwrap();
895 let list2 = BlockSparsePostingList::deserialize(&mut Cursor::new(&buf)).unwrap();
896
897 assert_eq!(list.doc_count(), list2.doc_count());
898 }
899
900 #[test]
901 fn test_seek() {
902 let postings: Vec<(DocId, u16, f32)> = (0..500).map(|i| (i * 3, 0, i as f32)).collect();
903 let list =
904 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
905
906 let mut iter = list.iterator();
907 assert_eq!(iter.seek(300), 300);
908 assert_eq!(iter.seek(301), 303);
909 assert_eq!(iter.seek(2000), TERMINATED);
910 }
911
912 #[test]
913 fn test_merge_with_offsets() {
914 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.0), (5, 0, 2.0), (10, 1, 3.0)];
916 let list1 =
917 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
918
919 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.0), (3, 1, 5.0), (7, 0, 6.0)];
921 let list2 =
922 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
923
924 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
926
927 assert_eq!(merged.doc_count(), 6);
928
929 let decoded = merged.decode_all();
931 assert_eq!(decoded.len(), 6);
932
933 assert_eq!(decoded[0].0, 0);
935 assert_eq!(decoded[1].0, 5);
936 assert_eq!(decoded[2].0, 10);
937
938 assert_eq!(decoded[3].0, 100); assert_eq!(decoded[4].0, 103); assert_eq!(decoded[5].0, 107); assert!((decoded[0].2 - 1.0).abs() < 0.01);
945 assert!((decoded[3].2 - 4.0).abs() < 0.01);
946
947 assert_eq!(decoded[2].1, 1); assert_eq!(decoded[4].1, 1); }
951
952 #[test]
953 fn test_merge_with_offsets_multi_block() {
954 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, i as f32)).collect();
956 let list1 =
957 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
958 assert!(list1.num_blocks() > 1, "Should have multiple blocks");
959
960 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 1, i as f32)).collect();
961 let list2 =
962 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
963
964 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 1000)]);
966
967 assert_eq!(merged.doc_count(), 350);
968 assert_eq!(merged.num_blocks(), list1.num_blocks() + list2.num_blocks());
969
970 let mut iter = merged.iterator();
972
973 assert_eq!(iter.doc(), 0);
975
976 let doc = iter.seek(1000);
978 assert_eq!(doc, 1000); iter.advance();
982 assert_eq!(iter.doc(), 1003); }
984
985 #[test]
986 fn test_merge_with_offsets_serialize_roundtrip() {
987 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.0), (5, 0, 2.0), (10, 1, 3.0)];
989 let list1 =
990 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
991
992 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.0), (3, 1, 5.0), (7, 0, 6.0)];
993 let list2 =
994 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
995
996 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
998
999 let mut bytes = Vec::new();
1001 merged.serialize(&mut bytes).unwrap();
1002
1003 let mut cursor = std::io::Cursor::new(&bytes);
1005 let loaded = BlockSparsePostingList::deserialize(&mut cursor).unwrap();
1006
1007 let decoded = loaded.decode_all();
1009 assert_eq!(decoded.len(), 6);
1010
1011 assert_eq!(decoded[0].0, 0);
1013 assert_eq!(decoded[1].0, 5);
1014 assert_eq!(decoded[2].0, 10);
1015
1016 assert_eq!(decoded[3].0, 100, "First doc of seg2 should be 0+100=100");
1018 assert_eq!(decoded[4].0, 103, "Second doc of seg2 should be 3+100=103");
1019 assert_eq!(decoded[5].0, 107, "Third doc of seg2 should be 7+100=107");
1020
1021 let mut iter = loaded.iterator();
1023 assert_eq!(iter.doc(), 0);
1024 iter.advance();
1025 assert_eq!(iter.doc(), 5);
1026 iter.advance();
1027 assert_eq!(iter.doc(), 10);
1028 iter.advance();
1029 assert_eq!(iter.doc(), 100);
1030 iter.advance();
1031 assert_eq!(iter.doc(), 103);
1032 iter.advance();
1033 assert_eq!(iter.doc(), 107);
1034 }
1035
1036 #[test]
1037 fn test_merge_seek_after_roundtrip() {
1038 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, 1.0)).collect();
1040 let list1 =
1041 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1042
1043 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 0, 2.0)).collect();
1044 let list2 =
1045 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1046
1047 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 1000)]);
1049
1050 let mut bytes = Vec::new();
1052 merged.serialize(&mut bytes).unwrap();
1053 let loaded =
1054 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1055
1056 let mut iter = loaded.iterator();
1058
1059 let doc = iter.seek(100);
1061 assert_eq!(doc, 100, "Seek to 100 in segment 1");
1062
1063 let doc = iter.seek(1000);
1065 assert_eq!(doc, 1000, "Seek to 1000 (first doc of segment 2)");
1066
1067 let doc = iter.seek(1050);
1069 assert!(
1070 doc >= 1050,
1071 "Seek to 1050 should find doc >= 1050, got {}",
1072 doc
1073 );
1074
1075 let doc = iter.seek(500);
1077 assert!(
1078 doc >= 1050,
1079 "Seek backwards should not go back, got {}",
1080 doc
1081 );
1082
1083 let mut iter2 = loaded.iterator();
1085
1086 let mut count = 0;
1088 let mut prev_doc = 0;
1089 while iter2.doc() != super::TERMINATED {
1090 let current = iter2.doc();
1091 if count > 0 {
1092 assert!(
1093 current > prev_doc,
1094 "Docs should be monotonically increasing: {} vs {}",
1095 prev_doc,
1096 current
1097 );
1098 }
1099 prev_doc = current;
1100 iter2.advance();
1101 count += 1;
1102 }
1103 assert_eq!(count, 350, "Should have 350 total docs");
1104 }
1105
1106 #[test]
1107 fn test_doc_count_multi_value() {
1108 let postings: Vec<(DocId, u16, f32)> = vec![
1111 (0, 0, 1.0),
1112 (0, 1, 1.5),
1113 (0, 2, 2.0),
1114 (5, 0, 3.0),
1115 (5, 1, 3.5),
1116 (10, 0, 4.0),
1117 ];
1118 let list =
1119 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1120
1121 assert_eq!(list.doc_count(), 3);
1123
1124 let decoded = list.decode_all();
1126 assert_eq!(decoded.len(), 6);
1127 }
1128
1129 #[test]
1133 fn test_zero_copy_merge_patches_first_doc_id() {
1134 use crate::structures::SparseSkipEntry;
1135
1136 let postings1: Vec<(DocId, u16, f32)> = (0..200).map(|i| (i * 2, 0, i as f32)).collect();
1138 let list1 =
1139 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1140 assert!(list1.num_blocks() > 1);
1141
1142 let postings2: Vec<(DocId, u16, f32)> = (0..150).map(|i| (i * 3, 1, i as f32)).collect();
1143 let list2 =
1144 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1145
1146 let mut bytes1 = Vec::new();
1148 list1.serialize(&mut bytes1).unwrap();
1149 let mut bytes2 = Vec::new();
1150 list2.serialize(&mut bytes2).unwrap();
1151
1152 fn parse_raw(data: &[u8]) -> (u32, f32, Vec<SparseSkipEntry>, &[u8]) {
1154 let doc_count = u32::from_le_bytes(data[0..4].try_into().unwrap());
1155 let global_max = f32::from_le_bytes(data[4..8].try_into().unwrap());
1156 let num_blocks = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize;
1157 let mut pos = 12;
1158 let mut skip = Vec::new();
1159 for _ in 0..num_blocks {
1160 let first_doc = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
1161 let last_doc = u32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap());
1162 let offset = u32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap());
1163 let length = u32::from_le_bytes(data[pos + 12..pos + 16].try_into().unwrap());
1164 let max_w = f32::from_le_bytes(data[pos + 16..pos + 20].try_into().unwrap());
1165 skip.push(SparseSkipEntry::new(
1166 first_doc, last_doc, offset, length, max_w,
1167 ));
1168 pos += 20;
1169 }
1170 (doc_count, global_max, skip, &data[pos..])
1171 }
1172
1173 let (dc1, gm1, skip1, raw1) = parse_raw(&bytes1);
1174 let (dc2, gm2, skip2, raw2) = parse_raw(&bytes2);
1175
1176 let doc_offset: u32 = 1000; let total_docs = dc1 + dc2;
1179 let global_max = gm1.max(gm2);
1180 let total_blocks = (skip1.len() + skip2.len()) as u32;
1181
1182 let mut output = Vec::new();
1183 output.extend_from_slice(&total_docs.to_le_bytes());
1185 output.extend_from_slice(&global_max.to_le_bytes());
1186 output.extend_from_slice(&total_blocks.to_le_bytes());
1187
1188 let mut block_data_offset = 0u32;
1190 for entry in &skip1 {
1191 let adjusted = SparseSkipEntry::new(
1192 entry.first_doc,
1193 entry.last_doc,
1194 block_data_offset + entry.offset,
1195 entry.length,
1196 entry.max_weight,
1197 );
1198 adjusted.write(&mut output).unwrap();
1199 }
1200 if let Some(last) = skip1.last() {
1201 block_data_offset += last.offset + last.length;
1202 }
1203 for entry in &skip2 {
1204 let adjusted = SparseSkipEntry::new(
1205 entry.first_doc + doc_offset,
1206 entry.last_doc + doc_offset,
1207 block_data_offset + entry.offset,
1208 entry.length,
1209 entry.max_weight,
1210 );
1211 adjusted.write(&mut output).unwrap();
1212 }
1213
1214 output.extend_from_slice(raw1);
1216
1217 const FIRST_DOC_ID_OFFSET: usize = 8;
1218 let mut buf2 = raw2.to_vec();
1219 for entry in &skip2 {
1220 let off = entry.offset as usize + FIRST_DOC_ID_OFFSET;
1221 if off + 4 <= buf2.len() {
1222 let old = u32::from_le_bytes(buf2[off..off + 4].try_into().unwrap());
1223 let patched = (old + doc_offset).to_le_bytes();
1224 buf2[off..off + 4].copy_from_slice(&patched);
1225 }
1226 }
1227 output.extend_from_slice(&buf2);
1228
1229 let loaded = BlockSparsePostingList::deserialize(&mut Cursor::new(&output)).unwrap();
1231 assert_eq!(loaded.doc_count(), 350);
1232
1233 let mut iter = loaded.iterator();
1234
1235 assert_eq!(iter.doc(), 0);
1237 let doc = iter.seek(100);
1238 assert_eq!(doc, 100);
1239 let doc = iter.seek(398);
1240 assert_eq!(doc, 398);
1241
1242 let doc = iter.seek(1000);
1244 assert_eq!(doc, 1000, "First doc of segment 2 should be 1000");
1245 iter.advance();
1246 assert_eq!(iter.doc(), 1003, "Second doc of segment 2 should be 1003");
1247 let doc = iter.seek(1447);
1248 assert_eq!(doc, 1447, "Last doc of segment 2 should be 1447");
1249
1250 iter.advance();
1252 assert_eq!(iter.doc(), super::TERMINATED);
1253
1254 let reference =
1256 BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, doc_offset)]);
1257 let mut ref_iter = reference.iterator();
1258 let mut zc_iter = loaded.iterator();
1259 while ref_iter.doc() != super::TERMINATED {
1260 assert_eq!(
1261 ref_iter.doc(),
1262 zc_iter.doc(),
1263 "Zero-copy and reference merge should produce identical doc_ids"
1264 );
1265 assert!(
1266 (ref_iter.weight() - zc_iter.weight()).abs() < 0.01,
1267 "Weights should match: {} vs {}",
1268 ref_iter.weight(),
1269 zc_iter.weight()
1270 );
1271 ref_iter.advance();
1272 zc_iter.advance();
1273 }
1274 assert_eq!(zc_iter.doc(), super::TERMINATED);
1275 }
1276
1277 #[test]
1278 fn test_doc_count_single_value() {
1279 let postings: Vec<(DocId, u16, f32)> =
1281 vec![(0, 0, 1.0), (5, 0, 2.0), (10, 0, 3.0), (15, 0, 4.0)];
1282 let list =
1283 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1284
1285 assert_eq!(list.doc_count(), 4);
1287 }
1288
1289 #[test]
1290 fn test_doc_count_multi_value_serialization_roundtrip() {
1291 let postings: Vec<(DocId, u16, f32)> =
1293 vec![(0, 0, 1.0), (0, 1, 1.5), (5, 0, 2.0), (5, 1, 2.5)];
1294 let list =
1295 BlockSparsePostingList::from_postings(&postings, WeightQuantization::Float32).unwrap();
1296 assert_eq!(list.doc_count(), 2);
1297
1298 let mut buf = Vec::new();
1299 list.serialize(&mut buf).unwrap();
1300 let loaded = BlockSparsePostingList::deserialize(&mut Cursor::new(&buf)).unwrap();
1301 assert_eq!(loaded.doc_count(), 2);
1302 }
1303
1304 #[test]
1305 fn test_merge_preserves_weights_and_ordinals() {
1306 let postings1: Vec<(DocId, u16, f32)> = vec![(0, 0, 1.5), (5, 1, 2.5), (10, 2, 3.5)];
1308 let list1 =
1309 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1310
1311 let postings2: Vec<(DocId, u16, f32)> = vec![(0, 0, 4.5), (3, 1, 5.5), (7, 3, 6.5)];
1312 let list2 =
1313 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1314
1315 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1317
1318 let mut bytes = Vec::new();
1320 merged.serialize(&mut bytes).unwrap();
1321 let loaded =
1322 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1323
1324 let mut iter = loaded.iterator();
1326
1327 assert_eq!(iter.doc(), 0);
1329 assert!(
1330 (iter.weight() - 1.5).abs() < 0.01,
1331 "Weight should be 1.5, got {}",
1332 iter.weight()
1333 );
1334 assert_eq!(iter.ordinal(), 0);
1335
1336 iter.advance();
1337 assert_eq!(iter.doc(), 5);
1338 assert!(
1339 (iter.weight() - 2.5).abs() < 0.01,
1340 "Weight should be 2.5, got {}",
1341 iter.weight()
1342 );
1343 assert_eq!(iter.ordinal(), 1);
1344
1345 iter.advance();
1346 assert_eq!(iter.doc(), 10);
1347 assert!(
1348 (iter.weight() - 3.5).abs() < 0.01,
1349 "Weight should be 3.5, got {}",
1350 iter.weight()
1351 );
1352 assert_eq!(iter.ordinal(), 2);
1353
1354 iter.advance();
1356 assert_eq!(iter.doc(), 100);
1357 assert!(
1358 (iter.weight() - 4.5).abs() < 0.01,
1359 "Weight should be 4.5, got {}",
1360 iter.weight()
1361 );
1362 assert_eq!(iter.ordinal(), 0);
1363
1364 iter.advance();
1365 assert_eq!(iter.doc(), 103);
1366 assert!(
1367 (iter.weight() - 5.5).abs() < 0.01,
1368 "Weight should be 5.5, got {}",
1369 iter.weight()
1370 );
1371 assert_eq!(iter.ordinal(), 1);
1372
1373 iter.advance();
1374 assert_eq!(iter.doc(), 107);
1375 assert!(
1376 (iter.weight() - 6.5).abs() < 0.01,
1377 "Weight should be 6.5, got {}",
1378 iter.weight()
1379 );
1380 assert_eq!(iter.ordinal(), 3);
1381
1382 iter.advance();
1384 assert_eq!(iter.doc(), super::TERMINATED);
1385 }
1386
1387 #[test]
1388 fn test_merge_global_max_weight() {
1389 let postings1: Vec<(DocId, u16, f32)> = vec![
1391 (0, 0, 3.0),
1392 (1, 0, 7.0), (2, 0, 2.0),
1394 ];
1395 let list1 =
1396 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1397
1398 let postings2: Vec<(DocId, u16, f32)> = vec![
1399 (0, 0, 5.0),
1400 (1, 0, 4.0),
1401 (2, 0, 6.0), ];
1403 let list2 =
1404 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1405
1406 assert!((list1.global_max_weight() - 7.0).abs() < 0.01);
1408 assert!((list2.global_max_weight() - 6.0).abs() < 0.01);
1409
1410 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1412
1413 assert!(
1415 (merged.global_max_weight() - 7.0).abs() < 0.01,
1416 "Global max should be 7.0, got {}",
1417 merged.global_max_weight()
1418 );
1419
1420 let mut bytes = Vec::new();
1422 merged.serialize(&mut bytes).unwrap();
1423 let loaded =
1424 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1425
1426 assert!(
1427 (loaded.global_max_weight() - 7.0).abs() < 0.01,
1428 "After roundtrip, global max should still be 7.0, got {}",
1429 loaded.global_max_weight()
1430 );
1431 }
1432
1433 #[test]
1434 fn test_scoring_simulation_after_merge() {
1435 let postings1: Vec<(DocId, u16, f32)> = vec![
1437 (0, 0, 0.5), (5, 0, 0.8), ];
1440 let list1 =
1441 BlockSparsePostingList::from_postings(&postings1, WeightQuantization::Float32).unwrap();
1442
1443 let postings2: Vec<(DocId, u16, f32)> = vec![
1444 (0, 0, 0.6), (3, 0, 0.9), ];
1447 let list2 =
1448 BlockSparsePostingList::from_postings(&postings2, WeightQuantization::Float32).unwrap();
1449
1450 let merged = BlockSparsePostingList::merge_with_offsets(&[(&list1, 0), (&list2, 100)]);
1452
1453 let mut bytes = Vec::new();
1455 merged.serialize(&mut bytes).unwrap();
1456 let loaded =
1457 BlockSparsePostingList::deserialize(&mut std::io::Cursor::new(&bytes)).unwrap();
1458
1459 let query_weight = 2.0f32;
1461 let mut iter = loaded.iterator();
1462
1463 assert_eq!(iter.doc(), 0);
1466 let score = query_weight * iter.weight();
1467 assert!(
1468 (score - 1.0).abs() < 0.01,
1469 "Doc 0 score should be 1.0, got {}",
1470 score
1471 );
1472
1473 iter.advance();
1474 assert_eq!(iter.doc(), 5);
1476 let score = query_weight * iter.weight();
1477 assert!(
1478 (score - 1.6).abs() < 0.01,
1479 "Doc 5 score should be 1.6, got {}",
1480 score
1481 );
1482
1483 iter.advance();
1484 assert_eq!(iter.doc(), 100);
1486 let score = query_weight * iter.weight();
1487 assert!(
1488 (score - 1.2).abs() < 0.01,
1489 "Doc 100 score should be 1.2, got {}",
1490 score
1491 );
1492
1493 iter.advance();
1494 assert_eq!(iter.doc(), 103);
1496 let score = query_weight * iter.weight();
1497 assert!(
1498 (score - 1.8).abs() < 0.01,
1499 "Doc 103 score should be 1.8, got {}",
1500 score
1501 );
1502 }
1503}