use crate::structures::simd;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::io::{self, Read, Write};
pub const OPT_P4D_BLOCK_SIZE: usize = 128;
const MAX_EXCEPTIONS_RATIO: f32 = 0.10;
fn find_optimal_bit_width(values: &[u32]) -> (u8, usize, usize) {
if values.is_empty() {
return (0, 0, 0);
}
let n = values.len();
let max_exceptions = ((n as f32) * MAX_EXCEPTIONS_RATIO).ceil() as usize;
let mut bit_counts = [0usize; 33]; for &v in values {
let bits = simd::bits_needed(v) as usize;
bit_counts[bits] += 1;
}
let mut cumulative = [0usize; 33];
cumulative[0] = bit_counts[0];
for b in 1..=32 {
cumulative[b] = cumulative[b - 1] + bit_counts[b];
}
let mut best_bits = 32u8;
let mut best_total = usize::MAX;
let mut best_exceptions = 0usize;
for b in 0..=32u8 {
let fitting = if b == 0 {
bit_counts[0]
} else {
cumulative[b as usize]
};
let exceptions = n - fitting;
if exceptions > max_exceptions && b < 32 {
continue;
}
let main_bits = n * (b as usize);
let exception_bits = if b < 32 {
exceptions * (7 + (32 - b as usize))
} else {
0
};
let total = main_bits + exception_bits;
if total < best_total {
best_total = total;
best_bits = b;
best_exceptions = exceptions;
}
}
(best_bits, best_exceptions, best_total)
}
fn pack_with_exceptions(values: &[u32], bit_width: u8) -> (Vec<u8>, Vec<(u8, u32)>) {
if bit_width == 0 {
let exceptions: Vec<(u8, u32)> = values
.iter()
.enumerate()
.filter(|&(_, &v)| v != 0)
.map(|(i, &v)| (i as u8, v)) .collect();
return (Vec::new(), exceptions);
}
if bit_width >= 32 {
let bytes_needed = values.len() * 4;
let mut packed = vec![0u8; bytes_needed];
for (i, &value) in values.iter().enumerate() {
let bytes = value.to_le_bytes();
packed[i * 4..i * 4 + 4].copy_from_slice(&bytes);
}
return (packed, Vec::new());
}
let mask = (1u64 << bit_width) - 1;
let bytes_needed = (values.len() * bit_width as usize).div_ceil(8);
let mut packed = vec![0u8; bytes_needed];
let mut exceptions = Vec::new();
let mut bit_pos = 0usize;
for (i, &value) in values.iter().enumerate() {
let low_bits = (value as u64) & mask;
let byte_idx = bit_pos / 8;
let bit_offset = bit_pos % 8;
let mut remaining_bits = bit_width as usize;
let mut val = low_bits;
let mut current_byte_idx = byte_idx;
let mut current_bit_offset = bit_offset;
while remaining_bits > 0 {
let bits_in_byte = (8 - current_bit_offset).min(remaining_bits);
let byte_mask = ((1u64 << bits_in_byte) - 1) as u8;
packed[current_byte_idx] |= ((val as u8) & byte_mask) << current_bit_offset;
val >>= bits_in_byte;
remaining_bits -= bits_in_byte;
current_byte_idx += 1;
current_bit_offset = 0;
}
bit_pos += bit_width as usize;
let fits = value <= mask as u32;
if !fits {
let high_bits = value >> bit_width;
exceptions.push((i as u8, high_bits));
}
}
(packed, exceptions)
}
fn unpack_with_exceptions(
packed: &[u8],
bit_width: u8,
exceptions: &[(u8, u32)],
count: usize,
output: &mut [u32],
) {
if bit_width == 0 {
output[..count].fill(0);
} else if bit_width == 8 {
simd::unpack_8bit(packed, output, count);
} else if bit_width == 16 {
simd::unpack_16bit(packed, output, count);
} else if bit_width >= 32 {
simd::unpack_32bit(packed, output, count);
return; } else {
let mask = (1u64 << bit_width) - 1;
let mut bit_pos = 0usize;
let input_ptr = packed.as_ptr();
for out in output[..count].iter_mut() {
let byte_idx = bit_pos >> 3;
let bit_offset = bit_pos & 7;
let word = if byte_idx + 8 <= packed.len() {
unsafe { (input_ptr.add(byte_idx) as *const u64).read_unaligned() }
} else {
let mut word = 0u64;
for (i, &b) in packed[byte_idx..].iter().enumerate() {
word |= (b as u64) << (i * 8);
}
word
};
*out = ((word >> bit_offset) & mask) as u32;
bit_pos += bit_width as usize;
}
}
for &(pos, high_bits) in exceptions {
if (pos as usize) < count {
let low_bits = output[pos as usize];
output[pos as usize] = (high_bits << bit_width) | low_bits;
}
}
}
#[inline]
fn unpack_exceptions_delta_decode(
packed: &[u8],
bit_width: u8,
exceptions: &[(u8, u32)],
output: &mut [u32],
first_doc_id: u32,
count: usize,
) {
if count == 0 {
return;
}
output[0] = first_doc_id;
if count == 1 {
return;
}
let mask = if bit_width < 32 {
(1u64 << bit_width) - 1
} else {
u64::MAX
};
let mut carry = first_doc_id;
match bit_width {
0 => {
for item in output.iter_mut().take(count).skip(1) {
carry = carry.wrapping_add(1);
*item = carry;
}
}
8 => {
for i in 0..count - 1 {
let mut delta = packed[i] as u32;
for &(pos, high_bits) in exceptions {
if pos as usize == i {
delta |= high_bits << bit_width;
break;
}
}
carry = carry.wrapping_add(delta).wrapping_add(1);
output[i + 1] = carry;
}
}
16 => {
for i in 0..count - 1 {
let idx = i * 2;
let mut delta = u16::from_le_bytes([packed[idx], packed[idx + 1]]) as u32;
for &(pos, high_bits) in exceptions {
if pos as usize == i {
delta |= high_bits << bit_width;
break;
}
}
carry = carry.wrapping_add(delta).wrapping_add(1);
output[i + 1] = carry;
}
}
32 => {
for i in 0..count - 1 {
let idx = i * 4;
let delta = u32::from_le_bytes([
packed[idx],
packed[idx + 1],
packed[idx + 2],
packed[idx + 3],
]);
carry = carry.wrapping_add(delta).wrapping_add(1);
output[i + 1] = carry;
}
}
_ => {
let input_ptr = packed.as_ptr();
let mut bit_pos = 0usize;
for i in 0..count - 1 {
let byte_idx = bit_pos >> 3;
let bit_offset = bit_pos & 7;
let word = unsafe { (input_ptr.add(byte_idx) as *const u64).read_unaligned() };
let mut delta = ((word >> bit_offset) & mask) as u32;
for &(pos, high_bits) in exceptions {
if pos as usize == i {
delta |= high_bits << bit_width;
break;
}
}
carry = carry.wrapping_add(delta).wrapping_add(1);
output[i + 1] = carry;
bit_pos += bit_width as usize;
}
}
}
}
#[derive(Debug, Clone)]
pub struct OptP4DBlock {
pub first_doc_id: u32,
pub last_doc_id: u32,
pub num_docs: u16,
pub doc_bit_width: u8,
pub tf_bit_width: u8,
pub max_tf: u32,
pub max_block_score: f32,
pub doc_deltas: Vec<u8>,
pub doc_exceptions: Vec<(u8, u32)>,
pub term_freqs: Vec<u8>,
pub tf_exceptions: Vec<(u8, u32)>,
}
impl OptP4DBlock {
pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
writer.write_u32::<LittleEndian>(self.first_doc_id)?;
writer.write_u32::<LittleEndian>(self.last_doc_id)?;
writer.write_u16::<LittleEndian>(self.num_docs)?;
writer.write_u8(self.doc_bit_width)?;
writer.write_u8(self.tf_bit_width)?;
writer.write_u32::<LittleEndian>(self.max_tf)?;
writer.write_f32::<LittleEndian>(self.max_block_score)?;
writer.write_u16::<LittleEndian>(self.doc_deltas.len() as u16)?;
writer.write_all(&self.doc_deltas)?;
writer.write_u8(self.doc_exceptions.len() as u8)?;
for &(pos, val) in &self.doc_exceptions {
writer.write_u8(pos)?;
writer.write_u32::<LittleEndian>(val)?;
}
writer.write_u16::<LittleEndian>(self.term_freqs.len() as u16)?;
writer.write_all(&self.term_freqs)?;
writer.write_u8(self.tf_exceptions.len() as u8)?;
for &(pos, val) in &self.tf_exceptions {
writer.write_u8(pos)?;
writer.write_u32::<LittleEndian>(val)?;
}
Ok(())
}
pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let first_doc_id = reader.read_u32::<LittleEndian>()?;
let last_doc_id = reader.read_u32::<LittleEndian>()?;
let num_docs = reader.read_u16::<LittleEndian>()?;
let doc_bit_width = reader.read_u8()?;
let tf_bit_width = reader.read_u8()?;
let max_tf = reader.read_u32::<LittleEndian>()?;
let max_block_score = reader.read_f32::<LittleEndian>()?;
let doc_deltas_len = reader.read_u16::<LittleEndian>()? as usize;
let mut doc_deltas = vec![0u8; doc_deltas_len];
reader.read_exact(&mut doc_deltas)?;
let num_doc_exceptions = reader.read_u8()? as usize;
let mut doc_exceptions = Vec::with_capacity(num_doc_exceptions);
for _ in 0..num_doc_exceptions {
let pos = reader.read_u8()?;
let val = reader.read_u32::<LittleEndian>()?;
doc_exceptions.push((pos, val));
}
let term_freqs_len = reader.read_u16::<LittleEndian>()? as usize;
let mut term_freqs = vec![0u8; term_freqs_len];
reader.read_exact(&mut term_freqs)?;
let num_tf_exceptions = reader.read_u8()? as usize;
let mut tf_exceptions = Vec::with_capacity(num_tf_exceptions);
for _ in 0..num_tf_exceptions {
let pos = reader.read_u8()?;
let val = reader.read_u32::<LittleEndian>()?;
tf_exceptions.push((pos, val));
}
Ok(Self {
first_doc_id,
last_doc_id,
num_docs,
doc_bit_width,
tf_bit_width,
max_tf,
max_block_score,
doc_deltas,
doc_exceptions,
term_freqs,
tf_exceptions,
})
}
pub fn decode_doc_ids(&self) -> Vec<u32> {
let mut output = vec![0u32; self.num_docs as usize];
self.decode_doc_ids_into(&mut output);
output
}
#[inline]
pub fn decode_doc_ids_into(&self, output: &mut [u32]) -> usize {
let count = self.num_docs as usize;
if count == 0 {
return 0;
}
unpack_exceptions_delta_decode(
&self.doc_deltas,
self.doc_bit_width,
&self.doc_exceptions,
output,
self.first_doc_id,
count,
);
count
}
pub fn decode_term_freqs(&self) -> Vec<u32> {
let mut output = vec![0u32; self.num_docs as usize];
self.decode_term_freqs_into(&mut output);
output
}
#[inline]
pub fn decode_term_freqs_into(&self, output: &mut [u32]) -> usize {
let count = self.num_docs as usize;
if count == 0 {
return 0;
}
unpack_with_exceptions(
&self.term_freqs,
self.tf_bit_width,
&self.tf_exceptions,
count,
output,
);
simd::add_one(output, count);
count
}
}
#[derive(Debug, Clone)]
pub struct OptP4DPostingList {
pub blocks: Vec<OptP4DBlock>,
pub doc_count: u32,
pub max_score: f32,
}
impl OptP4DPostingList {
pub fn from_postings(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> Self {
assert_eq!(doc_ids.len(), term_freqs.len());
if doc_ids.is_empty() {
return Self {
blocks: Vec::new(),
doc_count: 0,
max_score: 0.0,
};
}
let mut blocks = Vec::new();
let mut max_score = 0.0f32;
let mut i = 0;
while i < doc_ids.len() {
let block_end = (i + OPT_P4D_BLOCK_SIZE).min(doc_ids.len());
let block_docs = &doc_ids[i..block_end];
let block_tfs = &term_freqs[i..block_end];
let block = Self::create_block(block_docs, block_tfs, idf);
max_score = max_score.max(block.max_block_score);
blocks.push(block);
i = block_end;
}
Self {
blocks,
doc_count: doc_ids.len() as u32,
max_score,
}
}
fn create_block(doc_ids: &[u32], term_freqs: &[u32], idf: f32) -> OptP4DBlock {
let num_docs = doc_ids.len();
let first_doc_id = doc_ids[0];
let last_doc_id = *doc_ids.last().unwrap();
let mut deltas = [0u32; OPT_P4D_BLOCK_SIZE];
for j in 1..num_docs {
deltas[j - 1] = doc_ids[j] - doc_ids[j - 1] - 1;
}
let (doc_bit_width, _, _) = find_optimal_bit_width(&deltas[..num_docs.saturating_sub(1)]);
let (doc_deltas, doc_exceptions) =
pack_with_exceptions(&deltas[..num_docs.saturating_sub(1)], doc_bit_width);
let mut tfs = [0u32; OPT_P4D_BLOCK_SIZE];
let mut max_tf = 0u32;
for (j, &tf) in term_freqs.iter().enumerate() {
tfs[j] = tf - 1; max_tf = max_tf.max(tf);
}
let (tf_bit_width, _, _) = find_optimal_bit_width(&tfs[..num_docs]);
let (term_freqs_packed, tf_exceptions) =
pack_with_exceptions(&tfs[..num_docs], tf_bit_width);
let max_block_score = crate::query::bm25_upper_bound(max_tf as f32, idf);
OptP4DBlock {
first_doc_id,
last_doc_id,
num_docs: num_docs as u16,
doc_bit_width,
tf_bit_width,
max_tf,
max_block_score,
doc_deltas,
doc_exceptions,
term_freqs: term_freqs_packed,
tf_exceptions,
}
}
pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
writer.write_u32::<LittleEndian>(self.doc_count)?;
writer.write_f32::<LittleEndian>(self.max_score)?;
writer.write_u32::<LittleEndian>(self.blocks.len() as u32)?;
for block in &self.blocks {
block.serialize(writer)?;
}
Ok(())
}
pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let doc_count = reader.read_u32::<LittleEndian>()?;
let max_score = reader.read_f32::<LittleEndian>()?;
let num_blocks = reader.read_u32::<LittleEndian>()? as usize;
let mut blocks = Vec::with_capacity(num_blocks);
for _ in 0..num_blocks {
blocks.push(OptP4DBlock::deserialize(reader)?);
}
Ok(Self {
blocks,
doc_count,
max_score,
})
}
pub fn len(&self) -> u32 {
self.doc_count
}
pub fn is_empty(&self) -> bool {
self.doc_count == 0
}
pub fn iterator(&self) -> OptP4DIterator<'_> {
OptP4DIterator::new(self)
}
}
pub struct OptP4DIterator<'a> {
posting_list: &'a OptP4DPostingList,
current_block: usize,
current_block_len: usize,
block_doc_ids: Vec<u32>,
block_term_freqs: Vec<u32>,
pos_in_block: usize,
exhausted: bool,
}
impl<'a> OptP4DIterator<'a> {
pub fn new(posting_list: &'a OptP4DPostingList) -> Self {
let mut iter = Self {
posting_list,
current_block: 0,
current_block_len: 0,
block_doc_ids: vec![0u32; OPT_P4D_BLOCK_SIZE],
block_term_freqs: vec![0u32; OPT_P4D_BLOCK_SIZE],
pos_in_block: 0,
exhausted: posting_list.blocks.is_empty(),
};
if !iter.exhausted {
iter.decode_current_block();
}
iter
}
#[inline]
fn decode_current_block(&mut self) {
let block = &self.posting_list.blocks[self.current_block];
self.current_block_len = block.decode_doc_ids_into(&mut self.block_doc_ids);
block.decode_term_freqs_into(&mut self.block_term_freqs);
self.pos_in_block = 0;
}
#[inline]
pub fn doc(&self) -> u32 {
if self.exhausted {
u32::MAX
} else {
self.block_doc_ids[self.pos_in_block]
}
}
#[inline]
pub fn term_freq(&self) -> u32 {
if self.exhausted {
0
} else {
self.block_term_freqs[self.pos_in_block]
}
}
#[inline]
pub fn advance(&mut self) -> u32 {
if self.exhausted {
return u32::MAX;
}
self.pos_in_block += 1;
if self.pos_in_block >= self.current_block_len {
self.current_block += 1;
if self.current_block >= self.posting_list.blocks.len() {
self.exhausted = true;
return u32::MAX;
}
self.decode_current_block();
}
self.doc()
}
pub fn seek(&mut self, target: u32) -> u32 {
if self.exhausted {
return u32::MAX;
}
while self.current_block < self.posting_list.blocks.len() {
let block = &self.posting_list.blocks[self.current_block];
if block.last_doc_id >= target {
break;
}
self.current_block += 1;
}
if self.current_block >= self.posting_list.blocks.len() {
self.exhausted = true;
return u32::MAX;
}
if self.current_block_len == 0 || self.current_block != self.posting_list.blocks.len() - 1 {
self.decode_current_block();
}
match self.block_doc_ids[self.pos_in_block..self.current_block_len].binary_search(&target) {
Ok(idx) => {
self.pos_in_block += idx;
}
Err(idx) => {
self.pos_in_block += idx;
if self.pos_in_block >= self.current_block_len {
self.current_block += 1;
if self.current_block >= self.posting_list.blocks.len() {
self.exhausted = true;
return u32::MAX;
}
self.decode_current_block();
}
}
}
self.doc()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bits_needed() {
assert_eq!(simd::bits_needed(0), 0);
assert_eq!(simd::bits_needed(1), 1);
assert_eq!(simd::bits_needed(2), 2);
assert_eq!(simd::bits_needed(3), 2);
assert_eq!(simd::bits_needed(4), 3);
assert_eq!(simd::bits_needed(255), 8);
assert_eq!(simd::bits_needed(256), 9);
assert_eq!(simd::bits_needed(u32::MAX), 32);
}
#[test]
fn test_find_optimal_bit_width() {
let values = vec![0u32; 100];
let (bits, exceptions, _) = find_optimal_bit_width(&values);
assert_eq!(bits, 0);
assert_eq!(exceptions, 0);
let values: Vec<u32> = (0..100).map(|i| i % 16).collect();
let (bits, _, _) = find_optimal_bit_width(&values);
assert!(bits <= 4);
let mut values: Vec<u32> = (0..100).map(|i| i % 16).collect();
values[50] = 1_000_000; let (bits, exceptions, _) = find_optimal_bit_width(&values);
assert!(bits < 20); assert!(exceptions >= 1);
}
#[test]
fn test_pack_unpack_with_exceptions() {
let values = vec![1, 2, 3, 255, 4, 5, 1000, 6, 7, 8];
let (packed, exceptions) = pack_with_exceptions(&values, 4);
let mut output = vec![0u32; values.len()];
unpack_with_exceptions(&packed, 4, &exceptions, values.len(), &mut output);
assert_eq!(output, values);
}
#[test]
fn test_opt_p4d_posting_list_small() {
let doc_ids: Vec<u32> = (0..100).map(|i| i * 2).collect();
let term_freqs: Vec<u32> = vec![1; 100];
let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
assert_eq!(list.len(), 100);
assert_eq!(list.blocks.len(), 1);
let mut iter = list.iterator();
for (i, &expected) in doc_ids.iter().enumerate() {
assert_eq!(iter.doc(), expected, "Mismatch at {}", i);
assert_eq!(iter.term_freq(), 1);
iter.advance();
}
assert_eq!(iter.doc(), u32::MAX);
}
#[test]
fn test_opt_p4d_posting_list_large() {
let doc_ids: Vec<u32> = (0..500).map(|i| i * 3).collect();
let term_freqs: Vec<u32> = (0..500).map(|i| (i % 10) + 1).collect();
let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
assert_eq!(list.len(), 500);
assert_eq!(list.blocks.len(), 4);
let mut iter = list.iterator();
for (i, &expected) in doc_ids.iter().enumerate() {
assert_eq!(iter.doc(), expected, "Mismatch at {}", i);
assert_eq!(iter.term_freq(), term_freqs[i]);
iter.advance();
}
}
#[test]
fn test_opt_p4d_seek() {
let doc_ids: Vec<u32> = vec![10, 20, 30, 100, 200, 300, 1000, 2000];
let term_freqs: Vec<u32> = vec![1; 8];
let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
let mut iter = list.iterator();
assert_eq!(iter.seek(25), 30);
assert_eq!(iter.seek(100), 100);
assert_eq!(iter.seek(500), 1000);
assert_eq!(iter.seek(3000), u32::MAX);
}
#[test]
fn test_opt_p4d_serialization() {
let doc_ids: Vec<u32> = (0..200).map(|i| i * 5).collect();
let term_freqs: Vec<u32> = (0..200).map(|i| (i % 5) + 1).collect();
let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
let mut buffer = Vec::new();
list.serialize(&mut buffer).unwrap();
let restored = OptP4DPostingList::deserialize(&mut &buffer[..]).unwrap();
assert_eq!(restored.len(), list.len());
assert_eq!(restored.blocks.len(), list.blocks.len());
let mut iter1 = list.iterator();
let mut iter2 = restored.iterator();
while iter1.doc() != u32::MAX {
assert_eq!(iter1.doc(), iter2.doc());
assert_eq!(iter1.term_freq(), iter2.term_freq());
iter1.advance();
iter2.advance();
}
}
#[test]
fn test_opt_p4d_with_outliers() {
let mut doc_ids: Vec<u32> = (0..128).map(|i| i * 2).collect();
doc_ids[64] = 1_000_000;
doc_ids.sort();
let term_freqs: Vec<u32> = vec![1; 128];
let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
let mut iter = list.iterator();
let mut found_outlier = false;
while iter.doc() != u32::MAX {
if iter.doc() == 1_000_000 {
found_outlier = true;
}
iter.advance();
}
assert!(found_outlier, "Outlier value should be preserved");
}
#[test]
fn test_opt_p4d_simd_full_blocks() {
let doc_ids: Vec<u32> = (0..1024).map(|i| i * 2).collect();
let term_freqs: Vec<u32> = (0..1024).map(|i| (i % 20) + 1).collect();
let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
assert_eq!(list.len(), 1024);
assert_eq!(list.blocks.len(), 8);
let mut iter = list.iterator();
for (i, &expected_doc) in doc_ids.iter().enumerate() {
assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
assert_eq!(iter.term_freq(), term_freqs[i], "TF mismatch at {}", i);
iter.advance();
}
assert_eq!(iter.doc(), u32::MAX);
}
#[test]
fn test_opt_p4d_simd_8bit_values() {
let doc_ids: Vec<u32> = (0..256).collect();
let term_freqs: Vec<u32> = (0..256).map(|i| (i % 100) + 1).collect();
let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
let mut iter = list.iterator();
for (i, &expected_doc) in doc_ids.iter().enumerate() {
assert_eq!(iter.doc(), expected_doc, "Doc mismatch at {}", i);
assert_eq!(iter.term_freq(), term_freqs[i], "TF mismatch at {}", i);
iter.advance();
}
}
#[test]
fn test_opt_p4d_simd_delta_decode() {
let mut doc_ids = Vec::with_capacity(512);
let mut current = 0u32;
for i in 0..512 {
current += (i % 10) + 1; doc_ids.push(current);
}
let term_freqs: Vec<u32> = vec![1; 512];
let list = OptP4DPostingList::from_postings(&doc_ids, &term_freqs, 1.0);
let mut iter = list.iterator();
for (i, &expected_doc) in doc_ids.iter().enumerate() {
assert_eq!(
iter.doc(),
expected_doc,
"Doc mismatch at {} (expected {}, got {})",
i,
expected_doc,
iter.doc()
);
iter.advance();
}
}
}