use crate::block::{BlockMeta, PostingBlock};
#[derive(Debug, Clone, Copy)]
pub struct SkipEntry {
pub meta: BlockMeta,
pub block_idx: u32,
}
#[derive(Debug)]
pub struct TermBlocks {
pub blocks: Vec<PostingBlock>,
pub skip: Vec<SkipEntry>,
pub df: u32,
pub global_max_tf: u32,
pub global_min_fieldnorm: u8,
}
impl TermBlocks {
pub fn from_blocks(blocks: Vec<PostingBlock>) -> Self {
let mut df = 0u32;
let mut global_max_tf = 0u32;
let mut global_min_fieldnorm = u8::MAX;
let skip: Vec<SkipEntry> = blocks
.iter()
.enumerate()
.map(|(i, block)| {
let meta = block.meta();
df += meta.doc_count as u32;
if meta.block_max_tf > global_max_tf {
global_max_tf = meta.block_max_tf;
}
if meta.block_min_fieldnorm < global_min_fieldnorm {
global_min_fieldnorm = meta.block_min_fieldnorm;
}
SkipEntry {
meta,
block_idx: i as u32,
}
})
.collect();
if global_min_fieldnorm == u8::MAX {
global_min_fieldnorm = 0;
}
Self {
blocks,
skip,
df,
global_max_tf,
global_min_fieldnorm,
}
}
pub fn num_blocks(&self) -> usize {
self.blocks.len()
}
pub fn advance_to_block(&self, target_doc_id: u32) -> Option<usize> {
let pos = self
.skip
.partition_point(|entry| entry.meta.last_doc_id < target_doc_id);
if pos < self.skip.len() {
Some(pos)
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::block::CompactPosting;
use crate::codec::smallfloat;
fn make_term_blocks(doc_ids: &[u32], tf: u32) -> TermBlocks {
let mut postings: Vec<CompactPosting> = doc_ids
.iter()
.map(|&id| CompactPosting {
doc_id: id,
term_freq: tf,
fieldnorm: smallfloat::encode(100),
positions: vec![0],
})
.collect();
postings.sort_by_key(|p| p.doc_id);
let blocks = crate::block::into_blocks(postings);
TermBlocks::from_blocks(blocks)
}
#[test]
fn skip_index_basic() {
let ids: Vec<u32> = (0..200).collect();
let tb = make_term_blocks(&ids, 2);
assert_eq!(tb.num_blocks(), 2);
assert_eq!(tb.df, 200);
assert_eq!(tb.global_max_tf, 2);
assert_eq!(tb.skip[0].meta.last_doc_id, 127);
assert_eq!(tb.skip[1].meta.last_doc_id, 199);
}
#[test]
fn advance_to_block() {
let ids: Vec<u32> = (0..300).collect();
let tb = make_term_blocks(&ids, 1);
assert_eq!(tb.advance_to_block(0), Some(0));
assert_eq!(tb.advance_to_block(50), Some(0));
assert_eq!(tb.advance_to_block(127), Some(0));
assert_eq!(tb.advance_to_block(128), Some(1));
assert_eq!(tb.advance_to_block(200), Some(1));
assert_eq!(tb.advance_to_block(256), Some(2));
assert_eq!(tb.advance_to_block(299), Some(2));
assert_eq!(tb.advance_to_block(300), None); }
#[test]
fn empty_term() {
let tb = TermBlocks::from_blocks(Vec::new());
assert_eq!(tb.df, 0);
assert_eq!(tb.num_blocks(), 0);
assert_eq!(tb.advance_to_block(0), None);
}
}