use crate::docset::{DocSet, TERMINATED};
use crate::postings::{Postings, SegmentPostings};
use crate::DocId;
pub struct LoadedPostings {
doc_ids: Box<[DocId]>,
position_offsets: Box<[u32]>,
positions: Box<[u32]>,
offset_offsets: Box<[u32]>,
byte_offsets: Box<[(u32, u32)]>,
cursor: usize,
}
impl LoadedPostings {
pub fn load(segment_postings: &mut SegmentPostings) -> LoadedPostings {
let num_docs = segment_postings.doc_freq() as usize;
let mut doc_ids = Vec::with_capacity(num_docs);
let mut positions = Vec::with_capacity(num_docs);
let mut position_offsets = Vec::with_capacity(num_docs);
let mut byte_offsets = Vec::new();
let mut offset_offsets = Vec::new();
while segment_postings.doc() != TERMINATED {
position_offsets.push(positions.len() as u32);
offset_offsets.push(byte_offsets.len() as u32);
doc_ids.push(segment_postings.doc());
segment_postings.append_positions_with_offset(0, &mut positions);
segment_postings.append_offsets(&mut byte_offsets);
segment_postings.advance();
}
position_offsets.push(positions.len() as u32);
offset_offsets.push(byte_offsets.len() as u32);
LoadedPostings {
doc_ids: doc_ids.into_boxed_slice(),
positions: positions.into_boxed_slice(),
position_offsets: position_offsets.into_boxed_slice(),
offset_offsets: offset_offsets.into_boxed_slice(),
byte_offsets: byte_offsets.into_boxed_slice(),
cursor: 0,
}
}
}
#[cfg(test)]
impl From<(Vec<DocId>, Vec<Vec<u32>>)> for LoadedPostings {
fn from(doc_ids_and_positions: (Vec<DocId>, Vec<Vec<u32>>)) -> LoadedPostings {
let mut position_offsets = Vec::new();
let mut all_positions = Vec::new();
let (doc_ids, docid_positions) = doc_ids_and_positions;
for positions in docid_positions {
position_offsets.push(all_positions.len() as u32);
all_positions.extend_from_slice(&positions);
}
position_offsets.push(all_positions.len() as u32);
let offset_offsets = vec![0u32; position_offsets.len()];
LoadedPostings {
doc_ids: doc_ids.into_boxed_slice(),
positions: all_positions.into_boxed_slice(),
position_offsets: position_offsets.into_boxed_slice(),
offset_offsets: offset_offsets.into_boxed_slice(),
byte_offsets: Box::new([]),
cursor: 0,
}
}
}
impl DocSet for LoadedPostings {
fn advance(&mut self) -> DocId {
self.cursor += 1;
if self.cursor >= self.doc_ids.len() {
self.cursor = self.doc_ids.len();
return TERMINATED;
}
self.doc()
}
fn doc(&self) -> DocId {
if self.cursor >= self.doc_ids.len() {
return TERMINATED;
}
self.doc_ids[self.cursor]
}
fn size_hint(&self) -> u32 {
self.doc_ids.len() as u32
}
}
impl Postings for LoadedPostings {
fn term_freq(&self) -> u32 {
let start = self.position_offsets[self.cursor] as usize;
let end = self.position_offsets[self.cursor + 1] as usize;
(end - start) as u32
}
fn append_positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
let start = self.position_offsets[self.cursor] as usize;
let end = self.position_offsets[self.cursor + 1] as usize;
for pos in &self.positions[start..end] {
output.push(*pos + offset);
}
}
fn append_offsets(&mut self, output: &mut Vec<(u32, u32)>) {
let start = self.offset_offsets[self.cursor] as usize;
let end = self.offset_offsets[self.cursor + 1] as usize;
output.extend_from_slice(&self.byte_offsets[start..end]);
}
fn append_positions_and_offsets(&mut self, offset: u32, output: &mut Vec<(u32, u32, u32)>) {
let pos_start = self.position_offsets[self.cursor] as usize;
let pos_end = self.position_offsets[self.cursor + 1] as usize;
let off_start = self.offset_offsets[self.cursor] as usize;
let positions = &self.positions[pos_start..pos_end];
let offsets = &self.byte_offsets[off_start..];
for (i, &pos) in positions.iter().enumerate() {
let (from, to) = offsets.get(i).copied().unwrap_or((0, 0));
output.push((pos + offset, from, to));
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use super::*;
#[test]
pub fn test_vec_postings() {
let doc_ids: Vec<DocId> = (0u32..1024u32).map(|e| e * 3).collect();
let mut postings = LoadedPostings::from((doc_ids, vec![]));
assert_eq!(postings.doc(), 0u32);
assert_eq!(postings.advance(), 3u32);
assert_eq!(postings.doc(), 3u32);
assert_eq!(postings.seek(14u32), 15u32);
assert_eq!(postings.doc(), 15u32);
assert_eq!(postings.seek(300u32), 300u32);
assert_eq!(postings.doc(), 300u32);
assert_eq!(postings.seek(6000u32), TERMINATED);
}
#[test]
pub fn test_vec_postings2() {
let doc_ids: Vec<DocId> = (0u32..1024u32).map(|e| e * 3).collect();
let mut positions = Vec::new();
positions.resize(1024, Vec::new());
positions[0] = vec![1u32, 2u32, 3u32];
positions[1] = vec![30u32];
positions[2] = vec![10u32];
positions[4] = vec![50u32];
let mut postings = LoadedPostings::from((doc_ids, positions));
let load = |postings: &mut LoadedPostings| {
let mut loaded_positions = Vec::new();
postings.positions(loaded_positions.as_mut());
loaded_positions
};
assert_eq!(postings.doc(), 0u32);
assert_eq!(load(&mut postings), vec![1u32, 2u32, 3u32]);
assert_eq!(postings.advance(), 3u32);
assert_eq!(postings.doc(), 3u32);
assert_eq!(load(&mut postings), vec![30u32]);
assert_eq!(postings.seek(14u32), 15u32);
assert_eq!(postings.doc(), 15u32);
assert_eq!(postings.seek(300u32), 300u32);
assert_eq!(postings.doc(), 300u32);
assert_eq!(postings.seek(6000u32), TERMINATED);
}
}