use std::collections::HashMap;
use std::fmt;
use std::mem;
use crate::util::byte_block_pool::ByteBlockPool;
pub struct SegmentAccumulator {
term_byte_pool: ByteBlockPool,
text_start_hint: Option<i32>,
norms: HashMap<u32, PerFieldNormsData>,
doc_count: i32,
}
#[derive(Debug, mem_dbg::MemSize)]
pub struct PerFieldNormsData {
pub field_name: String,
pub docs: Vec<i32>,
pub values: Vec<i64>,
}
impl SegmentAccumulator {
pub fn new() -> Self {
let term_byte_pool = ByteBlockPool::new(32 * 1024);
Self {
term_byte_pool,
text_start_hint: None,
norms: HashMap::new(),
doc_count: 0,
}
}
pub fn term_byte_pool(&self) -> &ByteBlockPool {
&self.term_byte_pool
}
pub fn term_byte_pool_mut(&mut self) -> &mut ByteBlockPool {
&mut self.term_byte_pool
}
pub fn set_text_start_hint(&mut self, text_start: i32) {
self.text_start_hint = Some(text_start);
}
pub fn take_text_start_hint(&mut self) -> i32 {
self.text_start_hint
.take()
.expect("no text_start hint set — postings must process token before term vectors")
}
pub fn clear_text_start_hint(&mut self) {
self.text_start_hint = None;
}
pub fn record_norm(&mut self, field_id: u32, field_name: &str, doc_id: i32, norm: i64) {
let entry = self
.norms
.entry(field_id)
.or_insert_with(|| PerFieldNormsData {
field_name: field_name.to_string(),
docs: Vec::new(),
values: Vec::new(),
});
entry.docs.push(doc_id);
entry.values.push(norm);
}
pub fn norms(&self) -> &HashMap<u32, PerFieldNormsData> {
&self.norms
}
pub fn increment_doc_count(&mut self) {
self.doc_count += 1;
}
pub fn doc_count(&self) -> i32 {
self.doc_count
}
}
impl Default for SegmentAccumulator {
fn default() -> Self {
Self::new()
}
}
impl fmt::Debug for SegmentAccumulator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SegmentAccumulator")
.field("term_byte_pool_len", &self.term_byte_pool.data.len())
.field("text_start_hint", &self.text_start_hint)
.field("norms_fields", &self.norms.len())
.field("doc_count", &self.doc_count)
.finish()
}
}
impl mem_dbg::MemSize for SegmentAccumulator {
fn mem_size_rec(
&self,
flags: mem_dbg::SizeFlags,
refs: &mut mem_dbg::HashMap<usize, usize>,
) -> usize {
let pool_size = self.term_byte_pool.mem_size_rec(flags, refs);
let norms_size = self.norms.mem_size_rec(flags, refs);
mem::size_of::<Self>() + pool_size + norms_size
}
}
#[cfg(test)]
mod tests {
use super::*;
use assertables::*;
use mem_dbg::{MemSize, SizeFlags};
#[test]
fn mem_size_empty_is_small() {
let acc = SegmentAccumulator::new();
let size = acc.mem_size(SizeFlags::default());
assert_gt!(size, 0);
assert_lt!(size, 1_000);
}
#[test]
fn mem_size_grows_with_norms() {
let mut acc = SegmentAccumulator::new();
let before = acc.mem_size(SizeFlags::default());
for doc_id in 0..100 {
acc.record_norm(0, "body", doc_id, 42);
}
assert_gt!(acc.mem_size(SizeFlags::default()), before);
}
#[test]
fn hint_set_and_take() {
let mut acc = SegmentAccumulator::new();
acc.set_text_start_hint(42);
let result = acc.take_text_start_hint();
assert_eq!(result, 42);
}
#[test]
fn hint_cleared_after_take() {
let mut acc = SegmentAccumulator::new();
acc.set_text_start_hint(42);
acc.take_text_start_hint();
acc.set_text_start_hint(99);
let result = acc.take_text_start_hint();
assert_eq!(result, 99);
}
#[test]
fn clear_hint_allows_reset() {
let mut acc = SegmentAccumulator::new();
acc.set_text_start_hint(42);
acc.clear_text_start_hint();
acc.set_text_start_hint(99);
let result = acc.take_text_start_hint();
assert_eq!(result, 99);
}
#[test]
fn hint_overwrites_unconsumed() {
let mut acc = SegmentAccumulator::new();
acc.set_text_start_hint(42);
acc.set_text_start_hint(99); let result = acc.take_text_start_hint();
assert_eq!(result, 99);
}
#[test]
#[should_panic(expected = "no text_start hint set")]
fn hint_panics_on_missing() {
let mut acc = SegmentAccumulator::new();
acc.take_text_start_hint(); }
#[test]
fn term_byte_pool_accessible() {
let acc = SegmentAccumulator::new();
let pool = acc.term_byte_pool();
assert_eq!(pool.data.len(), 0);
}
}