use std::collections::BTreeMap;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct Postings {
map: BTreeMap<u64, RoaringBitmap>,
}
impl Postings {
#[must_use]
pub fn new() -> Self {
Self {
map: BTreeMap::new(),
}
}
#[must_use]
pub fn len(&self) -> usize {
self.map.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
pub fn insert(&mut self, trigram: u64, doc_id: u32) {
self.map.entry(trigram).or_default().insert(doc_id);
}
pub fn remove(&mut self, trigram: u64, doc_id: u32) {
let drop = match self.map.get_mut(&trigram) {
Some(bm) => {
bm.remove(doc_id);
bm.is_empty()
}
None => false,
};
if drop {
self.map.remove(&trigram);
}
}
#[must_use]
pub fn lookup(&self, trigram: u64) -> Option<&RoaringBitmap> {
self.map.get(&trigram)
}
#[must_use]
pub fn intersect(&self, trigrams: &[u64]) -> RoaringBitmap {
if trigrams.is_empty() {
return RoaringBitmap::new();
}
let mut order: Vec<u64> = trigrams.to_vec();
order.sort_unstable();
order.dedup();
order.sort_by_key(|t| self.map.get(t).map_or(0, RoaringBitmap::len));
let mut iter = order.into_iter();
let Some(first_trigram) = iter.next() else {
return RoaringBitmap::new();
};
let mut acc = match self.map.get(&first_trigram) {
Some(b) => b.clone(),
None => return RoaringBitmap::new(),
};
for t in iter {
if acc.is_empty() {
return acc;
}
match self.map.get(&t) {
Some(b) => {
acc &= b;
}
None => return RoaringBitmap::new(),
}
}
acc
}
#[must_use]
pub fn union(&self, trigrams: &[u64]) -> RoaringBitmap {
let mut acc = RoaringBitmap::new();
let mut seen: Vec<u64> = trigrams.to_vec();
seen.sort_unstable();
seen.dedup();
for t in seen {
if let Some(b) = self.map.get(&t) {
acc |= b;
}
}
acc
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn postings_insert_and_lookup() {
let mut p = Postings::new();
p.insert(7, 1);
p.insert(7, 2);
p.insert(8, 2);
let bm = p.lookup(7).expect("trigram 7 present");
assert!(bm.contains(1));
assert!(bm.contains(2));
let bm = p.lookup(8).expect("trigram 8 present");
assert!(bm.contains(2));
assert!(!bm.contains(1));
assert_eq!(p.len(), 2);
}
#[test]
fn postings_lookup_missing_returns_none() {
let p = Postings::new();
assert!(p.lookup(99).is_none());
}
#[test]
fn postings_intersection_of_two_trigrams_yields_intersection() {
let mut p = Postings::new();
p.insert(1, 10);
p.insert(1, 20);
p.insert(1, 30);
p.insert(2, 20);
p.insert(2, 30);
p.insert(2, 40);
let r = p.intersect(&[1, 2]);
assert!(r.contains(20));
assert!(r.contains(30));
assert!(!r.contains(10));
assert!(!r.contains(40));
assert_eq!(r.len(), 2);
}
#[test]
fn postings_intersection_of_disjoint_trigrams_is_empty() {
let mut p = Postings::new();
p.insert(1, 10);
p.insert(1, 20);
p.insert(2, 30);
p.insert(2, 40);
let r = p.intersect(&[1, 2]);
assert!(r.is_empty());
}
#[test]
fn postings_intersection_with_missing_trigram_is_empty() {
let mut p = Postings::new();
p.insert(1, 10);
let r = p.intersect(&[1, 999]);
assert!(r.is_empty());
}
#[test]
fn postings_intersection_empty_input_is_empty() {
let p = Postings::new();
assert!(p.intersect(&[]).is_empty());
}
#[test]
fn postings_intersection_single_trigram_returns_that_list() {
let mut p = Postings::new();
p.insert(1, 10);
p.insert(1, 20);
let r = p.intersect(&[1]);
assert_eq!(r.len(), 2);
}
#[test]
fn postings_union_basic() {
let mut p = Postings::new();
p.insert(1, 10);
p.insert(2, 20);
p.insert(3, 30);
let r = p.union(&[1, 2]);
assert!(r.contains(10));
assert!(r.contains(20));
assert!(!r.contains(30));
}
#[test]
fn postings_remove_clears_doc_id_from_one_trigram() {
let mut p = Postings::new();
p.insert(1, 10);
p.insert(1, 20);
p.remove(1, 10);
let bm = p.lookup(1).expect("trigram 1 still present");
assert!(!bm.contains(10));
assert!(bm.contains(20));
}
#[test]
fn postings_dropping_a_trigram_when_no_docs_left_garbage_collects() {
let mut p = Postings::new();
p.insert(1, 10);
assert_eq!(p.len(), 1);
p.remove(1, 10);
assert_eq!(p.len(), 0);
assert!(p.lookup(1).is_none());
}
#[test]
fn postings_remove_missing_is_a_noop() {
let mut p = Postings::new();
p.insert(1, 10);
p.remove(1, 99);
p.remove(2, 10);
let bm = p.lookup(1).expect("trigram 1 still present");
assert!(bm.contains(10));
}
}