#![allow(clippy::doc_markdown)]
#![allow(clippy::default_trait_access)]
#![allow(clippy::cast_possible_truncation)]
use roaring::RoaringBitmap;
use rustc_hash::FxHashSet;
pub const PROMOTION_THRESHOLD: usize = 1000;
#[derive(Debug, Clone)]
#[allow(dead_code)] pub enum PostingList {
Small(FxHashSet<u32>),
Large(RoaringBitmap),
}
impl Default for PostingList {
fn default() -> Self {
Self::new()
}
}
#[allow(dead_code)] impl PostingList {
#[must_use]
pub fn new() -> Self {
PostingList::Small(FxHashSet::default())
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
if capacity >= PROMOTION_THRESHOLD {
PostingList::Large(RoaringBitmap::new())
} else {
PostingList::Small(FxHashSet::with_capacity_and_hasher(
capacity,
Default::default(),
))
}
}
pub fn insert(&mut self, doc_id: u32) -> bool {
match self {
PostingList::Small(set) => {
let inserted = set.insert(doc_id);
if set.len() >= PROMOTION_THRESHOLD {
self.promote_to_large();
}
inserted
}
PostingList::Large(bitmap) => bitmap.insert(doc_id),
}
}
pub fn remove(&mut self, doc_id: u32) -> bool {
match self {
PostingList::Small(set) => set.remove(&doc_id),
PostingList::Large(bitmap) => bitmap.remove(doc_id),
}
}
#[must_use]
pub fn contains(&self, doc_id: u32) -> bool {
match self {
PostingList::Small(set) => set.contains(&doc_id),
PostingList::Large(bitmap) => bitmap.contains(doc_id),
}
}
#[must_use]
pub fn len(&self) -> usize {
match self {
PostingList::Small(set) => set.len(),
PostingList::Large(bitmap) => bitmap.len() as usize,
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub fn is_large(&self) -> bool {
matches!(self, PostingList::Large(_))
}
#[must_use]
pub fn union(&self, other: &PostingList) -> PostingList {
match (self, other) {
(PostingList::Large(a), PostingList::Large(b)) => PostingList::Large(a | b),
(PostingList::Large(bitmap), PostingList::Small(set))
| (PostingList::Small(set), PostingList::Large(bitmap)) => {
let mut result = bitmap.clone();
for &doc_id in set {
result.insert(doc_id);
}
PostingList::Large(result)
}
(PostingList::Small(a), PostingList::Small(b)) => {
let combined_estimate = a.len() + b.len();
if combined_estimate >= PROMOTION_THRESHOLD {
let mut bitmap = RoaringBitmap::new();
for &doc_id in a {
bitmap.insert(doc_id);
}
for &doc_id in b {
bitmap.insert(doc_id);
}
PostingList::Large(bitmap)
} else {
let mut result = a.clone();
result.extend(b.iter().copied());
PostingList::Small(result)
}
}
}
}
pub fn iter(&self) -> PostingListIter<'_> {
match self {
PostingList::Small(set) => PostingListIter::Small(set.iter()),
PostingList::Large(bitmap) => PostingListIter::Large(bitmap.iter()),
}
}
fn promote_to_large(&mut self) {
if let PostingList::Small(set) = self {
let mut bitmap = RoaringBitmap::new();
for &doc_id in set.iter() {
bitmap.insert(doc_id);
}
*self = PostingList::Large(bitmap);
}
}
}
pub enum PostingListIter<'a> {
Small(std::collections::hash_set::Iter<'a, u32>),
Large(roaring::bitmap::Iter<'a>),
}
impl Iterator for PostingListIter<'_> {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
match self {
PostingListIter::Small(iter) => iter.next().copied(),
PostingListIter::Large(iter) => iter.next(),
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
PostingListIter::Small(iter) => iter.size_hint(),
PostingListIter::Large(iter) => iter.size_hint(),
}
}
}
impl ExactSizeIterator for PostingListIter<'_> {}