use std::future::Future;
use std::pin::Pin;
use crate::segment::SegmentReader;
use crate::{DocId, Result, Score};
#[derive(Debug, Clone, Copy)]
pub struct Bm25Params {
pub k1: f32,
pub b: f32,
}
impl Default for Bm25Params {
fn default() -> Self {
Self { k1: 1.2, b: 0.75 }
}
}
#[cfg(not(target_arch = "wasm32"))]
pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + Send + 'a>>;
#[cfg(target_arch = "wasm32")]
pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + 'a>>;
#[cfg(not(target_arch = "wasm32"))]
pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + Send + 'a>>;
#[cfg(target_arch = "wasm32")]
pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + 'a>>;
#[cfg(not(target_arch = "wasm32"))]
pub type DocPredicate<'a> = Box<dyn Fn(DocId) -> bool + Send + Sync + 'a>;
#[cfg(target_arch = "wasm32")]
pub type DocPredicate<'a> = Box<dyn Fn(DocId) -> bool + 'a>;
pub struct DocBitset {
pub(crate) bits: Vec<u64>,
}
impl DocBitset {
pub fn new(num_docs: u32) -> Self {
let num_words = (num_docs as usize).div_ceil(64);
Self {
bits: vec![0u64; num_words],
}
}
#[inline]
pub fn set(&mut self, doc_id: u32) {
let word = doc_id as usize / 64;
let bit = doc_id as usize % 64;
if word < self.bits.len() {
self.bits[word] |= 1u64 << bit;
}
}
#[inline(always)]
pub fn contains(&self, doc_id: u32) -> bool {
let word = doc_id as usize / 64;
let bit = doc_id as usize % 64;
word < self.bits.len() && self.bits[word] & (1u64 << bit) != 0
}
pub fn count(&self) -> u32 {
self.bits.iter().map(|w| w.count_ones()).sum()
}
pub fn from_predicate(num_docs: u32, pred: &dyn Fn(DocId) -> bool) -> Self {
let mut bs = Self::new(num_docs);
for doc_id in 0..num_docs {
if pred(doc_id) {
bs.set(doc_id);
}
}
bs
}
pub fn union_with(&mut self, other: &DocBitset) {
for (a, b) in self.bits.iter_mut().zip(other.bits.iter()) {
*a |= *b;
}
}
pub fn intersect_with(&mut self, other: &DocBitset) {
for (a, b) in self.bits.iter_mut().zip(other.bits.iter()) {
*a &= *b;
}
for a in self.bits.iter_mut().skip(other.bits.len()) {
*a = 0;
}
}
pub fn subtract(&mut self, other: &DocBitset) {
for (a, b) in self.bits.iter_mut().zip(other.bits.iter()) {
*a &= !*b;
}
}
}
#[derive(Debug, Clone)]
pub struct TermQueryInfo {
pub field: crate::dsl::Field,
pub term: Vec<u8>,
}
#[derive(Debug, Clone, Copy)]
pub struct SparseTermQueryInfo {
pub field: crate::dsl::Field,
pub dim_id: u32,
pub weight: f32,
pub heap_factor: f32,
pub combiner: super::MultiValueCombiner,
pub over_fetch_factor: f32,
pub max_superblocks: usize,
}
#[derive(Debug, Clone)]
pub enum QueryDecomposition {
TextTerm(TermQueryInfo),
SparseTerms(Vec<SparseTermQueryInfo>),
Opaque,
}
pub type MatchedPositions = Vec<(u32, Vec<super::ScoredPosition>)>;
macro_rules! define_query_traits {
($($send_bounds:tt)*) => {
pub trait Query: std::fmt::Display + $($send_bounds)* {
fn scorer<'a>(
&self,
reader: &'a SegmentReader,
limit: usize,
) -> ScorerFuture<'a>;
fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a>;
#[cfg(feature = "sync")]
fn scorer_sync<'a>(
&self,
reader: &'a SegmentReader,
limit: usize,
) -> Result<Box<dyn Scorer + 'a>> {
let _ = (reader, limit);
Err(crate::error::Error::Query(
"sync scorer not supported for this query type".into(),
))
}
fn decompose(&self) -> QueryDecomposition {
QueryDecomposition::Opaque
}
fn is_filter(&self) -> bool {
false
}
fn as_doc_predicate<'a>(
&self,
_reader: &'a SegmentReader,
) -> Option<DocPredicate<'a>> {
None
}
fn as_doc_bitset(
&self,
_reader: &SegmentReader,
) -> Option<DocBitset> {
None
}
}
pub trait Scorer: super::docset::DocSet + $($send_bounds)* {
fn score(&self) -> Score;
fn matched_positions(&self) -> Option<MatchedPositions> {
None
}
}
};
}
#[cfg(not(target_arch = "wasm32"))]
define_query_traits!(Send + Sync);
#[cfg(target_arch = "wasm32")]
define_query_traits!();
impl Query for Box<dyn Query> {
fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
(**self).scorer(reader, limit)
}
fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
(**self).count_estimate(reader)
}
fn decompose(&self) -> QueryDecomposition {
(**self).decompose()
}
fn is_filter(&self) -> bool {
(**self).is_filter()
}
fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<DocPredicate<'a>> {
(**self).as_doc_predicate(reader)
}
fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<DocBitset> {
(**self).as_doc_bitset(reader)
}
#[cfg(feature = "sync")]
fn scorer_sync<'a>(
&self,
reader: &'a SegmentReader,
limit: usize,
) -> Result<Box<dyn Scorer + 'a>> {
(**self).scorer_sync(reader, limit)
}
}
pub struct EmptyScorer;
impl super::docset::DocSet for EmptyScorer {
fn doc(&self) -> DocId {
crate::structures::TERMINATED
}
fn advance(&mut self) -> DocId {
crate::structures::TERMINATED
}
fn seek(&mut self, _target: DocId) -> DocId {
crate::structures::TERMINATED
}
fn size_hint(&self) -> u32 {
0
}
}
impl Scorer for EmptyScorer {
fn score(&self) -> Score {
0.0
}
}