use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::fmt::Debug;
use crate::error::Result;
use crate::lexical::reader::PostingIterator;
pub trait Matcher: Send + Debug {
fn doc_id(&self) -> u64;
fn next(&mut self) -> Result<bool>;
fn skip_to(&mut self, target: u64) -> Result<bool>;
fn cost(&self) -> u64;
fn is_exhausted(&self) -> bool;
fn term_freq(&self) -> u64 {
1 }
}
#[derive(Debug)]
pub struct EmptyMatcher {
exhausted: bool,
}
impl EmptyMatcher {
pub fn new() -> Self {
EmptyMatcher { exhausted: true }
}
}
impl Default for EmptyMatcher {
fn default() -> Self {
Self::new()
}
}
impl Matcher for EmptyMatcher {
fn doc_id(&self) -> u64 {
u64::MAX
}
fn next(&mut self) -> Result<bool> {
Ok(false)
}
fn skip_to(&mut self, _target: u64) -> Result<bool> {
Ok(false)
}
fn cost(&self) -> u64 {
0
}
fn is_exhausted(&self) -> bool {
self.exhausted
}
}
#[derive(Debug)]
pub struct AllMatcher {
current_doc: u64,
max_doc: u64,
}
impl AllMatcher {
pub fn new(max_doc: u64) -> Self {
AllMatcher {
current_doc: 0,
max_doc,
}
}
}
impl Matcher for AllMatcher {
fn doc_id(&self) -> u64 {
if self.current_doc >= self.max_doc {
u64::MAX
} else {
self.current_doc
}
}
fn next(&mut self) -> Result<bool> {
if self.current_doc >= self.max_doc {
Ok(false)
} else {
self.current_doc += 1;
Ok(self.current_doc < self.max_doc)
}
}
fn skip_to(&mut self, target: u64) -> Result<bool> {
if target >= self.max_doc {
self.current_doc = self.max_doc;
Ok(false)
} else {
self.current_doc = target;
Ok(true)
}
}
fn cost(&self) -> u64 {
self.max_doc
}
fn is_exhausted(&self) -> bool {
self.current_doc >= self.max_doc
}
}
#[derive(Debug)]
pub struct PostingMatcher {
posting_iter: Box<dyn PostingIterator>,
exhausted: bool,
cost: u64,
}
impl PostingMatcher {
pub fn new(mut posting_iter: Box<dyn PostingIterator>) -> Self {
let cost = posting_iter.cost();
let exhausted = !posting_iter.next().unwrap_or(false);
PostingMatcher {
posting_iter,
exhausted,
cost,
}
}
pub fn exhausted() -> Self {
let postings = vec![];
let posting_iter = Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(postings),
);
PostingMatcher {
posting_iter,
exhausted: true,
cost: 0,
}
}
}
impl Matcher for PostingMatcher {
fn doc_id(&self) -> u64 {
if self.exhausted {
u64::MAX
} else {
self.posting_iter.doc_id()
}
}
fn next(&mut self) -> Result<bool> {
if self.exhausted {
Ok(false)
} else {
let has_next = self.posting_iter.next()?;
if !has_next {
self.exhausted = true;
}
Ok(has_next)
}
}
fn skip_to(&mut self, target: u64) -> Result<bool> {
if self.exhausted {
Ok(false)
} else {
let result = self.posting_iter.skip_to(target)?;
if !result {
self.exhausted = true;
}
Ok(result)
}
}
fn cost(&self) -> u64 {
self.cost
}
fn is_exhausted(&self) -> bool {
self.exhausted
}
fn term_freq(&self) -> u64 {
if self.exhausted {
0
} else {
self.posting_iter.term_freq()
}
}
}
#[derive(Debug)]
struct MatcherEntry {
matcher: Box<dyn Matcher>,
}
impl PartialEq for MatcherEntry {
fn eq(&self, other: &Self) -> bool {
self.matcher.doc_id() == other.matcher.doc_id()
}
}
impl Eq for MatcherEntry {}
impl PartialOrd for MatcherEntry {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for MatcherEntry {
fn cmp(&self, other: &Self) -> Ordering {
other.matcher.doc_id().cmp(&self.matcher.doc_id())
}
}
#[derive(Debug)]
pub struct DisjunctionMatcher {
heap: BinaryHeap<MatcherEntry>,
current_doc: u64,
exhausted: bool,
cost: u64,
}
impl DisjunctionMatcher {
pub fn new(mut matchers: Vec<Box<dyn Matcher>>) -> Self {
let mut heap = BinaryHeap::new();
let mut cost = 0;
for matcher in matchers.drain(..) {
if !matcher.is_exhausted() {
cost += matcher.cost();
heap.push(MatcherEntry { matcher });
}
}
let current_doc = heap
.peek()
.map(|entry| entry.matcher.doc_id())
.unwrap_or(u64::MAX);
let exhausted = heap.is_empty();
DisjunctionMatcher {
heap,
current_doc,
exhausted,
cost,
}
}
pub fn empty() -> Self {
DisjunctionMatcher {
heap: BinaryHeap::new(),
current_doc: u64::MAX,
exhausted: true,
cost: 0,
}
}
fn advance_to_next_doc(&mut self) -> Result<()> {
if self.exhausted {
return Ok(());
}
let current_doc = self.current_doc;
let mut matchers_to_reinsert = Vec::new();
while let Some(entry) = self.heap.peek() {
if entry.matcher.doc_id() == current_doc {
let mut entry = self.heap.pop().unwrap();
if entry.matcher.next()? && !entry.matcher.is_exhausted() {
matchers_to_reinsert.push(entry);
}
} else {
break;
}
}
for entry in matchers_to_reinsert {
self.heap.push(entry);
}
if let Some(entry) = self.heap.peek() {
self.current_doc = entry.matcher.doc_id();
} else {
self.current_doc = u64::MAX;
self.exhausted = true;
}
Ok(())
}
}
impl Matcher for DisjunctionMatcher {
fn doc_id(&self) -> u64 {
self.current_doc
}
fn next(&mut self) -> Result<bool> {
if self.exhausted {
return Ok(false);
}
self.advance_to_next_doc()?;
Ok(!self.exhausted)
}
fn skip_to(&mut self, target: u64) -> Result<bool> {
if self.exhausted || target <= self.current_doc {
return Ok(!self.exhausted);
}
let mut matchers_to_reinsert = Vec::new();
while let Some(mut entry) = self.heap.pop() {
if entry.matcher.skip_to(target)? && !entry.matcher.is_exhausted() {
matchers_to_reinsert.push(entry);
}
}
for entry in matchers_to_reinsert {
self.heap.push(entry);
}
if let Some(entry) = self.heap.peek() {
self.current_doc = entry.matcher.doc_id();
self.exhausted = false;
} else {
self.current_doc = u64::MAX;
self.exhausted = true;
}
Ok(!self.exhausted)
}
fn cost(&self) -> u64 {
self.cost
}
fn term_freq(&self) -> u64 {
let mut total_freq = 0;
for entry in self.heap.iter() {
if entry.matcher.doc_id() == self.current_doc {
total_freq += entry.matcher.term_freq();
}
}
total_freq
}
fn is_exhausted(&self) -> bool {
self.exhausted
}
}
#[derive(Debug)]
pub struct ConjunctionMatcher {
matchers: Vec<Box<dyn Matcher>>,
current_doc: u64,
exhausted: bool,
cost: u64,
}
impl ConjunctionMatcher {
pub fn new(matchers: Vec<Box<dyn Matcher>>) -> Self {
if matchers.is_empty() {
return ConjunctionMatcher {
matchers: vec![],
current_doc: u64::MAX,
exhausted: true,
cost: 0,
};
}
let cost = matchers.iter().map(|m| m.cost()).sum();
let mut matcher = ConjunctionMatcher {
matchers,
current_doc: 0,
exhausted: false,
cost,
};
if let Ok(found) = matcher.advance_to_alignment() {
if !found {
matcher.exhausted = true;
matcher.current_doc = u64::MAX;
}
} else {
matcher.exhausted = true;
matcher.current_doc = u64::MAX;
}
matcher
}
fn advance_to_alignment(&mut self) -> Result<bool> {
if self.matchers.is_empty() || self.exhausted {
return Ok(false);
}
loop {
let mut max_doc = 0;
for matcher in &self.matchers {
let doc_id = matcher.doc_id();
if doc_id == u64::MAX {
self.exhausted = true;
return Ok(false);
}
if doc_id > max_doc {
max_doc = doc_id;
}
}
let mut all_aligned = true;
for matcher in &mut self.matchers {
let doc_id = matcher.doc_id();
if doc_id < max_doc {
if !matcher.skip_to(max_doc)? {
self.exhausted = true;
return Ok(false);
}
if matcher.doc_id() != max_doc {
all_aligned = false;
}
}
}
if all_aligned {
self.current_doc = max_doc;
return Ok(true);
}
}
}
}
impl Matcher for ConjunctionMatcher {
fn doc_id(&self) -> u64 {
if self.exhausted {
u64::MAX
} else {
self.current_doc
}
}
fn next(&mut self) -> Result<bool> {
if self.exhausted || self.matchers.is_empty() {
return Ok(false);
}
if !self.matchers[0].next()? {
self.exhausted = true;
self.current_doc = u64::MAX;
return Ok(false);
}
self.advance_to_alignment()
}
fn skip_to(&mut self, target: u64) -> Result<bool> {
if self.exhausted || target <= self.current_doc {
return Ok(!self.exhausted && self.current_doc >= target);
}
if !self.matchers[0].skip_to(target)? {
self.exhausted = true;
self.current_doc = u64::MAX;
return Ok(false);
}
self.advance_to_alignment()
}
fn cost(&self) -> u64 {
self.cost
}
fn is_exhausted(&self) -> bool {
self.exhausted
}
}
#[derive(Debug)]
pub struct ConjunctionNotMatcher {
positive: Box<dyn Matcher>,
negatives: Vec<Box<dyn Matcher>>,
current_doc: u64,
exhausted: bool,
cost: u64,
}
impl ConjunctionNotMatcher {
pub fn new(positive: Box<dyn Matcher>, negatives: Vec<Box<dyn Matcher>>) -> Self {
let cost = positive.cost();
let mut matcher = ConjunctionNotMatcher {
positive,
negatives,
current_doc: 0,
exhausted: false,
cost,
};
if let Ok(found) = matcher.advance_to_next_valid() {
if !found {
matcher.exhausted = true;
matcher.current_doc = u64::MAX;
}
} else {
matcher.exhausted = true;
matcher.current_doc = u64::MAX;
}
matcher
}
fn is_excluded(&mut self, doc_id: u64) -> Result<bool> {
for negative in &mut self.negatives {
if negative.doc_id() < doc_id {
negative.skip_to(doc_id)?;
}
if negative.doc_id() == doc_id {
return Ok(true);
}
}
Ok(false)
}
fn advance_to_next_valid(&mut self) -> Result<bool> {
loop {
if self.positive.is_exhausted() {
self.exhausted = true;
self.current_doc = u64::MAX;
return Ok(false);
}
let doc_id = self.positive.doc_id();
if doc_id == u64::MAX {
self.exhausted = true;
self.current_doc = u64::MAX;
return Ok(false);
}
if !self.is_excluded(doc_id)? {
self.current_doc = doc_id;
return Ok(true);
}
if !self.positive.next()? {
self.exhausted = true;
self.current_doc = u64::MAX;
return Ok(false);
}
}
}
}
impl Matcher for ConjunctionNotMatcher {
fn doc_id(&self) -> u64 {
if self.exhausted {
u64::MAX
} else {
self.current_doc
}
}
fn next(&mut self) -> Result<bool> {
if self.exhausted {
return Ok(false);
}
if !self.positive.next()? {
self.exhausted = true;
self.current_doc = u64::MAX;
return Ok(false);
}
self.advance_to_next_valid()
}
fn skip_to(&mut self, target: u64) -> Result<bool> {
if self.exhausted || target <= self.current_doc {
return Ok(!self.exhausted && self.current_doc >= target);
}
if !self.positive.skip_to(target)? {
self.exhausted = true;
self.current_doc = u64::MAX;
return Ok(false);
}
self.advance_to_next_valid()
}
fn cost(&self) -> u64 {
self.cost
}
fn term_freq(&self) -> u64 {
self.positive.term_freq()
}
fn is_exhausted(&self) -> bool {
self.exhausted
}
}
#[derive(Debug)]
pub struct NotMatcher {
negative: Box<dyn Matcher>,
max_doc: u64,
current_doc: u64,
exhausted: bool,
}
impl NotMatcher {
pub fn new(negative: Box<dyn Matcher>, max_doc: u64) -> Self {
let mut matcher = NotMatcher {
negative,
max_doc,
current_doc: 0,
exhausted: false,
};
if let Ok(found) = matcher.advance_to_next_valid() {
if !found {
matcher.exhausted = true;
}
} else {
matcher.exhausted = true;
}
matcher
}
fn advance_to_next_valid(&mut self) -> Result<bool> {
while self.current_doc < self.max_doc {
let neg_doc = self.negative.doc_id();
if neg_doc > self.current_doc || neg_doc == u64::MAX {
return Ok(true);
} else if neg_doc == self.current_doc {
self.current_doc += 1;
if self.negative.doc_id() < self.current_doc && !self.negative.is_exhausted() {
self.negative.skip_to(self.current_doc)?;
}
} else {
self.negative.skip_to(self.current_doc)?;
}
}
self.current_doc = u64::MAX;
self.exhausted = true;
Ok(false)
}
}
impl Matcher for NotMatcher {
fn doc_id(&self) -> u64 {
if self.exhausted || self.current_doc >= self.max_doc {
u64::MAX
} else {
self.current_doc
}
}
fn next(&mut self) -> Result<bool> {
if self.exhausted || self.current_doc >= self.max_doc {
return Ok(false);
}
self.current_doc += 1;
self.advance_to_next_valid()
}
fn skip_to(&mut self, target: u64) -> Result<bool> {
if self.exhausted || target <= self.current_doc {
return Ok(!self.exhausted && self.current_doc < self.max_doc);
}
if target >= self.max_doc {
self.current_doc = self.max_doc;
self.exhausted = true;
return Ok(false);
}
self.current_doc = target;
self.advance_to_next_valid()
}
fn cost(&self) -> u64 {
self.max_doc
}
fn is_exhausted(&self) -> bool {
self.exhausted || self.current_doc >= self.max_doc
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_matcher() {
let mut matcher = EmptyMatcher::new();
assert_eq!(matcher.doc_id(), u64::MAX);
assert!(matcher.is_exhausted());
assert_eq!(matcher.cost(), 0);
assert!(!matcher.next().unwrap());
assert!(!matcher.skip_to(5).unwrap());
}
#[test]
fn test_all_matcher() {
let mut matcher = AllMatcher::new(5);
assert_eq!(matcher.doc_id(), 0);
assert!(!matcher.is_exhausted());
assert_eq!(matcher.cost(), 5);
assert!(matcher.next().unwrap());
assert_eq!(matcher.doc_id(), 1);
assert!(matcher.next().unwrap());
assert_eq!(matcher.doc_id(), 2);
assert!(matcher.skip_to(4).unwrap());
assert_eq!(matcher.doc_id(), 4);
assert!(!matcher.skip_to(10).unwrap());
assert_eq!(matcher.doc_id(), u64::MAX);
assert!(matcher.is_exhausted());
}
#[test]
fn test_posting_matcher() {
let postings = vec![
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 0,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 1,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 2,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 3,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 4,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
];
let posting_iter = Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(postings),
);
let mut matcher = PostingMatcher::new(posting_iter);
assert_eq!(matcher.doc_id(), 0);
assert!(!matcher.is_exhausted());
assert_eq!(matcher.cost(), 5);
for i in 1..5 {
assert!(matcher.next().unwrap());
assert_eq!(matcher.doc_id(), i);
}
assert!(!matcher.next().unwrap());
assert!(matcher.is_exhausted());
}
#[test]
fn test_exhausted_posting_matcher() {
let matcher = PostingMatcher::exhausted();
assert_eq!(matcher.doc_id(), u64::MAX);
assert!(matcher.is_exhausted());
assert_eq!(matcher.cost(), 0);
}
#[test]
fn test_conjunction_matcher() {
let postings1 = vec![
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 0,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 2,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 4,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 6,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 8,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
];
let matcher1 = Box::new(PostingMatcher::new(Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(postings1),
)));
let postings2 = vec![
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 1,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 2,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 3,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 4,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 5,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 6,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
];
let matcher2 = Box::new(PostingMatcher::new(Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(postings2),
)));
let mut conjunction = ConjunctionMatcher::new(vec![matcher1, matcher2]);
assert_eq!(conjunction.doc_id(), 2);
assert!(!conjunction.is_exhausted());
assert!(conjunction.next().unwrap());
assert_eq!(conjunction.doc_id(), 4);
assert!(conjunction.next().unwrap());
assert_eq!(conjunction.doc_id(), 6);
assert!(!conjunction.next().unwrap());
assert!(conjunction.is_exhausted());
}
#[test]
fn test_conjunction_not_matcher() {
let postings_pos = vec![
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 0,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 1,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 2,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 3,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 4,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 5,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
];
let positive = Box::new(PostingMatcher::new(Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(
postings_pos,
),
)));
let postings_neg = vec![
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 1,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 3,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 5,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
];
let negative = Box::new(PostingMatcher::new(Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(
postings_neg,
),
)));
let mut conj_not = ConjunctionNotMatcher::new(positive, vec![negative]);
assert_eq!(conj_not.doc_id(), 0);
assert!(!conj_not.is_exhausted());
assert!(conj_not.next().unwrap());
assert_eq!(conj_not.doc_id(), 2);
assert!(conj_not.next().unwrap());
assert_eq!(conj_not.doc_id(), 4);
assert!(!conj_not.next().unwrap());
assert!(conj_not.is_exhausted());
}
#[test]
fn test_not_matcher() {
let postings_neg = vec![
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 1,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 3,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 5,
frequency: 1,
positions: Some(vec![]),
weight: 1.0,
},
];
let negative = Box::new(PostingMatcher::new(Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(
postings_neg,
),
)));
let mut not_matcher = NotMatcher::new(negative, 7);
assert_eq!(not_matcher.doc_id(), 0);
assert!(!not_matcher.is_exhausted());
assert!(not_matcher.next().unwrap());
assert_eq!(not_matcher.doc_id(), 2);
assert!(not_matcher.next().unwrap());
assert_eq!(not_matcher.doc_id(), 4);
assert!(not_matcher.next().unwrap());
assert_eq!(not_matcher.doc_id(), 6);
assert!(!not_matcher.next().unwrap());
assert!(not_matcher.is_exhausted());
}
#[test]
fn test_conjunction_matcher_empty() {
let conjunction = ConjunctionMatcher::new(vec![]);
assert_eq!(conjunction.doc_id(), u64::MAX);
assert!(conjunction.is_exhausted());
}
#[test]
fn test_conjunction_matcher_no_overlap() {
let postings1 = vec![
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 0,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 2,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 4,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
];
let matcher1 = Box::new(PostingMatcher::new(Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(postings1),
)));
let postings2 = vec![
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 1,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 3,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
crate::lexical::index::inverted::core::posting::Posting {
doc_id: 5,
frequency: 1_u32,
positions: Some(vec![]),
weight: 1.0,
},
];
let matcher2 = Box::new(PostingMatcher::new(Box::new(
crate::lexical::index::inverted::reader::InvertedIndexPostingIterator::new(postings2),
)));
let conjunction = ConjunctionMatcher::new(vec![matcher1, matcher2]);
assert_eq!(conjunction.doc_id(), u64::MAX);
assert!(conjunction.is_exhausted());
}
}
#[derive(Debug)]
pub struct PreComputedMatcher {
doc_ids: Vec<u64>,
current_index: usize,
exhausted: bool,
}
impl PreComputedMatcher {
pub fn new(mut doc_ids: Vec<u64>) -> Self {
doc_ids.sort_unstable();
doc_ids.dedup();
let exhausted = doc_ids.is_empty();
PreComputedMatcher {
doc_ids,
current_index: 0,
exhausted,
}
}
pub fn empty() -> Self {
PreComputedMatcher {
doc_ids: Vec::new(),
current_index: 0,
exhausted: true,
}
}
}
impl Matcher for PreComputedMatcher {
fn doc_id(&self) -> u64 {
if self.exhausted || self.current_index >= self.doc_ids.len() {
u64::MAX
} else {
self.doc_ids[self.current_index]
}
}
fn next(&mut self) -> Result<bool> {
if self.exhausted {
return Ok(false);
}
self.current_index += 1;
if self.current_index >= self.doc_ids.len() {
self.exhausted = true;
Ok(false)
} else {
Ok(true)
}
}
fn skip_to(&mut self, target: u64) -> Result<bool> {
if self.exhausted {
return Ok(false);
}
let mut left = self.current_index;
let mut right = self.doc_ids.len();
while left < right {
let mid = left + (right - left) / 2;
if self.doc_ids[mid] >= target {
right = mid;
} else {
left = mid + 1;
}
}
self.current_index = left;
if self.current_index >= self.doc_ids.len() {
self.exhausted = true;
Ok(false)
} else {
Ok(true)
}
}
fn cost(&self) -> u64 {
self.doc_ids.len() as u64
}
fn is_exhausted(&self) -> bool {
self.exhausted
}
}