use core::codec::{Codec, CodecEnum, CodecPostingIterator, CodecTermState};
use core::doc::Term;
use core::index::reader::LeafReaderContext;
use core::search::explanation::Explanation;
use core::search::query::spans::{
build_sim_weight, PostingsFlag, SpanQueryEnum, SpanWeightEnum, SpansEnum, NO_MORE_POSITIONS,
};
use core::search::query::spans::{term_contexts, ConjunctionSpanBase, ConjunctionSpans};
use core::search::query::spans::{SpanCollector, SpanQuery, SpanWeight, Spans};
use core::search::searcher::SearchPlanBuilder;
use core::search::TermContext;
use core::search::{
query::Query, query::TermQuery, query::Weight, scorer::Scorer, similarity::SimWeight,
DocIterator, NO_MORE_DOCS,
};
use core::util::{DocId, KeyedContext, BM25_SIMILARITY_IDF};
use error::{ErrorKind, Result};
use core::codec::PostingIterator;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::collections::HashMap;
use std::fmt;
use std::mem::MaybeUninit;
use std::ptr;
use std::sync::Arc;
pub struct SpanNearQueryBuilder {
ordered: bool,
field: String,
clauses: Vec<SpanQueryEnum>,
slop: i32,
}
impl SpanNearQueryBuilder {
pub fn new(field: String, ordered: bool) -> Self {
SpanNearQueryBuilder {
ordered,
field,
clauses: vec![],
slop: 0,
}
}
pub fn add_clause(mut self, clause: SpanQueryEnum) -> Result<Self> {
if SpanQuery::<CodecEnum>::field(&clause) != self.field {
bail!(ErrorKind::IllegalArgument(format!(
"clause field is '{}' not equal with field '{}'",
SpanQuery::<CodecEnum>::field(&clause),
&self.field
)));
}
self.clauses.push(clause);
Ok(self)
}
pub fn add_gap(mut self, width: i32) -> Result<Self> {
if !self.ordered {
bail!(ErrorKind::IllegalArgument(
"Gaps can only be added to ordered near queries".into()
));
}
self.clauses.push(SpanQueryEnum::Gap(SpanGapQuery::new(
self.field.clone(),
width,
)));
Ok(self)
}
pub fn slop(mut self, slop: i32) -> Self {
self.slop = slop;
self
}
pub fn build(self) -> SpanNearQuery {
SpanNearQuery::new(self.clauses, self.slop, self.ordered).unwrap()
}
}
const SPAN_NEAR_QUERY: &str = "span_near";
pub struct SpanNearQuery {
clauses: Vec<SpanQueryEnum>,
slop: i32,
in_order: bool,
field: String,
}
impl SpanNearQuery {
pub fn new(clauses: Vec<SpanQueryEnum>, slop: i32, in_order: bool) -> Result<Self> {
if clauses.len() < 2 {
bail!(ErrorKind::IllegalArgument(
"clauses length must not be smaller than 2!".into()
));
}
for i in 0..clauses.len() - 1 {
if SpanQuery::<CodecEnum>::field(&clauses[i])
!= SpanQuery::<CodecEnum>::field(&clauses[i + 1])
{
bail!(ErrorKind::IllegalArgument(
"Clauses must have same field.".into()
));
}
}
let field = SpanQuery::<CodecEnum>::field(&clauses[0]).to_string();
Ok(SpanNearQuery {
clauses,
slop,
in_order,
field,
})
}
fn merge_idf_ctx(
ctx1: Option<KeyedContext>,
ctx2: Option<KeyedContext>,
) -> Option<KeyedContext> {
if ctx1.is_none() {
ctx2
} else if ctx2.is_none() {
ctx1
} else {
let mut ctx = KeyedContext::default();
let w = ctx1.map_or(0f32, |v| v.get_float(BM25_SIMILARITY_IDF).unwrap_or(0f32))
+ ctx2.map_or(0f32, |v| v.get_float(BM25_SIMILARITY_IDF).unwrap_or(0f32));
ctx.set_float(BM25_SIMILARITY_IDF, w);
Some(ctx)
}
}
fn span_near_weight<C: Codec>(
&self,
searcher: &dyn SearchPlanBuilder<C>,
needs_scores: bool,
) -> Result<SpanNearWeight<C>> {
let mut sub_weights = Vec::with_capacity(self.clauses.len());
let mut ctx = None;
for clause in &self.clauses {
sub_weights.push(clause.span_weight(searcher, needs_scores)?);
ctx = Self::merge_idf_ctx(ctx, SpanQuery::<C>::ctx(clause));
}
let term_contexts = if needs_scores {
term_contexts(&sub_weights)
} else {
HashMap::new()
};
SpanNearWeight::new(self, sub_weights, searcher, term_contexts, ctx)
}
}
impl<C: Codec> SpanQuery<C> for SpanNearQuery {
type Weight = SpanNearWeight<C>;
fn span_weight(
&self,
searcher: &dyn SearchPlanBuilder<C>,
needs_scores: bool,
) -> Result<Self::Weight> {
self.span_near_weight(searcher, needs_scores)
}
fn field(&self) -> &str {
&self.field
}
}
impl<C: Codec> Query<C> for SpanNearQuery {
fn create_weight(
&self,
searcher: &dyn SearchPlanBuilder<C>,
needs_scores: bool,
) -> Result<Box<dyn Weight<C>>> {
let weight = self.span_near_weight(searcher, needs_scores)?;
Ok(Box::new(weight))
}
fn extract_terms(&self) -> Vec<TermQuery> {
self.clauses
.iter()
.flat_map(|c| Query::<C>::extract_terms(c))
.collect()
}
fn as_any(&self) -> &dyn (::std::any::Any) {
self
}
}
impl fmt::Display for SpanNearQuery {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let clauses = {
let query_strs: Vec<String> = self.clauses.iter().map(|q| format!("{}", q)).collect();
query_strs.join(", ")
};
write!(
f,
"SpanNearQuery(clauses: [{}], slop: {}, in_order: {})",
clauses, self.slop, self.in_order
)
}
}
pub struct SpanNearWeight<C: Codec> {
field: String,
in_order: bool,
slop: i32,
sim_weight: Option<Box<dyn SimWeight<C>>>,
sub_weights: Vec<SpanWeightEnum<C>>,
}
impl<C: Codec> SpanNearWeight<C> {
pub fn new<IS: SearchPlanBuilder<C> + ?Sized>(
query: &SpanNearQuery,
sub_weights: Vec<SpanWeightEnum<C>>,
searcher: &IS,
terms: HashMap<Term, Arc<TermContext<CodecTermState<C>>>>,
ctx: Option<KeyedContext>,
) -> Result<Self> {
let field = SpanQuery::<C>::field(query).to_string();
let sim_weight = build_sim_weight(&field, searcher, terms, ctx)?;
Ok(SpanNearWeight {
field,
sim_weight,
sub_weights,
in_order: query.in_order,
slop: query.slop,
})
}
}
impl<C: Codec> SpanWeight<C> for SpanNearWeight<C> {
fn sim_weight(&self) -> Option<&dyn SimWeight<C>> {
self.sim_weight.as_ref().map(|x| &**x)
}
fn sim_weight_mut(&mut self) -> Option<&mut dyn SimWeight<C>> {
if let Some(ref mut sim_weight) = self.sim_weight {
Some(sim_weight.as_mut())
} else {
None
}
}
fn get_spans(
&self,
ctx: &LeafReaderContext<'_, C>,
required_postings: &PostingsFlag,
) -> Result<Option<SpansEnum<CodecPostingIterator<C>>>> {
if ctx.reader.terms(&self.field)?.is_some() {
let mut sub_spans = Vec::with_capacity(self.sub_weights.len());
for w in &self.sub_weights {
if let Some(span) = w.get_spans(ctx, required_postings)? {
sub_spans.push(span);
} else {
return Ok(None);
}
}
if self.in_order {
return Ok(Some(SpansEnum::NearOrdered(NearSpansOrdered::new(
self.slop, sub_spans,
)?)));
} else {
return Ok(Some(SpansEnum::NearUnordered(NearSpansUnordered::new(
self.slop, sub_spans,
)?)));
}
}
Ok(None)
}
fn extract_term_contexts(
&self,
contexts: &mut HashMap<Term, Arc<TermContext<CodecTermState<C>>>>,
) {
for weight in &self.sub_weights {
weight.extract_term_contexts(contexts)
}
}
}
impl<C: Codec> Weight<C> for SpanNearWeight<C> {
fn create_scorer(&self, ctx: &LeafReaderContext<'_, C>) -> Result<Option<Box<dyn Scorer>>> {
self.do_create_scorer(ctx)
}
fn query_type(&self) -> &'static str {
SPAN_NEAR_QUERY
}
fn normalize(&mut self, norm: f32, boost: f32) {
self.do_normalize(norm, boost)
}
fn value_for_normalization(&self) -> f32 {
self.do_value_for_normalization()
}
fn needs_scores(&self) -> bool {
true
}
fn explain(&self, reader: &LeafReaderContext<'_, C>, doc: DocId) -> Result<Explanation> {
self.explain_span(reader, doc)
}
}
impl<C: Codec> fmt::Display for SpanNearWeight<C> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let weights = {
let query_strs: Vec<String> =
self.sub_weights.iter().map(|q| format!("{}", q)).collect();
query_strs.join(", ")
};
write!(
f,
"SpanNearQuery(field: {}, sub_weight: [{}], in_order: {})",
&self.field, weights, self.in_order
)
}
}
pub struct NearSpansUnordered<P: PostingIterator> {
conjunction_span: MaybeUninit<ConjunctionSpanBase<P>>,
sub_span_cells: Vec<SpansCell<P>>,
allowed_slop: i32,
span_position_queue: BinaryHeap<SpansCellElement<P>>,
total_span_length: i32,
max_end_position_cell_idx: usize,
}
impl<P: PostingIterator> Drop for NearSpansUnordered<P> {
fn drop(&mut self) {
unsafe {
ptr::drop_in_place(self.conjunction_span.as_mut_ptr());
}
}
}
impl<P: PostingIterator> NearSpansUnordered<P> {
#[allow(clippy::explicit_counter_loop)]
pub fn new(
allowed_slop: i32,
sub_spans: Vec<SpansEnum<P>>,
) -> Result<Box<NearSpansUnordered<P>>> {
let mut res = Box::new(NearSpansUnordered {
conjunction_span: MaybeUninit::<ConjunctionSpanBase<P>>::uninit(),
sub_span_cells: Vec::with_capacity(sub_spans.len()),
allowed_slop,
span_position_queue: BinaryHeap::with_capacity(sub_spans.len()),
total_span_length: 0,
max_end_position_cell_idx: 0,
});
let mut index = 0usize;
for spans in sub_spans {
let span_cell =
SpansCell::new(res.as_mut() as *mut NearSpansUnordered<P>, spans, index);
index += 1;
res.sub_span_cells.push(span_cell);
}
let conjunction_span = {
let spans: Vec<_> = res
.sub_span_cells
.iter_mut()
.map(|s| &mut s.spans)
.collect();
ConjunctionSpanBase::new(spans)?
};
res.conjunction_span.write(conjunction_span);
res.single_cell_to_positions_queue();
Ok(res)
}
fn single_cell_to_positions_queue(&mut self) {
assert_eq!(self.sub_span_cells[0].doc_id(), -1);
assert_eq!(self.sub_span_cells[0].start_position(), -1);
let ele = SpansCellElement::new(&mut self.sub_span_cells[0]);
self.span_position_queue.push(ele);
}
fn sub_span_cells_to_positions_queue(&mut self) -> Result<()> {
self.span_position_queue.clear();
let queue = &mut self.span_position_queue;
for cell in &mut self.sub_span_cells {
debug_assert_eq!(cell.start_position(), -1);
cell.next_start_position()?;
debug_assert_ne!(cell.start_position(), NO_MORE_POSITIONS);
let ele = SpansCellElement::new(cell);
queue.push(ele);
}
Ok(())
}
fn at_match(&self) -> bool {
let min_cell = self.min_cell();
let max_cell = &self.sub_span_cells[self.max_end_position_cell_idx];
debug_assert_eq!(min_cell.doc_id(), max_cell.doc_id());
max_cell.end_position() - min_cell.start_position() - self.total_span_length
<= self.allowed_slop
}
fn min_cell(&self) -> &SpansCell<P> {
self.span_position_queue.peek().unwrap().spans()
}
}
impl<P: PostingIterator> ConjunctionSpans<P> for NearSpansUnordered<P> {
fn conjunction_span_base(&self) -> &ConjunctionSpanBase<P> {
unsafe { self.conjunction_span.get_ref() }
}
fn conjunction_span_base_mut(&mut self) -> &mut ConjunctionSpanBase<P> {
unsafe { self.conjunction_span.get_mut() }
}
fn two_phase_current_doc_matches(&mut self) -> Result<bool> {
self.sub_span_cells_to_positions_queue()?;
loop {
if self.at_match() {
unsafe {
self.conjunction_span.get_mut().first_in_current_doc = true;
self.conjunction_span.get_mut().one_exhausted_in_current_doc = false;
}
return Ok(true);
}
let mut min_cell = self.span_position_queue.peek_mut().unwrap();
debug_assert_ne!(min_cell.spans_mut().start_position(), NO_MORE_POSITIONS);
let next = min_cell.spans_mut().next_start_position()?;
if next == NO_MORE_POSITIONS {
return Ok(false);
}
}
}
}
impl<P: PostingIterator> Spans for NearSpansUnordered<P> {
fn next_start_position(&mut self) -> Result<i32> {
unsafe {
if self.conjunction_span.get_ref().first_in_current_doc {
self.conjunction_span.get_mut().first_in_current_doc = false;
return Ok(self.min_cell().start_position());
}
}
while self.min_cell().start_position() == -1 {
self.span_position_queue
.peek_mut()
.unwrap()
.spans_mut()
.next_start_position()?;
}
debug_assert_ne!(self.min_cell().start_position(), NO_MORE_POSITIONS);
loop {
if self
.span_position_queue
.peek_mut()
.unwrap()
.spans_mut()
.next_start_position()?
== NO_MORE_POSITIONS
{
unsafe {
self.conjunction_span.get_mut().one_exhausted_in_current_doc = true;
}
return Ok(NO_MORE_POSITIONS);
}
if self.at_match() {
return Ok(self.min_cell().start_position());
}
}
}
fn start_position(&self) -> i32 {
unsafe {
if self.conjunction_span.get_ref().first_in_current_doc {
-1
} else if self.conjunction_span.get_ref().one_exhausted_in_current_doc {
NO_MORE_POSITIONS
} else {
self.min_cell().start_position()
}
}
}
fn end_position(&self) -> i32 {
unsafe {
if self.conjunction_span.get_ref().first_in_current_doc {
-1
} else if self.conjunction_span.get_ref().one_exhausted_in_current_doc {
NO_MORE_POSITIONS
} else {
self.sub_span_cells[self.max_end_position_cell_idx].end_position()
}
}
}
fn width(&self) -> i32 {
self.sub_span_cells[self.max_end_position_cell_idx].start_position()
- self.min_cell().start_position()
}
fn collect(&mut self, collector: &mut impl SpanCollector) -> Result<()> {
for cell in &mut self.sub_span_cells {
cell.collect(collector)?;
}
Ok(())
}
fn positions_cost(&self) -> f32 {
unreachable!()
}
}
conjunction_span_doc_iter!(NearSpansUnordered<P: PostingIterator>);
pub struct SpansCell<P: PostingIterator> {
parent: *mut NearSpansUnordered<P>,
spans: SpansEnum<P>,
index: usize,
span_length: i32,
}
unsafe impl<P: PostingIterator> Send for SpansCell<P> {}
impl<P: PostingIterator> SpansCell<P> {
pub fn new(parent: *mut NearSpansUnordered<P>, spans: SpansEnum<P>, index: usize) -> Self {
SpansCell {
parent,
spans,
index,
span_length: -1,
}
}
fn adjust_length(&mut self) {
if self.span_length != -1 {
unsafe {
(*self.parent).total_span_length -= self.span_length;
}
}
debug_assert_ne!(self.spans.start_position(), NO_MORE_POSITIONS);
self.span_length = self.end_position() - self.start_position();
debug_assert!(self.span_length >= 0);
unsafe {
(*self.parent).total_span_length += self.span_length;
}
}
fn adjust_max(&mut self) {
unsafe {
let max_index = (*self.parent).max_end_position_cell_idx;
let max_cell = &(*self.parent).sub_span_cells[max_index];
debug_assert_eq!(self.doc_id(), max_cell.doc_id());
if self.end_position() > max_cell.end_position() {
(*self.parent).max_end_position_cell_idx = self.index;
}
}
}
}
impl<P: PostingIterator> Spans for SpansCell<P> {
fn next_start_position(&mut self) -> Result<i32> {
let res = self.spans.next_start_position()?;
if res != NO_MORE_POSITIONS {
self.adjust_length();
}
self.adjust_max();
Ok(res)
}
fn start_position(&self) -> i32 {
self.spans.start_position()
}
fn end_position(&self) -> i32 {
self.spans.end_position()
}
fn width(&self) -> i32 {
self.spans.width()
}
fn collect(&mut self, collector: &mut impl SpanCollector) -> Result<()> {
self.spans.collect(collector)
}
fn positions_cost(&self) -> f32 {
self.spans.positions_cost()
}
}
impl<P: PostingIterator> DocIterator for SpansCell<P> {
fn doc_id(&self) -> i32 {
self.spans.doc_id()
}
fn next(&mut self) -> Result<i32> {
self.spans.next()
}
fn advance(&mut self, target: i32) -> Result<i32> {
self.spans.advance(target)
}
fn cost(&self) -> usize {
self.spans.cost()
}
fn matches(&mut self) -> Result<bool> {
self.spans.matches()
}
fn match_cost(&self) -> f32 {
self.spans.match_cost()
}
fn support_two_phase(&self) -> bool {
self.spans.support_two_phase()
}
fn approximate_next(&mut self) -> Result<DocId> {
self.spans.approximate_next()
}
fn approximate_advance(&mut self, target: i32) -> Result<DocId> {
self.spans.approximate_advance(target)
}
}
struct SpansCellElement<P: PostingIterator> {
spans: *mut SpansCell<P>,
}
unsafe impl<P: PostingIterator> Send for SpansCellElement<P> {}
impl<P: PostingIterator> SpansCellElement<P> {
fn new(span: &mut SpansCell<P>) -> Self {
SpansCellElement {
spans: span as *mut SpansCell<P>,
}
}
fn spans(&self) -> &SpansCell<P> {
unsafe { &*self.spans }
}
fn spans_mut(&mut self) -> &mut SpansCell<P> {
unsafe { &mut *self.spans }
}
}
impl<P: PostingIterator> Eq for SpansCellElement<P> {}
impl<P: PostingIterator> PartialEq for SpansCellElement<P> {
fn eq(&self, other: &SpansCellElement<P>) -> bool {
unsafe { (*self.spans).index.eq(&(*other.spans).index) }
}
}
impl<P: PostingIterator> Ord for SpansCellElement<P> {
fn cmp(&self, other: &Self) -> Ordering {
unsafe {
let span1 = &mut (*self.spans).spans;
let span2 = &mut (*other.spans).spans;
debug_assert_eq!(span1.doc_id(), span2.doc_id());
let start1 = span1.start_position();
let start2 = span2.start_position();
if start1 == start2 {
span2.end_position().cmp(&span1.end_position())
} else {
start2.cmp(&start1)
}
}
}
}
impl<P: PostingIterator> PartialOrd for SpansCellElement<P> {
fn partial_cmp(&self, other: &SpansCellElement<P>) -> Option<Ordering> {
Some(self.cmp(other))
}
}
pub struct NearSpansOrdered<P: PostingIterator> {
conjunction_span: ConjunctionSpanBase<P>,
sub_spans: Vec<SpansEnum<P>>,
match_start: i32,
match_end: i32,
match_width: i32,
allowed_slop: i32,
}
impl<P: PostingIterator> NearSpansOrdered<P> {
pub fn new(allowed_slop: i32, mut sub_spans: Vec<SpansEnum<P>>) -> Result<Self> {
let conjunction_span = ConjunctionSpanBase::new(&mut sub_spans)?;
Ok(NearSpansOrdered {
conjunction_span,
sub_spans,
match_start: -1,
match_end: -1,
match_width: -1,
allowed_slop,
})
}
fn unpositioned(&self) -> bool {
self.sub_spans
.iter()
.all(|span| span.start_position() == -1)
}
fn stretch_to_order(&mut self) -> Result<bool> {
self.match_start = self.sub_spans[0].start_position();
debug_assert_ne!(self.sub_spans[0].start_position(), NO_MORE_POSITIONS);
debug_assert_ne!(self.sub_spans[0].end_position(), NO_MORE_POSITIONS);
self.match_width = 0;
for i in 1..self.sub_spans.len() {
debug_assert_ne!(self.sub_spans[i].start_position(), NO_MORE_POSITIONS);
debug_assert_ne!(self.sub_spans[i].end_position(), NO_MORE_POSITIONS);
let prev_end_position = self.sub_spans[i - 1].end_position();
if self.sub_spans[i].advance_position(prev_end_position)? == NO_MORE_POSITIONS {
self.conjunction_span.one_exhausted_in_current_doc = true;
return Ok(false);
}
self.match_width += self.sub_spans[i].start_position() - prev_end_position;
}
self.match_end = self.sub_spans[self.sub_spans.len() - 1].end_position();
Ok(true)
}
}
impl<P: PostingIterator> ConjunctionSpans<P> for NearSpansOrdered<P> {
fn conjunction_span_base(&self) -> &ConjunctionSpanBase<P> {
&self.conjunction_span
}
fn conjunction_span_base_mut(&mut self) -> &mut ConjunctionSpanBase<P> {
&mut self.conjunction_span
}
fn two_phase_current_doc_matches(&mut self) -> Result<bool> {
debug_assert!(self.unpositioned());
self.conjunction_span.one_exhausted_in_current_doc = false;
while (self.sub_spans[0].next_start_position()? != NO_MORE_POSITIONS)
&& !self.conjunction_span.one_exhausted_in_current_doc
{
if self.stretch_to_order()? && self.match_width <= self.allowed_slop {
self.conjunction_span.first_in_current_doc = true;
return Ok(true);
}
}
Ok(false)
}
}
impl<P: PostingIterator> Spans for NearSpansOrdered<P> {
fn next_start_position(&mut self) -> Result<i32> {
if self.conjunction_span.first_in_current_doc {
self.conjunction_span.first_in_current_doc = false;
return Ok(self.match_start);
}
self.conjunction_span.one_exhausted_in_current_doc = false;
while self.sub_spans[0].next_start_position()? != NO_MORE_POSITIONS
&& !self.conjunction_span.one_exhausted_in_current_doc
{
if self.stretch_to_order()? && (self.match_width <= self.allowed_slop) {
return Ok(self.match_start);
}
}
self.match_start = NO_MORE_POSITIONS;
self.match_end = NO_MORE_POSITIONS;
Ok(NO_MORE_POSITIONS)
}
fn start_position(&self) -> i32 {
if self.conjunction_span.first_in_current_doc {
-1
} else {
self.match_start
}
}
fn end_position(&self) -> i32 {
if self.conjunction_span.first_in_current_doc {
-1
} else {
self.match_end
}
}
fn width(&self) -> i32 {
self.match_width
}
fn collect(&mut self, collector: &mut impl SpanCollector) -> Result<()> {
for span in &mut self.sub_spans {
span.collect(collector)?;
}
Ok(())
}
fn positions_cost(&self) -> f32 {
unreachable!()
}
}
conjunction_span_doc_iter!(NearSpansOrdered<P: PostingIterator>);
const SPAN_GAP_QUERY: &str = "span_gap";
pub struct SpanGapQuery {
field: String,
width: i32,
}
impl SpanGapQuery {
pub fn new(field: String, width: i32) -> Self {
SpanGapQuery { field, width }
}
}
impl<C: Codec> Query<C> for SpanGapQuery {
fn create_weight(
&self,
searcher: &dyn SearchPlanBuilder<C>,
_needs_scores: bool,
) -> Result<Box<dyn Weight<C>>> {
let weight = SpanGapWeight::new(self, searcher, self.width)?;
Ok(Box::new(weight))
}
fn extract_terms(&self) -> Vec<TermQuery> {
vec![]
}
fn as_any(&self) -> &dyn (::std::any::Any) {
self
}
}
impl<C: Codec> SpanQuery<C> for SpanGapQuery {
type Weight = SpanGapWeight<C>;
fn span_weight(
&self,
searcher: &dyn SearchPlanBuilder<C>,
_needs_scores: bool,
) -> Result<Self::Weight> {
SpanGapWeight::new(self, searcher, self.width)
}
fn field(&self) -> &str {
&self.field
}
}
impl fmt::Display for SpanGapQuery {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"SpanGapQuery(field: {}, width: {})",
&self.field, self.width
)
}
}
pub struct SpanGapWeight<C: Codec> {
width: i32,
sim_weight: Option<Box<dyn SimWeight<C>>>,
}
impl<C: Codec> SpanGapWeight<C> {
pub fn new<IS: SearchPlanBuilder<C> + ?Sized>(
query: &SpanGapQuery,
searcher: &IS,
width: i32,
) -> Result<Self> {
let sim_weight =
build_sim_weight(SpanQuery::<C>::field(query), searcher, HashMap::new(), None)?;
Ok(SpanGapWeight { width, sim_weight })
}
}
impl<C: Codec> SpanWeight<C> for SpanGapWeight<C> {
fn sim_weight(&self) -> Option<&dyn SimWeight<C>> {
self.sim_weight.as_ref().map(|x| &**x)
}
fn sim_weight_mut(&mut self) -> Option<&mut dyn SimWeight<C>> {
if let Some(ref mut sim_weight) = self.sim_weight {
Some(sim_weight.as_mut())
} else {
None
}
}
fn get_spans(
&self,
_reader: &LeafReaderContext<'_, C>,
_required_postings: &PostingsFlag,
) -> Result<Option<SpansEnum<CodecPostingIterator<C>>>> {
Ok(Some(SpansEnum::Gap(GapSpans::new(self.width))))
}
fn extract_term_contexts(
&self,
_contexts: &mut HashMap<Term, Arc<TermContext<CodecTermState<C>>>>,
) {
}
}
impl<C: Codec> Weight<C> for SpanGapWeight<C> {
fn create_scorer(&self, ctx: &LeafReaderContext<'_, C>) -> Result<Option<Box<dyn Scorer>>> {
self.do_create_scorer(ctx)
}
fn query_type(&self) -> &'static str {
SPAN_GAP_QUERY
}
fn normalize(&mut self, norm: f32, boost: f32) {
if let Some(ref mut sim_weight) = self.sim_weight {
sim_weight.normalize(norm, boost)
}
}
fn value_for_normalization(&self) -> f32 {
if let Some(ref sim_weight) = self.sim_weight {
sim_weight.get_value_for_normalization()
} else {
1.0
}
}
fn needs_scores(&self) -> bool {
false
}
fn explain(&self, reader: &LeafReaderContext<'_, C>, doc: DocId) -> Result<Explanation> {
self.explain_span(reader, doc)
}
}
impl<C: Codec> fmt::Display for SpanGapWeight<C> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "SpanGapWeight(width: {})", self.width)
}
}
pub struct GapSpans {
doc: DocId,
pos: i32,
width: i32,
}
impl GapSpans {
pub fn new(width: i32) -> Self {
GapSpans {
doc: -1,
pos: -1,
width,
}
}
fn skip_to_position(&mut self, position: i32) {
self.pos = position;
}
}
impl Spans for GapSpans {
fn next_start_position(&mut self) -> Result<i32> {
self.pos += 1;
Ok(self.pos)
}
fn start_position(&self) -> i32 {
self.pos
}
fn end_position(&self) -> i32 {
self.pos + self.width
}
fn width(&self) -> i32 {
self.width
}
fn collect(&mut self, _collector: &mut impl SpanCollector) -> Result<()> {
Ok(())
}
fn positions_cost(&self) -> f32 {
0.0
}
fn advance_position(&mut self, position: i32) -> Result<i32> {
self.skip_to_position(position);
Ok(self.pos)
}
}
impl DocIterator for GapSpans {
fn doc_id(&self) -> i32 {
self.doc
}
fn next(&mut self) -> Result<i32> {
self.pos = -1;
self.doc += 1;
Ok(self.doc)
}
fn advance(&mut self, target: i32) -> Result<i32> {
self.pos = -1;
self.doc = target;
Ok(self.doc)
}
fn cost(&self) -> usize {
0
}
}