use std::io;
use std::ops::Bound;
use common::bounds::{map_bound, BoundsRange};
use common::BitSet;
use super::range_query_fastfield::FastFieldRangeWeight;
use crate::index::SegmentReader;
use crate::query::explanation::does_not_match;
use crate::query::range_query::is_type_valid_for_fastfield_range_query;
use crate::query::{BitSetDocSet, ConstScorer, EnableScoring, Explanation, Query, Scorer, Weight};
use crate::schema::{Field, IndexRecordOption, Term, Type};
use crate::termdict::{TermDictionary, TermStreamer};
use crate::{DocId, Score};
#[derive(Clone, Debug)]
pub struct RangeQuery {
bounds: BoundsRange<Term>,
}
impl RangeQuery {
pub fn new(lower_bound: Bound<Term>, upper_bound: Bound<Term>) -> RangeQuery {
RangeQuery {
bounds: BoundsRange::new(lower_bound, upper_bound),
}
}
pub fn field(&self) -> Field {
self.get_term().field()
}
pub fn value_type(&self) -> Type {
self.get_term().typ()
}
pub(crate) fn get_term(&self) -> &Term {
self.bounds
.get_inner()
.expect("At least one bound must be set")
}
}
impl Query for RangeQuery {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let schema = enable_scoring.schema();
let field_type = schema.get_field_entry(self.field()).field_type();
if field_type.is_fast() && is_type_valid_for_fastfield_range_query(self.value_type()) {
Ok(Box::new(FastFieldRangeWeight::new(self.bounds.clone())))
} else {
if field_type.is_json() {
return Err(crate::LucivyError::InvalidArgument(
"RangeQuery on JSON is only supported for fast fields currently".to_string(),
));
}
Ok(Box::new(InvertedIndexRangeWeight::new(
self.field(),
&self.bounds.lower_bound,
&self.bounds.upper_bound,
None,
)))
}
}
}
#[derive(Clone, Debug)]
pub struct InvertedIndexRangeQuery {
bounds: BoundsRange<Term>,
limit: Option<u64>,
}
impl InvertedIndexRangeQuery {
pub fn new(lower_bound: Bound<Term>, upper_bound: Bound<Term>) -> InvertedIndexRangeQuery {
InvertedIndexRangeQuery {
bounds: BoundsRange::new(lower_bound, upper_bound),
limit: None,
}
}
pub fn limit(&mut self, limit: u64) {
self.limit = Some(limit);
}
}
impl Query for InvertedIndexRangeQuery {
fn weight(&self, _enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let field = self
.bounds
.get_inner()
.expect("At least one bound must be set")
.field();
Ok(Box::new(InvertedIndexRangeWeight::new(
field,
&self.bounds.lower_bound,
&self.bounds.upper_bound,
self.limit,
)))
}
}
pub struct InvertedIndexRangeWeight {
field: Field,
lower_bound: Bound<Vec<u8>>,
upper_bound: Bound<Vec<u8>>,
limit: Option<u64>,
}
impl InvertedIndexRangeWeight {
pub fn new(
field: Field,
lower_bound: &Bound<Term>,
upper_bound: &Bound<Term>,
limit: Option<u64>,
) -> Self {
let verify_and_unwrap_term = |val: &Term| val.serialized_value_bytes().to_owned();
Self {
field,
lower_bound: map_bound(lower_bound, verify_and_unwrap_term),
upper_bound: map_bound(upper_bound, verify_and_unwrap_term),
limit,
}
}
fn term_range<'a>(&self, term_dict: &'a TermDictionary) -> io::Result<TermStreamer<'a>> {
use std::ops::Bound::*;
let mut term_stream_builder = term_dict.range();
term_stream_builder = match self.lower_bound {
Included(ref term_val) => term_stream_builder.ge(term_val),
Excluded(ref term_val) => term_stream_builder.gt(term_val),
Unbounded => term_stream_builder,
};
term_stream_builder = match self.upper_bound {
Included(ref term_val) => term_stream_builder.le(term_val),
Excluded(ref term_val) => term_stream_builder.lt(term_val),
Unbounded => term_stream_builder,
};
#[cfg(feature = "quickwit")]
if let Some(limit) = self.limit {
term_stream_builder = term_stream_builder.limit(limit);
}
term_stream_builder.into_stream()
}
}
impl Weight for InvertedIndexRangeWeight {
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
let max_doc = reader.max_doc();
let mut doc_bitset = BitSet::with_max_value(max_doc);
let inverted_index = reader.inverted_index(self.field)?;
let term_dict = inverted_index.terms();
let mut term_range = self.term_range(term_dict)?;
let mut processed_count = 0;
while term_range.advance() {
if let Some(limit) = self.limit {
if limit <= processed_count {
break;
}
}
processed_count += 1;
let term_info = term_range.value();
let mut block_segment_postings = inverted_index
.read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic)?;
loop {
let docs = block_segment_postings.docs();
if docs.is_empty() {
break;
}
for &doc in block_segment_postings.docs() {
doc_bitset.insert(doc);
}
block_segment_postings.advance();
}
}
let doc_bitset = BitSetDocSet::from(doc_bitset);
Ok(Box::new(ConstScorer::new(doc_bitset, boost)))
}
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
let mut scorer = self.scorer(reader, 1.0)?;
if scorer.seek(doc) != doc {
return Err(does_not_match(doc));
}
Ok(Explanation::new("RangeQuery", 1.0))
}
}
#[cfg(test)]
mod tests {
use std::net::IpAddr;
use std::ops::Bound;
use std::str::FromStr;
use rand::seq::SliceRandom;
use super::RangeQuery;
use crate::collector::{Count, TopDocs};
use crate::indexer::NoMergePolicy;
use crate::query::range_query::fast_field_range_doc_set::RangeDocSet;
use crate::query::range_query::range_query::InvertedIndexRangeQuery;
use crate::query::{AllScorer, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser};
use crate::schema::{
Field, IntoIpv6Addr, Schema, LucivyDocument, FAST, INDEXED, STORED, TEXT,
};
use crate::{Index, IndexWriter, Term};
#[test]
fn test_range_query_simple() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let year_field = schema_builder.add_u64_field("year", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests()?;
for year in 1950u64..2017u64 {
let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
for _ in 0..num_docs_within_year {
index_writer.add_document(doc!(year_field => year))?;
}
}
index_writer.commit()?;
}
let reader = index.reader()?;
let searcher = reader.searcher();
let docs_in_the_sixties = InvertedIndexRangeQuery::new(
Bound::Included(Term::from_field_u64(year_field, 1960)),
Bound::Excluded(Term::from_field_u64(year_field, 1970)),
);
let count = searcher.search(&docs_in_the_sixties, &Count)?;
assert_eq!(count, 2285);
Ok(())
}
#[test]
fn test_range_query_with_limit() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let year_field = schema_builder.add_u64_field("year", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests()?;
for year in 1950u64..2017u64 {
if year == 1963 {
continue;
}
let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
for _ in 0..num_docs_within_year {
index_writer.add_document(doc!(year_field => year))?;
}
}
index_writer.commit()?;
}
let reader = index.reader()?;
let searcher = reader.searcher();
let mut docs_in_the_sixties = InvertedIndexRangeQuery::new(
Bound::Included(Term::from_field_u64(year_field, 1960)),
Bound::Excluded(Term::from_field_u64(year_field, 1970)),
);
docs_in_the_sixties.limit(5);
let count = searcher.search(&docs_in_the_sixties, &Count)?;
assert_eq!(count, 836);
Ok(())
}
#[test]
fn test_range_query() -> crate::Result<()> {
let int_field: Field;
let schema = {
let mut schema_builder = Schema::builder();
int_field = schema_builder.add_i64_field("intfield", INDEXED);
schema_builder.build()
};
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_with_num_threads(1, 60_000_000)?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
for i in 1..100 {
let mut doc = LucivyDocument::new();
for j in 1..100 {
if i % j == 0 {
doc.add_i64(int_field, j as i64);
}
}
index_writer.add_document(doc)?;
if i == 10 {
index_writer.commit()?;
}
}
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 2);
let count_multiples =
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
assert_eq!(
count_multiples(RangeQuery::new(
Bound::Included(Term::from_field_i64(int_field, 10)),
Bound::Excluded(Term::from_field_i64(int_field, 11)),
)),
9
);
assert_eq!(
count_multiples(RangeQuery::new(
Bound::Included(Term::from_field_i64(int_field, 10)),
Bound::Included(Term::from_field_i64(int_field, 11)),
)),
18
);
assert_eq!(
count_multiples(RangeQuery::new(
Bound::Excluded(Term::from_field_i64(int_field, 9)),
Bound::Included(Term::from_field_i64(int_field, 10)),
)),
9
);
assert_eq!(
count_multiples(RangeQuery::new(
Bound::Included(Term::from_field_i64(int_field, 9)),
Bound::Unbounded
)),
91
);
Ok(())
}
#[test]
fn test_range_float() -> crate::Result<()> {
let float_field: Field;
let schema = {
let mut schema_builder = Schema::builder();
float_field = schema_builder.add_f64_field("floatfield", INDEXED);
schema_builder.build()
};
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_with_num_threads(1, 60_000_000).unwrap();
let mut docs = Vec::new();
for i in 1..100 {
let mut doc = LucivyDocument::new();
for j in 1..100 {
if i % j == 0 {
doc.add_f64(float_field, j as f64);
}
}
docs.push(doc);
}
docs.shuffle(&mut rand::thread_rng());
let mut docs_it = docs.into_iter();
for doc in (&mut docs_it).take(50) {
index_writer.add_document(doc)?;
}
index_writer.commit()?;
for doc in docs_it {
index_writer.add_document(doc)?;
}
index_writer.commit()?;
}
let reader = index.reader()?;
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 2);
let count_multiples =
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
assert_eq!(
count_multiples(RangeQuery::new(
Bound::Included(Term::from_field_f64(float_field, 10.0)),
Bound::Excluded(Term::from_field_f64(float_field, 11.0)),
)),
9
);
assert_eq!(
count_multiples(RangeQuery::new(
Bound::Included(Term::from_field_f64(float_field, 10.0)),
Bound::Included(Term::from_field_f64(float_field, 11.0)),
)),
18
);
assert_eq!(
count_multiples(RangeQuery::new(
Bound::Excluded(Term::from_field_f64(float_field, 9.0)),
Bound::Included(Term::from_field_f64(float_field, 10.0)),
)),
9
);
assert_eq!(
count_multiples(RangeQuery::new(
Bound::Included(Term::from_field_f64(float_field, 9.0)),
Bound::Unbounded
)),
91
);
Ok(())
}
#[test]
fn test_bug_reproduce_range_query() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_i64_field("year", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut index_writer = index.writer_for_tests()?;
let title = schema.get_field("title").unwrap();
let year = schema.get_field("year").unwrap();
index_writer.add_document(doc!(
title => "hemoglobin blood",
year => 1990_i64
))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let query_parser = QueryParser::for_index(&index, vec![title]);
let query = query_parser.parse_query("hemoglobin AND year:[1970 TO 1990]")?;
let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;
assert_eq!(top_docs.len(), 1);
Ok(())
}
#[test]
fn search_ip_range_test_posting_list() {
search_ip_range_test_opt(false);
}
#[test]
fn search_ip_range_test() {
search_ip_range_test_opt(true);
}
fn search_ip_range_test_opt(with_fast_field: bool) {
let mut schema_builder = Schema::builder();
let ip_field = if with_fast_field {
schema_builder.add_ip_addr_field("ip", INDEXED | STORED | FAST)
} else {
schema_builder.add_ip_addr_field("ip", INDEXED | STORED)
};
let text_field = schema_builder.add_text_field("text", TEXT | STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let ip_addr_1 = IpAddr::from_str("127.0.0.10").unwrap().into_ipv6_addr();
let ip_addr_2 = IpAddr::from_str("127.0.0.20").unwrap().into_ipv6_addr();
{
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
for _ in 0..1_000 {
index_writer
.add_document(doc!(
ip_field => ip_addr_1,
text_field => "BLUBBER"
))
.unwrap();
}
for _ in 0..1_000 {
index_writer
.add_document(doc!(
ip_field => ip_addr_2,
text_field => "BLOBBER"
))
.unwrap();
}
index_writer.commit().unwrap();
}
let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
let get_num_hits = |query| {
let (_top_docs, count) = searcher
.search(&query, &(TopDocs::with_limit(10).order_by_score(), Count))
.unwrap();
count
};
let query_from_text = |text: &str| {
QueryParser::for_index(&index, vec![])
.parse_query(text)
.unwrap()
};
assert_eq!(
get_num_hits(query_from_text("ip:[127.0.0.1 TO 127.0.0.20]")),
2000
);
assert_eq!(
get_num_hits(query_from_text("ip:[127.0.0.10 TO 127.0.0.20]")),
2000
);
assert_eq!(
get_num_hits(query_from_text("ip:[127.0.0.11 TO 127.0.0.20]")),
1000
);
assert_eq!(
get_num_hits(query_from_text("ip:[127.0.0.11 TO 127.0.0.19]")),
0
);
assert_eq!(get_num_hits(query_from_text("ip:[127.0.0.11 TO *]")), 1000);
assert_eq!(get_num_hits(query_from_text("ip:[127.0.0.21 TO *]")), 0);
assert_eq!(get_num_hits(query_from_text("ip:[* TO 127.0.0.9]")), 0);
assert_eq!(get_num_hits(query_from_text("ip:[* TO 127.0.0.10]")), 1000);
assert_eq!(
get_num_hits(query_from_text("ip:{127.0.0.1 TO 127.0.0.20}")),
1000
);
assert_eq!(
get_num_hits(query_from_text("ip:{127.0.0.1 TO 127.0.0.21}")),
2000
);
assert_eq!(
get_num_hits(query_from_text("ip:{127.0.0.10 TO 127.0.0.20}")),
0
);
assert_eq!(
get_num_hits(query_from_text("ip:{127.0.0.11 TO 127.0.0.20}")),
0
);
assert_eq!(
get_num_hits(query_from_text("ip:{127.0.0.11 TO 127.0.0.19}")),
0
);
assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.11 TO *}")), 1000);
assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.10 TO *}")), 1000);
assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.21 TO *}")), 0);
assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.20 TO *}")), 0);
assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.19 TO *}")), 1000);
assert_eq!(get_num_hits(query_from_text("ip:{* TO 127.0.0.9}")), 0);
assert_eq!(get_num_hits(query_from_text("ip:{* TO 127.0.0.10}")), 0);
assert_eq!(get_num_hits(query_from_text("ip:{* TO 127.0.0.11}")), 1000);
assert_eq!(
get_num_hits(query_from_text("ip:[127.0.0.1 TO 127.0.0.20}")),
1000
);
assert_eq!(
get_num_hits(query_from_text("ip:{127.0.0.1 TO 127.0.0.20]")),
2000
);
assert_eq!(
get_num_hits(query_from_text(
"text:BLUBBER AND ip:[127.0.0.10 TO 127.0.0.10]"
)),
1000
);
assert_eq!(
get_num_hits(query_from_text(
"text:BLOBBER AND ip:[127.0.0.10 TO 127.0.0.10]"
)),
0
);
assert_eq!(
get_num_hits(query_from_text(
"text:BLOBBER AND ip:[127.0.0.20 TO 127.0.0.20]"
)),
1000
);
assert_eq!(
get_num_hits(query_from_text(
"text:BLUBBER AND ip:[127.0.0.20 TO 127.0.0.20]"
)),
0
);
}
#[test]
fn test_range_query_simplified() {
let mut schema_builder = Schema::builder();
let u64_field = schema_builder.add_u64_field("u64_field", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(u64_field=> 2u64)).unwrap();
index_writer.add_document(doc!(u64_field=> 4u64)).unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
let make_term = |value: u64| Term::from_field_u64(u64_field, value);
let make_scorer = move |lower_bound: Bound<u64>, upper_bound: Bound<u64>| {
let lower_bound_term = lower_bound.map(make_term);
let upper_bound_term = upper_bound.map(make_term);
let range_query = RangeQuery::new(lower_bound_term, upper_bound_term);
let range_weight = range_query
.weight(EnableScoring::disabled_from_schema(&schema))
.unwrap();
let range_scorer = range_weight
.scorer(&searcher.segment_readers()[0], 1.0f32)
.unwrap();
range_scorer
};
let range_scorer = make_scorer(Bound::Included(1), Bound::Included(4));
assert!(range_scorer.is::<AllScorer>());
let range_scorer = make_scorer(Bound::Included(0), Bound::Included(2));
assert!(range_scorer.is::<ConstScorer<RangeDocSet<u64>>>());
let range_scorer = make_scorer(Bound::Included(3), Bound::Included(10));
assert!(range_scorer.is::<ConstScorer<RangeDocSet<u64>>>());
let range_scorer = make_scorer(Bound::Included(10), Bound::Included(12));
assert!(range_scorer.is::<ConstScorer<RangeDocSet<u64>>>());
let range_scorer = make_scorer(Bound::Included(0), Bound::Included(1));
assert!(range_scorer.is::<EmptyScorer>());
let range_scorer = make_scorer(Bound::Included(0), Bound::Excluded(2));
assert!(range_scorer.is::<EmptyScorer>());
}
}