1use std::io;
2use std::ops::{Bound, Range};
3
4use async_trait::async_trait;
5use common::{BinarySerializable, BitSet};
6
7use super::range_query_u64_fastfield::FastFieldRangeWeight;
8use crate::core::SegmentReader;
9use crate::error::TantivyError;
10use crate::query::explanation::does_not_match;
11use crate::query::range_query::range_query_ip_fastfield::IPFastFieldRangeWeight;
12use crate::query::{BitSetDocSet, ConstScorer, EnableScoring, Explanation, Query, Scorer, Weight};
13use crate::schema::{Field, IndexRecordOption, Term, Type};
14use crate::termdict::{TermDictionary, TermStreamer};
15use crate::{DateTime, DocId, Score};
16
17pub(crate) fn map_bound<TFrom, TTo, Transform: Fn(&TFrom) -> TTo>(
18 bound: &Bound<TFrom>,
19 transform: &Transform,
20) -> Bound<TTo> {
21 use self::Bound::*;
22 match bound {
23 Excluded(ref from_val) => Excluded(transform(from_val)),
24 Included(ref from_val) => Included(transform(from_val)),
25 Unbounded => Unbounded,
26 }
27}
28
29#[derive(Clone, Debug)]
79pub struct RangeQuery {
80 field: Field,
81 value_type: Type,
82 left_bound: Bound<Vec<u8>>,
83 right_bound: Bound<Vec<u8>>,
84}
85
86impl RangeQuery {
87 pub fn new_term_bounds(
92 field: Field,
93 value_type: Type,
94 left_bound: &Bound<Term>,
95 right_bound: &Bound<Term>,
96 ) -> RangeQuery {
97 let verify_and_unwrap_term = |val: &Term| {
98 assert_eq!(field, val.field());
99 val.value_bytes().to_owned()
100 };
101 RangeQuery {
102 field,
103 value_type,
104 left_bound: map_bound(left_bound, &verify_and_unwrap_term),
105 right_bound: map_bound(right_bound, &verify_and_unwrap_term),
106 }
107 }
108
109 pub fn new_i64(field: Field, range: Range<i64>) -> RangeQuery {
114 RangeQuery::new_i64_bounds(
115 field,
116 Bound::Included(range.start),
117 Bound::Excluded(range.end),
118 )
119 }
120
121 pub fn new_i64_bounds(
129 field: Field,
130 left_bound: Bound<i64>,
131 right_bound: Bound<i64>,
132 ) -> RangeQuery {
133 let make_term_val = |val: &i64| Term::from_field_i64(field, *val).value_bytes().to_owned();
134 RangeQuery {
135 field,
136 value_type: Type::I64,
137 left_bound: map_bound(&left_bound, &make_term_val),
138 right_bound: map_bound(&right_bound, &make_term_val),
139 }
140 }
141
142 pub fn new_f64(field: Field, range: Range<f64>) -> RangeQuery {
147 RangeQuery::new_f64_bounds(
148 field,
149 Bound::Included(range.start),
150 Bound::Excluded(range.end),
151 )
152 }
153
154 pub fn new_f64_bounds(
162 field: Field,
163 left_bound: Bound<f64>,
164 right_bound: Bound<f64>,
165 ) -> RangeQuery {
166 let make_term_val = |val: &f64| Term::from_field_f64(field, *val).value_bytes().to_owned();
167 RangeQuery {
168 field,
169 value_type: Type::F64,
170 left_bound: map_bound(&left_bound, &make_term_val),
171 right_bound: map_bound(&right_bound, &make_term_val),
172 }
173 }
174
175 pub fn new_u64_bounds(
183 field: Field,
184 left_bound: Bound<u64>,
185 right_bound: Bound<u64>,
186 ) -> RangeQuery {
187 let make_term_val = |val: &u64| Term::from_field_u64(field, *val).value_bytes().to_owned();
188 RangeQuery {
189 field,
190 value_type: Type::U64,
191 left_bound: map_bound(&left_bound, &make_term_val),
192 right_bound: map_bound(&right_bound, &make_term_val),
193 }
194 }
195
196 pub fn new_u64(field: Field, range: Range<u64>) -> RangeQuery {
201 RangeQuery::new_u64_bounds(
202 field,
203 Bound::Included(range.start),
204 Bound::Excluded(range.end),
205 )
206 }
207
208 pub fn new_date_bounds(
216 field: Field,
217 left_bound: Bound<DateTime>,
218 right_bound: Bound<DateTime>,
219 ) -> RangeQuery {
220 let make_term_val =
221 |val: &DateTime| Term::from_field_date(field, *val).value_bytes().to_owned();
222 RangeQuery {
223 field,
224 value_type: Type::Date,
225 left_bound: map_bound(&left_bound, &make_term_val),
226 right_bound: map_bound(&right_bound, &make_term_val),
227 }
228 }
229
230 pub fn new_date(field: Field, range: Range<DateTime>) -> RangeQuery {
235 RangeQuery::new_date_bounds(
236 field,
237 Bound::Included(range.start),
238 Bound::Excluded(range.end),
239 )
240 }
241
242 pub fn new_str_bounds(field: Field, left: Bound<&str>, right: Bound<&str>) -> RangeQuery {
250 let make_term_val = |val: &&str| val.as_bytes().to_vec();
251 RangeQuery {
252 field,
253 value_type: Type::Str,
254 left_bound: map_bound(&left, &make_term_val),
255 right_bound: map_bound(&right, &make_term_val),
256 }
257 }
258
259 pub fn new_str(field: Field, range: Range<&str>) -> RangeQuery {
264 RangeQuery::new_str_bounds(
265 field,
266 Bound::Included(range.start),
267 Bound::Excluded(range.end),
268 )
269 }
270
271 pub fn field(&self) -> Field {
273 self.field
274 }
275
276 pub fn left_bound(&self) -> Bound<Term> {
278 map_bound(&self.left_bound, &|bytes| {
279 Term::from_field_bytes(self.field, bytes)
280 })
281 }
282
283 pub fn right_bound(&self) -> Bound<Term> {
285 map_bound(&self.right_bound, &|bytes| {
286 Term::from_field_bytes(self.field, bytes)
287 })
288 }
289}
290
291pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool {
292 match typ {
293 Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true,
294 Type::IpAddr => true,
295 Type::Str | Type::Facet | Type::Bytes | Type::Json => false,
296 }
297}
298
299pub(crate) fn maps_to_u64_fastfield(typ: Type) -> bool {
301 match typ {
302 Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true,
303 Type::IpAddr => false,
304 Type::Str | Type::Facet | Type::Bytes | Type::Json => false,
305 }
306}
307
308#[async_trait]
309impl Query for RangeQuery {
310 fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
311 let schema = enable_scoring.schema();
312 let field_type = schema.get_field_entry(self.field).field_type();
313 let value_type = field_type.value_type();
314 if value_type != self.value_type {
315 let err_msg = format!(
316 "Create a range query of the type {:?}, when the field given was of type {:?}",
317 self.value_type, value_type
318 );
319 return Err(TantivyError::SchemaError(err_msg));
320 }
321
322 if field_type.is_fast() && is_type_valid_for_fastfield_range_query(self.value_type) {
323 if field_type.is_ip_addr() {
324 Ok(Box::new(IPFastFieldRangeWeight::new(
325 self.field,
326 &self.left_bound,
327 &self.right_bound,
328 )))
329 } else {
330 assert!(maps_to_u64_fastfield(self.value_type));
333 let parse_from_bytes = |data: &Vec<u8>| {
334 u64::from_be(BinarySerializable::deserialize(&mut &data[..]).unwrap())
335 };
336
337 let left_bound = map_bound(&self.left_bound, &parse_from_bytes);
338 let right_bound = map_bound(&self.right_bound, &parse_from_bytes);
339 Ok(Box::new(FastFieldRangeWeight::new(
340 self.field,
341 left_bound,
342 right_bound,
343 )))
344 }
345 } else {
346 Ok(Box::new(RangeWeight {
347 field: self.field,
348 left_bound: self.left_bound.clone(),
349 right_bound: self.right_bound.clone(),
350 }))
351 }
352 }
353 #[cfg(feature = "quickwit")]
354 async fn weight_async(
355 &self,
356 enable_scoring: EnableScoring<'_>,
357 ) -> crate::Result<Box<dyn Weight>> {
358 self.weight(enable_scoring)
359 }
360}
361
362pub struct RangeWeight {
363 field: Field,
364 left_bound: Bound<Vec<u8>>,
365 right_bound: Bound<Vec<u8>>,
366}
367
368impl RangeWeight {
369 fn term_range<'a>(&self, term_dict: &'a TermDictionary) -> io::Result<TermStreamer<'a>> {
370 use std::ops::Bound::*;
371 let mut term_stream_builder = term_dict.range();
372 term_stream_builder = match self.left_bound {
373 Included(ref term_val) => term_stream_builder.ge(term_val),
374 Excluded(ref term_val) => term_stream_builder.gt(term_val),
375 Unbounded => term_stream_builder,
376 };
377 term_stream_builder = match self.right_bound {
378 Included(ref term_val) => term_stream_builder.le(term_val),
379 Excluded(ref term_val) => term_stream_builder.lt(term_val),
380 Unbounded => term_stream_builder,
381 };
382 term_stream_builder.into_stream()
383 }
384}
385
386impl Weight for RangeWeight {
387 fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
388 let max_doc = reader.max_doc();
389 let mut doc_bitset = BitSet::with_max_value(max_doc);
390
391 let inverted_index = reader.inverted_index(self.field)?;
392 let term_dict = inverted_index.terms();
393 let mut term_range = self.term_range(term_dict)?;
394 while term_range.advance() {
395 let term_info = term_range.value();
396 let mut block_segment_postings = inverted_index
397 .read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic)?;
398 loop {
399 let docs = block_segment_postings.docs();
400 if docs.is_empty() {
401 break;
402 }
403 for &doc in block_segment_postings.docs() {
404 doc_bitset.insert(doc);
405 }
406 block_segment_postings.advance();
407 }
408 }
409 let doc_bitset = BitSetDocSet::from(doc_bitset);
410 Ok(Box::new(ConstScorer::new(doc_bitset, boost)))
411 }
412
413 fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
414 let mut scorer = self.scorer(reader, 1.0)?;
415 if scorer.seek(doc) != doc {
416 return Err(does_not_match(doc));
417 }
418 Ok(Explanation::new("RangeQuery", 1.0))
419 }
420}
421
422#[cfg(test)]
423mod tests {
424
425 use std::net::IpAddr;
426 use std::ops::Bound;
427 use std::str::FromStr;
428
429 use super::RangeQuery;
430 use crate::collector::{Count, TopDocs};
431 use crate::query::QueryParser;
432 use crate::schema::{Document, Field, IntoIpv6Addr, Schema, FAST, INDEXED, STORED, TEXT};
433 use crate::{doc, Index};
434
435 #[test]
436 fn test_range_query_simple() -> crate::Result<()> {
437 let mut schema_builder = Schema::builder();
438 let year_field = schema_builder.add_u64_field("year", INDEXED);
439 let schema = schema_builder.build();
440
441 let index = Index::create_in_ram(schema);
442 {
443 let mut index_writer = index.writer_for_tests()?;
444 for year in 1950u64..2017u64 {
445 let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
446 for _ in 0..num_docs_within_year {
447 index_writer.add_document(doc!(year_field => year))?;
448 }
449 }
450 index_writer.commit()?;
451 }
452 let reader = index.reader()?;
453 let searcher = reader.searcher();
454
455 let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
456
457 let count = searcher.search(&docs_in_the_sixties, &Count)?;
459 assert_eq!(count, 2285);
460 Ok(())
461 }
462
463 #[test]
464 fn test_range_query() -> crate::Result<()> {
465 let int_field: Field;
466 let schema = {
467 let mut schema_builder = Schema::builder();
468 int_field = schema_builder.add_i64_field("intfield", INDEXED);
469 schema_builder.build()
470 };
471
472 let index = Index::create_in_ram(schema);
473 {
474 let mut index_writer = index.writer_with_num_threads(2, 6_000_000)?;
475
476 for i in 1..100 {
477 let mut doc = Document::new();
478 for j in 1..100 {
479 if i % j == 0 {
480 doc.add_i64(int_field, j as i64);
481 }
482 }
483 index_writer.add_document(doc)?;
484 }
485
486 index_writer.commit()?;
487 }
488 let reader = index.reader().unwrap();
489 let searcher = reader.searcher();
490 let count_multiples =
491 |range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
492
493 assert_eq!(count_multiples(RangeQuery::new_i64(int_field, 10..11)), 9);
494 assert_eq!(
495 count_multiples(RangeQuery::new_i64_bounds(
496 int_field,
497 Bound::Included(10),
498 Bound::Included(11)
499 )),
500 18
501 );
502 assert_eq!(
503 count_multiples(RangeQuery::new_i64_bounds(
504 int_field,
505 Bound::Excluded(9),
506 Bound::Included(10)
507 )),
508 9
509 );
510 assert_eq!(
511 count_multiples(RangeQuery::new_i64_bounds(
512 int_field,
513 Bound::Included(9),
514 Bound::Unbounded
515 )),
516 91
517 );
518 Ok(())
519 }
520
521 #[test]
522 fn test_range_float() -> crate::Result<()> {
523 let float_field: Field;
524 let schema = {
525 let mut schema_builder = Schema::builder();
526 float_field = schema_builder.add_f64_field("floatfield", INDEXED);
527 schema_builder.build()
528 };
529
530 let index = Index::create_in_ram(schema);
531 {
532 let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
533
534 for i in 1..100 {
535 let mut doc = Document::new();
536 for j in 1..100 {
537 if i % j == 0 {
538 doc.add_f64(float_field, j as f64);
539 }
540 }
541 index_writer.add_document(doc)?;
542 }
543
544 index_writer.commit()?;
545 }
546 let reader = index.reader()?;
547 let searcher = reader.searcher();
548 let count_multiples =
549 |range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
550
551 assert_eq!(
552 count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)),
553 9
554 );
555 assert_eq!(
556 count_multiples(RangeQuery::new_f64_bounds(
557 float_field,
558 Bound::Included(10.0),
559 Bound::Included(11.0)
560 )),
561 18
562 );
563 assert_eq!(
564 count_multiples(RangeQuery::new_f64_bounds(
565 float_field,
566 Bound::Excluded(9.0),
567 Bound::Included(10.0)
568 )),
569 9
570 );
571 assert_eq!(
572 count_multiples(RangeQuery::new_f64_bounds(
573 float_field,
574 Bound::Included(9.0),
575 Bound::Unbounded
576 )),
577 91
578 );
579 Ok(())
580 }
581
582 #[test]
583 fn test_bug_reproduce_range_query() -> crate::Result<()> {
584 let mut schema_builder = Schema::builder();
585 schema_builder.add_text_field("title", TEXT);
586 schema_builder.add_i64_field("year", INDEXED);
587 let schema = schema_builder.build();
588 let index = Index::create_in_ram(schema.clone());
589 let mut index_writer = index.writer_for_tests()?;
590 let title = schema.get_field("title").unwrap();
591 let year = schema.get_field("year").unwrap();
592 index_writer.add_document(doc!(
593 title => "hemoglobin blood",
594 year => 1990_i64
595 ))?;
596 index_writer.commit()?;
597 let reader = index.reader()?;
598 let searcher = reader.searcher();
599 let query_parser = QueryParser::for_index(&index, vec![title]);
600 let query = query_parser.parse_query("hemoglobin AND year:[1970 TO 1990]")?;
601 let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
602 assert_eq!(top_docs.len(), 1);
603 Ok(())
604 }
605
606 #[test]
607 fn search_ip_range_test_posting_list() {
608 search_ip_range_test_opt(false);
609 }
610
611 #[test]
612 fn search_ip_range_test() {
613 search_ip_range_test_opt(true);
614 }
615
616 fn search_ip_range_test_opt(with_fast_field: bool) {
617 let mut schema_builder = Schema::builder();
618 let ip_field = if with_fast_field {
619 schema_builder.add_ip_addr_field("ip", INDEXED | STORED | FAST)
620 } else {
621 schema_builder.add_ip_addr_field("ip", INDEXED | STORED)
622 };
623 let text_field = schema_builder.add_text_field("text", TEXT | STORED);
624 let schema = schema_builder.build();
625 let index = Index::create_in_ram(schema);
626 let ip_addr_1 = IpAddr::from_str("127.0.0.10").unwrap().into_ipv6_addr();
627 let ip_addr_2 = IpAddr::from_str("127.0.0.20").unwrap().into_ipv6_addr();
628
629 {
630 let mut index_writer = index.writer(3_000_000).unwrap();
631 for _ in 0..1_000 {
632 index_writer
633 .add_document(doc!(
634 ip_field => ip_addr_1,
635 text_field => "BLUBBER"
636 ))
637 .unwrap();
638 }
639 for _ in 0..1_000 {
640 index_writer
641 .add_document(doc!(
642 ip_field => ip_addr_2,
643 text_field => "BLOBBER"
644 ))
645 .unwrap();
646 }
647
648 index_writer.commit().unwrap();
649 }
650 let reader = index.reader().unwrap();
651 let searcher = reader.searcher();
652
653 let get_num_hits = |query| {
654 let (_top_docs, count) = searcher
655 .search(&query, &(TopDocs::with_limit(10), Count))
656 .unwrap();
657 count
658 };
659 let query_from_text = |text: &str| {
660 QueryParser::for_index(&index, vec![])
661 .parse_query(text)
662 .unwrap()
663 };
664
665 assert_eq!(
667 get_num_hits(query_from_text("ip:[127.0.0.1 TO 127.0.0.20]")),
668 2000
669 );
670
671 assert_eq!(
672 get_num_hits(query_from_text("ip:[127.0.0.10 TO 127.0.0.20]")),
673 2000
674 );
675
676 assert_eq!(
677 get_num_hits(query_from_text("ip:[127.0.0.11 TO 127.0.0.20]")),
678 1000
679 );
680
681 assert_eq!(
682 get_num_hits(query_from_text("ip:[127.0.0.11 TO 127.0.0.19]")),
683 0
684 );
685
686 assert_eq!(get_num_hits(query_from_text("ip:[127.0.0.11 TO *]")), 1000);
687 assert_eq!(get_num_hits(query_from_text("ip:[127.0.0.21 TO *]")), 0);
688 assert_eq!(get_num_hits(query_from_text("ip:[* TO 127.0.0.9]")), 0);
689 assert_eq!(get_num_hits(query_from_text("ip:[* TO 127.0.0.10]")), 1000);
690
691 assert_eq!(
693 get_num_hits(query_from_text("ip:{127.0.0.1 TO 127.0.0.20}")),
694 1000
695 );
696
697 assert_eq!(
698 get_num_hits(query_from_text("ip:{127.0.0.1 TO 127.0.0.21}")),
699 2000
700 );
701
702 assert_eq!(
703 get_num_hits(query_from_text("ip:{127.0.0.10 TO 127.0.0.20}")),
704 0
705 );
706
707 assert_eq!(
708 get_num_hits(query_from_text("ip:{127.0.0.11 TO 127.0.0.20}")),
709 0
710 );
711
712 assert_eq!(
713 get_num_hits(query_from_text("ip:{127.0.0.11 TO 127.0.0.19}")),
714 0
715 );
716
717 assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.11 TO *}")), 1000);
718 assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.10 TO *}")), 1000);
719 assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.21 TO *}")), 0);
720 assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.20 TO *}")), 0);
721 assert_eq!(get_num_hits(query_from_text("ip:{127.0.0.19 TO *}")), 1000);
722 assert_eq!(get_num_hits(query_from_text("ip:{* TO 127.0.0.9}")), 0);
723 assert_eq!(get_num_hits(query_from_text("ip:{* TO 127.0.0.10}")), 0);
724 assert_eq!(get_num_hits(query_from_text("ip:{* TO 127.0.0.11}")), 1000);
725
726 assert_eq!(
728 get_num_hits(query_from_text("ip:[127.0.0.1 TO 127.0.0.20}")),
729 1000
730 );
731
732 assert_eq!(
733 get_num_hits(query_from_text("ip:{127.0.0.1 TO 127.0.0.20]")),
734 2000
735 );
736
737 assert_eq!(
739 get_num_hits(query_from_text(
740 "text:BLUBBER AND ip:[127.0.0.10 TO 127.0.0.10]"
741 )),
742 1000
743 );
744
745 assert_eq!(
746 get_num_hits(query_from_text(
747 "text:BLOBBER AND ip:[127.0.0.10 TO 127.0.0.10]"
748 )),
749 0
750 );
751
752 assert_eq!(
753 get_num_hits(query_from_text(
754 "text:BLOBBER AND ip:[127.0.0.20 TO 127.0.0.20]"
755 )),
756 1000
757 );
758
759 assert_eq!(
760 get_num_hits(query_from_text(
761 "text:BLUBBER AND ip:[127.0.0.20 TO 127.0.0.20]"
762 )),
763 0
764 );
765 }
766}