1#![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
2#![cfg_attr(all(feature = "unstable", test), feature(test))]
3#![doc(test(attr(allow(unused_variables), deny(warnings))))]
4#![warn(missing_docs)]
5#![allow(
6 clippy::len_without_is_empty,
7 clippy::derive_partial_eq_without_eq,
8 clippy::module_inception,
9 clippy::needless_range_loop,
10 clippy::bool_assert_comparison
11)]
12
13#[cfg_attr(test, macro_use)]
149extern crate serde_json;
150#[macro_use]
151extern crate log;
152
153#[macro_use]
154extern crate thiserror;
155
156#[cfg(all(test, feature = "unstable"))]
157extern crate test;
158
159#[cfg(feature = "mmap")]
160#[cfg(test)]
161mod functional_test;
162
163#[macro_use]
164mod macros;
165mod future_result;
166
167pub use common::DateTime;
169pub use {columnar, query_grammar, time};
170
171pub use crate::error::TantivyError;
172pub use crate::future_result::FutureResult;
173
174pub type Result<T> = std::result::Result<T, TantivyError>;
179
180mod core;
181pub mod indexer;
182
183pub mod error;
184pub mod tokenizer;
185
186pub mod aggregation;
187pub mod collector;
188pub mod directory;
189pub mod fastfield;
190pub mod fieldnorm;
191pub mod index;
192pub mod positions;
193pub mod postings;
194
195pub mod query;
197pub mod schema;
198pub mod space_usage;
199pub mod store;
200pub mod termdict;
201
202mod docset;
203mod reader;
204
205#[cfg(test)]
206mod compat_tests;
207
208pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy, Warmer};
209pub mod snippet;
210
211use std::fmt;
212
213pub use census::{Inventory, TrackedObject};
214pub use common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, HasLen};
215use once_cell::sync::Lazy;
216use serde::{Deserialize, Serialize};
217
218pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
219#[doc(hidden)]
220pub use crate::core::json_utils;
221pub use crate::core::{Executor, Searcher, SearcherGeneration};
222pub use crate::directory::Directory;
223pub use crate::index::{
224 Index, IndexBuilder, IndexMeta, IndexSettings, InvertedIndexReader, Order, Segment,
225 SegmentMeta, SegmentReader,
226};
227pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
228pub use crate::schema::{Document, TantivyDocument, Term};
229
230pub const INDEX_FORMAT_VERSION: u32 = 7;
232pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;
234
235#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
237pub struct Version {
238 major: u32,
239 minor: u32,
240 patch: u32,
241 index_format_version: u32,
242}
243
244impl fmt::Debug for Version {
245 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
246 fmt::Display::fmt(self, f)
247 }
248}
249
250static VERSION: Lazy<Version> = Lazy::new(|| Version {
251 major: env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(),
252 minor: env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(),
253 patch: env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(),
254 index_format_version: INDEX_FORMAT_VERSION,
255});
256
257impl fmt::Display for Version {
258 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
259 write!(
260 f,
261 "tantivy v{}.{}.{}, index_format v{}",
262 self.major, self.minor, self.patch, self.index_format_version
263 )
264 }
265}
266
267static VERSION_STRING: Lazy<String> = Lazy::new(|| VERSION.to_string());
268
269pub fn version() -> &'static Version {
272 &VERSION
273}
274
275pub fn version_string() -> &'static str {
278 VERSION_STRING.as_str()
279}
280
281pub mod merge_policy {
283 pub use crate::indexer::{
284 DefaultMergePolicy, LogMergePolicy, MergeCandidate, MergePolicy, NoMergePolicy,
285 };
286}
287
288pub type DocId = u32;
294
295pub type Opstamp = u64;
304
305pub type Score = f32;
310
311pub type SegmentOrdinal = u32;
313
314impl DocAddress {
315 pub fn new(segment_ord: SegmentOrdinal, doc_id: DocId) -> DocAddress {
317 DocAddress {
318 segment_ord,
319 doc_id,
320 }
321 }
322}
323
324#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
333pub struct DocAddress {
334 pub segment_ord: SegmentOrdinal,
337 pub doc_id: DocId,
339}
340
341#[macro_export]
342macro_rules! fail_point {
344 ($name:expr) => {{
345 #[cfg(feature = "failpoints")]
346 {
347 fail::eval($name, |_| {
348 panic!("Return is not supported for the fail point \"{}\"", $name);
349 });
350 }
351 }};
352 ($name:expr, $e:expr) => {{
353 #[cfg(feature = "failpoints")]
354 {
355 if let Some(res) = fail::eval($name, $e) {
356 return res;
357 }
358 }
359 }};
360 ($name:expr, $cond:expr, $e:expr) => {{
361 #[cfg(feature = "failpoints")]
362 {
363 if $cond {
364 fail::fail_point!($name, $e);
365 }
366 }
367 }};
368}
369
370#[cfg(test)]
372pub mod tests {
373 use common::{BinarySerializable, FixedSize};
374 use query_grammar::{UserInputAst, UserInputLeaf, UserInputLiteral};
375 use rand::distributions::{Bernoulli, Uniform};
376 use rand::rngs::StdRng;
377 use rand::{Rng, SeedableRng};
378 use time::OffsetDateTime;
379
380 use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
381 use crate::docset::{DocSet, TERMINATED};
382 use crate::index::SegmentReader;
383 use crate::merge_policy::NoMergePolicy;
384 use crate::postings::Postings;
385 use crate::query::BooleanQuery;
386 use crate::schema::*;
387 use crate::{DateTime, DocAddress, Index, IndexWriter, ReloadPolicy};
388
389 pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
391 let mut buffer = Vec::new();
392 O::default().serialize(&mut buffer).unwrap();
393 assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
394 }
395
396 #[macro_export]
399 macro_rules! assert_nearly_equals {
400 ($left:expr, $right:expr) => {{
401 assert_nearly_equals!($left, $right, 0.0005);
402 }};
403 ($left:expr, $right:expr, $epsilon:expr) => {{
404 match (&$left, &$right, &$epsilon) {
405 (left_val, right_val, epsilon_val) => {
406 let diff = (left_val - right_val).abs();
407
408 if diff > *epsilon_val {
409 panic!(
410 r#"assertion failed: `abs(left-right)>epsilon`
411 left: `{:?}`,
412 right: `{:?}`,
413 epsilon: `{:?}`"#,
414 &*left_val, &*right_val, &*epsilon_val
415 )
416 }
417 }
418 }
419 }};
420 }
421
422 pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
424 let seed: [u8; 32] = [1; 32];
425 StdRng::from_seed(seed)
426 .sample_iter(&Uniform::new(0u32, max_value))
427 .take(n_elems)
428 .collect::<Vec<u32>>()
429 }
430
431 pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
433 StdRng::from_seed([seed_val; 32])
434 .sample_iter(&Bernoulli::new(ratio).unwrap())
435 .take(n as usize)
436 .enumerate()
437 .filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
438 .collect()
439 }
440
441 pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
443 sample_with_seed(n, ratio, 4)
444 }
445
446 #[test]
447 fn test_version_string() {
448 use regex::Regex;
449 let regex_ptn = Regex::new(
450 "tantivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}",
451 )
452 .unwrap();
453 let version = super::version().to_string();
454 assert!(regex_ptn.find(&version).is_some());
455 }
456
457 #[test]
458 #[cfg(feature = "mmap")]
459 fn test_indexing() -> crate::Result<()> {
460 let mut schema_builder = Schema::builder();
461 let text_field = schema_builder.add_text_field("text", TEXT);
462 let schema = schema_builder.build();
463 let index = Index::create_from_tempdir(schema)?;
464 let mut index_writer: IndexWriter = index.writer_for_tests()?;
466 {
467 let doc = doc!(text_field=>"af b");
468 index_writer.add_document(doc)?;
469 }
470 {
471 let doc = doc!(text_field=>"a b c");
472 index_writer.add_document(doc)?;
473 }
474 {
475 let doc = doc!(text_field=>"a b c d");
476 index_writer.add_document(doc)?;
477 }
478 index_writer.commit()?;
479 Ok(())
480 }
481
482 #[test]
483 fn test_docfreq1() -> crate::Result<()> {
484 let mut schema_builder = Schema::builder();
485 let text_field = schema_builder.add_text_field("text", TEXT);
486 let index = Index::create_in_ram(schema_builder.build());
487 let mut index_writer: IndexWriter = index.writer_for_tests()?;
488 index_writer.add_document(doc!(text_field=>"a b c"))?;
489 index_writer.commit()?;
490 index_writer.add_document(doc!(text_field=>"a"))?;
491 index_writer.add_document(doc!(text_field=>"a a"))?;
492 index_writer.commit()?;
493 index_writer.add_document(doc!(text_field=>"c"))?;
494 index_writer.commit()?;
495 let reader = index.reader()?;
496 let searcher = reader.searcher();
497 let term_a = Term::from_field_text(text_field, "a");
498 assert_eq!(searcher.doc_freq(&term_a)?, 3);
499 let term_b = Term::from_field_text(text_field, "b");
500 assert_eq!(searcher.doc_freq(&term_b)?, 1);
501 let term_c = Term::from_field_text(text_field, "c");
502 assert_eq!(searcher.doc_freq(&term_c)?, 2);
503 let term_d = Term::from_field_text(text_field, "d");
504 assert_eq!(searcher.doc_freq(&term_d)?, 0);
505 Ok(())
506 }
507
508 #[test]
509 fn test_fieldnorm_no_docs_with_field() -> crate::Result<()> {
510 let mut schema_builder = Schema::builder();
511 let title_field = schema_builder.add_text_field("title", TEXT);
512 let text_field = schema_builder.add_text_field("text", TEXT);
513 let index = Index::create_in_ram(schema_builder.build());
514 let mut index_writer: IndexWriter = index.writer_for_tests()?;
515 index_writer.add_document(doc!(text_field=>"a b c"))?;
516 index_writer.commit()?;
517 let index_reader = index.reader()?;
518 let searcher = index_reader.searcher();
519 let reader = searcher.segment_reader(0);
520 {
521 let fieldnorm_reader = reader.get_fieldnorms_reader(text_field)?;
522 assert_eq!(fieldnorm_reader.fieldnorm(0), 3);
523 }
524 {
525 let fieldnorm_reader = reader.get_fieldnorms_reader(title_field)?;
526 assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0);
527 }
528 Ok(())
529 }
530
531 #[test]
532 fn test_fieldnorm() -> crate::Result<()> {
533 let mut schema_builder = Schema::builder();
534 let text_field = schema_builder.add_text_field("text", TEXT);
535 let index = Index::create_in_ram(schema_builder.build());
536 let mut index_writer: IndexWriter = index.writer_for_tests()?;
537 index_writer.add_document(doc!(text_field=>"a b c"))?;
538 index_writer.add_document(doc!())?;
539 index_writer.add_document(doc!(text_field=>"a b"))?;
540 index_writer.commit()?;
541 let reader = index.reader()?;
542 let searcher = reader.searcher();
543 let segment_reader: &SegmentReader = searcher.segment_reader(0);
544 let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field)?;
545 assert_eq!(fieldnorms_reader.fieldnorm(0), 3);
546 assert_eq!(fieldnorms_reader.fieldnorm(1), 0);
547 assert_eq!(fieldnorms_reader.fieldnorm(2), 2);
548 Ok(())
549 }
550
551 fn advance_undeleted(docset: &mut dyn DocSet, reader: &SegmentReader) -> bool {
552 let mut doc = docset.advance();
553 while doc != TERMINATED {
554 if !reader.is_deleted(doc) {
555 return true;
556 }
557 doc = docset.advance();
558 }
559 false
560 }
561
562 #[test]
563 fn test_delete_postings1() -> crate::Result<()> {
564 let mut schema_builder = Schema::builder();
565 let text_field = schema_builder.add_text_field("text", TEXT);
566 let term_abcd = Term::from_field_text(text_field, "abcd");
567 let term_a = Term::from_field_text(text_field, "a");
568 let term_b = Term::from_field_text(text_field, "b");
569 let term_c = Term::from_field_text(text_field, "c");
570 let schema = schema_builder.build();
571 let index = Index::create_in_ram(schema);
572 let reader = index
573 .reader_builder()
574 .reload_policy(ReloadPolicy::Manual)
575 .try_into()
576 .unwrap();
577 {
578 let mut index_writer: IndexWriter = index.writer_for_tests()?;
580 index_writer.add_document(doc!(text_field=>"a b"))?;
582 index_writer.add_document(doc!(text_field=>" a c"))?;
584 index_writer.add_document(doc!(text_field=>" b c"))?;
586 index_writer.add_document(doc!(text_field=>" b d"))?;
588
589 index_writer.delete_term(Term::from_field_text(text_field, "c"));
590 index_writer.delete_term(Term::from_field_text(text_field, "a"));
591 index_writer.add_document(doc!(text_field=>" b c"))?;
593 index_writer.add_document(doc!(text_field=>" a"))?;
595 index_writer.commit()?;
596 }
597 {
598 reader.reload()?;
599 let searcher = reader.searcher();
600 let segment_reader = searcher.segment_reader(0);
601 let inverted_index = segment_reader.inverted_index(text_field)?;
602 assert!(inverted_index
603 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
604 .is_none());
605 {
606 let mut postings = inverted_index
607 .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
608 .unwrap();
609 assert!(advance_undeleted(&mut postings, segment_reader));
610 assert_eq!(postings.doc(), 5);
611 assert!(!advance_undeleted(&mut postings, segment_reader));
612 }
613 {
614 let mut postings = inverted_index
615 .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)?
616 .unwrap();
617 assert!(advance_undeleted(&mut postings, segment_reader));
618 assert_eq!(postings.doc(), 3);
619 assert!(advance_undeleted(&mut postings, segment_reader));
620 assert_eq!(postings.doc(), 4);
621 assert!(!advance_undeleted(&mut postings, segment_reader));
622 }
623 }
624 {
625 let mut index_writer: IndexWriter = index.writer_for_tests()?;
627 index_writer.add_document(doc!(text_field=>"a b"))?;
629 index_writer.delete_term(Term::from_field_text(text_field, "c"));
631 index_writer.rollback()?;
632 }
633 {
634 reader.reload()?;
635 let searcher = reader.searcher();
636 let seg_reader = searcher.segment_reader(0);
637 let inverted_index = seg_reader.inverted_index(term_abcd.field())?;
638
639 assert!(inverted_index
640 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
641 .is_none());
642 {
643 let mut postings = inverted_index
644 .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
645 .unwrap();
646 assert!(advance_undeleted(&mut postings, seg_reader));
647 assert_eq!(postings.doc(), 5);
648 assert!(!advance_undeleted(&mut postings, seg_reader));
649 }
650 {
651 let mut postings = inverted_index
652 .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)?
653 .unwrap();
654 assert!(advance_undeleted(&mut postings, seg_reader));
655 assert_eq!(postings.doc(), 3);
656 assert!(advance_undeleted(&mut postings, seg_reader));
657 assert_eq!(postings.doc(), 4);
658 assert!(!advance_undeleted(&mut postings, seg_reader));
659 }
660 }
661 {
662 let mut index_writer: IndexWriter = index.writer_for_tests()?;
664 index_writer.add_document(doc!(text_field=>"a b"))?;
665 index_writer.delete_term(Term::from_field_text(text_field, "c"));
666 index_writer.rollback()?;
667 index_writer.delete_term(Term::from_field_text(text_field, "a"));
668 index_writer.commit()?;
669 }
670 {
671 reader.reload()?;
672 let searcher = reader.searcher();
673 let segment_reader = searcher.segment_reader(0);
674 let inverted_index = segment_reader.inverted_index(term_abcd.field())?;
675 assert!(inverted_index
676 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
677 .is_none());
678 {
679 let mut postings = inverted_index
680 .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
681 .unwrap();
682 assert!(!advance_undeleted(&mut postings, segment_reader));
683 }
684 {
685 let mut postings = inverted_index
686 .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)?
687 .unwrap();
688 assert!(advance_undeleted(&mut postings, segment_reader));
689 assert_eq!(postings.doc(), 3);
690 assert!(advance_undeleted(&mut postings, segment_reader));
691 assert_eq!(postings.doc(), 4);
692 assert!(!advance_undeleted(&mut postings, segment_reader));
693 }
694 {
695 let mut postings = inverted_index
696 .read_postings(&term_c, IndexRecordOption::WithFreqsAndPositions)?
697 .unwrap();
698 assert!(advance_undeleted(&mut postings, segment_reader));
699 assert_eq!(postings.doc(), 4);
700 assert!(!advance_undeleted(&mut postings, segment_reader));
701 }
702 }
703 Ok(())
704 }
705
706 #[test]
707 fn test_indexed_u64() -> crate::Result<()> {
708 let mut schema_builder = Schema::builder();
709 let field = schema_builder.add_u64_field("value", INDEXED);
710 let schema = schema_builder.build();
711
712 let index = Index::create_in_ram(schema);
713 let mut index_writer: IndexWriter = index.writer_for_tests()?;
714 index_writer.add_document(doc!(field=>1u64))?;
715 index_writer.commit()?;
716 let reader = index.reader()?;
717 let searcher = reader.searcher();
718 let term = Term::from_field_u64(field, 1u64);
719 let mut postings = searcher
720 .segment_reader(0)
721 .inverted_index(term.field())?
722 .read_postings(&term, IndexRecordOption::Basic)?
723 .unwrap();
724 assert_eq!(postings.doc(), 0);
725 assert_eq!(postings.advance(), TERMINATED);
726 Ok(())
727 }
728
729 #[test]
730 fn test_indexed_i64() -> crate::Result<()> {
731 let mut schema_builder = Schema::builder();
732 let value_field = schema_builder.add_i64_field("value", INDEXED);
733 let schema = schema_builder.build();
734
735 let index = Index::create_in_ram(schema);
736 let mut index_writer: IndexWriter = index.writer_for_tests()?;
737 let negative_val = -1i64;
738 index_writer.add_document(doc!(value_field => negative_val))?;
739 index_writer.commit()?;
740 let reader = index.reader()?;
741 let searcher = reader.searcher();
742 let term = Term::from_field_i64(value_field, negative_val);
743 let mut postings = searcher
744 .segment_reader(0)
745 .inverted_index(term.field())?
746 .read_postings(&term, IndexRecordOption::Basic)?
747 .unwrap();
748 assert_eq!(postings.doc(), 0);
749 assert_eq!(postings.advance(), TERMINATED);
750 Ok(())
751 }
752
753 #[test]
754 fn test_indexed_f64() -> crate::Result<()> {
755 let mut schema_builder = Schema::builder();
756 let value_field = schema_builder.add_f64_field("value", INDEXED);
757 let schema = schema_builder.build();
758
759 let index = Index::create_in_ram(schema);
760 let mut index_writer: IndexWriter = index.writer_for_tests()?;
761 let val = std::f64::consts::PI;
762 index_writer.add_document(doc!(value_field => val))?;
763 index_writer.commit()?;
764 let reader = index.reader()?;
765 let searcher = reader.searcher();
766 let term = Term::from_field_f64(value_field, val);
767 let mut postings = searcher
768 .segment_reader(0)
769 .inverted_index(term.field())?
770 .read_postings(&term, IndexRecordOption::Basic)?
771 .unwrap();
772 assert_eq!(postings.doc(), 0);
773 assert_eq!(postings.advance(), TERMINATED);
774 Ok(())
775 }
776
777 #[test]
778 fn test_indexedfield_not_in_documents() -> crate::Result<()> {
779 let mut schema_builder = Schema::builder();
780 let text_field = schema_builder.add_text_field("text", TEXT);
781 let absent_field = schema_builder.add_text_field("absent_text", TEXT);
782 let schema = schema_builder.build();
783 let index = Index::create_in_ram(schema);
784 let mut index_writer: IndexWriter = index.writer_for_tests()?;
785 index_writer.add_document(doc!(text_field=>"a"))?;
786 assert!(index_writer.commit().is_ok());
787 let reader = index.reader()?;
788 let searcher = reader.searcher();
789 let segment_reader = searcher.segment_reader(0);
790 let inverted_index = segment_reader.inverted_index(absent_field)?;
791 assert_eq!(inverted_index.terms().num_terms(), 0);
792 Ok(())
793 }
794
795 #[test]
796 fn test_delete_postings2() -> crate::Result<()> {
797 let mut schema_builder = Schema::builder();
798 let text_field = schema_builder.add_text_field("text", TEXT);
799 let schema = schema_builder.build();
800 let index = Index::create_in_ram(schema);
801 let reader = index
802 .reader_builder()
803 .reload_policy(ReloadPolicy::Manual)
804 .try_into()?;
805
806 let mut index_writer: IndexWriter = index.writer_for_tests()?;
808 index_writer.add_document(doc!(text_field=>"63"))?;
809 index_writer.add_document(doc!(text_field=>"70"))?;
810 index_writer.add_document(doc!(text_field=>"34"))?;
811 index_writer.add_document(doc!(text_field=>"1"))?;
812 index_writer.add_document(doc!(text_field=>"38"))?;
813 index_writer.add_document(doc!(text_field=>"33"))?;
814 index_writer.add_document(doc!(text_field=>"40"))?;
815 index_writer.add_document(doc!(text_field=>"17"))?;
816 index_writer.delete_term(Term::from_field_text(text_field, "38"));
817 index_writer.delete_term(Term::from_field_text(text_field, "34"));
818 index_writer.commit()?;
819 reader.reload()?;
820 assert_eq!(reader.searcher().num_docs(), 6);
821 Ok(())
822 }
823
824 #[test]
825 fn test_termfreq() -> crate::Result<()> {
826 let mut schema_builder = Schema::builder();
827 let text_field = schema_builder.add_text_field("text", TEXT);
828 let schema = schema_builder.build();
829 let index = Index::create_in_ram(schema);
830 {
831 let mut index_writer: IndexWriter = index.writer_for_tests()?;
833 index_writer.add_document(doc!(text_field=>"af af af bc bc"))?;
834 index_writer.commit()?;
835 }
836 {
837 let index_reader = index.reader()?;
838 let searcher = index_reader.searcher();
839 let reader = searcher.segment_reader(0);
840 let inverted_index = reader.inverted_index(text_field)?;
841 let term_abcd = Term::from_field_text(text_field, "abcd");
842 assert!(inverted_index
843 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
844 .is_none());
845 let term_af = Term::from_field_text(text_field, "af");
846 let mut postings = inverted_index
847 .read_postings(&term_af, IndexRecordOption::WithFreqsAndPositions)?
848 .unwrap();
849 assert_eq!(postings.doc(), 0);
850 assert_eq!(postings.term_freq(), 3);
851 assert_eq!(postings.advance(), TERMINATED);
852 }
853 Ok(())
854 }
855
856 #[test]
857 fn test_searcher_1() -> crate::Result<()> {
858 let mut schema_builder = Schema::builder();
859 let text_field = schema_builder.add_text_field("text", TEXT);
860 let schema = schema_builder.build();
861 let index = Index::create_in_ram(schema);
862 let reader = index.reader()?;
863 let mut index_writer: IndexWriter = index.writer_for_tests()?;
865 index_writer.add_document(doc!(text_field=>"af af af b"))?;
866 index_writer.add_document(doc!(text_field=>"a b c"))?;
867 index_writer.add_document(doc!(text_field=>"a b c d"))?;
868 index_writer.commit()?;
869
870 reader.reload()?;
871 let searcher = reader.searcher();
872 let get_doc_ids = |terms: Vec<Term>| {
873 let query = BooleanQuery::new_multiterms_query(terms);
874 searcher
875 .search(&query, &TEST_COLLECTOR_WITH_SCORE)
876 .map(|topdocs| topdocs.docs().to_vec())
877 };
878 assert_eq!(
879 get_doc_ids(vec![Term::from_field_text(text_field, "a")])?,
880 vec![DocAddress::new(0, 1), DocAddress::new(0, 2)]
881 );
882 assert_eq!(
883 get_doc_ids(vec![Term::from_field_text(text_field, "af")])?,
884 vec![DocAddress::new(0, 0)]
885 );
886 assert_eq!(
887 get_doc_ids(vec![Term::from_field_text(text_field, "b")])?,
888 vec![
889 DocAddress::new(0, 0),
890 DocAddress::new(0, 1),
891 DocAddress::new(0, 2)
892 ]
893 );
894 assert_eq!(
895 get_doc_ids(vec![Term::from_field_text(text_field, "c")])?,
896 vec![DocAddress::new(0, 1), DocAddress::new(0, 2)]
897 );
898 assert_eq!(
899 get_doc_ids(vec![Term::from_field_text(text_field, "d")])?,
900 vec![DocAddress::new(0, 2)]
901 );
902 assert_eq!(
903 get_doc_ids(vec![
904 Term::from_field_text(text_field, "b"),
905 Term::from_field_text(text_field, "a"),
906 ])?,
907 vec![
908 DocAddress::new(0, 0),
909 DocAddress::new(0, 1),
910 DocAddress::new(0, 2)
911 ]
912 );
913 Ok(())
914 }
915
916 #[test]
917 fn test_searcher_2() -> crate::Result<()> {
918 let mut schema_builder = Schema::builder();
919 let text_field = schema_builder.add_text_field("text", TEXT);
920 let schema = schema_builder.build();
921 let index = Index::create_in_ram(schema);
922 let reader = index
923 .reader_builder()
924 .reload_policy(ReloadPolicy::Manual)
925 .try_into()?;
926 assert_eq!(reader.searcher().num_docs(), 0u64);
927 let mut index_writer: IndexWriter = index.writer_for_tests()?;
929 index_writer.add_document(doc!(text_field=>"af b"))?;
930 index_writer.add_document(doc!(text_field=>"a b c"))?;
931 index_writer.add_document(doc!(text_field=>"a b c d"))?;
932 index_writer.commit()?;
933 reader.reload()?;
934 assert_eq!(reader.searcher().num_docs(), 3u64);
935 Ok(())
936 }
937
938 #[test]
939 fn test_searcher_on_json_field_with_type_inference() {
940 let mut schema_builder = Schema::builder();
944 let json_field = schema_builder.add_json_field("json", STORED | TEXT);
945 let schema = schema_builder.build();
946 let json_val: serde_json::Value = serde_json::from_str(
947 r#"{
948 "signed": 2,
949 "float": 2.0,
950 "unsigned": 10000000000000,
951 "date": "1985-04-12T23:20:50.52Z",
952 "bool": true
953 }"#,
954 )
955 .unwrap();
956 let doc = doc!(json_field=>json_val);
957 let index = Index::create_in_ram(schema);
958 let mut writer = index.writer_for_tests().unwrap();
959 writer.add_document(doc).unwrap();
960 writer.commit().unwrap();
961 let reader = index.reader().unwrap();
962 let searcher = reader.searcher();
963 let get_doc_ids = |user_input_literal: UserInputLiteral| {
964 let query_parser = crate::query::QueryParser::for_index(&index, Vec::new());
965 let query = query_parser
966 .build_query_from_user_input_ast(UserInputAst::from(UserInputLeaf::Literal(
967 user_input_literal,
968 )))
969 .unwrap();
970 searcher
971 .search(&query, &TEST_COLLECTOR_WITH_SCORE)
972 .map(|topdocs| topdocs.docs().to_vec())
973 .unwrap()
974 };
975 {
976 let user_input_literal = UserInputLiteral {
977 field_name: Some("json.signed".to_string()),
978 phrase: "2".to_string(),
979 delimiter: crate::query_grammar::Delimiter::None,
980 slop: 0,
981 prefix: false,
982 };
983 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
984 }
985 {
986 let user_input_literal = UserInputLiteral {
987 field_name: Some("json.float".to_string()),
988 phrase: "2.0".to_string(),
989 delimiter: crate::query_grammar::Delimiter::None,
990 slop: 0,
991 prefix: false,
992 };
993 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
994 }
995 {
996 let user_input_literal = UserInputLiteral {
997 field_name: Some("json.date".to_string()),
998 phrase: "1985-04-12T23:20:50.52Z".to_string(),
999 delimiter: crate::query_grammar::Delimiter::None,
1000 slop: 0,
1001 prefix: false,
1002 };
1003 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
1004 }
1005 {
1006 let user_input_literal = UserInputLiteral {
1007 field_name: Some("json.unsigned".to_string()),
1008 phrase: "10000000000000".to_string(),
1009 delimiter: crate::query_grammar::Delimiter::None,
1010 slop: 0,
1011 prefix: false,
1012 };
1013 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
1014 }
1015 {
1016 let user_input_literal = UserInputLiteral {
1017 field_name: Some("json.bool".to_string()),
1018 phrase: "true".to_string(),
1019 delimiter: crate::query_grammar::Delimiter::None,
1020 slop: 0,
1021 prefix: false,
1022 };
1023 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
1024 }
1025 }
1026
1027 #[test]
1028 fn test_doc_macro() {
1029 let mut schema_builder = Schema::builder();
1030 let text_field = schema_builder.add_text_field("text", TEXT);
1031 let other_text_field = schema_builder.add_text_field("text2", TEXT);
1032 let document = doc!(text_field => "tantivy",
1033 text_field => "some other value",
1034 other_text_field => "short");
1035 assert_eq!(document.len(), 3);
1036 let values: Vec<OwnedValue> = document.get_all(text_field).map(OwnedValue::from).collect();
1037 assert_eq!(values.len(), 2);
1038 assert_eq!(values[0].as_ref().as_str(), Some("tantivy"));
1039 assert_eq!(values[1].as_ref().as_str(), Some("some other value"));
1040 let values: Vec<OwnedValue> = document
1041 .get_all(other_text_field)
1042 .map(OwnedValue::from)
1043 .collect();
1044 assert_eq!(values.len(), 1);
1045 assert_eq!(values[0].as_ref().as_str(), Some("short"));
1046 }
1047
1048 #[test]
1049 fn test_wrong_fast_field_type() -> crate::Result<()> {
1050 let mut schema_builder = Schema::builder();
1051 let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST);
1052 let fast_field_signed = schema_builder.add_i64_field("signed", FAST);
1053 let fast_field_float = schema_builder.add_f64_field("float", FAST);
1054 schema_builder.add_text_field("text", TEXT);
1055 schema_builder.add_u64_field("stored_int", STORED);
1056 let schema = schema_builder.build();
1057
1058 let index = Index::create_in_ram(schema);
1059 let mut index_writer: IndexWriter = index.writer_for_tests()?;
1060 {
1061 let document =
1062 doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
1063 index_writer.add_document(document)?;
1064 index_writer.commit()?;
1065 }
1066 let reader = index.reader()?;
1067 let searcher = reader.searcher();
1068 let segment_reader: &SegmentReader = searcher.segment_reader(0);
1069 {
1070 let fast_field_reader_res = segment_reader.fast_fields().u64("text");
1071 assert!(fast_field_reader_res.is_err());
1072 }
1073 {
1074 let fast_field_reader_opt = segment_reader.fast_fields().u64("stored_int");
1075 assert!(fast_field_reader_opt.is_err());
1076 }
1077 {
1078 let fast_field_reader_opt = segment_reader.fast_fields().u64("signed");
1079 assert!(fast_field_reader_opt.is_err());
1080 }
1081 {
1082 let fast_field_reader_opt = segment_reader.fast_fields().u64("float");
1083 assert!(fast_field_reader_opt.is_err());
1084 }
1085 {
1086 let fast_field_reader_opt = segment_reader.fast_fields().u64("unsigned");
1087 assert!(fast_field_reader_opt.is_ok());
1088 let fast_field_reader = fast_field_reader_opt.unwrap();
1089 assert_eq!(fast_field_reader.first(0), Some(4u64))
1090 }
1091
1092 {
1093 let fast_field_reader_res = segment_reader.fast_fields().i64("signed");
1094 assert!(fast_field_reader_res.is_ok());
1095 let fast_field_reader = fast_field_reader_res.unwrap();
1096 assert_eq!(fast_field_reader.first(0), Some(4i64))
1097 }
1098
1099 {
1100 let fast_field_reader_res = segment_reader.fast_fields().f64("float");
1101 assert!(fast_field_reader_res.is_ok());
1102 let fast_field_reader = fast_field_reader_res.unwrap();
1103 assert_eq!(fast_field_reader.first(0), Some(4f64))
1104 }
1105 Ok(())
1106 }
1107
1108 #[test]
1110 fn test_update_via_delete_insert() -> crate::Result<()> {
1111 use crate::collector::Count;
1112 use crate::index::SegmentId;
1113 use crate::indexer::NoMergePolicy;
1114 use crate::query::AllQuery;
1115
1116 const DOC_COUNT: u64 = 2u64;
1117
1118 let mut schema_builder = SchemaBuilder::default();
1119 let id = schema_builder.add_u64_field("id", INDEXED);
1120 let schema = schema_builder.build();
1121
1122 let index = Index::create_in_ram(schema);
1123 let index_reader = index.reader()?;
1124
1125 let mut index_writer: IndexWriter = index.writer_for_tests()?;
1126 index_writer.set_merge_policy(Box::new(NoMergePolicy));
1127
1128 for doc_id in 0u64..DOC_COUNT {
1129 index_writer.add_document(doc!(id => doc_id))?;
1130 }
1131 index_writer.commit()?;
1132
1133 index_reader.reload()?;
1134 let searcher = index_reader.searcher();
1135
1136 assert_eq!(
1137 searcher.search(&AllQuery, &Count).unwrap(),
1138 DOC_COUNT as usize
1139 );
1140
1141 for doc_id in 0u64..DOC_COUNT {
1143 index_writer.delete_term(Term::from_field_u64(id, doc_id));
1144 index_writer.commit()?;
1145 index_reader.reload()?;
1146 index_writer.add_document(doc!(id => doc_id))?;
1147 index_writer.commit()?;
1148 index_reader.reload()?;
1149 let searcher = index_reader.searcher();
1150 assert_eq!(
1152 searcher.search(&AllQuery, &Count).unwrap(),
1153 DOC_COUNT as usize
1154 );
1155 }
1156
1157 index_reader.reload()?;
1158 let searcher = index_reader.searcher();
1159 let segment_ids: Vec<SegmentId> = searcher
1160 .segment_readers()
1161 .iter()
1162 .map(|reader| reader.segment_id())
1163 .collect();
1164 index_writer.merge(&segment_ids).wait()?;
1165 index_reader.reload()?;
1166 let searcher = index_reader.searcher();
1167 assert_eq!(searcher.search(&AllQuery, &Count)?, DOC_COUNT as usize);
1168 Ok(())
1169 }
1170
1171 #[test]
1172 fn test_validate_checksum() -> crate::Result<()> {
1173 let index_path = tempfile::tempdir().expect("dir");
1174 let mut builder = Schema::builder();
1175 let body = builder.add_text_field("body", TEXT | STORED);
1176 let schema = builder.build();
1177 let index = Index::create_in_dir(&index_path, schema)?;
1178 let mut writer: IndexWriter = index.writer(50_000_000)?;
1179 writer.set_merge_policy(Box::new(NoMergePolicy));
1180 for _ in 0..5000 {
1181 writer.add_document(doc!(body => "foo"))?;
1182 writer.add_document(doc!(body => "boo"))?;
1183 }
1184 writer.commit()?;
1185 assert!(index.validate_checksum()?.is_empty());
1186
1187 writer.delete_term(Term::from_field_text(body, "foo"));
1189 writer.commit()?;
1190 let segment_ids = index.searchable_segment_ids()?;
1191 writer.merge(&segment_ids).wait()?;
1192 assert!(index.validate_checksum()?.is_empty());
1193 Ok(())
1194 }
1195
1196 #[test]
1197 fn test_datetime() {
1198 let now = OffsetDateTime::now_utc();
1199
1200 let dt = DateTime::from_utc(now).into_utc();
1201 assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
1202 assert_eq!(dt.to_hms_micro(), now.to_hms_micro());
1203 assert_eq!(dt.nanosecond(), now.nanosecond());
1205
1206 let dt = DateTime::from_timestamp_secs(now.unix_timestamp()).into_utc();
1207 assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
1208 assert_eq!(dt.to_hms(), now.to_hms());
1209 assert_ne!(dt.to_hms_micro(), now.to_hms_micro());
1211
1212 let dt =
1213 DateTime::from_timestamp_micros((now.unix_timestamp_nanos() / 1_000) as i64).into_utc();
1214 assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
1215 assert_eq!(dt.to_hms_micro(), now.to_hms_micro());
1216
1217 let dt_from_ts_nanos =
1218 OffsetDateTime::from_unix_timestamp_nanos(1492432621123456789).unwrap();
1219 let offset_dt = DateTime::from_utc(dt_from_ts_nanos).into_utc();
1220 assert_eq!(
1221 dt_from_ts_nanos.to_ordinal_date(),
1222 offset_dt.to_ordinal_date()
1223 );
1224 assert_eq!(dt_from_ts_nanos.to_hms_micro(), offset_dt.to_hms_micro());
1225 }
1226}