1#![doc(html_logo_url = "http://fulmicoton.com/lucivy-logo/lucivy-logo.png")]
2#![cfg_attr(all(feature = "unstable", test), feature(test))]
3#![doc(test(attr(allow(unused_variables), deny(warnings))))]
4#![warn(missing_docs)]
5#![allow(
6 clippy::len_without_is_empty,
7 clippy::derive_partial_eq_without_eq,
8 clippy::module_inception,
9 clippy::needless_range_loop,
10 clippy::bool_assert_comparison
11)]
12
13#[cfg_attr(test, macro_use)]
153extern crate serde_json;
154#[macro_use]
155extern crate log;
156
157#[macro_use]
158extern crate thiserror;
159
160#[cfg(all(test, feature = "unstable"))]
161extern crate test;
162
163#[cfg(feature = "mmap")]
164#[cfg(test)]
165mod functional_test;
166
167#[macro_use]
168mod macros;
169mod future_result;
170
171pub use common::{ByteCount, DateTime};
173pub use {columnar, query_grammar, time};
174
175pub use crate::error::LucivyError;
176pub use crate::future_result::FutureResult;
177
178pub type Result<T> = std::result::Result<T, LucivyError>;
183
184mod core;
185pub mod indexer;
186
187pub mod error;
188pub mod tokenizer;
189
190pub mod aggregation;
191pub mod collector;
192pub mod directory;
193pub mod fastfield;
194pub mod fieldnorm;
195pub mod index;
196pub mod positions;
197pub mod postings;
198
199pub mod query;
201pub mod schema;
202pub mod space_usage;
203pub mod store;
204pub mod termdict;
205
206mod docset;
207mod reader;
208
209#[cfg(test)]
210#[cfg(feature = "mmap")]
211mod compat_tests;
212
213pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy, Warmer};
214pub mod snippet;
215
216use std::fmt;
217
218pub use census::{Inventory, TrackedObject};
219pub use common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, HasLen};
220use once_cell::sync::Lazy;
221use serde::{Deserialize, Serialize};
222
223pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
224pub use crate::core::{json_utils, Executor, Searcher, SearcherGeneration};
225pub use crate::directory::Directory;
226pub use crate::index::{
227 Index, IndexBuilder, IndexMeta, IndexSettings, InvertedIndexReader, Order, Segment,
228 SegmentMeta, SegmentReader,
229};
230pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
231pub use crate::schema::{Document, LucivyDocument, Term};
232
233pub const INDEX_FORMAT_VERSION: u32 = 7;
235pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;
237
238#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
240pub struct Version {
241 major: u32,
242 minor: u32,
243 patch: u32,
244 index_format_version: u32,
245}
246
247impl fmt::Debug for Version {
248 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
249 fmt::Display::fmt(self, f)
250 }
251}
252
253static VERSION: Lazy<Version> = Lazy::new(|| Version {
254 major: env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(),
255 minor: env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(),
256 patch: env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(),
257 index_format_version: INDEX_FORMAT_VERSION,
258});
259
260impl fmt::Display for Version {
261 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
262 write!(
263 f,
264 "lucivy v{}.{}.{}, index_format v{}",
265 self.major, self.minor, self.patch, self.index_format_version
266 )
267 }
268}
269
270static VERSION_STRING: Lazy<String> = Lazy::new(|| VERSION.to_string());
271
272pub fn version() -> &'static Version {
275 &VERSION
276}
277
278pub fn version_string() -> &'static str {
281 VERSION_STRING.as_str()
282}
283
284pub mod merge_policy {
286 pub use crate::indexer::{
287 DefaultMergePolicy, LogMergePolicy, MergeCandidate, MergePolicy, NoMergePolicy,
288 };
289}
290
291pub type DocId = u32;
297
298pub type Opstamp = u64;
307
308pub type Score = f32;
313
314pub type SegmentOrdinal = u32;
316
317impl DocAddress {
318 pub fn new(segment_ord: SegmentOrdinal, doc_id: DocId) -> DocAddress {
320 DocAddress {
321 segment_ord,
322 doc_id,
323 }
324 }
325}
326
327#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
336pub struct DocAddress {
337 pub segment_ord: SegmentOrdinal,
340 pub doc_id: DocId,
342}
343
344#[macro_export]
345macro_rules! fail_point {
347 ($name:expr) => {{
348 #[cfg(feature = "failpoints")]
349 {
350 fail::eval($name, |_| {
351 panic!("Return is not supported for the fail point \"{}\"", $name);
352 });
353 }
354 }};
355 ($name:expr, $e:expr) => {{
356 #[cfg(feature = "failpoints")]
357 {
358 if let Some(res) = fail::eval($name, $e) {
359 return res;
360 }
361 }
362 }};
363 ($name:expr, $cond:expr, $e:expr) => {{
364 #[cfg(feature = "failpoints")]
365 {
366 if $cond {
367 fail::fail_point!($name, $e);
368 }
369 }
370 }};
371}
372
373#[cfg(test)]
375pub mod tests {
376 use std::collections::BTreeMap;
377
378 use common::{BinarySerializable, FixedSize};
379 use query_grammar::{UserInputAst, UserInputLeaf, UserInputLiteral};
380 use rand::distributions::{Bernoulli, Uniform};
381 use rand::rngs::StdRng;
382 use rand::{Rng, SeedableRng};
383 use time::OffsetDateTime;
384
385 use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
386 use crate::docset::{DocSet, TERMINATED};
387 use crate::index::SegmentReader;
388 use crate::merge_policy::NoMergePolicy;
389 use crate::postings::Postings;
390 use crate::query::{BooleanQuery, QueryParser};
391 use crate::schema::*;
392 use crate::{DateTime, DocAddress, Index, IndexWriter, ReloadPolicy};
393
394 pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
396 let mut buffer = Vec::new();
397 O::default().serialize(&mut buffer).unwrap();
398 assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
399 }
400
401 #[macro_export]
404 macro_rules! assert_nearly_equals {
405 ($left:expr, $right:expr) => {{
406 assert_nearly_equals!($left, $right, 0.0005);
407 }};
408 ($left:expr, $right:expr, $epsilon:expr) => {{
409 match (&$left, &$right, &$epsilon) {
410 (left_val, right_val, epsilon_val) => {
411 let diff = (left_val - right_val).abs();
412
413 if diff > *epsilon_val {
414 panic!(
415 r#"assertion failed: `abs(left-right)>epsilon`
416 left: `{:?}`,
417 right: `{:?}`,
418 epsilon: `{:?}`"#,
419 &*left_val, &*right_val, &*epsilon_val
420 )
421 }
422 }
423 }
424 }};
425 }
426
427 pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
429 let seed: [u8; 32] = [1; 32];
430 StdRng::from_seed(seed)
431 .sample_iter(&Uniform::new(0u32, max_value))
432 .take(n_elems)
433 .collect::<Vec<u32>>()
434 }
435
436 pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
438 StdRng::from_seed([seed_val; 32])
439 .sample_iter(&Bernoulli::new(ratio).unwrap())
440 .take(n as usize)
441 .enumerate()
442 .filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
443 .collect()
444 }
445
446 pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
448 sample_with_seed(n, ratio, 4)
449 }
450
451 #[test]
452 fn test_version_string() {
453 use regex::Regex;
454 let regex_ptn = Regex::new(
455 "lucivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}",
456 )
457 .unwrap();
458 let version = super::version().to_string();
459 assert!(regex_ptn.find(&version).is_some());
460 }
461
462 #[test]
463 #[cfg(feature = "mmap")]
464 fn test_indexing() -> crate::Result<()> {
465 let mut schema_builder = Schema::builder();
466 let text_field = schema_builder.add_text_field("text", TEXT);
467 let schema = schema_builder.build();
468 let index = Index::create_from_tempdir(schema)?;
469 let mut index_writer: IndexWriter = index.writer_for_tests()?;
471 {
472 let doc = doc!(text_field=>"af b");
473 index_writer.add_document(doc)?;
474 }
475 {
476 let doc = doc!(text_field=>"a b c");
477 index_writer.add_document(doc)?;
478 }
479 {
480 let doc = doc!(text_field=>"a b c d");
481 index_writer.add_document(doc)?;
482 }
483 index_writer.commit()?;
484 Ok(())
485 }
486
487 #[test]
488 fn test_docfreq1() -> crate::Result<()> {
489 let mut schema_builder = Schema::builder();
490 let text_field = schema_builder.add_text_field("text", TEXT);
491 let index = Index::create_in_ram(schema_builder.build());
492 let mut index_writer: IndexWriter = index.writer_for_tests()?;
493 index_writer.add_document(doc!(text_field=>"a b c"))?;
494 index_writer.commit()?;
495 index_writer.add_document(doc!(text_field=>"a"))?;
496 index_writer.add_document(doc!(text_field=>"a a"))?;
497 index_writer.commit()?;
498 index_writer.add_document(doc!(text_field=>"c"))?;
499 index_writer.commit()?;
500 let reader = index.reader()?;
501 let searcher = reader.searcher();
502 let term_a = Term::from_field_text(text_field, "a");
503 assert_eq!(searcher.doc_freq(&term_a)?, 3);
504 let term_b = Term::from_field_text(text_field, "b");
505 assert_eq!(searcher.doc_freq(&term_b)?, 1);
506 let term_c = Term::from_field_text(text_field, "c");
507 assert_eq!(searcher.doc_freq(&term_c)?, 2);
508 let term_d = Term::from_field_text(text_field, "d");
509 assert_eq!(searcher.doc_freq(&term_d)?, 0);
510 Ok(())
511 }
512
513 #[test]
514 fn test_fieldnorm_no_docs_with_field() -> crate::Result<()> {
515 let mut schema_builder = Schema::builder();
516 let title_field = schema_builder.add_text_field("title", TEXT);
517 let text_field = schema_builder.add_text_field("text", TEXT);
518 let index = Index::create_in_ram(schema_builder.build());
519 let mut index_writer: IndexWriter = index.writer_for_tests()?;
520 index_writer.add_document(doc!(text_field=>"a b c"))?;
521 index_writer.commit()?;
522 let index_reader = index.reader()?;
523 let searcher = index_reader.searcher();
524 let reader = searcher.segment_reader(0);
525 {
526 let fieldnorm_reader = reader.get_fieldnorms_reader(text_field)?;
527 assert_eq!(fieldnorm_reader.fieldnorm(0), 3);
528 }
529 {
530 let fieldnorm_reader = reader.get_fieldnorms_reader(title_field)?;
531 assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0);
532 }
533 Ok(())
534 }
535
536 #[test]
537 fn test_fieldnorm() -> crate::Result<()> {
538 let mut schema_builder = Schema::builder();
539 let text_field = schema_builder.add_text_field("text", TEXT);
540 let index = Index::create_in_ram(schema_builder.build());
541 let mut index_writer: IndexWriter = index.writer_for_tests()?;
542 index_writer.add_document(doc!(text_field=>"a b c"))?;
543 index_writer.add_document(doc!())?;
544 index_writer.add_document(doc!(text_field=>"a b"))?;
545 index_writer.commit()?;
546 let reader = index.reader()?;
547 let searcher = reader.searcher();
548 let segment_reader: &SegmentReader = searcher.segment_reader(0);
549 let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field)?;
550 assert_eq!(fieldnorms_reader.fieldnorm(0), 3);
551 assert_eq!(fieldnorms_reader.fieldnorm(1), 0);
552 assert_eq!(fieldnorms_reader.fieldnorm(2), 2);
553 Ok(())
554 }
555
556 fn advance_undeleted(docset: &mut dyn DocSet, reader: &SegmentReader) -> bool {
557 let mut doc = docset.advance();
558 while doc != TERMINATED {
559 if !reader.is_deleted(doc) {
560 return true;
561 }
562 doc = docset.advance();
563 }
564 false
565 }
566
567 #[test]
568 fn test_delete_postings1() -> crate::Result<()> {
569 let mut schema_builder = Schema::builder();
570 let text_field = schema_builder.add_text_field("text", TEXT);
571 let term_abcd = Term::from_field_text(text_field, "abcd");
572 let term_a = Term::from_field_text(text_field, "a");
573 let term_b = Term::from_field_text(text_field, "b");
574 let term_c = Term::from_field_text(text_field, "c");
575 let schema = schema_builder.build();
576 let index = Index::create_in_ram(schema);
577 let reader = index
578 .reader_builder()
579 .reload_policy(ReloadPolicy::Manual)
580 .try_into()
581 .unwrap();
582 {
583 let mut index_writer: IndexWriter = index.writer_for_tests()?;
585 index_writer.add_document(doc!(text_field=>"a b"))?;
587 index_writer.add_document(doc!(text_field=>" a c"))?;
589 index_writer.add_document(doc!(text_field=>" b c"))?;
591 index_writer.add_document(doc!(text_field=>" b d"))?;
593
594 index_writer.delete_term(Term::from_field_text(text_field, "c"));
595 index_writer.delete_term(Term::from_field_text(text_field, "a"));
596 index_writer.add_document(doc!(text_field=>" b c"))?;
598 index_writer.add_document(doc!(text_field=>" a"))?;
600 index_writer.commit()?;
601 }
602 {
603 reader.reload()?;
604 let searcher = reader.searcher();
605 let segment_reader = searcher.segment_reader(0);
606 let inverted_index = segment_reader.inverted_index(text_field)?;
607 assert!(inverted_index
608 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
609 .is_none());
610 {
611 let mut postings = inverted_index
612 .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
613 .unwrap();
614 assert!(advance_undeleted(&mut postings, segment_reader));
615 assert_eq!(postings.doc(), 5);
616 assert!(!advance_undeleted(&mut postings, segment_reader));
617 }
618 {
619 let mut postings = inverted_index
620 .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)?
621 .unwrap();
622 assert!(advance_undeleted(&mut postings, segment_reader));
623 assert_eq!(postings.doc(), 3);
624 assert!(advance_undeleted(&mut postings, segment_reader));
625 assert_eq!(postings.doc(), 4);
626 assert!(!advance_undeleted(&mut postings, segment_reader));
627 }
628 }
629 {
630 let mut index_writer: IndexWriter = index.writer_for_tests()?;
632 index_writer.add_document(doc!(text_field=>"a b"))?;
634 index_writer.delete_term(Term::from_field_text(text_field, "c"));
636 index_writer.rollback()?;
637 }
638 {
639 reader.reload()?;
640 let searcher = reader.searcher();
641 let seg_reader = searcher.segment_reader(0);
642 let inverted_index = seg_reader.inverted_index(term_abcd.field())?;
643
644 assert!(inverted_index
645 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
646 .is_none());
647 {
648 let mut postings = inverted_index
649 .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
650 .unwrap();
651 assert!(advance_undeleted(&mut postings, seg_reader));
652 assert_eq!(postings.doc(), 5);
653 assert!(!advance_undeleted(&mut postings, seg_reader));
654 }
655 {
656 let mut postings = inverted_index
657 .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)?
658 .unwrap();
659 assert!(advance_undeleted(&mut postings, seg_reader));
660 assert_eq!(postings.doc(), 3);
661 assert!(advance_undeleted(&mut postings, seg_reader));
662 assert_eq!(postings.doc(), 4);
663 assert!(!advance_undeleted(&mut postings, seg_reader));
664 }
665 }
666 {
667 let mut index_writer: IndexWriter = index.writer_for_tests()?;
669 index_writer.add_document(doc!(text_field=>"a b"))?;
670 index_writer.delete_term(Term::from_field_text(text_field, "c"));
671 index_writer.rollback()?;
672 index_writer.delete_term(Term::from_field_text(text_field, "a"));
673 index_writer.commit()?;
674 }
675 {
676 reader.reload()?;
677 let searcher = reader.searcher();
678 let segment_reader = searcher.segment_reader(0);
679 let inverted_index = segment_reader.inverted_index(term_abcd.field())?;
680 assert!(inverted_index
681 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
682 .is_none());
683 {
684 let mut postings = inverted_index
685 .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)?
686 .unwrap();
687 assert!(!advance_undeleted(&mut postings, segment_reader));
688 }
689 {
690 let mut postings = inverted_index
691 .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)?
692 .unwrap();
693 assert!(advance_undeleted(&mut postings, segment_reader));
694 assert_eq!(postings.doc(), 3);
695 assert!(advance_undeleted(&mut postings, segment_reader));
696 assert_eq!(postings.doc(), 4);
697 assert!(!advance_undeleted(&mut postings, segment_reader));
698 }
699 {
700 let mut postings = inverted_index
701 .read_postings(&term_c, IndexRecordOption::WithFreqsAndPositions)?
702 .unwrap();
703 assert!(advance_undeleted(&mut postings, segment_reader));
704 assert_eq!(postings.doc(), 4);
705 assert!(!advance_undeleted(&mut postings, segment_reader));
706 }
707 }
708 Ok(())
709 }
710
711 #[test]
712 fn test_indexed_u64() -> crate::Result<()> {
713 let mut schema_builder = Schema::builder();
714 let field = schema_builder.add_u64_field("value", INDEXED);
715 let schema = schema_builder.build();
716
717 let index = Index::create_in_ram(schema);
718 let mut index_writer: IndexWriter = index.writer_for_tests()?;
719 index_writer.add_document(doc!(field=>1u64))?;
720 index_writer.commit()?;
721 let reader = index.reader()?;
722 let searcher = reader.searcher();
723 let term = Term::from_field_u64(field, 1u64);
724 let mut postings = searcher
725 .segment_reader(0)
726 .inverted_index(term.field())?
727 .read_postings(&term, IndexRecordOption::Basic)?
728 .unwrap();
729 assert_eq!(postings.doc(), 0);
730 assert_eq!(postings.advance(), TERMINATED);
731 Ok(())
732 }
733
734 #[test]
735 fn test_indexed_i64() -> crate::Result<()> {
736 let mut schema_builder = Schema::builder();
737 let value_field = schema_builder.add_i64_field("value", INDEXED);
738 let schema = schema_builder.build();
739
740 let index = Index::create_in_ram(schema);
741 let mut index_writer: IndexWriter = index.writer_for_tests()?;
742 let negative_val = -1i64;
743 index_writer.add_document(doc!(value_field => negative_val))?;
744 index_writer.commit()?;
745 let reader = index.reader()?;
746 let searcher = reader.searcher();
747 let term = Term::from_field_i64(value_field, negative_val);
748 let mut postings = searcher
749 .segment_reader(0)
750 .inverted_index(term.field())?
751 .read_postings(&term, IndexRecordOption::Basic)?
752 .unwrap();
753 assert_eq!(postings.doc(), 0);
754 assert_eq!(postings.advance(), TERMINATED);
755 Ok(())
756 }
757
758 #[test]
759 fn test_indexed_f64() -> crate::Result<()> {
760 let mut schema_builder = Schema::builder();
761 let value_field = schema_builder.add_f64_field("value", INDEXED);
762 let schema = schema_builder.build();
763
764 let index = Index::create_in_ram(schema);
765 let mut index_writer: IndexWriter = index.writer_for_tests()?;
766 let val = std::f64::consts::PI;
767 index_writer.add_document(doc!(value_field => val))?;
768 index_writer.commit()?;
769 let reader = index.reader()?;
770 let searcher = reader.searcher();
771 let term = Term::from_field_f64(value_field, val);
772 let mut postings = searcher
773 .segment_reader(0)
774 .inverted_index(term.field())?
775 .read_postings(&term, IndexRecordOption::Basic)?
776 .unwrap();
777 assert_eq!(postings.doc(), 0);
778 assert_eq!(postings.advance(), TERMINATED);
779 Ok(())
780 }
781
782 #[test]
783 fn test_indexedfield_not_in_documents() -> crate::Result<()> {
784 let mut schema_builder = Schema::builder();
785 let text_field = schema_builder.add_text_field("text", TEXT);
786 let absent_field = schema_builder.add_text_field("absent_text", TEXT);
787 let schema = schema_builder.build();
788 let index = Index::create_in_ram(schema);
789 let mut index_writer: IndexWriter = index.writer_for_tests()?;
790 index_writer.add_document(doc!(text_field=>"a"))?;
791 assert!(index_writer.commit().is_ok());
792 let reader = index.reader()?;
793 let searcher = reader.searcher();
794 let segment_reader = searcher.segment_reader(0);
795 let inverted_index = segment_reader.inverted_index(absent_field)?;
796 assert_eq!(inverted_index.terms().num_terms(), 0);
797 Ok(())
798 }
799
800 #[test]
801 fn test_delete_postings2() -> crate::Result<()> {
802 let mut schema_builder = Schema::builder();
803 let text_field = schema_builder.add_text_field("text", TEXT);
804 let schema = schema_builder.build();
805 let index = Index::create_in_ram(schema);
806 let reader = index
807 .reader_builder()
808 .reload_policy(ReloadPolicy::Manual)
809 .try_into()?;
810
811 let mut index_writer: IndexWriter = index.writer_for_tests()?;
813 index_writer.add_document(doc!(text_field=>"63"))?;
814 index_writer.add_document(doc!(text_field=>"70"))?;
815 index_writer.add_document(doc!(text_field=>"34"))?;
816 index_writer.add_document(doc!(text_field=>"1"))?;
817 index_writer.add_document(doc!(text_field=>"38"))?;
818 index_writer.add_document(doc!(text_field=>"33"))?;
819 index_writer.add_document(doc!(text_field=>"40"))?;
820 index_writer.add_document(doc!(text_field=>"17"))?;
821 index_writer.delete_term(Term::from_field_text(text_field, "38"));
822 index_writer.delete_term(Term::from_field_text(text_field, "34"));
823 index_writer.commit()?;
824 reader.reload()?;
825 assert_eq!(reader.searcher().num_docs(), 6);
826 Ok(())
827 }
828
829 #[test]
830 fn test_termfreq() -> crate::Result<()> {
831 let mut schema_builder = Schema::builder();
832 let text_field = schema_builder.add_text_field("text", TEXT);
833 let schema = schema_builder.build();
834 let index = Index::create_in_ram(schema);
835 {
836 let mut index_writer: IndexWriter = index.writer_for_tests()?;
838 index_writer.add_document(doc!(text_field=>"af af af bc bc"))?;
839 index_writer.commit()?;
840 }
841 {
842 let index_reader = index.reader()?;
843 let searcher = index_reader.searcher();
844 let reader = searcher.segment_reader(0);
845 let inverted_index = reader.inverted_index(text_field)?;
846 let term_abcd = Term::from_field_text(text_field, "abcd");
847 assert!(inverted_index
848 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
849 .is_none());
850 let term_af = Term::from_field_text(text_field, "af");
851 let mut postings = inverted_index
852 .read_postings(&term_af, IndexRecordOption::WithFreqsAndPositions)?
853 .unwrap();
854 assert_eq!(postings.doc(), 0);
855 assert_eq!(postings.term_freq(), 3);
856 assert_eq!(postings.advance(), TERMINATED);
857 }
858 Ok(())
859 }
860
861 #[test]
862 fn test_searcher_1() -> crate::Result<()> {
863 let mut schema_builder = Schema::builder();
864 let text_field = schema_builder.add_text_field("text", TEXT);
865 let schema = schema_builder.build();
866 let index = Index::create_in_ram(schema);
867 let reader = index.reader()?;
868 let mut index_writer: IndexWriter = index.writer_for_tests()?;
870 index_writer.add_document(doc!(text_field=>"af af af b"))?;
871 index_writer.add_document(doc!(text_field=>"a b c"))?;
872 index_writer.add_document(doc!(text_field=>"a b c d"))?;
873 index_writer.commit()?;
874
875 reader.reload()?;
876 let searcher = reader.searcher();
877 let get_doc_ids = |terms: Vec<Term>| {
878 let query = BooleanQuery::new_multiterms_query(terms);
879 searcher
880 .search(&query, &TEST_COLLECTOR_WITH_SCORE)
881 .map(|topdocs| topdocs.docs().to_vec())
882 };
883 assert_eq!(
884 get_doc_ids(vec![Term::from_field_text(text_field, "a")])?,
885 vec![DocAddress::new(0, 1), DocAddress::new(0, 2)]
886 );
887 assert_eq!(
888 get_doc_ids(vec![Term::from_field_text(text_field, "af")])?,
889 vec![DocAddress::new(0, 0)]
890 );
891 assert_eq!(
892 get_doc_ids(vec![Term::from_field_text(text_field, "b")])?,
893 vec![
894 DocAddress::new(0, 0),
895 DocAddress::new(0, 1),
896 DocAddress::new(0, 2)
897 ]
898 );
899 assert_eq!(
900 get_doc_ids(vec![Term::from_field_text(text_field, "c")])?,
901 vec![DocAddress::new(0, 1), DocAddress::new(0, 2)]
902 );
903 assert_eq!(
904 get_doc_ids(vec![Term::from_field_text(text_field, "d")])?,
905 vec![DocAddress::new(0, 2)]
906 );
907 assert_eq!(
908 get_doc_ids(vec![
909 Term::from_field_text(text_field, "b"),
910 Term::from_field_text(text_field, "a"),
911 ])?,
912 vec![
913 DocAddress::new(0, 0),
914 DocAddress::new(0, 1),
915 DocAddress::new(0, 2)
916 ]
917 );
918 Ok(())
919 }
920
921 #[test]
922 fn test_searcher_2() -> crate::Result<()> {
923 let mut schema_builder = Schema::builder();
924 let text_field = schema_builder.add_text_field("text", TEXT);
925 let schema = schema_builder.build();
926 let index = Index::create_in_ram(schema);
927 let reader = index
928 .reader_builder()
929 .reload_policy(ReloadPolicy::Manual)
930 .try_into()?;
931 assert_eq!(reader.searcher().num_docs(), 0u64);
932 let mut index_writer: IndexWriter = index.writer_for_tests()?;
934 index_writer.add_document(doc!(text_field=>"af b"))?;
935 index_writer.add_document(doc!(text_field=>"a b c"))?;
936 index_writer.add_document(doc!(text_field=>"a b c d"))?;
937 index_writer.commit()?;
938 reader.reload()?;
939 assert_eq!(reader.searcher().num_docs(), 3u64);
940 Ok(())
941 }
942
943 #[test]
944 fn test_searcher_on_json_field_with_type_inference() {
945 let mut schema_builder = Schema::builder();
949 let json_field = schema_builder.add_json_field("json", STORED | TEXT);
950 let schema = schema_builder.build();
951 let json_val: serde_json::Value = serde_json::from_str(
952 r#"{
953 "signed": 2,
954 "float": 2.0,
955 "unsigned": 10000000000000,
956 "date": "1985-04-12T23:20:50.52Z",
957 "bool": true
958 }"#,
959 )
960 .unwrap();
961 let doc = doc!(json_field=>json_val);
962 let index = Index::create_in_ram(schema);
963 let mut writer = index.writer_for_tests().unwrap();
964 writer.add_document(doc).unwrap();
965 writer.commit().unwrap();
966 let reader = index.reader().unwrap();
967 let searcher = reader.searcher();
968 let get_doc_ids = |user_input_literal: UserInputLiteral| {
969 let query_parser = crate::query::QueryParser::for_index(&index, Vec::new());
970 let query = query_parser
971 .build_query_from_user_input_ast(UserInputAst::from(UserInputLeaf::Literal(
972 user_input_literal,
973 )))
974 .unwrap();
975 searcher
976 .search(&query, &TEST_COLLECTOR_WITH_SCORE)
977 .map(|topdocs| topdocs.docs().to_vec())
978 .unwrap()
979 };
980 {
981 let user_input_literal = UserInputLiteral {
982 field_name: Some("json.signed".to_string()),
983 phrase: "2".to_string(),
984 delimiter: crate::query_grammar::Delimiter::None,
985 slop: 0,
986 prefix: false,
987 };
988 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
989 }
990 {
991 let user_input_literal = UserInputLiteral {
992 field_name: Some("json.float".to_string()),
993 phrase: "2.0".to_string(),
994 delimiter: crate::query_grammar::Delimiter::None,
995 slop: 0,
996 prefix: false,
997 };
998 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
999 }
1000 {
1001 let user_input_literal = UserInputLiteral {
1002 field_name: Some("json.date".to_string()),
1003 phrase: "1985-04-12T23:20:50.52Z".to_string(),
1004 delimiter: crate::query_grammar::Delimiter::None,
1005 slop: 0,
1006 prefix: false,
1007 };
1008 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
1009 }
1010 {
1011 let user_input_literal = UserInputLiteral {
1012 field_name: Some("json.unsigned".to_string()),
1013 phrase: "10000000000000".to_string(),
1014 delimiter: crate::query_grammar::Delimiter::None,
1015 slop: 0,
1016 prefix: false,
1017 };
1018 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
1019 }
1020 {
1021 let user_input_literal = UserInputLiteral {
1022 field_name: Some("json.bool".to_string()),
1023 phrase: "true".to_string(),
1024 delimiter: crate::query_grammar::Delimiter::None,
1025 slop: 0,
1026 prefix: false,
1027 };
1028 assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
1029 }
1030 }
1031
1032 #[test]
1033 fn test_doc_macro() {
1034 let mut schema_builder = Schema::builder();
1035 let text_field = schema_builder.add_text_field("text", TEXT);
1036 let other_text_field = schema_builder.add_text_field("text2", TEXT);
1037 let document = doc!(text_field => "lucivy",
1038 text_field => "some other value",
1039 other_text_field => "short");
1040 assert_eq!(document.len(), 3);
1041 let values: Vec<OwnedValue> = document.get_all(text_field).map(OwnedValue::from).collect();
1042 assert_eq!(values.len(), 2);
1043 assert_eq!(values[0].as_ref().as_str(), Some("lucivy"));
1044 assert_eq!(values[1].as_ref().as_str(), Some("some other value"));
1045 let values: Vec<OwnedValue> = document
1046 .get_all(other_text_field)
1047 .map(OwnedValue::from)
1048 .collect();
1049 assert_eq!(values.len(), 1);
1050 assert_eq!(values[0].as_ref().as_str(), Some("short"));
1051 }
1052
1053 #[test]
1054 fn test_wrong_fast_field_type() -> crate::Result<()> {
1055 let mut schema_builder = Schema::builder();
1056 let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST);
1057 let fast_field_signed = schema_builder.add_i64_field("signed", FAST);
1058 let fast_field_float = schema_builder.add_f64_field("float", FAST);
1059 schema_builder.add_text_field("text", TEXT);
1060 schema_builder.add_u64_field("stored_int", STORED);
1061 let schema = schema_builder.build();
1062
1063 let index = Index::create_in_ram(schema);
1064 let mut index_writer: IndexWriter = index.writer_for_tests()?;
1065 {
1066 let document =
1067 doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
1068 index_writer.add_document(document)?;
1069 index_writer.commit()?;
1070 }
1071 let reader = index.reader()?;
1072 let searcher = reader.searcher();
1073 let segment_reader: &SegmentReader = searcher.segment_reader(0);
1074 {
1075 let fast_field_reader_res = segment_reader.fast_fields().u64("text");
1076 assert!(fast_field_reader_res.is_err());
1077 }
1078 {
1079 let fast_field_reader_opt = segment_reader.fast_fields().u64("stored_int");
1080 assert!(fast_field_reader_opt.is_err());
1081 }
1082 {
1083 let fast_field_reader_opt = segment_reader.fast_fields().u64("signed");
1084 assert!(fast_field_reader_opt.is_err());
1085 }
1086 {
1087 let fast_field_reader_opt = segment_reader.fast_fields().u64("float");
1088 assert!(fast_field_reader_opt.is_err());
1089 }
1090 {
1091 let fast_field_reader_opt = segment_reader.fast_fields().u64("unsigned");
1092 assert!(fast_field_reader_opt.is_ok());
1093 let fast_field_reader = fast_field_reader_opt.unwrap();
1094 assert_eq!(fast_field_reader.first(0), Some(4u64))
1095 }
1096
1097 {
1098 let fast_field_reader_res = segment_reader.fast_fields().i64("signed");
1099 assert!(fast_field_reader_res.is_ok());
1100 let fast_field_reader = fast_field_reader_res.unwrap();
1101 assert_eq!(fast_field_reader.first(0), Some(4i64))
1102 }
1103
1104 {
1105 let fast_field_reader_res = segment_reader.fast_fields().f64("float");
1106 assert!(fast_field_reader_res.is_ok());
1107 let fast_field_reader = fast_field_reader_res.unwrap();
1108 assert_eq!(fast_field_reader.first(0), Some(4f64))
1109 }
1110 Ok(())
1111 }
1112
1113 #[test]
1115 fn test_update_via_delete_insert() -> crate::Result<()> {
1116 use crate::collector::Count;
1117 use crate::index::SegmentId;
1118 use crate::indexer::NoMergePolicy;
1119 use crate::query::AllQuery;
1120
1121 const DOC_COUNT: u64 = 2u64;
1122
1123 let mut schema_builder = SchemaBuilder::default();
1124 let id = schema_builder.add_u64_field("id", INDEXED);
1125 let schema = schema_builder.build();
1126
1127 let index = Index::create_in_ram(schema);
1128 let index_reader = index.reader()?;
1129
1130 let mut index_writer: IndexWriter = index.writer_for_tests()?;
1131 index_writer.set_merge_policy(Box::new(NoMergePolicy));
1132
1133 for doc_id in 0u64..DOC_COUNT {
1134 index_writer.add_document(doc!(id => doc_id))?;
1135 }
1136 index_writer.commit()?;
1137
1138 index_reader.reload()?;
1139 let searcher = index_reader.searcher();
1140
1141 assert_eq!(
1142 searcher.search(&AllQuery, &Count).unwrap(),
1143 DOC_COUNT as usize
1144 );
1145
1146 for doc_id in 0u64..DOC_COUNT {
1148 index_writer.delete_term(Term::from_field_u64(id, doc_id));
1149 index_writer.commit()?;
1150 index_reader.reload()?;
1151 index_writer.add_document(doc!(id => doc_id))?;
1152 index_writer.commit()?;
1153 index_reader.reload()?;
1154 let searcher = index_reader.searcher();
1155 assert_eq!(
1157 searcher.search(&AllQuery, &Count).unwrap(),
1158 DOC_COUNT as usize
1159 );
1160 }
1161
1162 index_reader.reload()?;
1163 let searcher = index_reader.searcher();
1164 let segment_ids: Vec<SegmentId> = searcher
1165 .segment_readers()
1166 .iter()
1167 .map(|reader| reader.segment_id())
1168 .collect();
1169 index_writer.merge(&segment_ids).wait()?;
1170 index_reader.reload()?;
1171 let searcher = index_reader.searcher();
1172 assert_eq!(searcher.search(&AllQuery, &Count)?, DOC_COUNT as usize);
1173 Ok(())
1174 }
1175
1176 #[test]
1177 fn test_validate_checksum() -> crate::Result<()> {
1178 let mut builder = Schema::builder();
1179 let body = builder.add_text_field("body", TEXT | STORED);
1180 let schema = builder.build();
1181 let index = Index::create_in_ram(schema);
1182 let mut writer: IndexWriter = index.writer_for_tests()?;
1183 writer.set_merge_policy(Box::new(NoMergePolicy));
1184 for _ in 0..5000 {
1185 writer.add_document(doc!(body => "foo"))?;
1186 writer.add_document(doc!(body => "boo"))?;
1187 }
1188 writer.commit()?;
1189 assert!(index.validate_checksum()?.is_empty());
1190
1191 writer.delete_term(Term::from_field_text(body, "foo"));
1193 writer.commit()?;
1194 let segment_ids = index.searchable_segment_ids()?;
1195 writer.merge(&segment_ids).wait()?;
1196 assert!(index.validate_checksum()?.is_empty());
1197 Ok(())
1198 }
1199
1200 #[test]
1201 fn test_datetime() {
1202 let now = OffsetDateTime::now_utc();
1203
1204 let dt = DateTime::from_utc(now).into_utc();
1205 assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
1206 assert_eq!(dt.to_hms_micro(), now.to_hms_micro());
1207 assert_eq!(dt.nanosecond(), now.nanosecond());
1209
1210 let dt = DateTime::from_timestamp_secs(now.unix_timestamp()).into_utc();
1211 assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
1212 assert_eq!(dt.to_hms(), now.to_hms());
1213 assert_ne!(dt.to_hms_micro(), now.to_hms_micro());
1215
1216 let dt =
1217 DateTime::from_timestamp_micros((now.unix_timestamp_nanos() / 1_000) as i64).into_utc();
1218 assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
1219 assert_eq!(dt.to_hms_micro(), now.to_hms_micro());
1220
1221 let dt_from_ts_nanos =
1222 OffsetDateTime::from_unix_timestamp_nanos(1492432621123456789).unwrap();
1223 let offset_dt = DateTime::from_utc(dt_from_ts_nanos).into_utc();
1224 assert_eq!(
1225 dt_from_ts_nanos.to_ordinal_date(),
1226 offset_dt.to_ordinal_date()
1227 );
1228 assert_eq!(dt_from_ts_nanos.to_hms_micro(), offset_dt.to_hms_micro());
1229 }
1230
1231 #[test]
1232 fn test_json_number_ambiguity() {
1233 let mut schema_builder = Schema::builder();
1234 let json_field = schema_builder.add_json_field("number", crate::schema::TEXT);
1235 let schema = schema_builder.build();
1236 let index = Index::create_in_ram(schema);
1237 let mut index_writer = index.writer_for_tests().unwrap();
1238 {
1239 let mut doc = LucivyDocument::new();
1240 let mut obj = BTreeMap::default();
1241 obj.insert("key".to_string(), OwnedValue::I64(1i64));
1242 doc.add_object(json_field, obj);
1243 index_writer.add_document(doc).unwrap();
1244 }
1245 {
1246 let mut doc = LucivyDocument::new();
1247 let mut obj = BTreeMap::default();
1248 obj.insert("key".to_string(), OwnedValue::U64(1u64));
1249 doc.add_object(json_field, obj);
1250 index_writer.add_document(doc).unwrap();
1251 }
1252 {
1253 let mut doc = LucivyDocument::new();
1254 let mut obj = BTreeMap::default();
1255 obj.insert("key".to_string(), OwnedValue::F64(1.0f64));
1256 doc.add_object(json_field, obj);
1257 index_writer.add_document(doc).unwrap();
1258 }
1259 index_writer.commit().unwrap();
1260 let searcher = index.reader().unwrap().searcher();
1261 assert_eq!(searcher.num_docs(), 3);
1262 {
1263 let parser = QueryParser::for_index(&index, vec![]);
1264 let query = parser.parse_query("number.key:1").unwrap();
1265 let count = searcher.search(&query, &crate::collector::Count).unwrap();
1266 assert_eq!(count, 3);
1267 }
1268 {
1269 let parser = QueryParser::for_index(&index, vec![]);
1270 let query = parser.parse_query("number.key:1.0").unwrap();
1271 let count = searcher.search(&query, &crate::collector::Count).unwrap();
1272 assert_eq!(count, 3);
1273 }
1274 }
1275}