summavy/indexer/
mod.rs

1pub mod delete_queue;
2
3pub mod demuxer;
4pub mod doc_id_mapping;
5mod doc_opstamp_mapping;
6mod flat_map_with_buffer;
7pub mod index_writer;
8mod index_writer_status;
9mod json_term_writer;
10mod log_merge_policy;
11mod merge_operation;
12pub mod merge_policy;
13pub mod merger;
14mod merger_sorted_index_test;
15pub mod operation;
16pub mod prepared_commit;
17mod segment_entry;
18mod segment_manager;
19mod segment_register;
20pub mod segment_serializer;
21pub mod segment_updater;
22mod segment_writer;
23mod sorted_doc_id_column;
24mod sorted_doc_id_multivalue_column;
25mod stamper;
26
27use crossbeam_channel as channel;
28use smallvec::SmallVec;
29
30pub use self::index_writer::IndexWriter;
31pub(crate) use self::json_term_writer::{
32    convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter,
33};
34pub use self::log_merge_policy::LogMergePolicy;
35pub use self::merge_operation::MergeOperation;
36pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
37pub use self::prepared_commit::PreparedCommit;
38pub use self::segment_entry::SegmentEntry;
39pub use self::segment_manager::SegmentManager;
40pub use self::segment_serializer::SegmentSerializer;
41pub use self::segment_updater::{merge_filtered_segments, merge_indices};
42pub use self::segment_writer::SegmentWriter;
43use crate::indexer::operation::AddOperation;
44
45/// Alias for the default merge policy, which is the `LogMergePolicy`.
46pub type DefaultMergePolicy = LogMergePolicy;
47
48// Batch of documents.
49// Most of the time, users will send operation one-by-one, but it can be useful to
50// send them as a small block to ensure that
51// - all docs in the operation will happen on the same segment and continuous doc_ids.
52// - all operations in the group are committed at the same time, making the group
53// atomic.
54type AddBatch = SmallVec<[AddOperation; 4]>;
55type AddBatchSender = channel::Sender<AddBatch>;
56type AddBatchReceiver = channel::Receiver<AddBatch>;
57
58#[cfg(feature = "mmap")]
59#[cfg(test)]
60mod tests_mmap {
61    use crate::collector::Count;
62    use crate::query::QueryParser;
63    use crate::schema::{JsonObjectOptions, Schema, TEXT};
64    use crate::{Index, Term};
65
66    #[test]
67    fn test_advance_delete_bug() -> crate::Result<()> {
68        let mut schema_builder = Schema::builder();
69        let text_field = schema_builder.add_text_field("text", TEXT);
70        let index = Index::create_from_tempdir(schema_builder.build())?;
71        let mut index_writer = index.writer_for_tests()?;
72        // there must be one deleted document in the segment
73        index_writer.add_document(doc!(text_field=>"b"))?;
74        index_writer.delete_term(Term::from_field_text(text_field, "b"));
75        // we need enough data to trigger the bug (at least 32 documents)
76        for _ in 0..32 {
77            index_writer.add_document(doc!(text_field=>"c"))?;
78        }
79        index_writer.commit()?;
80        Ok(())
81    }
82
83    #[test]
84    fn test_json_field_expand_dots_disabled_dot_escaped_required() {
85        let mut schema_builder = Schema::builder();
86        let json_field = schema_builder.add_json_field("json", TEXT);
87        let index = Index::create_in_ram(schema_builder.build());
88        let mut index_writer = index.writer_for_tests().unwrap();
89        let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
90        index_writer.add_document(doc!(json_field=>json)).unwrap();
91        index_writer.commit().unwrap();
92        let reader = index.reader().unwrap();
93        let searcher = reader.searcher();
94        assert_eq!(searcher.num_docs(), 1);
95        let parse_query = QueryParser::for_index(&index, Vec::new());
96        let query = parse_query
97            .parse_query(r#"json.k8s\.container\.name:prometheus"#)
98            .unwrap();
99        let num_docs = searcher.search(&query, &Count).unwrap();
100        assert_eq!(num_docs, 1);
101    }
102
103    #[test]
104    fn test_json_field_expand_dots_enabled_dot_escape_not_required() {
105        let mut schema_builder = Schema::builder();
106        let json_options: JsonObjectOptions =
107            JsonObjectOptions::from(TEXT).set_expand_dots_enabled();
108        let json_field = schema_builder.add_json_field("json", json_options);
109        let index = Index::create_in_ram(schema_builder.build());
110        let mut index_writer = index.writer_for_tests().unwrap();
111        let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
112        index_writer.add_document(doc!(json_field=>json)).unwrap();
113        index_writer.commit().unwrap();
114        let reader = index.reader().unwrap();
115        let searcher = reader.searcher();
116        assert_eq!(searcher.num_docs(), 1);
117        let parse_query = QueryParser::for_index(&index, Vec::new());
118        let query = parse_query
119            .parse_query(r#"json.k8s.container.name:prometheus"#)
120            .unwrap();
121        let num_docs = searcher.search(&query, &Count).unwrap();
122        assert_eq!(num_docs, 1);
123    }
124}