elasticsearch_dsl/search/highlight/mod.rs
1//! Highlighters enable you to get highlighted snippets from one or more fields in your search
2//! results so you can show users where the query matches are.
3//!
4//! When you request highlights, the
5//! response contains an additional `highlight` element for each search hit that includes the
6//! highlighted fields and the highlighted fragments.
7//!
8//! # Offsets Strategy
9//!
10//! To create meaningful search snippets from the terms being queried, the highlighter needs to
11//! know the start and end character offsets of each word in the original text. These offsets can
12//! be obtained from:
13//!
14//! - The postings list. If `index_options` is set to `offsets` in the mapping, the
15//! [`unified` highlighter](UnifiedHighlighter) uses this information to highlight documents
16//! without re-analyzing the text. It re-runs the original query directly on the postings and
17//! extracts the matching offsets from the index, limiting the collection to the highlighted
18//! documents. This is important if you have large fields because it doesn’t require reanalyzing
19//! the text to be highlighted. It also requires less disk space than using `term_vectors`.
20//! - Term vectors. If `term_vector` information is provided by setting `term_vector` to
21//! `with_positions_offsets` in the mapping, the [`unified` highlighter](UnifiedHighlighter)
22//! automatically uses the `term_vector` to highlight the field. It’s fast especially for large
23//! fields (> `1MB`) and for highlighting multi-term queries like `prefix` or `wildcard` because it
24//! can access the dictionary of terms for each document. The
25//! [`fvh` highlighter](FastVectorHighlighter) always uses term vectors.
26//! - Plain highlighting. This mode is used by the [`unified`](UnifiedHighlighter) when there is no
27//! other alternative. It creates a tiny in-memory index and re-runs the original query criteria
28//! through Lucene’s query execution planner to get access to low-level match information on the
29//! current document. This is repeated for every field and every document that needs highlighting.
30//! The [`plain` highlighter](PlainHighlighter) always uses plain highlighting.
31//!
32//! > **Warning**<br/>
33//! > Plain highlighting for large texts may require substantial amount of time and memory. To
34//! > protect against this, the maximum number of text characters that will be analyzed has been
35//! > limited to 1000000. This default limit can be changed for a particular index with the index
36//! > setting `index.highlight.max_analyzed_offset`.
37//!
38//! <https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html>
39
40mod boundary_scanner;
41mod encoder;
42mod fragmenter;
43mod highlighter;
44mod matched_fields;
45mod order;
46mod tags;
47
48use crate::util::*;
49
50pub use self::boundary_scanner::*;
51pub use self::encoder::*;
52pub use self::fragmenter::*;
53pub use self::highlighter::*;
54/// Reexports
55pub use self::matched_fields::*;
56pub use self::order::*;
57pub use self::tags::*;
58
59/// Highlight structure
60#[derive(Debug, Clone, Default, PartialEq, Serialize)]
61pub struct Highlight {
62 #[serde(flatten, skip_serializing_if = "ShouldSkip::should_skip")]
63 highlighter: Option<Highlighter>,
64 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
65 fields: Vec<KeyValuePair<String, Highlighter>>,
66}
67
68impl Highlight {
69 /// Creates a new instance of [Highlight]
70 pub fn new() -> Self {
71 Default::default()
72 }
73
74 /// Sets highlighter settings
75 pub fn highlighter<H>(mut self, highlighter: H) -> Self
76 where
77 H: Into<Highlighter>,
78 {
79 self.highlighter = Some(highlighter.into());
80 self
81 }
82
83 /// Adds field or field pattern to highlighter
84 pub fn field<F>(mut self, field: F) -> Self
85 where
86 F: ToString,
87 {
88 self.fields
89 .push(KeyValuePair::new(field.to_string(), Default::default()));
90 self
91 }
92
93 /// Adds field or field pattern to highlighter
94 pub fn field_highlighter<F, H>(mut self, field: F, highlighter: H) -> Self
95 where
96 F: ToString,
97 H: Into<Highlighter>,
98 {
99 self.fields
100 .push(KeyValuePair::new(field.to_string(), highlighter.into()));
101 self
102 }
103}
104
105#[cfg(test)]
106mod tests {
107 use super::*;
108
109 #[test]
110 fn serialization() {
111 assert_serialize(Highlight::new(), json!({}));
112
113 assert_serialize(
114 Highlight::new()
115 .field("field1")
116 .field("field2")
117 .field("field3"),
118 json!({
119 "fields": [
120 { "field1": {} },
121 { "field2": {} },
122 { "field3": {} },
123 ]
124 }),
125 );
126
127 assert_serialize(
128 Highlight::new()
129 .highlighter(Highlighter::new().tags((["<eim>"], ["</eim>"])))
130 .field("field3")
131 .field("field2")
132 .field("field1"),
133 json!({
134 "pre_tags": ["<eim>"],
135 "post_tags": ["</eim>"],
136 "fields": [
137 { "field3": {} },
138 { "field2": {} },
139 { "field1": {} },
140 ]
141 }),
142 );
143
144 assert_serialize(
145 Highlight::new()
146 .highlighter(
147 Highlighter::new()
148 .tags((["<eim>"], ["</eim>"]))
149 .fvh()
150 .matched_fields(["one", "two", "three"]),
151 )
152 .field("field1")
153 .field("field2")
154 .field_highlighter("field3", Highlighter::new().plain().no_match_size(2u32)),
155 json!({
156 "pre_tags": ["<eim>"],
157 "post_tags": ["</eim>"],
158 "matched_fields": ["one", "two", "three"],
159 "type": "fvh",
160 "fields": [
161 { "field1": {} },
162 { "field2": {} },
163 { "field3": { "type": "plain", "no_match_size": 2 } },
164 ]
165 }),
166 );
167
168 assert_serialize(
169 Highlight::new()
170 .highlighter(
171 Highlighter::new()
172 .tags((["<eim>"], ["</eim>"]))
173 .fvh()
174 .matched_fields(["one", "two", "three"])
175 .order(Order::Score),
176 )
177 .field("field1")
178 .field("field2")
179 .field_highlighter("field3", Highlighter::new().plain().no_match_size(2u32)),
180 json!({
181 "pre_tags": ["<eim>"],
182 "post_tags": ["</eim>"],
183 "matched_fields": ["one", "two", "three"],
184 "order": "score",
185 "type": "fvh",
186 "fields": [
187 { "field1": {} },
188 { "field2": {} },
189 { "field3": { "type": "plain", "no_match_size": 2 } },
190 ]
191 }),
192 );
193 }
194}