elasticsearch_dsl/search/highlight/
mod.rs

1//! Highlighters enable you to get highlighted snippets from one or more fields in your search
2//! results so you can show users where the query matches are.
3//!
4//! When you request highlights, the
5//! response contains an additional `highlight` element for each search hit that includes the
6//! highlighted fields and the highlighted fragments.
7//!
8//! # Offsets Strategy
9//!
10//! To create meaningful search snippets from the terms being queried, the highlighter needs to
11//! know the start and end character offsets of each word in the original text. These offsets can
12//! be obtained from:
13//!
14//! - The postings list. If `index_options` is set to `offsets` in the mapping, the
15//!   [`unified` highlighter](UnifiedHighlighter) uses this information to highlight documents
16//!   without re-analyzing the text. It re-runs the original query directly on the postings and
17//!   extracts the matching offsets from the index, limiting the collection to the highlighted
18//!   documents. This is important if you have large fields because it doesn’t require reanalyzing
19//!   the text to be highlighted. It also requires less disk space than using `term_vectors`.
20//! - Term vectors. If `term_vector` information is provided by setting `term_vector` to
21//!   `with_positions_offsets` in the mapping, the [`unified` highlighter](UnifiedHighlighter)
22//!   automatically uses the `term_vector` to highlight the field. It’s fast especially for large
23//!   fields (> `1MB`) and for highlighting multi-term queries like `prefix` or `wildcard` because it
24//!   can access the dictionary of terms for each document. The
25//!   [`fvh` highlighter](FastVectorHighlighter) always uses term vectors.
26//! - Plain highlighting. This mode is used by the [`unified`](UnifiedHighlighter) when there is no
27//!   other alternative. It creates a tiny in-memory index and re-runs the original query criteria
28//!   through Lucene’s query execution planner to get access to low-level match information on the
29//!   current document. This is repeated for every field and every document that needs highlighting.
30//!   The [`plain` highlighter](PlainHighlighter) always uses plain highlighting.
31//!
32//! > **Warning**<br/>
33//! > Plain highlighting for large texts may require substantial amount of time and memory. To
34//! > protect against this, the maximum number of text characters that will be analyzed has been
35//! > limited to 1000000. This default limit can be changed for a particular index with the index
36//! > setting `index.highlight.max_analyzed_offset`.
37//!
38//! <https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html>
39
40mod boundary_scanner;
41mod encoder;
42mod fragmenter;
43mod highlighter;
44mod matched_fields;
45mod order;
46mod tags;
47
48use crate::util::*;
49
50pub use self::boundary_scanner::*;
51pub use self::encoder::*;
52pub use self::fragmenter::*;
53pub use self::highlighter::*;
54/// Reexports
55pub use self::matched_fields::*;
56pub use self::order::*;
57pub use self::tags::*;
58
59/// Highlight structure
60#[derive(Debug, Clone, Default, PartialEq, Serialize)]
61pub struct Highlight {
62    #[serde(flatten, skip_serializing_if = "ShouldSkip::should_skip")]
63    highlighter: Option<Highlighter>,
64    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
65    fields: Vec<KeyValuePair<String, Highlighter>>,
66}
67
68impl Highlight {
69    /// Creates a new instance of [Highlight]
70    pub fn new() -> Self {
71        Default::default()
72    }
73
74    /// Sets highlighter settings
75    pub fn highlighter<H>(mut self, highlighter: H) -> Self
76    where
77        H: Into<Highlighter>,
78    {
79        self.highlighter = Some(highlighter.into());
80        self
81    }
82
83    /// Adds field or field pattern to highlighter
84    pub fn field<F>(mut self, field: F) -> Self
85    where
86        F: ToString,
87    {
88        self.fields
89            .push(KeyValuePair::new(field.to_string(), Default::default()));
90        self
91    }
92
93    /// Adds field or field pattern to highlighter
94    pub fn field_highlighter<F, H>(mut self, field: F, highlighter: H) -> Self
95    where
96        F: ToString,
97        H: Into<Highlighter>,
98    {
99        self.fields
100            .push(KeyValuePair::new(field.to_string(), highlighter.into()));
101        self
102    }
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn serialization() {
111        assert_serialize(Highlight::new(), json!({}));
112
113        assert_serialize(
114            Highlight::new()
115                .field("field1")
116                .field("field2")
117                .field("field3"),
118            json!({
119                "fields": [
120                    { "field1": {} },
121                    { "field2": {} },
122                    { "field3": {} },
123                ]
124            }),
125        );
126
127        assert_serialize(
128            Highlight::new()
129                .highlighter(Highlighter::new().tags((["<eim>"], ["</eim>"])))
130                .field("field3")
131                .field("field2")
132                .field("field1"),
133            json!({
134                "pre_tags": ["<eim>"],
135                "post_tags": ["</eim>"],
136                "fields": [
137                    { "field3": {} },
138                    { "field2": {} },
139                    { "field1": {} },
140                ]
141            }),
142        );
143
144        assert_serialize(
145            Highlight::new()
146                .highlighter(
147                    Highlighter::new()
148                        .tags((["<eim>"], ["</eim>"]))
149                        .fvh()
150                        .matched_fields(["one", "two", "three"]),
151                )
152                .field("field1")
153                .field("field2")
154                .field_highlighter("field3", Highlighter::new().plain().no_match_size(2u32)),
155            json!({
156                "pre_tags": ["<eim>"],
157                "post_tags": ["</eim>"],
158                "matched_fields": ["one", "two", "three"],
159                "type": "fvh",
160                "fields": [
161                    { "field1": {} },
162                    { "field2": {} },
163                    { "field3": { "type": "plain", "no_match_size": 2 } },
164                ]
165            }),
166        );
167
168        assert_serialize(
169            Highlight::new()
170                .highlighter(
171                    Highlighter::new()
172                        .tags((["<eim>"], ["</eim>"]))
173                        .fvh()
174                        .matched_fields(["one", "two", "three"])
175                        .order(Order::Score),
176                )
177                .field("field1")
178                .field("field2")
179                .field_highlighter("field3", Highlighter::new().plain().no_match_size(2u32)),
180            json!({
181                "pre_tags": ["<eim>"],
182                "post_tags": ["</eim>"],
183                "matched_fields": ["one", "two", "three"],
184                "order": "score",
185                "type": "fvh",
186                "fields": [
187                    { "field1": {} },
188                    { "field2": {} },
189                    { "field3": { "type": "plain", "no_match_size": 2 } },
190                ]
191            }),
192        );
193    }
194}