elasticsearch_dsl/search/highlight/
highlighter.rs

1use crate::search::*;
2use crate::util::*;
3
4/// Highlighter settings
5#[derive(Debug, Clone, PartialEq, Serialize)]
6#[serde(untagged)]
7pub enum Highlighter {
8    /// Default highlighter
9    Default(DefaultHighlighter),
10
11    /// Fast vector highlighter
12    Fvh(FastVectorHighlighter),
13
14    /// Plain highlighter
15    Plain(PlainHighlighter),
16
17    /// Unified highlighter
18    Unified(UnifiedHighlighter),
19}
20
21/// Highlighting settings can be set on a global level and overridden at the field level
22#[derive(Debug, Clone, Default, PartialEq, Serialize)]
23pub struct DefaultHighlighter {
24    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
25    boundary_chars: Option<String>,
26
27    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
28    boundary_max_scan: Option<u32>,
29
30    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
31    encoder: Option<Encoder>,
32
33    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
34    force_source: Option<bool>,
35
36    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
37    fragment_size: Option<u32>,
38
39    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
40    highlight_query: Option<Query>,
41
42    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
43    no_match_size: Option<u32>,
44
45    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
46    number_of_fragments: Option<u32>,
47
48    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
49    order: Option<Order>,
50
51    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
52    pre_tags: Option<Vec<String>>,
53
54    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
55    post_tags: Option<Vec<String>>,
56
57    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
58    require_field_match: Option<bool>,
59
60    #[serde(skip_serializing_if = "ShouldSkip::should_skip", flatten)]
61    tags: Option<Tags>,
62}
63
64/// The `fvh` highlighter uses the Lucene Fast Vector highlighter. This highlighter can be used on
65/// fields with `term_vector` set to `with_positions_offsets` in the mapping. The fast vector
66/// highlighter:
67///
68/// - Can be customized with a
69///   [boundary_scanner](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#boundary-scanners).
70/// - Requires setting `term_vector` to `with_positions_offsets` which increases the size of the
71///   index
72/// - Can combine matches from multiple fields into one result. See
73///   [`matched_fields`](FastVectorHighlighter::matched_fields)
74/// - Can assign different weights to matches at different positions allowing for things like
75///   phrase matches being sorted above term matches when highlighting a Boosting Query that boosts
76///   phrase matches over term matches
77///
78/// > **Warning**<br/>
79/// > The `fvh` highlighter does not support span queries. If you need support for span queries,
80/// > try an alternative highlighter, such as the `unified` highlighter.
81///
82/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#fast-vector-highlighter>
83#[derive(Debug, Clone, PartialEq, Serialize)]
84pub struct FastVectorHighlighter {
85    // Common
86    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
87    boundary_chars: Option<String>,
88
89    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
90    boundary_max_scan: Option<u32>,
91
92    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
93    encoder: Option<Encoder>,
94
95    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
96    force_source: Option<bool>,
97
98    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
99    fragment_size: Option<u32>,
100
101    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
102    highlight_query: Option<Query>,
103
104    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
105    no_match_size: Option<u32>,
106
107    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
108    number_of_fragments: Option<u32>,
109
110    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
111    order: Option<Order>,
112
113    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
114    pre_tags: Option<Vec<String>>,
115
116    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
117    post_tags: Option<Vec<String>>,
118
119    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
120    require_field_match: Option<bool>,
121
122    #[serde(skip_serializing_if = "ShouldSkip::should_skip", flatten)]
123    tags: Option<Tags>,
124
125    // Highlighter specific
126    r#type: &'static str,
127
128    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
129    boundary_scanner: Option<FvhBoundaryScanner>,
130
131    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
132    fragment_offset: Option<u32>,
133
134    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
135    matched_fields: Option<MatchedFields>,
136
137    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
138    phrase_limit: Option<u32>,
139}
140
141/// The `plain` highlighter uses the standard Lucene highlighter. It attempts to reflect the query
142/// matching logic in terms of understanding word importance and any word positioning criteria in
143/// phrase queries.
144///
145/// > **Warning**<br/>
146/// > The `plain` highlighter works best for highlighting simple query matches in a single field.
147/// > To accurately reflect query logic, it creates a tiny in-memory index and re-runs the original
148/// > query criteria through Lucene’s query execution planner to get access to low-level match
149/// > information for the current document. This is repeated for every field and every document that
150/// > needs to be highlighted. If you want to highlight a lot of fields in a lot of documents with
151/// > complex queries, we recommend using the `unified` highlighter on `postings` or `term_vector`
152/// > fields.
153///
154/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#plain-highlighter>
155#[derive(Debug, Clone, PartialEq, Serialize)]
156pub struct PlainHighlighter {
157    // Common
158    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
159    boundary_chars: Option<String>,
160
161    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
162    boundary_max_scan: Option<u32>,
163
164    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
165    encoder: Option<Encoder>,
166
167    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
168    force_source: Option<bool>,
169
170    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
171    fragment_size: Option<u32>,
172
173    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
174    highlight_query: Option<Query>,
175
176    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
177    no_match_size: Option<u32>,
178
179    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
180    number_of_fragments: Option<u32>,
181
182    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
183    order: Option<Order>,
184
185    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
186    pre_tags: Option<Vec<String>>,
187
188    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
189    post_tags: Option<Vec<String>>,
190
191    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
192    require_field_match: Option<bool>,
193
194    #[serde(skip_serializing_if = "ShouldSkip::should_skip", flatten)]
195    tags: Option<Tags>,
196
197    // Highlighter specific
198    r#type: &'static str,
199
200    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
201    fragmenter: Option<Fragmenter>,
202}
203
204/// The `unified` highlighter uses the Lucene Unified Highlighter. This highlighter breaks the text
205/// into sentences and uses the BM25 algorithm to score individual sentences as if they were
206/// documents in the corpus. It also supports accurate phrase and multi-term (fuzzy, prefix, regex)
207/// highlighting. This is the default highlighter.
208///
209/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#unified-highlighter>
210#[derive(Debug, Clone, PartialEq, Serialize)]
211pub struct UnifiedHighlighter {
212    // Common
213    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
214    boundary_chars: Option<String>,
215
216    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
217    boundary_max_scan: Option<u32>,
218
219    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
220    encoder: Option<Encoder>,
221
222    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
223    force_source: Option<bool>,
224
225    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
226    fragment_size: Option<u32>,
227
228    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
229    highlight_query: Option<Query>,
230
231    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
232    no_match_size: Option<u32>,
233
234    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
235    number_of_fragments: Option<u32>,
236
237    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
238    order: Option<Order>,
239
240    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
241    pre_tags: Option<Vec<String>>,
242
243    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
244    post_tags: Option<Vec<String>>,
245
246    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
247    require_field_match: Option<bool>,
248
249    #[serde(skip_serializing_if = "ShouldSkip::should_skip", flatten)]
250    tags: Option<Tags>,
251
252    // Highlighter specific
253    r#type: &'static str,
254
255    #[serde(skip_serializing_if = "ShouldSkip::should_skip", flatten)]
256    boundary_scanner: Option<UnifiedBoundaryScanner>,
257}
258
259impl Default for Highlighter {
260    fn default() -> Self {
261        Self::Default(Default::default())
262    }
263}
264
265impl Highlighter {
266    /// Creates a new instance of [DefaultHighlighter](DefaultHighlighter)
267    #[allow(clippy::new_ret_no_self)]
268    pub fn new() -> DefaultHighlighter {
269        Default::default()
270    }
271
272    /// Creates a new instance of [FastVectorHighlighter](FastVectorHighlighter)
273    pub fn fvh() -> FastVectorHighlighter {
274        FastVectorHighlighter::default()
275    }
276
277    /// Creates a new instance of [PlainHighlighter](PlainHighlighter)
278    pub fn plain() -> PlainHighlighter {
279        PlainHighlighter::default()
280    }
281
282    /// Creates a new instance of [UnifiedHighlighter](UnifiedHighlighter)
283    pub fn unified() -> UnifiedHighlighter {
284        UnifiedHighlighter::default()
285    }
286}
287
288impl Default for FastVectorHighlighter {
289    fn default() -> Self {
290        Self {
291            r#type: "fvh",
292            boundary_chars: None,
293            boundary_max_scan: None,
294            encoder: None,
295            force_source: None,
296            fragment_size: None,
297            highlight_query: None,
298            no_match_size: None,
299            number_of_fragments: None,
300            order: None,
301            pre_tags: None,
302            post_tags: None,
303            require_field_match: None,
304            tags: None,
305            boundary_scanner: None,
306            fragment_offset: None,
307            matched_fields: None,
308            phrase_limit: None,
309        }
310    }
311}
312
313impl Default for PlainHighlighter {
314    fn default() -> Self {
315        Self {
316            r#type: "plain",
317            boundary_chars: None,
318            boundary_max_scan: None,
319            encoder: None,
320            force_source: None,
321            fragment_size: None,
322            highlight_query: None,
323            no_match_size: None,
324            number_of_fragments: None,
325            order: None,
326            pre_tags: None,
327            post_tags: None,
328            require_field_match: None,
329            tags: None,
330            fragmenter: None,
331        }
332    }
333}
334
335impl Default for UnifiedHighlighter {
336    fn default() -> Self {
337        Self {
338            r#type: "unified",
339            boundary_chars: None,
340            boundary_max_scan: None,
341            encoder: None,
342            force_source: None,
343            fragment_size: None,
344            highlight_query: None,
345            no_match_size: None,
346            number_of_fragments: None,
347            order: None,
348            pre_tags: None,
349            post_tags: None,
350            require_field_match: None,
351            tags: None,
352            boundary_scanner: None,
353        }
354    }
355}
356
357impl From<DefaultHighlighter> for Highlighter {
358    fn from(highlighter: DefaultHighlighter) -> Self {
359        Self::Default(highlighter)
360    }
361}
362
363impl From<FastVectorHighlighter> for Highlighter {
364    fn from(highlighter: FastVectorHighlighter) -> Self {
365        Self::Fvh(highlighter)
366    }
367}
368
369impl From<PlainHighlighter> for Highlighter {
370    fn from(highlighter: PlainHighlighter) -> Self {
371        Self::Plain(highlighter)
372    }
373}
374
375impl From<UnifiedHighlighter> for Highlighter {
376    fn from(highlighter: UnifiedHighlighter) -> Self {
377        Self::Unified(highlighter)
378    }
379}
380
381macro_rules! add_highlighter_methods {
382    () => {
383        /// A string that contains each boundary character. Defaults to `.,!? \t\n`.
384        pub fn boundary_chars<T>(mut self, boundary_chars: T) -> Self
385        where
386            T: ToString,
387        {
388            self.boundary_chars = Some(boundary_chars.to_string());
389            self
390        }
391
392        /// How far to scan for boundary characters. Defaults to `20`.
393        pub fn boundary_max_scan(mut self, boundary_max_scan: u32) -> Self {
394            self.boundary_max_scan = Some(boundary_max_scan);
395            self
396        }
397
398        /// Indicates if the snippet should be HTML encoded.
399        pub fn encoder(mut self, encoder: Encoder) -> Self {
400            self.encoder = Some(encoder);
401            self
402        }
403
404        /// Highlight based on the source even if the field is stored separately. Defaults to `false`.
405        pub fn force_source(mut self, force_source: bool) -> Self {
406            self.force_source = Some(force_source);
407            self
408        }
409
410        /// The size of the highlighted fragment in characters. Defaults to
411        /// `100`.
412        pub fn fragment_size(mut self, fragment_size: u32) -> Self {
413            self.fragment_size = Some(fragment_size);
414            self
415        }
416
417        /// Highlight matches for a query other than the search query. This is especially useful if you
418        /// use a rescore query because those are not taken into account by highlighting by default.
419        ///
420        /// > **Warning**<br/>
421        /// > Elasticsearch does not validate that `highlight_query` contains the search query in any
422        /// > way so it is possible to define it so legitimate query results are not highlighted.
423        /// > Generally, you should include the search query as part of the `highlight_query`.
424        pub fn highlight_query<T>(mut self, highlight_query: T) -> Self
425        where
426            T: Into<Query>,
427        {
428            self.highlight_query = Some(highlight_query.into());
429            self
430        }
431
432        /// The amount of text you want to return from the beginning of the field if there are no
433        /// matching fragments to highlight. Defaults to `0` (nothing is returned).
434        pub fn no_match_size(mut self, no_match_size: u32) -> Self {
435            self.no_match_size = Some(no_match_size);
436            self
437        }
438
439        /// The maximum number of fragments to return. If the number of fragments is set to `0`, no
440        /// fragments are returned. Instead, the entire field contents are highlighted and returned.
441        /// This can be handy when you need to highlight short texts such as a title or address, but
442        /// fragmentation is not required. If `number_of_fragments` is `0`, `fragment_size` is ignored.
443        /// Defaults to `5`.
444        pub fn number_of_fragments(mut self, number_of_fragments: u32) -> Self {
445            self.number_of_fragments = Some(number_of_fragments);
446            self
447        }
448
449        /// Sorts highlighted fragments by score when set to [`score`](Order::Score). By default,
450        /// fragments will be output in the order they appear in the field
451        /// (order: [`none`](Order::None)). Setting this option to [`score`](Order::Score) will output
452        /// the most relevant fragments first. Each highlighter applies its own logic to compute
453        /// relevancy scores. See the document
454        /// [How highlighters work internally](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#how-es-highlighters-work-internally)
455        /// for more details how different highlighters find the best fragments.
456        pub fn order(mut self, order: Order) -> Self {
457            self.order = Some(order);
458            self
459        }
460
461        /// By default, only fields that contains a query match are highlighted. Set
462        /// `require_field_match` to `false` to highlight all fields. Defaults to `true`.
463        pub fn require_field_match(mut self, require_field_match: bool) -> Self {
464            self.require_field_match = Some(require_field_match);
465            self
466        }
467
468        /// Set to `styled` to use the built-in tag schema or use custom tags
469        pub fn tags<T>(mut self, tags: T) -> Self
470        where
471            T: Into<Tags>,
472        {
473            self.tags = Some(tags.into());
474            self
475        }
476    };
477}
478
479macro_rules! convert_to_highlighter {
480    ($method:tt, $struct:tt) => {
481        /// Converts [Highlighter](Highlighter) to specific highlighter
482        pub fn $method(self) -> $struct {
483            $struct {
484                boundary_chars: self.boundary_chars,
485                boundary_max_scan: self.boundary_max_scan,
486                encoder: self.encoder,
487                force_source: self.force_source,
488                fragment_size: self.fragment_size,
489                highlight_query: self.highlight_query,
490                no_match_size: self.no_match_size,
491                number_of_fragments: self.number_of_fragments,
492                order: self.order,
493                pre_tags: self.pre_tags,
494                post_tags: self.post_tags,
495                require_field_match: self.require_field_match,
496                tags: self.tags,
497                ..Default::default()
498            }
499        }
500    };
501}
502
503impl DefaultHighlighter {
504    /// Creates a new [Highlighter](Highlighter) instance
505    pub fn new() -> Self {
506        Default::default()
507    }
508
509    add_highlighter_methods!();
510    convert_to_highlighter!(fvh, FastVectorHighlighter);
511    convert_to_highlighter!(plain, PlainHighlighter);
512    convert_to_highlighter!(unified, UnifiedHighlighter);
513}
514
515impl FastVectorHighlighter {
516    /// Creates a new [FastVectorHighlighter](FastVectorHighlighter) instance
517    pub fn new() -> Self {
518        Default::default()
519    }
520
521    add_highlighter_methods!();
522
523    /// Specifies how to break the highlighted fragments.
524    pub fn boundary_scanner(mut self, boundary_scanner: FvhBoundaryScanner) -> Self {
525        self.boundary_scanner = Some(boundary_scanner);
526        self
527    }
528
529    /// Controls the margin from which you want to start highlighting.
530    pub fn fragment_offset(mut self, fragment_offset: u32) -> Self {
531        self.fragment_offset = Some(fragment_offset);
532        self
533    }
534
535    /// Combine matches on multiple fields to highlight a single field. This is most intuitive for
536    /// multi-fields that analyze the same string in different ways. All `matched_fields` must have
537    /// `term_vector` set to `with_positions_offsets`, but only the field to which the matches are
538    /// combined is loaded so only that field benefits from having store set to yes.
539    pub fn matched_fields<T>(mut self, matched_fields: T) -> Self
540    where
541        T: Into<MatchedFields>,
542    {
543        self.matched_fields = Some(matched_fields.into());
544        self
545    }
546
547    /// Controls the number of matching phrases in a document that are considered. Prevents the
548    /// highlighter from analyzing too many phrases and consuming too much memory. When using
549    /// `matched_fields`, `phrase_limit` phrases per matched field are considered. Raising the
550    /// limit increases query time and consumes more memory. Defaults to 256.
551    pub fn phrase_limit(mut self, phrase_limit: u32) -> Self {
552        self.phrase_limit = Some(phrase_limit);
553        self
554    }
555}
556
557impl PlainHighlighter {
558    /// Creates a new [PlainHighlighter](PlainHighlighter) instance
559    pub fn new() -> Self {
560        Default::default()
561    }
562
563    add_highlighter_methods!();
564
565    /// Specifies how text should be broken up in highlight snippets.
566    pub fn fragmenter(mut self, fragmenter: Fragmenter) -> Self {
567        self.fragmenter = Some(fragmenter);
568        self
569    }
570}
571
572impl UnifiedHighlighter {
573    /// Creates a new [UnifiedHighlighter](UnifiedHighlighter) instance
574    pub fn new() -> Self {
575        Default::default()
576    }
577
578    add_highlighter_methods!();
579
580    /// Specifies how to break the highlighted fragments.
581    pub fn boundary_scanner(mut self, boundary_scanner: UnifiedBoundaryScanner) -> Self {
582        self.boundary_scanner = Some(boundary_scanner);
583        self
584    }
585}