logo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
//! Highlighters enable you to get highlighted snippets from one or more fields in your search
//! results so you can show users where the query matches are.
//!
//! When you request highlights, the
//! response contains an additional `highlight` element for each search hit that includes the
//! highlighted fields and the highlighted fragments.
//!
//! # Offsets Strategy
//!
//! To create meaningful search snippets from the terms being queried, the highlighter needs to
//! know the start and end character offsets of each word in the original text. These offsets can
//! be obtained from:
//!
//! - The postings list. If `index_options` is set to `offsets` in the mapping, the
//! [`unified` highlighter](UnifiedHighlighter) uses this information to highlight documents
//! without re-analyzing the text. It re-runs the original query directly on the postings and
//! extracts the matching offsets from the index, limiting the collection to the highlighted
//! documents. This is important if you have large fields because it doesn’t require reanalyzing
//! the text to be highlighted. It also requires less disk space than using `term_vectors`.
//! - Term vectors. If `term_vector` information is provided by setting `term_vector` to
//! `with_positions_offsets` in the mapping, the [`unified` highlighter](UnifiedHighlighter)
//! automatically uses the `term_vector` to highlight the field. It’s fast especially for large
//! fields (> `1MB`) and for highlighting multi-term queries like `prefix` or `wildcard` because it
//! can access the dictionary of terms for each document. The
//! [`fvh` highlighter](FastVectorHighlighter) always uses term vectors.
//! - Plain highlighting. This mode is used by the [`unified`](UnifiedHighlighter) when there is no
//! other alternative. It creates a tiny in-memory index and re-runs the original query criteria
//! through Lucene’s query execution planner to get access to low-level match information on the
//! current document. This is repeated for every field and every document that needs highlighting.
//! The [`plain` highlighter](PlainHighlighter) always uses plain highlighting.
//!
//! > **Warning**</br>
//! > Plain highlighting for large texts may require substantial amount of time and memory. To
//! protect against this, the maximum number of text characters that will be analyzed has been
//! limited to 1000000. This default limit can be changed for a particular index with the index
//! setting `index.highlight.max_analyzed_offset`.
//!
//! <https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html>

mod boundary_scanner;
mod encoder;
mod fragmenter;
mod highlighter;
mod matched_fields;
mod order;
mod tags;

use crate::util::*;

pub use self::boundary_scanner::*;
pub use self::encoder::*;
pub use self::fragmenter::*;
pub use self::highlighter::*;
/// Reexports
pub use self::matched_fields::*;
pub use self::order::*;
pub use self::tags::*;

/// Highlight structure
#[derive(Debug, Clone, Default, PartialEq, Serialize)]
pub struct Highlight {
    #[serde(flatten, skip_serializing_if = "ShouldSkip::should_skip")]
    highlighter: Option<Highlighter>,
    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
    fields: Vec<KeyValuePair<String, Highlighter>>,
}

impl Highlight {
    /// Creates a new instance of [Highlight]
    pub fn new() -> Self {
        Default::default()
    }

    /// Sets highlighter settings
    pub fn highlighter<H>(mut self, highlighter: H) -> Self
    where
        H: Into<Highlighter>,
    {
        self.highlighter = Some(highlighter.into());
        self
    }

    /// Adds field or field pattern to highlighter
    pub fn field<F>(mut self, field: F) -> Self
    where
        F: ToString,
    {
        self.fields
            .push(KeyValuePair::new(field.to_string(), Default::default()));
        self
    }

    /// Adds field or field pattern to highlighter
    pub fn field_highlighter<F, H>(mut self, field: F, highlighter: H) -> Self
    where
        F: ToString,
        H: Into<Highlighter>,
    {
        self.fields
            .push(KeyValuePair::new(field.to_string(), highlighter.into()));
        self
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn serialization() {
        assert_serialize(Highlight::new(), json!({}));

        assert_serialize(
            Highlight::new()
                .field("field1")
                .field("field2")
                .field("field3"),
            json!({
                "fields": [
                    { "field1": {} },
                    { "field2": {} },
                    { "field3": {} },
                ]
            }),
        );

        assert_serialize(
            Highlight::new()
                .highlighter(Highlighter::new().tags((["<eim>"], ["</eim>"])))
                .field("field3")
                .field("field2")
                .field("field1"),
            json!({
                "pre_tags": ["<eim>"],
                "post_tags": ["</eim>"],
                "fields": [
                    { "field3": {} },
                    { "field2": {} },
                    { "field1": {} },
                ]
            }),
        );

        assert_serialize(
            Highlight::new()
                .highlighter(
                    Highlighter::new()
                        .tags((["<eim>"], ["</eim>"]))
                        .fvh()
                        .matched_fields(["one", "two", "three"]),
                )
                .field("field1")
                .field("field2")
                .field_highlighter("field3", Highlighter::new().plain().no_match_size(2u32)),
            json!({
                "pre_tags": ["<eim>"],
                "post_tags": ["</eim>"],
                "matched_fields": ["one", "two", "three"],
                "type": "fvh",
                "fields": [
                    { "field1": {} },
                    { "field2": {} },
                    { "field3": { "type": "plain", "no_match_size": 2 } },
                ]
            }),
        );

        assert_serialize(
            Highlight::new()
                .highlighter(
                    Highlighter::new()
                        .tags((["<eim>"], ["</eim>"]))
                        .fvh()
                        .matched_fields(["one", "two", "three"])
                        .order(Order::Score),
                )
                .field("field1")
                .field("field2")
                .field_highlighter("field3", Highlighter::new().plain().no_match_size(2u32)),
            json!({
                "pre_tags": ["<eim>"],
                "post_tags": ["</eim>"],
                "matched_fields": ["one", "two", "three"],
                "order": "score",
                "type": "fvh",
                "fields": [
                    { "field1": {} },
                    { "field2": {} },
                    { "field3": { "type": "plain", "no_match_size": 2 } },
                ]
            }),
        );
    }
}