logo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
//! Rescore clause to run second query over original one results and that way give more accuracy for final results
//! <https://www.elastic.co/guide/en/elasticsearch/reference/6.8/search-request-rescore.html>

use crate::search::*;
use crate::util::*;
use std::convert::TryInto;

/// Rescoring can help to improve precision by reordering just the top (eg 100 - 500)
/// documents returned by the [query](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#request-body-search-query)
/// and [post_filter](https://www.elastic.co/guide/en/elasticsearch/reference/current/filter-search-results.html#post-filter)
/// phases, using a secondary (usually more costly) algorithm, instead of applying the costly algorithm to all documents in the index.
///
/// A `rescore` request is executed on each shard before it returns its results to be sorted by the node handling the overall search request.
///
/// Currently the rescore API has only one implementation: the query rescorer, which uses a query to tweak the scoring.
/// In the future, alternative rescorers may be made available, for example, a pair-wise rescorer.
///
/// To create a `rescore` query with simple `term` query:
/// ```
/// # use elasticsearch_dsl::rescoring::*;
/// # use elasticsearch_dsl::queries::*;
/// # use elasticsearch_dsl::queries::params::*;
/// # let rescore =
/// Rescore::new(Query::term("title", "test"));
/// ```
/// To create a `rescore` query with simple `term` query and optional fields:
/// ```
/// # use elasticsearch_dsl::rescoring::*;
/// # use elasticsearch_dsl::queries::*;
/// # use elasticsearch_dsl::queries::params::*;
/// # let query =
/// Rescore::new(Query::term("title", "test"))
///     .rescore_query_weight(0.2)
///     .window_size(100);
/// ```
/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/filter-search-results.html#rescore>
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct Rescore {
    query: Inner,

    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
    window_size: Option<u64>,
}

#[derive(Debug, Clone, PartialEq, Serialize)]
struct Inner {
    rescore_query: Option<Query>,

    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
    rescore_query_weight: Option<f64>,

    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
    query_weight: Option<f64>,
}

impl Rescore {
    /// Creates a new instance of [`Rescore`]
    ///
    /// - `query` - Second query which will be execute on top-k results returned by original query.
    pub fn new(query: impl Into<Option<Query>>) -> Self {
        Self {
            query: Inner {
                rescore_query: query.into(),
                rescore_query_weight: None,
                query_weight: None,
            },
            window_size: None,
        }
    }

    /// The number of docs which will be examined on each shard can be controlled by the `window_size` parameter, which defaults to 10.
    pub fn window_size(mut self, window_size: impl TryInto<u64>) -> Self {
        if let Ok(window_size) = window_size.try_into() {
            self.window_size = Some(window_size);
        }
        self
    }

    /// The relative importance of the rescore query can be controlled with the `rescore_query_weight` respectively. Both default to 1.
    pub fn rescore_query_weight(mut self, rescore_query_weight: impl Into<f64>) -> Self {
        self.query.rescore_query_weight = Some(rescore_query_weight.into());
        self
    }

    /// The relative importance of the original query can be controlled with the `query_weight` respectively. Both default to 1.
    pub fn query_weight(mut self, query_weight: impl Into<f64>) -> Self {
        self.query.query_weight = Some(query_weight.into());
        self
    }
}

impl ShouldSkip for Rescore {
    fn should_skip(&self) -> bool {
        self.query
            .rescore_query
            .as_ref()
            .map(|q| q.should_skip())
            .unwrap_or(true)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn should_skip() {
        assert!(Rescore::new(Query::range("field")).should_skip());
        assert!(!Rescore::new(Query::range("field").gte(1)).should_skip());
    }

    #[test]
    fn serialization() {
        assert_serialize(
            Rescore::new(Query::term("title", "test")),
            json!({
                "query": {
                    "rescore_query": {
                        "term": {
                            "title": {
                                "value": "test"
                            }
                        }
                    }
                }
            }),
        );

        assert_serialize(
            Rescore::new(Query::term("title", "test"))
                .rescore_query_weight(0.2)
                .query_weight(0.5)
                .window_size(100),
            json!({
                "query": {
                    "rescore_query": {
                        "term": {
                            "title": {
                                "value": "test"
                            }
                        }
                    },
                    "query_weight": 0.5,
                    "rescore_query_weight": 0.2
                },
                "window_size": 100
            }),
        );
    }
}