elasticsearch_dsl/search/aggregations/bucket/
terms_aggregation.rs

1use crate::search::*;
2use crate::util::*;
3use serde::Serialize;
4
5#[derive(Debug, Clone, Serialize, PartialEq)]
6/// A multi-bucket value source based aggregation where buckets are dynamically built - one per unique value.
7///
8/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html>
9pub struct TermsAggregation {
10    terms: TermsAggregationInner,
11
12    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
13    aggs: Aggregations,
14}
15
16#[derive(Debug, Clone, Serialize, PartialEq)]
17struct TermsAggregationInner {
18    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
19    field: Option<String>,
20
21    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
22    size: Option<u64>,
23
24    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
25    show_term_doc_count_error: Option<bool>,
26
27    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
28    order: TermsOrderCollection,
29
30    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
31    min_doc_count: Option<u16>,
32
33    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
34    missing: Option<Term>,
35
36    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
37    include: Option<TermsInclude>,
38
39    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
40    exclude: Option<TermsExclude>,
41
42    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
43    script: Option<Script>,
44}
45
46impl Aggregation {
47    /// Creates an instance of [`TermsAggregation`]
48    ///
49    /// - `field` - field to group by
50    pub fn terms<T>(field: T) -> TermsAggregation
51    where
52        T: ToString,
53    {
54        TermsAggregation {
55            terms: TermsAggregationInner {
56                field: Some(field.to_string()),
57                size: None,
58                show_term_doc_count_error: None,
59                order: Default::default(),
60                min_doc_count: None,
61                missing: None,
62                include: None,
63                exclude: None,
64                script: None,
65            },
66            aggs: Aggregations::new(),
67        }
68    }
69
70    /// Creates an instance of [`TermsAggregation`] with a script
71    pub fn terms_with_script(script: Script) -> TermsAggregation {
72        TermsAggregation {
73            terms: TermsAggregationInner {
74                field: None,
75                size: None,
76                show_term_doc_count_error: None,
77                order: Default::default(),
78                min_doc_count: None,
79                missing: None,
80                include: None,
81                exclude: None,
82                script: Some(script),
83            },
84            aggs: Aggregations::new(),
85        }
86    }
87}
88
89impl TermsAggregation {
90    /// The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list.
91    ///
92    /// By default, the node coordinating the search process will request each shard to provide its own top `size` term buckets
93    /// and once all shards respond, it will reduce the results to the final list that will then be returned to the client.
94    ///
95    /// This means that if the number of unique terms is greater than `size`, the returned list is slightly off and not accurate
96    /// (it could be that the term counts are slightly off and it could even be that a term that should have been in the top `size` buckets was not returned).
97    pub fn size(mut self, size: u64) -> Self {
98        self.terms.size = Some(size);
99        self
100    }
101
102    /// Shows an error value for each term returned by the aggregation which represents the worst case error in the document
103    /// count and can be useful when deciding on a value for the shard_size parameter.
104    /// This is calculated by summing the document counts for the last term returned by all shards which did not return the term.
105    pub fn show_term_doc_count_error(mut self, show_term_doc_count_error: bool) -> Self {
106        self.terms.show_term_doc_count_error = Some(show_term_doc_count_error);
107        self
108    }
109
110    /// The order of the buckets can be customized by setting the order parameter.
111    /// By default, the buckets are ordered by their doc_count descending.
112    /// Order field allows changing this behavior.
113    ///
114    /// > Sorting by ascending `_count` or by sub aggregation is discouraged as it increases the
115    /// > [error](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-approximate-counts)
116    /// > on document counts. It is fine when a single shard is queried, or when the field that is
117    /// > being aggregated was used as a routing key at index time: in these cases results will be
118    /// > accurate since shards have disjoint values. However otherwise, errors are unbounded.
119    /// > One particular case that could still be useful is sorting by
120    /// > [min](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html) or
121    /// > [max](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html)
122    /// > aggregation: counts will not be accurate but at least the top buckets will be correctly picked.
123    pub fn order<T>(mut self, order: T) -> Self
124    where
125        T: Into<TermsOrderCollection>,
126    {
127        self.terms.order = order.into();
128        self
129    }
130
131    /// Only returns terms that match more than a configured number of hits using the `min_doc_count`
132    ///
133    /// Default value is `1`
134    pub fn min_doc_count(mut self, min_doc_count: u16) -> Self {
135        self.terms.min_doc_count = Some(min_doc_count);
136        self
137    }
138
139    /// The missing parameter defines how documents that are missing a value should be treated.
140    /// By default they will be ignored but it is also possible to treat them as if they had a value.
141    pub fn missing<T>(mut self, missing: T) -> Self
142    where
143        T: Serialize,
144    {
145        self.terms.missing = Term::new(missing);
146        self
147    }
148
149    /// The `include` parameter can be set to include only specific terms in the response.
150    pub fn include<T>(mut self, include: T) -> Self
151    where
152        T: Into<TermsInclude>,
153    {
154        self.terms.include = Some(include.into());
155        self
156    }
157
158    /// The `exclude` parameter can be set to exclude specific terms from the response.
159    pub fn exclude<T>(mut self, exclude: T) -> Self
160    where
161        T: Into<TermsExclude>,
162    {
163        self.terms.exclude = Some(exclude.into());
164        self
165    }
166
167    /// Sets the script for the aggregation.
168    pub fn script(mut self, script: Script) -> Self {
169        self.terms.script = Some(script);
170        self
171    }
172
173    /// The field can be Keyword, Numeric, ip, boolean, or binary.
174    pub fn field<T>(mut self, field: T) -> Self
175    where
176        T: Into<String>,
177    {
178        self.terms.field = Some(field.into());
179        self
180    }
181
182    add_aggregate!();
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn serialization() {
191        assert_serialize_aggregation(
192            Aggregation::terms("test_field"),
193            json!({ "terms": { "field": "test_field" } }),
194        );
195
196        assert_serialize_aggregation(
197            Aggregation::terms("test_field")
198                .size(5)
199                .min_doc_count(2)
200                .show_term_doc_count_error(false)
201                .missing("N/A")
202                .order(TermsOrder::new("test_order", SortOrder::Asc)),
203            json!({
204                "terms": {
205                    "field": "test_field",
206                    "size": 5,
207                    "min_doc_count": 2,
208                    "show_term_doc_count_error": false,
209                    "missing": "N/A",
210                    "order": [
211                        { "test_order": "asc" }
212                    ]
213                }
214            }),
215        );
216
217        assert_serialize_aggregation(
218            Aggregation::terms("test_field")
219                .size(0)
220                .order(TermsOrder::ascending("test_order"))
221                .missing(123)
222                .include(["mazda", "honda"])
223                .exclude("water_.*")
224                .aggregate(
225                    "test_sub_agg",
226                    Aggregation::terms("test_field2")
227                        .size(3)
228                        .missing(false)
229                        .include([0, 20]),
230                ),
231            json!({
232                "terms": {
233                    "field": "test_field",
234                    "size": 0,
235                    "missing": 123,
236                    "include": ["mazda", "honda"],
237                    "exclude": "water_.*",
238                    "order": [
239                        { "test_order": "asc" }
240                    ]
241                },
242                "aggs": {
243                    "test_sub_agg": {
244                        "terms": {
245                            "field": "test_field2",
246                            "size": 3,
247                            "missing": false,
248                            "include": {
249                                "partition": 0,
250                                "num_partitions": 20
251                            }
252                        }
253                    }
254                }
255            }),
256        );
257
258        assert_serialize_aggregation(
259            Aggregation::terms_with_script(
260                Script::source("if (!doc['field1'].isEmpty()) { return 'f2'; } if (!doc['field2'].isEmpty()) { return 'f1'; } return 'unknown';")
261                    .lang("painless")
262            ).size(10),
263            json!({
264                "terms": {
265                    "script": {
266                        "source": "if (!doc['field1'].isEmpty()) { return 'f2'; } if (!doc['field2'].isEmpty()) { return 'f1'; } return 'unknown';",
267                        "lang": "painless"
268                    },
269                    "size": 10
270                }
271            }),
272        );
273    }
274}