elasticsearch_dsl/search/aggregations/bucket/terms_aggregation.rs
1use crate::search::*;
2use crate::util::*;
3use serde::Serialize;
4
5#[derive(Debug, Clone, Serialize, PartialEq)]
6/// A multi-bucket value source based aggregation where buckets are dynamically built - one per unique value.
7///
8/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html>
9pub struct TermsAggregation {
10 terms: TermsAggregationInner,
11
12 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
13 aggs: Aggregations,
14}
15
16#[derive(Debug, Clone, Serialize, PartialEq)]
17struct TermsAggregationInner {
18 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
19 field: Option<String>,
20
21 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
22 size: Option<u64>,
23
24 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
25 show_term_doc_count_error: Option<bool>,
26
27 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
28 order: TermsOrderCollection,
29
30 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
31 min_doc_count: Option<u16>,
32
33 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
34 missing: Option<Term>,
35
36 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
37 include: Option<TermsInclude>,
38
39 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
40 exclude: Option<TermsExclude>,
41
42 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
43 script: Option<Script>,
44}
45
46impl Aggregation {
47 /// Creates an instance of [`TermsAggregation`]
48 ///
49 /// - `field` - field to group by
50 pub fn terms<T>(field: T) -> TermsAggregation
51 where
52 T: ToString,
53 {
54 TermsAggregation {
55 terms: TermsAggregationInner {
56 field: Some(field.to_string()),
57 size: None,
58 show_term_doc_count_error: None,
59 order: Default::default(),
60 min_doc_count: None,
61 missing: None,
62 include: None,
63 exclude: None,
64 script: None,
65 },
66 aggs: Aggregations::new(),
67 }
68 }
69
70 /// Creates an instance of [`TermsAggregation`] with a script
71 pub fn terms_with_script(script: Script) -> TermsAggregation {
72 TermsAggregation {
73 terms: TermsAggregationInner {
74 field: None,
75 size: None,
76 show_term_doc_count_error: None,
77 order: Default::default(),
78 min_doc_count: None,
79 missing: None,
80 include: None,
81 exclude: None,
82 script: Some(script),
83 },
84 aggs: Aggregations::new(),
85 }
86 }
87}
88
89impl TermsAggregation {
90 /// The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list.
91 ///
92 /// By default, the node coordinating the search process will request each shard to provide its own top `size` term buckets
93 /// and once all shards respond, it will reduce the results to the final list that will then be returned to the client.
94 ///
95 /// This means that if the number of unique terms is greater than `size`, the returned list is slightly off and not accurate
96 /// (it could be that the term counts are slightly off and it could even be that a term that should have been in the top `size` buckets was not returned).
97 pub fn size(mut self, size: u64) -> Self {
98 self.terms.size = Some(size);
99 self
100 }
101
102 /// Shows an error value for each term returned by the aggregation which represents the worst case error in the document
103 /// count and can be useful when deciding on a value for the shard_size parameter.
104 /// This is calculated by summing the document counts for the last term returned by all shards which did not return the term.
105 pub fn show_term_doc_count_error(mut self, show_term_doc_count_error: bool) -> Self {
106 self.terms.show_term_doc_count_error = Some(show_term_doc_count_error);
107 self
108 }
109
110 /// The order of the buckets can be customized by setting the order parameter.
111 /// By default, the buckets are ordered by their doc_count descending.
112 /// Order field allows changing this behavior.
113 ///
114 /// > Sorting by ascending `_count` or by sub aggregation is discouraged as it increases the
115 /// > [error](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-approximate-counts)
116 /// > on document counts. It is fine when a single shard is queried, or when the field that is
117 /// > being aggregated was used as a routing key at index time: in these cases results will be
118 /// > accurate since shards have disjoint values. However otherwise, errors are unbounded.
119 /// > One particular case that could still be useful is sorting by
120 /// > [min](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html) or
121 /// > [max](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html)
122 /// > aggregation: counts will not be accurate but at least the top buckets will be correctly picked.
123 pub fn order<T>(mut self, order: T) -> Self
124 where
125 T: Into<TermsOrderCollection>,
126 {
127 self.terms.order = order.into();
128 self
129 }
130
131 /// Only returns terms that match more than a configured number of hits using the `min_doc_count`
132 ///
133 /// Default value is `1`
134 pub fn min_doc_count(mut self, min_doc_count: u16) -> Self {
135 self.terms.min_doc_count = Some(min_doc_count);
136 self
137 }
138
139 /// The missing parameter defines how documents that are missing a value should be treated.
140 /// By default they will be ignored but it is also possible to treat them as if they had a value.
141 pub fn missing<T>(mut self, missing: T) -> Self
142 where
143 T: Serialize,
144 {
145 self.terms.missing = Term::new(missing);
146 self
147 }
148
149 /// The `include` parameter can be set to include only specific terms in the response.
150 pub fn include<T>(mut self, include: T) -> Self
151 where
152 T: Into<TermsInclude>,
153 {
154 self.terms.include = Some(include.into());
155 self
156 }
157
158 /// The `exclude` parameter can be set to exclude specific terms from the response.
159 pub fn exclude<T>(mut self, exclude: T) -> Self
160 where
161 T: Into<TermsExclude>,
162 {
163 self.terms.exclude = Some(exclude.into());
164 self
165 }
166
167 /// Sets the script for the aggregation.
168 pub fn script(mut self, script: Script) -> Self {
169 self.terms.script = Some(script);
170 self
171 }
172
173 /// The field can be Keyword, Numeric, ip, boolean, or binary.
174 pub fn field<T>(mut self, field: T) -> Self
175 where
176 T: Into<String>,
177 {
178 self.terms.field = Some(field.into());
179 self
180 }
181
182 add_aggregate!();
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188
189 #[test]
190 fn serialization() {
191 assert_serialize_aggregation(
192 Aggregation::terms("test_field"),
193 json!({ "terms": { "field": "test_field" } }),
194 );
195
196 assert_serialize_aggregation(
197 Aggregation::terms("test_field")
198 .size(5)
199 .min_doc_count(2)
200 .show_term_doc_count_error(false)
201 .missing("N/A")
202 .order(TermsOrder::new("test_order", SortOrder::Asc)),
203 json!({
204 "terms": {
205 "field": "test_field",
206 "size": 5,
207 "min_doc_count": 2,
208 "show_term_doc_count_error": false,
209 "missing": "N/A",
210 "order": [
211 { "test_order": "asc" }
212 ]
213 }
214 }),
215 );
216
217 assert_serialize_aggregation(
218 Aggregation::terms("test_field")
219 .size(0)
220 .order(TermsOrder::ascending("test_order"))
221 .missing(123)
222 .include(["mazda", "honda"])
223 .exclude("water_.*")
224 .aggregate(
225 "test_sub_agg",
226 Aggregation::terms("test_field2")
227 .size(3)
228 .missing(false)
229 .include([0, 20]),
230 ),
231 json!({
232 "terms": {
233 "field": "test_field",
234 "size": 0,
235 "missing": 123,
236 "include": ["mazda", "honda"],
237 "exclude": "water_.*",
238 "order": [
239 { "test_order": "asc" }
240 ]
241 },
242 "aggs": {
243 "test_sub_agg": {
244 "terms": {
245 "field": "test_field2",
246 "size": 3,
247 "missing": false,
248 "include": {
249 "partition": 0,
250 "num_partitions": 20
251 }
252 }
253 }
254 }
255 }),
256 );
257
258 assert_serialize_aggregation(
259 Aggregation::terms_with_script(
260 Script::source("if (!doc['field1'].isEmpty()) { return 'f2'; } if (!doc['field2'].isEmpty()) { return 'f1'; } return 'unknown';")
261 .lang("painless")
262 ).size(10),
263 json!({
264 "terms": {
265 "script": {
266 "source": "if (!doc['field1'].isEmpty()) { return 'f2'; } if (!doc['field2'].isEmpty()) { return 'f1'; } return 'unknown';",
267 "lang": "painless"
268 },
269 "size": 10
270 }
271 }),
272 );
273 }
274}