1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
use crate::search::*;
use crate::util::*;
use serde::Serialize;
#[derive(Debug, Clone, Serialize, PartialEq)]
/// A multi-bucket value source based aggregation where buckets are dynamically built - one per unique value.
///
/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html>
pub struct TermsAggregation {
terms: TermsAggregationInner,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
aggs: Aggregations,
}
#[derive(Debug, Clone, Serialize, PartialEq)]
struct TermsAggregationInner {
field: String,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
size: Option<u64>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
show_term_doc_count_error: Option<bool>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
order: TermsOrderCollection,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
min_doc_count: Option<u16>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
missing: Option<Term>,
}
impl Aggregation {
/// Creates an instance of [`TermsAggregation`]
///
/// - `field` - field to group by
pub fn terms<T>(field: T) -> TermsAggregation
where
T: ToString,
{
TermsAggregation {
terms: TermsAggregationInner {
field: field.to_string(),
size: None,
show_term_doc_count_error: None,
order: Default::default(),
min_doc_count: None,
missing: None,
},
aggs: Aggregations::new(),
}
}
}
impl TermsAggregation {
/// The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list.
///
/// By default, the node coordinating the search process will request each shard to provide its own top `size` term buckets
/// and once all shards respond, it will reduce the results to the final list that will then be returned to the client.
///
/// This means that if the number of unique terms is greater than `size`, the returned list is slightly off and not accurate
/// (it could be that the term counts are slightly off and it could even be that a term that should have been in the top `size` buckets was not returned).
pub fn size(mut self, size: u64) -> Self {
self.terms.size = Some(size);
self
}
/// Shows an error value for each term returned by the aggregation which represents the worst case error in the document
/// count and can be useful when deciding on a value for the shard_size parameter.
/// This is calculated by summing the document counts for the last term returned by all shards which did not return the term.
pub fn show_term_doc_count_error(mut self, show_term_doc_count_error: bool) -> Self {
self.terms.show_term_doc_count_error = Some(show_term_doc_count_error);
self
}
/// The order of the buckets can be customized by setting the order parameter.
/// By default, the buckets are ordered by their doc_count descending.
/// Order field allows changing this behavior.
///
/// > Sorting by ascending `_count` or by sub aggregation is discouraged as it increases the
/// [error](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-approximate-counts)
/// on document counts. It is fine when a single shard is queried, or when the field that is
/// being aggregated was used as a routing key at index time: in these cases results will be
/// accurate since shards have disjoint values. However otherwise, errors are unbounded.
/// One particular case that could still be useful is sorting by
/// [min](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html) or
/// [max](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html)
/// aggregation: counts will not be accurate but at least the top buckets will be correctly picked.
pub fn order<T>(mut self, order: T) -> Self
where
T: Into<TermsOrderCollection>,
{
self.terms.order = order.into();
self
}
/// Only returns terms that match more than a configured number of hits using the `min_doc_count`
///
/// Default value is `1`
pub fn min_doc_count(mut self, min_doc_count: u16) -> Self {
self.terms.min_doc_count = Some(min_doc_count);
self
}
/// The missing parameter defines how documents that are missing a value should be treated.
/// By default they will be ignored but it is also possible to treat them as if they had a value.
pub fn missing<T>(mut self, missing: T) -> Self
where
T: Serialize,
{
self.terms.missing = Term::new(missing);
self
}
add_aggregate!();
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn serialization() {
assert_serialize_aggregation(
Aggregation::terms("test_field"),
json!({ "terms": { "field": "test_field" } }),
);
assert_serialize_aggregation(
Aggregation::terms("test_field")
.size(5)
.min_doc_count(2)
.show_term_doc_count_error(false)
.missing("N/A")
.order(TermsOrder::new("test_order", SortOrder::Asc)),
json!({
"terms": {
"field": "test_field",
"size": 5,
"min_doc_count": 2,
"show_term_doc_count_error": false,
"missing": "N/A",
"order": [
{ "test_order": "asc" }
]
}
}),
);
assert_serialize_aggregation(
Aggregation::terms("test_field")
.size(0)
.order(TermsOrder::ascending("test_order"))
.missing(123)
.aggregate(
"test_sub_agg",
Aggregation::terms("test_field2").size(3).missing(false),
),
json!({
"terms": {
"field": "test_field",
"size": 0,
"missing": 123,
"order": [
{ "test_order": "asc" }
]
},
"aggs": {
"test_sub_agg": {
"terms": {
"field": "test_field2",
"size": 3,
"missing": false
}
}
}
}),
);
}
}