elasticsearch_dsl/search/aggregations/metrics/
boxplot_aggregation.rs

1use crate::util::*;
2use crate::{Aggregation, Number};
3
4/// A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the
5/// aggregated documents. These values can be generated from specific numeric or [histogram fields](https://www.elastic.co/guide/en/elasticsearch/reference/current/histogram.html)
6/// in the documents.
7///
8/// The `boxplot` aggregation returns essential information for making a [box plot](https://en.wikipedia.org/wiki/Box_plot):
9/// minimum, maximum median, first quartile (25th percentile) and third quartile (75th percentile) values.
10///
11/// The algorithm used by the `boxplot` metric is called TDigest (introduced by Ted Dunning in
12/// [Computing Accurate Quantiles using T-Digests](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf)).
13///
14/// > Boxplot as other percentile aggregations are also [non-deterministic](https://en.wikipedia.org/wiki/Nondeterministic_algorithm).
15/// > This means you can get slightly different results using the same data.
16///
17/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-boxplot-aggregation.html>
18#[derive(Debug, Clone, Serialize, PartialEq)]
19pub struct BoxplotAggregation {
20    boxplot: BoxplotAggregationInner,
21}
22
23#[derive(Debug, Clone, Serialize, PartialEq)]
24struct BoxplotAggregationInner {
25    field: String,
26    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
27    compression: Option<Number>,
28    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
29    missing: Option<Number>,
30}
31
32impl Aggregation {
33    /// Creates an instance of [`BoxplotAggregation`]
34    ///
35    /// - `field` - field to aggregate
36    pub fn boxplot<T>(field: T) -> BoxplotAggregation
37    where
38        T: ToString,
39    {
40        BoxplotAggregation {
41            boxplot: BoxplotAggregationInner {
42                field: field.to_string(),
43                compression: None,
44                missing: None,
45            },
46        }
47    }
48}
49
50impl BoxplotAggregation {
51    /// Approximate algorithms must balance memory utilization with estimation accuracy.
52    ///
53    /// The TDigest algorithm uses a number of "nodes" to approximate percentiles —— the more
54    /// nodes available, the higher the accuracy (and large memory footprint) proportional to the
55    /// volume of data. The `compression` parameter limits the maximum number of nodes to 20 * `compression`.
56    ///
57    /// Therefore, by increasing the compression value, you can increase the accuracy of your
58    /// percentiles at the cost of more memory. Larger compression values also make the algorithm
59    /// slower since the underlying tree data structure grows in size, resulting in more expensive
60    /// operations. The default compression value is 100.
61    ///
62    /// A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large amount of
63    /// data which arrives sorted and in-order) the default settings will produce a TDigest roughly
64    /// 64KB in size. In practice data tends to be more random and the TDigest will use less memory.
65    pub fn compression<T>(mut self, compression: T) -> Self
66    where
67        T: Into<Number>,
68    {
69        self.boxplot.compression = Some(compression.into());
70        self
71    }
72
73    /// The `missing` parameter defines how documents that are missing a value should be treated.
74    /// By default they will be ignored but it is also possible to treat them as if they had a value.
75    pub fn missing<T>(mut self, missing: T) -> Self
76    where
77        T: Into<Number>,
78    {
79        self.boxplot.missing = Some(missing.into());
80        self
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn serialization() {
90        assert_serialize_aggregation(
91            Aggregation::boxplot("test_field"),
92            json!({ "boxplot": { "field": "test_field" } }),
93        );
94
95        assert_serialize_aggregation(
96            Aggregation::boxplot("test_field")
97                .compression(100)
98                .missing(10),
99            json!({
100                "boxplot": {
101                    "field": "test_field",
102                    "compression": 100,
103                    "missing": 10
104                }
105            }),
106        );
107    }
108}