elasticsearch_dsl/search/aggregations/bucket/
date_histogram_aggregation.rs

1use crate::search::*;
2use crate::util::*;
3use chrono::{DateTime, Utc};
4
5/// This multi-bucket aggregation is similar to the normal histogram, but it can only be used with date or date range
6/// values. Because dates are represented internally in Elasticsearch as long values, it is possible, but not as
7/// accurate, to use the normal histogram on dates as well. The main difference in the two APIs is that here the
8/// interval can be specified using date/time expressions. Time-based data requires special support because time-based
9/// intervals are not always a fixed length.
10///
11/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html>
12#[derive(Debug, Clone, Serialize, PartialEq)]
13pub struct DateHistogramAggregation {
14    date_histogram: DateHistogramAggregationInner,
15
16    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
17    aggs: Aggregations,
18}
19
20#[derive(Debug, Clone, Serialize, PartialEq)]
21struct DateHistogramAggregationInner {
22    field: String,
23
24    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
25    calendar_interval: Option<CalendarInterval>,
26
27    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
28    fixed_interval: Option<Time>,
29
30    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
31    min_doc_count: Option<u32>,
32
33    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
34    missing: Option<DateTime<Utc>>,
35
36    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
37    offset: Option<String>,
38
39    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
40    format: Option<String>,
41
42    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
43    time_zone: Option<String>,
44
45    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
46    order: TermsOrderCollection,
47}
48
49impl Aggregation {
50    /// Creates an instance of [`DateHistogramAggregation`]
51    ///
52    /// - `field` - field to group by
53    pub fn date_histogram<T>(field: T) -> DateHistogramAggregation
54    where
55        T: ToString,
56    {
57        DateHistogramAggregation {
58            date_histogram: DateHistogramAggregationInner {
59                field: field.to_string(),
60                calendar_interval: None,
61                fixed_interval: None,
62                min_doc_count: None,
63                missing: None,
64                offset: None,
65                format: None,
66                time_zone: None,
67                order: Default::default(),
68            },
69            aggs: Aggregations::new(),
70        }
71    }
72}
73
74impl DateHistogramAggregation {
75    /// Calendar-aware intervals are configured with the calendar_interval parameter
76    pub fn calendar_interval(mut self, calendar_interval: CalendarInterval) -> Self {
77        self.date_histogram.calendar_interval = Some(calendar_interval);
78        self
79    }
80
81    /// In contrast to calendar-aware intervals, fixed intervals are a fixed number of SI units and never deviate,
82    /// regardless of where they fall on the calendar. One second is always composed of 1000ms. This allows fixed
83    /// intervals to be specified in any multiple of the supported units.
84    pub fn fixed_interval(mut self, fixed_interval: Time) -> Self {
85        self.date_histogram.fixed_interval = Some(fixed_interval);
86        self
87    }
88
89    /// Only returns terms that match more than a configured number of hits using the `min_doc_count`
90    ///
91    /// Default value is `1`
92    pub fn min_doc_count(mut self, min_doc_count: u32) -> Self {
93        self.date_histogram.min_doc_count = Some(min_doc_count);
94        self
95    }
96
97    /// The missing parameter defines how documents that are missing a value should be treated.
98    /// By default they will be ignored but it is also possible to treat them as if they had a value.
99    pub fn missing(mut self, missing: DateTime<Utc>) -> Self {
100        self.date_histogram.missing = Some(missing);
101        self
102    }
103
104    /// Use the offset parameter to change the start value of each bucket by the specified positive (+) or negative
105    /// offset (-) duration, such as 1h for an hour, or 1d for a day. See Time units for more possible time duration
106    /// options.
107    pub fn offset<T>(mut self, offset: T) -> Self
108    where
109        T: ToString,
110    {
111        self.date_histogram.offset = Some(offset.to_string());
112        self
113    }
114
115    /// Sets the format for the date keys returned in the aggregation response.
116    ///
117    /// The `key` for each bucket is returned as a millisecond-since-the-epoch string.
118    /// The `format` parameter can be used to convert this key into a formatted date string
119    /// using the same date format patterns as the `date` field mapping.
120    ///
121    /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html#datehistogram-aggregation-keys>
122    pub fn format<T>(mut self, format: T) -> Self
123    where
124        T: ToString,
125    {
126        self.date_histogram.format = Some(format.to_string());
127        self
128    }
129
130    /// Elasticsearch stores date-times in Coordinated Universal Time (UTC). By default, all bucketing and rounding is
131    /// also done in UTC. Use the time_zone parameter to indicate that bucketing should use a different time zone.
132    pub fn time_zone<T>(mut self, time_zone: T) -> Self
133    where
134        T: ToString,
135    {
136        self.date_histogram.time_zone = Some(time_zone.to_string());
137        self
138    }
139
140    /// The order of the buckets can be customized by setting the order parameter.
141    /// By default, the buckets are ordered by their doc_count descending.
142    /// Order field allows changing this behavior.
143    ///
144    /// > Sorting by ascending `_count` or by sub aggregation is discouraged as it increases the
145    /// > [error](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-approximate-counts)
146    /// > on document counts. It is fine when a single shard is queried, or when the field that is
147    /// > being aggregated was used as a routing key at index time: in these cases results will be
148    /// > accurate since shards have disjoint values. However otherwise, errors are unbounded.
149    /// > One particular case that could still be useful is sorting by
150    /// > [min](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html) or
151    /// > [max](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html)
152    /// > aggregation: counts will not be accurate but at least the top buckets will be correctly picked.
153    pub fn order<T>(mut self, order: T) -> Self
154    where
155        T: Into<TermsOrderCollection>,
156    {
157        self.date_histogram.order = order.into();
158        self
159    }
160
161    add_aggregate!();
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167    use chrono::prelude::*;
168
169    #[test]
170    fn serialization() {
171        assert_serialize_aggregation(
172            Aggregation::date_histogram("test_field"),
173            json!({ "date_histogram": { "field": "test_field" } }),
174        );
175
176        assert_serialize_aggregation(
177            Aggregation::date_histogram("test_field")
178                .calendar_interval(CalendarInterval::Day)
179                .fixed_interval(Time::Hours(1))
180                .min_doc_count(2)
181                .missing(
182                    Utc.with_ymd_and_hms(2014, 11, 28, 12, 0, 4)
183                        .single()
184                        .unwrap(),
185                )
186                .order(TermsOrder::new("test_order", SortOrder::Asc))
187                .offset("+6h")
188                .format("yyyy-MM-dd")
189                .time_zone("-01:00"),
190            json!({
191                "date_histogram": {
192                    "field": "test_field",
193                    "calendar_interval": "day",
194                    "fixed_interval": "1h",
195                    "min_doc_count": 2,
196                    "missing": "2014-11-28T12:00:04Z",
197                    "order": [
198                        { "test_order": "asc" }
199                    ],
200                    "offset": "+6h",
201                    "format": "yyyy-MM-dd",
202                    "time_zone": "-01:00"
203                }
204            }),
205        );
206
207        assert_serialize_aggregation(
208            Aggregation::date_histogram("test_field")
209                .aggregate("test_sub_agg", Aggregation::terms("test_field2")),
210            json!({
211                "date_histogram": {
212                    "field": "test_field",
213                },
214                "aggs": {
215                    "test_sub_agg": {
216                        "terms": {
217                            "field": "test_field2"
218                        }
219                    }
220                }
221            }),
222        );
223    }
224}