elasticsearch_dsl/search/aggregations/bucket/date_histogram_aggregation.rs
1use crate::search::*;
2use crate::util::*;
3use chrono::{DateTime, Utc};
4
5/// This multi-bucket aggregation is similar to the normal histogram, but it can only be used with date or date range
6/// values. Because dates are represented internally in Elasticsearch as long values, it is possible, but not as
7/// accurate, to use the normal histogram on dates as well. The main difference in the two APIs is that here the
8/// interval can be specified using date/time expressions. Time-based data requires special support because time-based
9/// intervals are not always a fixed length.
10///
11/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html>
12#[derive(Debug, Clone, Serialize, PartialEq)]
13pub struct DateHistogramAggregation {
14 date_histogram: DateHistogramAggregationInner,
15
16 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
17 aggs: Aggregations,
18}
19
20#[derive(Debug, Clone, Serialize, PartialEq)]
21struct DateHistogramAggregationInner {
22 field: String,
23
24 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
25 calendar_interval: Option<CalendarInterval>,
26
27 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
28 fixed_interval: Option<Time>,
29
30 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
31 min_doc_count: Option<u32>,
32
33 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
34 missing: Option<DateTime<Utc>>,
35
36 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
37 offset: Option<String>,
38
39 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
40 format: Option<String>,
41
42 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
43 time_zone: Option<String>,
44
45 #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
46 order: TermsOrderCollection,
47}
48
49impl Aggregation {
50 /// Creates an instance of [`DateHistogramAggregation`]
51 ///
52 /// - `field` - field to group by
53 pub fn date_histogram<T>(field: T) -> DateHistogramAggregation
54 where
55 T: ToString,
56 {
57 DateHistogramAggregation {
58 date_histogram: DateHistogramAggregationInner {
59 field: field.to_string(),
60 calendar_interval: None,
61 fixed_interval: None,
62 min_doc_count: None,
63 missing: None,
64 offset: None,
65 format: None,
66 time_zone: None,
67 order: Default::default(),
68 },
69 aggs: Aggregations::new(),
70 }
71 }
72}
73
74impl DateHistogramAggregation {
75 /// Calendar-aware intervals are configured with the calendar_interval parameter
76 pub fn calendar_interval(mut self, calendar_interval: CalendarInterval) -> Self {
77 self.date_histogram.calendar_interval = Some(calendar_interval);
78 self
79 }
80
81 /// In contrast to calendar-aware intervals, fixed intervals are a fixed number of SI units and never deviate,
82 /// regardless of where they fall on the calendar. One second is always composed of 1000ms. This allows fixed
83 /// intervals to be specified in any multiple of the supported units.
84 pub fn fixed_interval(mut self, fixed_interval: Time) -> Self {
85 self.date_histogram.fixed_interval = Some(fixed_interval);
86 self
87 }
88
89 /// Only returns terms that match more than a configured number of hits using the `min_doc_count`
90 ///
91 /// Default value is `1`
92 pub fn min_doc_count(mut self, min_doc_count: u32) -> Self {
93 self.date_histogram.min_doc_count = Some(min_doc_count);
94 self
95 }
96
97 /// The missing parameter defines how documents that are missing a value should be treated.
98 /// By default they will be ignored but it is also possible to treat them as if they had a value.
99 pub fn missing(mut self, missing: DateTime<Utc>) -> Self {
100 self.date_histogram.missing = Some(missing);
101 self
102 }
103
104 /// Use the offset parameter to change the start value of each bucket by the specified positive (+) or negative
105 /// offset (-) duration, such as 1h for an hour, or 1d for a day. See Time units for more possible time duration
106 /// options.
107 pub fn offset<T>(mut self, offset: T) -> Self
108 where
109 T: ToString,
110 {
111 self.date_histogram.offset = Some(offset.to_string());
112 self
113 }
114
115 /// Sets the format for the date keys returned in the aggregation response.
116 ///
117 /// The `key` for each bucket is returned as a millisecond-since-the-epoch string.
118 /// The `format` parameter can be used to convert this key into a formatted date string
119 /// using the same date format patterns as the `date` field mapping.
120 ///
121 /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html#datehistogram-aggregation-keys>
122 pub fn format<T>(mut self, format: T) -> Self
123 where
124 T: ToString,
125 {
126 self.date_histogram.format = Some(format.to_string());
127 self
128 }
129
130 /// Elasticsearch stores date-times in Coordinated Universal Time (UTC). By default, all bucketing and rounding is
131 /// also done in UTC. Use the time_zone parameter to indicate that bucketing should use a different time zone.
132 pub fn time_zone<T>(mut self, time_zone: T) -> Self
133 where
134 T: ToString,
135 {
136 self.date_histogram.time_zone = Some(time_zone.to_string());
137 self
138 }
139
140 /// The order of the buckets can be customized by setting the order parameter.
141 /// By default, the buckets are ordered by their doc_count descending.
142 /// Order field allows changing this behavior.
143 ///
144 /// > Sorting by ascending `_count` or by sub aggregation is discouraged as it increases the
145 /// > [error](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-approximate-counts)
146 /// > on document counts. It is fine when a single shard is queried, or when the field that is
147 /// > being aggregated was used as a routing key at index time: in these cases results will be
148 /// > accurate since shards have disjoint values. However otherwise, errors are unbounded.
149 /// > One particular case that could still be useful is sorting by
150 /// > [min](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html) or
151 /// > [max](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html)
152 /// > aggregation: counts will not be accurate but at least the top buckets will be correctly picked.
153 pub fn order<T>(mut self, order: T) -> Self
154 where
155 T: Into<TermsOrderCollection>,
156 {
157 self.date_histogram.order = order.into();
158 self
159 }
160
161 add_aggregate!();
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167 use chrono::prelude::*;
168
169 #[test]
170 fn serialization() {
171 assert_serialize_aggregation(
172 Aggregation::date_histogram("test_field"),
173 json!({ "date_histogram": { "field": "test_field" } }),
174 );
175
176 assert_serialize_aggregation(
177 Aggregation::date_histogram("test_field")
178 .calendar_interval(CalendarInterval::Day)
179 .fixed_interval(Time::Hours(1))
180 .min_doc_count(2)
181 .missing(
182 Utc.with_ymd_and_hms(2014, 11, 28, 12, 0, 4)
183 .single()
184 .unwrap(),
185 )
186 .order(TermsOrder::new("test_order", SortOrder::Asc))
187 .offset("+6h")
188 .format("yyyy-MM-dd")
189 .time_zone("-01:00"),
190 json!({
191 "date_histogram": {
192 "field": "test_field",
193 "calendar_interval": "day",
194 "fixed_interval": "1h",
195 "min_doc_count": 2,
196 "missing": "2014-11-28T12:00:04Z",
197 "order": [
198 { "test_order": "asc" }
199 ],
200 "offset": "+6h",
201 "format": "yyyy-MM-dd",
202 "time_zone": "-01:00"
203 }
204 }),
205 );
206
207 assert_serialize_aggregation(
208 Aggregation::date_histogram("test_field")
209 .aggregate("test_sub_agg", Aggregation::terms("test_field2")),
210 json!({
211 "date_histogram": {
212 "field": "test_field",
213 },
214 "aggs": {
215 "test_sub_agg": {
216 "terms": {
217 "field": "test_field2"
218 }
219 }
220 }
221 }),
222 );
223 }
224}