1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
use crate::search::*;
use crate::util::*;
use chrono::{DateTime, Utc};
/// This multi-bucket aggregation is similar to the normal histogram, but it can only be used with date or date range
/// values. Because dates are represented internally in Elasticsearch as long values, it is possible, but not as
/// accurate, to use the normal histogram on dates as well. The main difference in the two APIs is that here the
/// interval can be specified using date/time expressions. Time-based data requires special support because time-based
/// intervals are not always a fixed length.
///
/// <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html>
#[derive(Debug, Clone, Serialize, PartialEq)]
pub struct DateHistogramAggregation {
date_histogram: DateHistogramAggregationInner,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
aggs: Aggregations,
}
#[derive(Debug, Clone, Serialize, PartialEq)]
struct DateHistogramAggregationInner {
field: String,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
calendar_interval: Option<CalendarInterval>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
fixed_interval: Option<Time>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
min_doc_count: Option<u32>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
missing: Option<DateTime<Utc>>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
offset: Option<String>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
time_zone: Option<String>,
#[serde(skip_serializing_if = "ShouldSkip::should_skip")]
order: TermsOrderCollection,
}
impl Aggregation {
/// Creates an instance of [`DateHistogramAggregation`]
///
/// - `field` - field to group by
pub fn date_histogram<T>(field: T) -> DateHistogramAggregation
where
T: ToString,
{
DateHistogramAggregation {
date_histogram: DateHistogramAggregationInner {
field: field.to_string(),
calendar_interval: None,
fixed_interval: None,
min_doc_count: None,
missing: None,
offset: None,
time_zone: None,
order: Default::default(),
},
aggs: Aggregations::new(),
}
}
}
impl DateHistogramAggregation {
/// Calendar-aware intervals are configured with the calendar_interval parameter
pub fn calendar_interval(mut self, calendar_interval: CalendarInterval) -> Self {
self.date_histogram.calendar_interval = Some(calendar_interval);
self
}
/// In contrast to calendar-aware intervals, fixed intervals are a fixed number of SI units and never deviate,
/// regardless of where they fall on the calendar. One second is always composed of 1000ms. This allows fixed
/// intervals to be specified in any multiple of the supported units.
pub fn fixed_interval(mut self, fixed_interval: Time) -> Self {
self.date_histogram.fixed_interval = Some(fixed_interval);
self
}
/// Only returns terms that match more than a configured number of hits using the `min_doc_count`
///
/// Default value is `1`
pub fn min_doc_count(mut self, min_doc_count: u32) -> Self {
self.date_histogram.min_doc_count = Some(min_doc_count);
self
}
/// The missing parameter defines how documents that are missing a value should be treated.
/// By default they will be ignored but it is also possible to treat them as if they had a value.
pub fn missing(mut self, missing: DateTime<Utc>) -> Self {
self.date_histogram.missing = Some(missing);
self
}
/// Use the offset parameter to change the start value of each bucket by the specified positive (+) or negative
/// offset (-) duration, such as 1h for an hour, or 1d for a day. See Time units for more possible time duration
/// options.
pub fn offset<T>(mut self, offset: T) -> Self
where
T: ToString,
{
self.date_histogram.offset = Some(offset.to_string());
self
}
/// Elasticsearch stores date-times in Coordinated Universal Time (UTC). By default, all bucketing and rounding is
/// also done in UTC. Use the time_zone parameter to indicate that bucketing should use a different time zone.
pub fn time_zone<T>(mut self, time_zone: T) -> Self
where
T: ToString,
{
self.date_histogram.time_zone = Some(time_zone.to_string());
self
}
/// The order of the buckets can be customized by setting the order parameter.
/// By default, the buckets are ordered by their doc_count descending.
/// Order field allows changing this behavior.
///
/// > Sorting by ascending `_count` or by sub aggregation is discouraged as it increases the
/// [error](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-approximate-counts)
/// on document counts. It is fine when a single shard is queried, or when the field that is
/// being aggregated was used as a routing key at index time: in these cases results will be
/// accurate since shards have disjoint values. However otherwise, errors are unbounded.
/// One particular case that could still be useful is sorting by
/// [min](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html) or
/// [max](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html)
/// aggregation: counts will not be accurate but at least the top buckets will be correctly picked.
pub fn order<T>(mut self, order: T) -> Self
where
T: Into<TermsOrderCollection>,
{
self.date_histogram.order = order.into();
self
}
add_aggregate!();
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::prelude::*;
#[test]
fn serialization() {
assert_serialize_aggregation(
Aggregation::date_histogram("test_field"),
json!({ "date_histogram": { "field": "test_field" } }),
);
assert_serialize_aggregation(
Aggregation::date_histogram("test_field")
.calendar_interval(CalendarInterval::Day)
.fixed_interval(Time::Hours(1))
.min_doc_count(2)
.missing(
Utc.with_ymd_and_hms(2014, 11, 28, 12, 0, 4)
.single()
.unwrap(),
)
.order(TermsOrder::new("test_order", SortOrder::Asc))
.offset("+6h")
.time_zone("-01:00"),
json!({
"date_histogram": {
"field": "test_field",
"calendar_interval": "day",
"fixed_interval": "1h",
"min_doc_count": 2,
"missing": "2014-11-28T12:00:04Z",
"order": [
{ "test_order": "asc" }
],
"offset": "+6h",
"time_zone": "-01:00"
}
}),
);
assert_serialize_aggregation(
Aggregation::date_histogram("test_field")
.aggregate("test_sub_agg", Aggregation::terms("test_field2")),
json!({
"date_histogram": {
"field": "test_field",
},
"aggs": {
"test_sub_agg": {
"terms": {
"field": "test_field2"
}
}
}
}),
);
}
}