athena_rs 3.6.0

Hyper performant polyglot Database driver
Documentation
use chrono::{DateTime, Datelike, Duration, Timelike, Utc};
use serde_json::Value;
use std::collections::HashMap;

use crate::data::DataTimeseries;

// implementation of the group_by_interval function
/// Groups the data by the specified interval (hour, day, week, year) based on the date key.
/// The function takes a JSON array of objects and groups them by the specified interval.
///     
impl DataTimeseries {
    pub fn group_by_interval(
        &self,
        data: &Value,
        date_key: &str,
        interval: &str,
    ) -> HashMap<String, Vec<Value>> {
        group_by_interval(data, date_key, interval)
    }
}

/// `group_by_interval` groups the data by the specified interval (hour, day, week, year) based on the date key.
/// The function takes a JSON array of objects and groups them by the specified interval.
///     
/// # Arguments
///
/// * `data` - A JSON array of objects to group
/// * `date_key` - The key of the date field to group by
/// * `interval` - The interval to group by (hour, day, week, year)
///
/// # Returns
///
/// A HashMap of grouped data
///
/// # Examples
///
/// ```
/// let data = json!([{ "date": "2021-01-01T00:00:00Z", "value": 1 }, { "date": "2021-01-01T01:00:00Z", "value": 2 }]);
/// let grouped_data = group_by_interval(&data, "date", "hour");
/// assert_eq!(grouped_data, HashMap::from([("2021-01-01 00:00", vec![json!({ "date": "2021-01-01T00:00:00Z", "value": 1 })]), ("2021-01-01 01:00", vec![json!({ "date": "2021-01-01T01:00:00Z", "value": 2 })])]));
/// ```
///
pub fn group_by_interval(
    data: &Value,
    date_key: &str,
    interval: &str,
) -> HashMap<String, Vec<Value>> {
    let mut grouped_data: HashMap<String, Vec<Value>> = HashMap::new();

    if let Value::Array(items) = data {
        for item in items {
            if let Some(date_value) = item.get(date_key)
                && let Some(date_str) = date_value.as_str()
                && let Ok(date) = DateTime::parse_from_rfc3339(date_str)
            {
                let date: DateTime<Utc> = date.with_timezone(&Utc);
                let key: String = match interval {
                    "hour" => format!(
                        "{:04}-{:02}-{:02} {:02}:00",
                        date.year(),
                        date.month(),
                        date.day(),
                        date.hour()
                    ),
                    "day" => {
                        format!("{:04}-{:02}-{:02}", date.year(), date.month(), date.day())
                    }
                    "week" => {
                        let week_start: DateTime<Utc> =
                            date - Duration::days(date.weekday().num_days_from_monday() as i64);
                        format!(
                            "Week of {:04}-{:02}-{:02}",
                            week_start.year(),
                            week_start.month(),
                            week_start.day()
                        )
                    }
                    "year" => format!("{:04}", date.year()),
                    _ => continue,
                };

                grouped_data.entry(key).or_default().push(item.clone());
            }
        }
    }

    grouped_data
}