cstats-core 0.1.1

Core library for cstats - statistical analysis and metrics collection
Documentation
//! Statistics aggregation functionality

use std::collections::HashMap;

use chrono::{DateTime, Utc};

use crate::{api::MetricValue, Error, Result};

use super::{utils, MetricSummary, StatsRecord, StatsSummary};

/// Statistics aggregator for processing and summarizing collected data
#[derive(Debug, Clone)]
pub struct StatsAggregator {
    records: Vec<StatsRecord>,
}

impl StatsAggregator {
    /// Create a new aggregator
    pub fn new() -> Self {
        Self {
            records: Vec::new(),
        }
    }

    /// Add a statistics record to the aggregator
    pub fn add_record(&mut self, record: StatsRecord) {
        self.records.push(record);
    }

    /// Add multiple statistics records to the aggregator
    pub fn add_records(&mut self, records: Vec<StatsRecord>) {
        self.records.extend(records);
    }

    /// Clear all records from the aggregator
    pub fn clear(&mut self) {
        self.records.clear();
    }

    /// Get the number of records in the aggregator
    pub fn record_count(&self) -> usize {
        self.records.len()
    }

    /// Generate a summary of all records
    pub fn summarize(&self) -> Result<StatsSummary> {
        if self.records.is_empty() {
            return Err(Error::statistics("No records to summarize"));
        }

        let time_range = self.get_time_range();
        let sources = self.get_unique_sources();
        let metrics = self.aggregate_metrics()?;

        Ok(StatsSummary {
            count: self.records.len() as u64,
            time_range,
            metrics,
            sources,
        })
    }

    /// Summarize records within a specific time range
    pub fn summarize_time_range(
        &self,
        start: DateTime<Utc>,
        end: DateTime<Utc>,
    ) -> Result<StatsSummary> {
        let filtered_records: Vec<_> = self
            .records
            .iter()
            .filter(|record| record.timestamp >= start && record.timestamp <= end)
            .collect();

        if filtered_records.is_empty() {
            return Err(Error::statistics("No records in specified time range"));
        }

        let mut temp_aggregator = StatsAggregator::new();
        temp_aggregator.records = filtered_records.into_iter().cloned().collect();
        temp_aggregator.summarize()
    }

    /// Summarize records by source
    pub fn summarize_by_source(&self) -> Result<HashMap<String, StatsSummary>> {
        let mut summaries = HashMap::new();

        // Group records by source
        let mut records_by_source: HashMap<String, Vec<StatsRecord>> = HashMap::new();
        for record in &self.records {
            records_by_source
                .entry(record.source.clone())
                .or_default()
                .push(record.clone());
        }

        // Create summary for each source
        for (source, records) in records_by_source {
            let mut aggregator = StatsAggregator::new();
            aggregator.add_records(records);
            summaries.insert(source, aggregator.summarize()?);
        }

        Ok(summaries)
    }

    /// Get records filtered by source
    pub fn filter_by_source(&self, source: &str) -> Vec<&StatsRecord> {
        self.records
            .iter()
            .filter(|record| record.source == source)
            .collect()
    }

    /// Get records filtered by time range
    pub fn filter_by_time_range(
        &self,
        start: DateTime<Utc>,
        end: DateTime<Utc>,
    ) -> Vec<&StatsRecord> {
        self.records
            .iter()
            .filter(|record| record.timestamp >= start && record.timestamp <= end)
            .collect()
    }

    /// Get the time range covered by all records
    fn get_time_range(&self) -> (DateTime<Utc>, DateTime<Utc>) {
        let mut min_time = self.records[0].timestamp;
        let mut max_time = self.records[0].timestamp;

        for record in &self.records {
            if record.timestamp < min_time {
                min_time = record.timestamp;
            }
            if record.timestamp > max_time {
                max_time = record.timestamp;
            }
        }

        (min_time, max_time)
    }

    /// Get unique sources from all records
    fn get_unique_sources(&self) -> Vec<String> {
        let mut sources: Vec<String> = self
            .records
            .iter()
            .map(|record| record.source.clone())
            .collect();
        sources.sort();
        sources.dedup();
        sources
    }

    /// Aggregate metrics from all records
    fn aggregate_metrics(&self) -> Result<HashMap<String, MetricSummary>> {
        let mut metric_values: HashMap<String, Vec<f64>> = HashMap::new();

        // Collect all metric values
        for record in &self.records {
            for (name, value) in &record.metrics {
                let numeric_value = match value {
                    MetricValue::Integer(i) => *i as f64,
                    MetricValue::Float(f) => *f,
                    MetricValue::Duration(d) => *d as f64,
                    MetricValue::Boolean(b) => {
                        if *b {
                            1.0
                        } else {
                            0.0
                        }
                    }
                    MetricValue::String(_) => continue, // Skip string values
                    MetricValue::Histogram { sum, .. } => *sum,
                };

                metric_values
                    .entry(name.clone())
                    .or_default()
                    .push(numeric_value);
            }
        }

        // Calculate summary statistics for each metric
        let mut summaries = HashMap::new();
        for (name, mut values) in metric_values {
            if values.is_empty() {
                continue;
            }

            values.sort_by(|a, b| a.partial_cmp(b).unwrap());

            let min = values[0];
            let max = values[values.len() - 1];
            let sum = values.iter().sum();
            let count = values.len() as u64;
            let avg = sum / count as f64;

            let std_dev = if values.len() > 1 {
                Some(utils::std_deviation(&values))
            } else {
                None
            };

            let percentiles = if values.len() >= 4 {
                Some([
                    utils::percentile(&values, 50.0), // 50th percentile (median)
                    utils::percentile(&values, 90.0), // 90th percentile
                    utils::percentile(&values, 95.0), // 95th percentile
                    utils::percentile(&values, 99.0), // 99th percentile
                ])
            } else {
                None
            };

            summaries.insert(
                name.clone(),
                MetricSummary {
                    name,
                    min,
                    max,
                    avg,
                    sum,
                    count,
                    std_dev,
                    percentiles,
                },
            );
        }

        Ok(summaries)
    }
}

impl Default for StatsAggregator {
    fn default() -> Self {
        Self::new()
    }
}