blueprint_qos/metrics/
types.rs

1use crate::error::Error;
2use crate::servers::prometheus::PrometheusServerConfig;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::future::Future;
6
7// Default values for MetricsConfig
8const DEFAULT_METRICS_COLLECTION_INTERVAL_SECS: u64 = 60;
9const DEFAULT_METRICS_MAX_HISTORY: usize = 100;
10const DEFAULT_METRICS_SERVICE_ID: u64 = 0;
11const DEFAULT_METRICS_BLUEPRINT_ID: u64 = 0;
12
13/// Configuration for the metrics collection, storage, and exposure service.
14///
15/// This structure defines settings for how metrics should be collected,
16/// how long they should be retained, sampling rates, and how they
17/// should be exposed to external systems (e.g., through a Prometheus server).
18#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
19pub struct MetricsConfig {
20    pub prometheus_server: Option<PrometheusServerConfig>,
21    pub collection_interval_secs: u64,
22    pub max_history: usize,
23    pub service_id: u64,
24    pub blueprint_id: u64,
25}
26
27impl Default for MetricsConfig {
28    fn default() -> Self {
29        Self {
30            prometheus_server: Some(PrometheusServerConfig::default()),
31            collection_interval_secs: DEFAULT_METRICS_COLLECTION_INTERVAL_SECS,
32            max_history: DEFAULT_METRICS_MAX_HISTORY,
33            service_id: DEFAULT_METRICS_SERVICE_ID,
34            blueprint_id: DEFAULT_METRICS_BLUEPRINT_ID,
35        }
36    }
37}
38
39/// System-level metrics representing hardware and OS resource utilization.
40///
41/// These metrics include information about the host system's resource usage,
42/// such as CPU utilization, memory consumption, disk I/O, and network traffic.
43/// They provide a snapshot of the system's state at a point in time.
44#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
45pub struct SystemMetrics {
46    pub cpu_usage: f32,
47    pub memory_usage: u64,
48    pub total_memory: u64,
49    pub disk_usage: u64,
50    pub total_disk: u64,
51    pub network_rx_bytes: u64,
52    pub network_tx_bytes: u64,
53    pub timestamp: u64,
54}
55
56/// Blueprint-specific application metrics for monitoring application behavior.
57///
58/// These metrics track application-specific measurements relevant to blueprint
59/// operation such as job execution counts, processing durations, queue depths,
60/// and custom metrics defined by the application. They focus on business logic
61/// rather than system resources.
62#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
63pub struct BlueprintMetrics {
64    pub custom_metrics: HashMap<String, String>,
65    pub timestamp: u64,
66}
67
68/// Operational status information for a blueprint service instance.
69///
70/// This structure tracks the operational state of a blueprint service,
71/// including its status code, descriptive message, uptime metrics, startup time,
72/// and heartbeat activity. It provides a holistic view of service health and
73/// availability that can be queried through the `QoS` system.
74#[derive(Debug, Clone, PartialEq, Default)]
75pub struct BlueprintStatus {
76    pub service_id: u64,
77    pub blueprint_id: u64,
78    pub status_code: u32,
79    pub status_message: Option<String>,
80    pub uptime: u64,
81    pub start_time: u64,
82    pub last_heartbeat: Option<u64>,
83    pub timestamp: u64,
84}
85
86/// Trait for providing access to system and application metrics.
87///
88/// This trait defines the core interface for metric collection and retrieval in the `QoS` system.
89/// Implementers of this trait are responsible for collecting, storing, and exposing metrics
90/// about both the system (CPU, memory, etc.) and the application (blueprint-specific metrics).
91/// It supports both current and historical metric access as well as status updates.
92pub trait MetricsProvider: Send + Sync {
93    /// Get the latest system metrics
94    fn get_system_metrics(&self) -> impl Future<Output = SystemMetrics> + Send;
95    /// Get the latest blueprint metrics
96    fn get_blueprint_metrics(&self) -> impl Future<Output = BlueprintMetrics> + Send;
97    /// Get the current blueprint status
98    fn get_blueprint_status(&self) -> impl Future<Output = BlueprintStatus> + Send;
99    /// Get the historical system metrics
100    fn get_system_metrics_history(&self) -> impl Future<Output = Vec<SystemMetrics>> + Send;
101    /// Get the historical blueprint metrics
102    fn get_blueprint_metrics_history(&self) -> impl Future<Output = Vec<BlueprintMetrics>> + Send;
103    /// Add a custom metric
104    fn add_custom_metric(&self, key: String, value: String) -> impl Future<Output = ()> + Send;
105    /// Set the blueprint status
106    fn set_blueprint_status(
107        &self,
108        status_code: u32,
109        status_message: Option<String>,
110    ) -> impl Future<Output = ()> + Send;
111    /// Update the last heartbeat timestamp
112    fn update_last_heartbeat(&self, timestamp: u64) -> impl Future<Output = ()> + Send;
113    /// Start the metrics collection background task.
114    fn start_collection(&self) -> impl Future<Output = Result<(), Error>> + Send;
115}