use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use axum::Router;
use axum::extract::State;
use axum::routing::get;
use prometheus::{
Counter, CounterVec, Encoder, Gauge, HistogramOpts, HistogramVec, IntCounter, IntCounterVec,
Opts, Registry, TextEncoder,
};
#[cfg(feature = "daemon")]
use crate::daemon::ack::AckAction;
use crate::report::Report;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum OtlpRejectReason {
UnsupportedMediaType,
ParseError,
ChannelFull,
}
impl OtlpRejectReason {
pub const ALL: [Self; 3] = [
Self::UnsupportedMediaType,
Self::ParseError,
Self::ChannelFull,
];
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::UnsupportedMediaType => "unsupported_media_type",
Self::ParseError => "parse_error",
Self::ChannelFull => "channel_full",
}
}
}
#[cfg(feature = "daemon")]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum AckFailureReason {
AlreadyAcked,
NotAcked,
Unauthorized,
NoStore,
InvalidSignature,
LimitReached,
FileTooLarge,
EntryTooLarge,
InternalError,
}
#[cfg(feature = "daemon")]
impl AckFailureReason {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::AlreadyAcked => "already_acked",
Self::NotAcked => "not_acked",
Self::Unauthorized => "unauthorized",
Self::NoStore => "no_store",
Self::InvalidSignature => "invalid_signature",
Self::LimitReached => "limit_reached",
Self::FileTooLarge => "file_too_large",
Self::EntryTooLarge => "entry_too_large",
Self::InternalError => "internal_error",
}
}
}
#[cfg(feature = "daemon")]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(crate) enum ScaphandreScrapeReason {
Unreachable,
Timeout,
HttpError,
BodyReadError,
RequestError,
InvalidUtf8,
}
#[cfg(feature = "daemon")]
impl ScaphandreScrapeReason {
pub(crate) const ALL: [Self; 6] = [
Self::Unreachable,
Self::Timeout,
Self::HttpError,
Self::BodyReadError,
Self::RequestError,
Self::InvalidUtf8,
];
pub(crate) const fn as_str(self) -> &'static str {
match self {
Self::Unreachable => "unreachable",
Self::Timeout => "timeout",
Self::HttpError => "http_error",
Self::BodyReadError => "body_read_error",
Self::RequestError => "request_error",
Self::InvalidUtf8 => "invalid_utf8",
}
}
}
#[cfg(feature = "daemon")]
pub(crate) type KeplerScrapeReason = ScaphandreScrapeReason;
#[cfg(feature = "daemon")]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(crate) enum RedfishScrapeReason {
Unreachable,
Timeout,
HttpError,
BodyReadError,
RequestError,
InvalidUtf8,
InvalidJson,
PathMissing,
InvalidValue,
}
#[cfg(feature = "daemon")]
impl RedfishScrapeReason {
pub(crate) const ALL: [Self; 9] = [
Self::Unreachable,
Self::Timeout,
Self::HttpError,
Self::BodyReadError,
Self::RequestError,
Self::InvalidUtf8,
Self::InvalidJson,
Self::PathMissing,
Self::InvalidValue,
];
pub(crate) const fn as_str(self) -> &'static str {
match self {
Self::Unreachable => "unreachable",
Self::Timeout => "timeout",
Self::HttpError => "http_error",
Self::BodyReadError => "body_read_error",
Self::RequestError => "request_error",
Self::InvalidUtf8 => "invalid_utf8",
Self::InvalidJson => "invalid_json",
Self::PathMissing => "path_missing",
Self::InvalidValue => "invalid_value",
}
}
}
fn register_int_counter_vec(
registry: &Registry,
name: &str,
help: &str,
labels: &[&str],
) -> IntCounterVec {
let counter =
IntCounterVec::new(Opts::new(name, help), labels).expect("metric creation should not fail");
registry
.register(Box::new(counter.clone()))
.expect("registration should not fail");
counter
}
#[derive(Debug, Clone)]
struct ExemplarData {
trace_id: String,
}
fn sanitize_exemplar_value(value: &str) -> std::borrow::Cow<'_, str> {
let valid = |c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_';
if value.len() <= 64 && value.chars().all(valid) {
return std::borrow::Cow::Borrowed(value);
}
std::borrow::Cow::Owned(value.chars().filter(|&c| valid(c)).take(64).collect())
}
#[derive(Clone)]
pub struct MetricsState {
registry: Registry,
pub findings_total: CounterVec,
pub io_waste_ratio: Gauge,
pub traces_analyzed_total: Counter,
pub events_processed_total: Counter,
pub active_traces: Gauge,
pub total_io_ops: Counter,
pub avoidable_io_ops: Counter,
pub service_io_ops_total: CounterVec,
pub scaphandre_last_scrape_age_seconds: Gauge,
pub cloud_energy_last_scrape_age_seconds: Gauge,
pub slow_duration_seconds: HistogramVec,
pub export_report_requests_total: Counter,
pub otlp_rejected_total: IntCounterVec,
pub otlp_rejected_unsupported_media_type: IntCounter,
pub otlp_rejected_parse_error: IntCounter,
pub otlp_rejected_channel_full: IntCounter,
#[cfg(feature = "daemon")]
pub ack_operations_total: IntCounterVec,
#[cfg(feature = "daemon")]
pub ack_operations_failed_total: IntCounterVec,
#[cfg(feature = "daemon")]
pub ack_operations_ack_success: IntCounter,
#[cfg(feature = "daemon")]
pub ack_operations_unack_success: IntCounter,
#[cfg(feature = "daemon")]
pub scaphandre_scrape_total: IntCounterVec,
#[cfg(feature = "daemon")]
pub scaphandre_scrape_failed_total: IntCounterVec,
#[cfg(feature = "daemon")]
pub scaphandre_scrape_success: IntCounter,
#[cfg(feature = "daemon")]
pub scaphandre_scrape_failed: IntCounter,
pub kepler_last_scrape_age_seconds: Gauge,
#[cfg(feature = "daemon")]
pub kepler_scrape_total: IntCounterVec,
#[cfg(feature = "daemon")]
pub kepler_scrape_failed_total: IntCounterVec,
#[cfg(feature = "daemon")]
pub kepler_scrape_success: IntCounter,
#[cfg(feature = "daemon")]
pub kepler_scrape_failed: IntCounter,
pub redfish_last_scrape_age_seconds: Gauge,
#[cfg(feature = "daemon")]
pub redfish_scrape_total: IntCounterVec,
#[cfg(feature = "daemon")]
pub redfish_scrape_failed_total: IntCounterVec,
#[cfg(feature = "daemon")]
pub redfish_scrape_success: IntCounter,
#[cfg(feature = "daemon")]
pub redfish_scrape_failed: IntCounter,
worst_finding_trace: Arc<RwLock<HashMap<(&'static str, &'static str), ExemplarData>>>,
worst_waste_trace: Arc<RwLock<Option<ExemplarData>>>,
}
impl MetricsState {
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn new() -> Self {
let registry = Registry::new();
let findings_total = CounterVec::new(
Opts::new(
"perf_sentinel_findings_total",
"Total findings detected by type and severity",
),
&["type", "severity"],
)
.expect("metric creation should not fail");
let io_waste_ratio = Gauge::new(
"perf_sentinel_io_waste_ratio",
"Cumulative I/O waste ratio since daemon start",
)
.expect("metric creation should not fail");
let traces_analyzed_total = Counter::new(
"perf_sentinel_traces_analyzed_total",
"Total traces analyzed since start",
)
.expect("metric creation should not fail");
let events_processed_total = Counter::new(
"perf_sentinel_events_processed_total",
"Total events processed since start",
)
.expect("metric creation should not fail");
let active_traces = Gauge::new(
"perf_sentinel_active_traces",
"Currently active traces in the sliding window",
)
.expect("metric creation should not fail");
let total_io_ops = Counter::new(
"perf_sentinel_total_io_ops",
"Cumulative total I/O ops processed",
)
.expect("metric creation should not fail");
let avoidable_io_ops = Counter::new(
"perf_sentinel_avoidable_io_ops",
"Cumulative avoidable I/O ops detected",
)
.expect("metric creation should not fail");
let service_io_ops_total = CounterVec::new(
Opts::new(
"perf_sentinel_service_io_ops_total",
"Cumulative I/O ops attributed to each service",
),
&["service"],
)
.expect("metric creation should not fail");
let scaphandre_last_scrape_age_seconds = Gauge::new(
"perf_sentinel_scaphandre_last_scrape_age_seconds",
"Age in seconds since the last successful Scaphandre scrape",
)
.expect("metric creation should not fail");
registry
.register(Box::new(findings_total.clone()))
.expect("registration should not fail");
registry
.register(Box::new(io_waste_ratio.clone()))
.expect("registration should not fail");
registry
.register(Box::new(traces_analyzed_total.clone()))
.expect("registration should not fail");
registry
.register(Box::new(events_processed_total.clone()))
.expect("registration should not fail");
registry
.register(Box::new(active_traces.clone()))
.expect("registration should not fail");
registry
.register(Box::new(total_io_ops.clone()))
.expect("registration should not fail");
registry
.register(Box::new(avoidable_io_ops.clone()))
.expect("registration should not fail");
registry
.register(Box::new(service_io_ops_total.clone()))
.expect("registration should not fail");
registry
.register(Box::new(scaphandre_last_scrape_age_seconds.clone()))
.expect("registration should not fail");
let slow_duration_seconds = HistogramVec::new(
HistogramOpts::new(
"perf_sentinel_slow_duration_seconds",
"Duration of spans exceeding the slow threshold",
)
.buckets(vec![
0.1, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 5.0, 10.0, 30.0,
]),
&["type"],
)
.expect("metric creation should not fail");
registry
.register(Box::new(slow_duration_seconds.clone()))
.expect("registration should not fail");
let cloud_energy_last_scrape_age_seconds = Gauge::new(
"perf_sentinel_cloud_energy_last_scrape_age_seconds",
"Age in seconds since the last successful cloud energy scrape",
)
.expect("metric creation should not fail");
registry
.register(Box::new(cloud_energy_last_scrape_age_seconds.clone()))
.expect("registration should not fail");
let kepler_last_scrape_age_seconds = Gauge::new(
"perf_sentinel_kepler_last_scrape_age_seconds",
"Age in seconds since the last successful Kepler scrape",
)
.expect("metric creation should not fail");
registry
.register(Box::new(kepler_last_scrape_age_seconds.clone()))
.expect("registration should not fail");
let redfish_last_scrape_age_seconds = Gauge::new(
"perf_sentinel_redfish_last_scrape_age_seconds",
"Age in seconds since the last successful Redfish scrape",
)
.expect("metric creation should not fail");
registry
.register(Box::new(redfish_last_scrape_age_seconds.clone()))
.expect("registration should not fail");
let export_report_requests_total = Counter::new(
"perf_sentinel_export_report_requests_total",
"Total requests to GET /api/export/report since daemon start",
)
.expect("metric creation should not fail");
registry
.register(Box::new(export_report_requests_total.clone()))
.expect("registration should not fail");
let otlp_rejected_total = register_int_counter_vec(
®istry,
"perf_sentinel_otlp_rejected_total",
"Total OTLP requests rejected by the daemon, by reason",
&["reason"],
);
let otlp_rejected_unsupported_media_type = otlp_rejected_total
.with_label_values(&[OtlpRejectReason::UnsupportedMediaType.as_str()]);
let otlp_rejected_parse_error =
otlp_rejected_total.with_label_values(&[OtlpRejectReason::ParseError.as_str()]);
let otlp_rejected_channel_full =
otlp_rejected_total.with_label_values(&[OtlpRejectReason::ChannelFull.as_str()]);
for reason in &OtlpRejectReason::ALL {
let _ = otlp_rejected_total.with_label_values(&[reason.as_str()]);
}
#[cfg(feature = "daemon")]
let ack_operations_total = register_int_counter_vec(
®istry,
"perf_sentinel_ack_operations_total",
"Successful ack and unack operations on the daemon HTTP API, by action",
&["action"],
);
#[cfg(feature = "daemon")]
let ack_operations_failed_total = register_int_counter_vec(
®istry,
"perf_sentinel_ack_operations_failed_total",
"Failed ack and unack operations on the daemon HTTP API, by action and reason",
&["action", "reason"],
);
#[cfg(feature = "daemon")]
let ack_operations_ack_success =
ack_operations_total.with_label_values(&[AckAction::Ack.as_str()]);
#[cfg(feature = "daemon")]
let ack_operations_unack_success =
ack_operations_total.with_label_values(&[AckAction::Unack.as_str()]);
#[cfg(feature = "daemon")]
{
let prewarm_failure: &[(AckAction, &[AckFailureReason])] = &[
(
AckAction::Ack,
&[
AckFailureReason::AlreadyAcked,
AckFailureReason::Unauthorized,
AckFailureReason::NoStore,
AckFailureReason::InvalidSignature,
AckFailureReason::LimitReached,
AckFailureReason::FileTooLarge,
AckFailureReason::EntryTooLarge,
AckFailureReason::InternalError,
],
),
(
AckAction::Unack,
&[
AckFailureReason::NotAcked,
AckFailureReason::Unauthorized,
AckFailureReason::NoStore,
AckFailureReason::InvalidSignature,
AckFailureReason::InternalError,
],
),
];
for (action, reasons) in prewarm_failure {
for reason in *reasons {
let _ = ack_operations_failed_total
.with_label_values(&[action.as_str(), reason.as_str()]);
}
}
}
#[cfg(feature = "daemon")]
let scaphandre_scrape_total = register_int_counter_vec(
®istry,
"perf_sentinel_scaphandre_scrape_total",
"Total Scaphandre scrape attempts on the daemon scraper, by outcome",
&["status"],
);
#[cfg(feature = "daemon")]
let scaphandre_scrape_failed_total = register_int_counter_vec(
®istry,
"perf_sentinel_scaphandre_scrape_failed_total",
"Failed Scaphandre scrapes on the daemon scraper, by failure reason",
&["reason"],
);
#[cfg(feature = "daemon")]
let scaphandre_scrape_success = scaphandre_scrape_total.with_label_values(&["success"]);
#[cfg(feature = "daemon")]
let scaphandre_scrape_failed = scaphandre_scrape_total.with_label_values(&["failed"]);
#[cfg(feature = "daemon")]
for reason in &ScaphandreScrapeReason::ALL {
let _ = scaphandre_scrape_failed_total.with_label_values(&[reason.as_str()]);
}
#[cfg(feature = "daemon")]
let kepler_scrape_total = register_int_counter_vec(
®istry,
"perf_sentinel_kepler_scrape_total",
"Total Kepler scrape attempts on the daemon scraper, by outcome",
&["status"],
);
#[cfg(feature = "daemon")]
let kepler_scrape_failed_total = register_int_counter_vec(
®istry,
"perf_sentinel_kepler_scrape_failed_total",
"Failed Kepler scrapes on the daemon scraper, by failure reason",
&["reason"],
);
#[cfg(feature = "daemon")]
let kepler_scrape_success = kepler_scrape_total.with_label_values(&["success"]);
#[cfg(feature = "daemon")]
let kepler_scrape_failed = kepler_scrape_total.with_label_values(&["failed"]);
#[cfg(feature = "daemon")]
for reason in &KeplerScrapeReason::ALL {
let _ = kepler_scrape_failed_total.with_label_values(&[reason.as_str()]);
}
#[cfg(feature = "daemon")]
let redfish_scrape_total = register_int_counter_vec(
®istry,
"perf_sentinel_redfish_scrape_total",
"Total Redfish scrape attempts on the daemon scraper, by outcome",
&["status"],
);
#[cfg(feature = "daemon")]
let redfish_scrape_failed_total = register_int_counter_vec(
®istry,
"perf_sentinel_redfish_scrape_failed_total",
"Failed Redfish scrapes on the daemon scraper, by failure reason",
&["reason"],
);
#[cfg(feature = "daemon")]
let redfish_scrape_success = redfish_scrape_total.with_label_values(&["success"]);
#[cfg(feature = "daemon")]
let redfish_scrape_failed = redfish_scrape_total.with_label_values(&["failed"]);
#[cfg(feature = "daemon")]
for reason in &RedfishScrapeReason::ALL {
let _ = redfish_scrape_failed_total.with_label_values(&[reason.as_str()]);
}
#[cfg(target_os = "linux")]
{
use prometheus::process_collector::ProcessCollector;
registry
.register(Box::new(ProcessCollector::for_self()))
.expect("process collector registration should not fail");
}
Self {
registry,
findings_total,
io_waste_ratio,
traces_analyzed_total,
events_processed_total,
active_traces,
total_io_ops,
avoidable_io_ops,
service_io_ops_total,
scaphandre_last_scrape_age_seconds,
cloud_energy_last_scrape_age_seconds,
slow_duration_seconds,
export_report_requests_total,
otlp_rejected_total,
otlp_rejected_unsupported_media_type,
otlp_rejected_parse_error,
otlp_rejected_channel_full,
#[cfg(feature = "daemon")]
ack_operations_total,
#[cfg(feature = "daemon")]
ack_operations_failed_total,
#[cfg(feature = "daemon")]
ack_operations_ack_success,
#[cfg(feature = "daemon")]
ack_operations_unack_success,
#[cfg(feature = "daemon")]
scaphandre_scrape_total,
#[cfg(feature = "daemon")]
scaphandre_scrape_failed_total,
#[cfg(feature = "daemon")]
scaphandre_scrape_success,
#[cfg(feature = "daemon")]
scaphandre_scrape_failed,
kepler_last_scrape_age_seconds,
#[cfg(feature = "daemon")]
kepler_scrape_total,
#[cfg(feature = "daemon")]
kepler_scrape_failed_total,
#[cfg(feature = "daemon")]
kepler_scrape_success,
#[cfg(feature = "daemon")]
kepler_scrape_failed,
redfish_last_scrape_age_seconds,
#[cfg(feature = "daemon")]
redfish_scrape_total,
#[cfg(feature = "daemon")]
redfish_scrape_failed_total,
#[cfg(feature = "daemon")]
redfish_scrape_success,
#[cfg(feature = "daemon")]
redfish_scrape_failed,
worst_finding_trace: Arc::new(RwLock::new(HashMap::new())),
worst_waste_trace: Arc::new(RwLock::new(None)),
}
}
#[inline]
pub fn record_otlp_reject(&self, reason: OtlpRejectReason) {
<Self as crate::ingest::otlp::MetricsSink>::record_otlp_reject(self, reason);
}
#[cfg(feature = "daemon")]
#[inline]
pub fn record_ack_success(&self, action: AckAction) {
match action {
AckAction::Ack => self.ack_operations_ack_success.inc(),
AckAction::Unack => self.ack_operations_unack_success.inc(),
}
}
#[cfg(feature = "daemon")]
#[inline]
pub fn record_ack_failure(&self, action: AckAction, reason: AckFailureReason) {
self.ack_operations_failed_total
.with_label_values(&[action.as_str(), reason.as_str()])
.inc();
}
#[must_use]
pub fn snapshot_service_io_ops(&self) -> HashMap<String, u64> {
use prometheus::core::Collector;
let mut out = HashMap::new();
for family in Collector::collect(&self.service_io_ops_total) {
for metric in family.get_metric() {
let counter_value = metric.get_counter().value();
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
let count = if counter_value <= 0.0 {
0u64
} else if counter_value >= u64::MAX as f64 {
u64::MAX
} else {
counter_value as u64
};
for label in metric.get_label() {
if label.name() == "service" {
out.insert(label.value().to_string(), count);
break;
}
}
}
}
out
}
pub fn record_batch(&self, report: &Report) {
self.traces_analyzed_total
.inc_by(report.analysis.traces_analyzed as f64);
self.events_processed_total
.inc_by(report.analysis.events_processed as f64);
self.total_io_ops
.inc_by(report.green_summary.total_io_ops as f64);
self.avoidable_io_ops
.inc_by(report.green_summary.avoidable_io_ops as f64);
let cumulative_total = self.total_io_ops.get();
if cumulative_total > 0.0 {
self.io_waste_ratio
.set(self.avoidable_io_ops.get() / cumulative_total);
}
for finding in &report.findings {
self.findings_total
.with_label_values(&[finding.finding_type.as_str(), finding.severity.as_str()])
.inc();
}
self.record_exemplars(&report.findings, &report.green_summary);
}
pub fn record_exemplars(
&self,
findings: &[crate::detect::Finding],
green_summary: &crate::report::GreenSummary,
) {
let mut new_exemplars: HashMap<(&'static str, &'static str), ExemplarData> = HashMap::new();
for finding in findings {
new_exemplars.insert(
(finding.finding_type.as_str(), finding.severity.as_str()),
ExemplarData {
trace_id: finding.trace_id.clone(),
},
);
}
if !new_exemplars.is_empty() {
let mut worst_map = self
.worst_finding_trace
.write()
.unwrap_or_else(std::sync::PoisonError::into_inner);
worst_map.extend(new_exemplars);
}
if let Some(worst_finding) = (green_summary.io_waste_ratio > 0.0)
.then(|| {
findings
.iter()
.filter(|f| f.finding_type.is_avoidable_io())
.max_by_key(|f| f.pattern.occurrences)
})
.flatten()
{
let mut waste_lock = self
.worst_waste_trace
.write()
.unwrap_or_else(std::sync::PoisonError::into_inner);
*waste_lock = Some(ExemplarData {
trace_id: worst_finding.trace_id.clone(),
});
}
}
#[must_use]
pub fn has_exemplars(&self) -> bool {
let finding_lock = self
.worst_finding_trace
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner);
if !finding_lock.is_empty() {
return true;
}
let waste_lock = self
.worst_waste_trace
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner);
waste_lock.is_some()
}
#[must_use]
pub fn negotiate(&self, accept: Option<&str>) -> (String, &'static str) {
let format = select_format(accept);
let encoder = TextEncoder::new();
let metric_families = self.registry.gather();
let mut buffer = Vec::new();
if encoder.encode(&metric_families, &mut buffer).is_err() {
return (
"# error encoding metrics\n".to_string(),
"text/plain; version=0.0.4; charset=utf-8",
);
}
let Ok(base_output) = String::from_utf8(buffer) else {
return (
"# error encoding metrics\n".to_string(),
"text/plain; version=0.0.4; charset=utf-8",
);
};
match format {
NegotiatedFormat::OpenMetricsForced => {
let mut output = if self.has_exemplars() {
self.inject_exemplars(base_output)
} else {
base_output
};
if !output.ends_with('\n') {
output.push('\n');
}
output.push_str("# EOF\n");
(
output,
"application/openmetrics-text; version=1.0.0; charset=utf-8",
)
}
NegotiatedFormat::Legacy => {
if self.has_exemplars() {
let mut output = self.inject_exemplars(base_output);
output.push_str("# EOF\n");
(
output,
"application/openmetrics-text; version=1.0.0; charset=utf-8",
)
} else {
(base_output, "text/plain; version=0.0.4; charset=utf-8")
}
}
NegotiatedFormat::PlainStrict => {
(base_output, "text/plain; version=0.0.4; charset=utf-8")
}
}
}
#[must_use]
pub fn render(&self) -> String {
self.negotiate(None).0
}
fn inject_exemplars(&self, base: String) -> String {
use std::fmt::Write;
let finding_map = self
.worst_finding_trace
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let waste_exemplar = self
.worst_waste_trace
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner);
if finding_map.is_empty() && waste_exemplar.is_none() {
return base;
}
let mut output = String::with_capacity(base.len() + 256);
for line in base.lines() {
output.push_str(line);
if let Some(exemplar) = line
.starts_with("perf_sentinel_findings_total{")
.then(|| extract_finding_exemplar(line, &finding_map))
.flatten()
{
let sanitized = sanitize_exemplar_value(&exemplar.trace_id);
let _ = write!(output, " # {{trace_id=\"{sanitized}\"}} 1.0");
}
if let Some(exemplar) = waste_exemplar
.as_ref()
.filter(|_| line.starts_with("perf_sentinel_io_waste_ratio "))
{
let sanitized = sanitize_exemplar_value(&exemplar.trace_id);
let _ = write!(output, " # {{trace_id=\"{sanitized}\"}} 1.0");
}
output.push('\n');
}
output
}
#[must_use]
pub fn content_type(&self) -> &'static str {
self.negotiate(None).1
}
}
impl crate::ingest::otlp::MetricsSink for MetricsState {
#[inline]
fn record_otlp_reject(&self, reason: OtlpRejectReason) {
match reason {
OtlpRejectReason::UnsupportedMediaType => {
self.otlp_rejected_unsupported_media_type.inc();
}
OtlpRejectReason::ParseError => self.otlp_rejected_parse_error.inc(),
OtlpRejectReason::ChannelFull => self.otlp_rejected_channel_full.inc(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum NegotiatedFormat {
OpenMetricsForced,
Legacy,
PlainStrict,
}
fn select_format(accept: Option<&str>) -> NegotiatedFormat {
let Some(header) = accept else {
return NegotiatedFormat::Legacy;
};
let mut accepts_openmetrics = false;
for token in header.split(',') {
let token = token.trim();
let mut parts = token.split(';');
let media = parts.next().unwrap_or("").trim();
let refused = parts.any(|p| {
let p = p.trim();
p.eq_ignore_ascii_case("q=0") || p.eq_ignore_ascii_case("q=0.0")
});
if refused {
continue;
}
if media.eq_ignore_ascii_case("application/openmetrics-text") {
accepts_openmetrics = true;
break;
}
}
if accepts_openmetrics {
NegotiatedFormat::OpenMetricsForced
} else if header.contains("*/*") {
NegotiatedFormat::Legacy
} else {
NegotiatedFormat::PlainStrict
}
}
fn extract_finding_exemplar<'a>(
line: &str,
map: &'a HashMap<(&'static str, &'static str), ExemplarData>,
) -> Option<&'a ExemplarData> {
let labels_start = line.find('{')?;
let labels_end = line.find('}')?;
let labels_str = &line[labels_start + 1..labels_end];
let mut finding_type = None;
let mut severity = None;
for part in labels_str.split(',') {
let part = part.trim();
if let Some(val) = part.strip_prefix("type=\"") {
finding_type = val.strip_suffix('"');
} else if let Some(val) = part.strip_prefix("severity=\"") {
severity = val.strip_suffix('"');
}
}
let ft = finding_type?;
let sev = severity?;
map.iter()
.find(|((k_ft, k_sev), _)| *k_ft == ft && *k_sev == sev)
.map(|(_, v)| v)
}
impl Default for MetricsState {
fn default() -> Self {
Self::new()
}
}
pub fn metrics_route(state: Arc<MetricsState>) -> Router {
async fn handle_metrics(
State(metrics): State<Arc<MetricsState>>,
headers: axum::http::HeaderMap,
) -> ([(axum::http::header::HeaderName, &'static str); 1], String) {
let accept = headers
.get(axum::http::header::ACCEPT)
.and_then(|v| v.to_str().ok());
let (body, content_type) = metrics.negotiate(accept);
([(axum::http::header::CONTENT_TYPE, content_type)], body)
}
Router::new()
.route("/metrics", get(handle_metrics))
.with_state(state)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::detect::{Confidence, Finding, FindingType, GreenImpact, Pattern, Severity};
use crate::report::{Analysis, GreenSummary, QualityGate, Report};
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
fn make_test_report(findings: Vec<Finding>, waste_ratio: f64) -> Report {
let total = 10;
let avoidable = (total as f64 * waste_ratio) as usize;
Report {
analysis: Analysis {
duration_ms: 1,
events_processed: 100,
traces_analyzed: 2,
},
findings,
green_summary: GreenSummary {
total_io_ops: total,
avoidable_io_ops: avoidable,
io_waste_ratio: waste_ratio,
io_waste_ratio_band: crate::report::interpret::InterpretationLevel::for_waste_ratio(
waste_ratio,
),
..GreenSummary::disabled(0)
},
quality_gate: QualityGate {
passed: true,
rules: vec![],
},
per_endpoint_io_ops: vec![],
correlations: vec![],
warnings: vec![],
warning_details: vec![],
acknowledged_findings: vec![],
binary_version: String::new(),
}
}
fn make_finding(
finding_type: FindingType,
severity: Severity,
trace_id: &str,
occurrences: usize,
) -> Finding {
Finding {
finding_type,
severity,
trace_id: trace_id.to_string(),
service: "order-svc".to_string(),
source_endpoint: "POST /api/orders/42/submit".to_string(),
pattern: Pattern {
template: "SELECT * FROM t WHERE id = ?".to_string(),
occurrences,
window_ms: 200,
distinct_params: occurrences,
..Default::default()
},
suggestion: "batch".to_string(),
first_timestamp: "2025-07-10T14:32:01.000Z".to_string(),
last_timestamp: "2025-07-10T14:32:01.250Z".to_string(),
green_impact: Some(GreenImpact {
estimated_extra_io_ops: occurrences.saturating_sub(1),
io_intensity_score: 6.0,
io_intensity_band: crate::report::interpret::InterpretationLevel::for_iis(6.0),
}),
confidence: Confidence::default(),
classification_method: None,
code_location: None,
instrumentation_scopes: Vec::new(),
suggested_fix: None,
signature: String::new(),
}
}
#[test]
fn default_creates_same_as_new() {
let state = MetricsState::default();
state.events_processed_total.inc();
let output = state.render();
assert!(output.contains("perf_sentinel_events_processed_total"));
}
#[tokio::test]
async fn metrics_route_returns_prometheus_output() {
use axum::body::Body;
use axum::http::{Request, StatusCode};
use http_body_util::BodyExt;
use tower::ServiceExt;
let state = Arc::new(MetricsState::new());
state.traces_analyzed_total.inc_by(42.0);
state.io_waste_ratio.set(0.25);
state
.findings_total
.with_label_values(&["n_plus_one_sql", "warning"])
.inc();
let router = metrics_route(state);
let request = Request::builder()
.uri("/metrics")
.body(Body::empty())
.unwrap();
let response = router.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let content_type = response
.headers()
.get("content-type")
.expect("should have content-type header")
.to_str()
.unwrap();
assert!(
content_type.contains("text/plain"),
"Content-Type should be text/plain, got: {content_type}"
);
let body = response.into_body().collect().await.unwrap().to_bytes();
let body_str = String::from_utf8(body.to_vec()).unwrap();
assert!(
body_str.contains("perf_sentinel_traces_analyzed_total 42"),
"should contain traces count, got: {body_str}"
);
assert!(
body_str.contains("perf_sentinel_io_waste_ratio 0.25"),
"should contain waste ratio"
);
assert!(
body_str.contains("n_plus_one_sql"),
"should contain finding type label"
);
}
#[test]
fn metrics_state_creates_successfully() {
let state = MetricsState::new();
state
.findings_total
.with_label_values(&["test", "test"])
.inc_by(0.0);
let output = state.render();
assert!(
output.contains("perf_sentinel_findings_total"),
"output: {output}"
);
assert!(output.contains("perf_sentinel_io_waste_ratio"));
assert!(output.contains("perf_sentinel_traces_analyzed_total"));
assert!(output.contains("perf_sentinel_events_processed_total"));
assert!(output.contains("perf_sentinel_active_traces"));
}
#[test]
fn increment_findings_counter() {
let state = MetricsState::new();
state
.findings_total
.with_label_values(&["n_plus_one_sql", "critical"])
.inc();
state
.findings_total
.with_label_values(&["n_plus_one_sql", "critical"])
.inc();
let output = state.render();
assert!(output.contains(r#"type="n_plus_one_sql""#));
assert!(output.contains(r#"severity="critical""#));
}
#[test]
fn set_gauge_values() {
let state = MetricsState::new();
state.io_waste_ratio.set(0.42);
state.active_traces.set(5.0);
let output = state.render();
assert!(output.contains("0.42"));
}
#[test]
fn increment_counters() {
let state = MetricsState::new();
state.traces_analyzed_total.inc_by(10.0);
state.events_processed_total.inc_by(100.0);
let output = state.render();
assert!(output.contains("100"));
}
#[test]
fn record_batch_tracks_worst_finding_trace() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Critical,
"trace-abc",
10,
)],
0.5,
);
state.record_batch(&report);
let map = state.worst_finding_trace.read().unwrap();
assert_eq!(
map.get(&("n_plus_one_sql", "critical")).unwrap().trace_id,
"trace-abc"
);
}
#[test]
fn record_batch_tracks_worst_waste_trace() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-waste",
8,
)],
0.4,
);
state.record_batch(&report);
let waste = state.worst_waste_trace.read().unwrap();
assert_eq!(waste.as_ref().unwrap().trace_id, "trace-waste");
}
#[test]
fn render_includes_exemplar_annotation() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-exemplar",
6,
)],
0.3,
);
state.record_batch(&report);
let output = state.render();
assert!(
output.contains(r#"# {trace_id="trace-exemplar"}"#),
"should contain exemplar annotation, got: {output}"
);
}
#[test]
fn render_no_exemplar_when_no_data() {
let state = MetricsState::new();
state.traces_analyzed_total.inc();
state
.findings_total
.with_label_values(&["n_plus_one_sql", "warning"])
.inc();
let output = state.render();
assert!(
!output.contains("# {trace_id="),
"should not contain exemplar when no record_batch called"
);
}
#[test]
fn exemplar_on_io_waste_ratio() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::RedundantSql,
Severity::Warning,
"trace-waste-ratio",
4,
)],
0.5,
);
state.record_batch(&report);
let output = state.render();
for line in output.lines() {
if line.starts_with("perf_sentinel_io_waste_ratio ") {
assert!(
line.contains(r#"# {trace_id="trace-waste-ratio"}"#),
"waste ratio line should have exemplar: {line}"
);
}
}
}
#[test]
fn content_type_is_openmetrics_with_exemplars() {
let state = MetricsState::new();
assert_eq!(
state.content_type(),
"text/plain; version=0.0.4; charset=utf-8"
);
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-1",
5,
)],
0.0,
);
state.record_batch(&report);
assert_eq!(
state.content_type(),
"application/openmetrics-text; version=1.0.0; charset=utf-8"
);
}
#[test]
fn multiple_batches_update_exemplars() {
let state = MetricsState::new();
let report1 = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-old",
5,
)],
0.3,
);
state.record_batch(&report1);
let report2 = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-new",
10,
)],
0.5,
);
state.record_batch(&report2);
let map = state.worst_finding_trace.read().unwrap();
assert_eq!(
map.get(&("n_plus_one_sql", "warning")).unwrap().trace_id,
"trace-new",
"should update to latest batch's worst finding"
);
}
#[tokio::test]
async fn metrics_route_returns_openmetrics_with_exemplars() {
use axum::body::Body;
use axum::http::{Request, StatusCode};
use http_body_util::BodyExt;
use tower::ServiceExt;
let state = Arc::new(MetricsState::new());
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-route-test",
5,
)],
0.0,
);
state.record_batch(&report);
let router = metrics_route(state);
let request = Request::builder()
.uri("/metrics")
.body(Body::empty())
.unwrap();
let response = router.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let content_type = response
.headers()
.get("content-type")
.unwrap()
.to_str()
.unwrap();
assert!(
content_type.contains("openmetrics"),
"should use OpenMetrics content type: {content_type}"
);
let body = response.into_body().collect().await.unwrap().to_bytes();
let body_str = String::from_utf8(body.to_vec()).unwrap();
assert!(
body_str.contains(r#"trace_id="trace-route-test""#),
"should contain exemplar trace_id"
);
}
#[test]
fn negotiate_returns_openmetrics_when_accept_header_explicitly_requests_it() {
let state = MetricsState::new();
state.traces_analyzed_total.inc();
let (body, content_type) =
state.negotiate(Some("application/openmetrics-text;version=1.0.0"));
assert_eq!(
content_type,
"application/openmetrics-text; version=1.0.0; charset=utf-8"
);
assert!(
body.ends_with("# EOF\n"),
"OM-forced body must terminate with `# EOF\\n`"
);
}
#[test]
fn negotiate_returns_openmetrics_with_exemplars_when_accept_explicit_and_exemplars_present() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-om-explicit",
5,
)],
0.4,
);
state.record_batch(&report);
let (body, content_type) = state.negotiate(Some("application/openmetrics-text"));
assert_eq!(
content_type,
"application/openmetrics-text; version=1.0.0; charset=utf-8"
);
assert!(body.ends_with("# EOF\n"));
assert!(
body.contains(r#"# {trace_id="trace-om-explicit"} 1.0"#),
"OM-forced body must include exemplar annotation: {body}"
);
}
#[test]
fn negotiate_falls_back_to_legacy_when_accept_absent() {
let state = MetricsState::new();
state.traces_analyzed_total.inc();
let (body_no_ex, ct_no_ex) = state.negotiate(None);
assert_eq!(ct_no_ex, "text/plain; version=0.0.4; charset=utf-8");
assert!(!body_no_ex.contains("# EOF"));
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-legacy",
5,
)],
0.0,
);
state.record_batch(&report);
let (body_with_ex, ct_with_ex) = state.negotiate(None);
assert_eq!(
ct_with_ex,
"application/openmetrics-text; version=1.0.0; charset=utf-8"
);
assert!(body_with_ex.ends_with("# EOF\n"));
}
#[test]
fn negotiate_falls_back_to_legacy_when_accept_contains_wildcard() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-vmagent",
5,
)],
0.0,
);
state.record_batch(&report);
let (body, content_type) = state.negotiate(Some("text/plain;version=0.0.4;*/*;q=0.1"));
assert_eq!(
content_type,
"application/openmetrics-text; version=1.0.0; charset=utf-8"
);
assert!(body.ends_with("# EOF\n"));
assert!(body.contains(r#"# {trace_id="trace-vmagent"} 1.0"#));
}
#[test]
fn negotiate_returns_plain_strict_when_accept_text_plain_only() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-strict",
5,
)],
0.4,
);
state.record_batch(&report);
let (body, content_type) = state.negotiate(Some("text/plain;version=0.0.4"));
assert_eq!(content_type, "text/plain; version=0.0.4; charset=utf-8");
assert!(!body.contains("# EOF"));
assert!(
!body.contains("# {trace_id="),
"plain-strict body must not contain exemplar annotations: {body}"
);
}
#[test]
fn select_format_dispatches_correctly() {
assert_eq!(select_format(None), NegotiatedFormat::Legacy);
assert_eq!(
select_format(Some("text/plain")),
NegotiatedFormat::PlainStrict
);
assert_eq!(
select_format(Some("text/plain;version=0.0.4")),
NegotiatedFormat::PlainStrict
);
assert_eq!(select_format(Some("*/*")), NegotiatedFormat::Legacy);
assert_eq!(
select_format(Some("text/plain;*/*;q=0.1")),
NegotiatedFormat::Legacy
);
assert_eq!(
select_format(Some("text/plain;version=0.0.4,*/*;q=0.1")),
NegotiatedFormat::Legacy
);
assert_eq!(
select_format(Some("application/openmetrics-text;version=1.0.0")),
NegotiatedFormat::OpenMetricsForced
);
assert_eq!(
select_format(Some("application/openmetrics-text;q=0.9,text/plain;q=0.5")),
NegotiatedFormat::OpenMetricsForced
);
assert_eq!(
select_format(Some("Application/OpenMetrics-Text")),
NegotiatedFormat::OpenMetricsForced
);
assert_eq!(
select_format(Some("application/openmetrics-text-foo")),
NegotiatedFormat::PlainStrict
);
assert_eq!(
select_format(Some("xapplication/openmetrics-text")),
NegotiatedFormat::PlainStrict
);
assert_eq!(
select_format(Some("application/openmetrics-text;q=0,*/*;q=1")),
NegotiatedFormat::Legacy
);
assert_eq!(
select_format(Some("application/openmetrics-text;q=0.0,text/plain")),
NegotiatedFormat::PlainStrict
);
}
#[tokio::test]
async fn metrics_route_honors_explicit_openmetrics_accept_header() {
use axum::body::Body;
use axum::http::{Request, StatusCode};
use http_body_util::BodyExt;
use tower::ServiceExt;
let state = Arc::new(MetricsState::new());
state.traces_analyzed_total.inc();
let router = metrics_route(state);
let request = Request::builder()
.uri("/metrics")
.header("accept", "application/openmetrics-text;version=1.0.0")
.body(Body::empty())
.unwrap();
let response = router.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response
.headers()
.get("content-type")
.unwrap()
.to_str()
.unwrap();
assert!(
ct.starts_with("application/openmetrics-text"),
"expected OM CT, got: {ct}"
);
let body = response.into_body().collect().await.unwrap().to_bytes();
assert!(
String::from_utf8(body.to_vec())
.unwrap()
.ends_with("# EOF\n")
);
}
#[tokio::test]
async fn metrics_route_preserves_legacy_behavior_for_vmagent_style_accept() {
use axum::body::Body;
use axum::http::{Request, StatusCode};
use http_body_util::BodyExt;
use tower::ServiceExt;
let state = Arc::new(MetricsState::new());
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-vmagent-route",
5,
)],
0.0,
);
state.record_batch(&report);
let router = metrics_route(state);
let request = Request::builder()
.uri("/metrics")
.header("accept", "text/plain;version=0.0.4;*/*;q=0.1")
.body(Body::empty())
.unwrap();
let response = router.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response
.headers()
.get("content-type")
.unwrap()
.to_str()
.unwrap();
assert!(
ct.starts_with("application/openmetrics-text"),
"vmagent-style Accept (`*/*`) must keep legacy OM-with-exemplars: {ct}"
);
let body = response.into_body().collect().await.unwrap().to_bytes();
let body_str = String::from_utf8(body.to_vec()).unwrap();
assert!(body_str.ends_with("# EOF\n"));
assert!(body_str.contains(r#"trace_id="trace-vmagent-route""#));
}
#[test]
fn sanitize_exemplar_value_strips_dangerous_chars() {
assert_eq!(sanitize_exemplar_value("abc-123_def"), "abc-123_def");
assert_eq!(
sanitize_exemplar_value("evil\"} 999\nfake_metric"),
"evil999fake_metric"
);
assert_eq!(sanitize_exemplar_value(""), "");
let long = "a".repeat(100);
assert_eq!(sanitize_exemplar_value(&long).len(), 64);
}
#[test]
fn exemplar_with_malicious_trace_id_is_sanitized() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"evil\"} 999\nmy_fake_metric",
5,
)],
0.0,
);
state.record_batch(&report);
let output = state.render();
assert!(
!output.contains("evil\""),
"malicious trace_id should be sanitized"
);
assert!(output.contains("evil999my_fake_metric"));
}
#[test]
fn render_appends_eof_marker_with_exemplars() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-eof",
5,
)],
0.3,
);
state.record_batch(&report);
let output = state.render();
assert!(
output.ends_with("# EOF\n"),
"OpenMetrics output must terminate with `# EOF\\n`, got tail: {:?}",
&output[output.len().saturating_sub(64)..]
);
assert!(
!output.contains("# EOF\n#") && !output.contains("# EOF\nperf_sentinel"),
"no content may follow the `# EOF` marker"
);
}
#[test]
fn render_omits_eof_marker_without_exemplars() {
let state = MetricsState::new();
state.traces_analyzed_total.inc();
let output = state.render();
assert!(
!output.contains("# EOF"),
"Prometheus text/plain output must not contain `# EOF`, got: {output}"
);
}
#[test]
fn exemplar_annotation_includes_numeric_value() {
let state = MetricsState::new();
let report = make_test_report(
vec![make_finding(
FindingType::NPlusOneSql,
Severity::Warning,
"trace-numeric",
5,
)],
0.5,
);
state.record_batch(&report);
let output = state.render();
let exemplar_line = output
.lines()
.find(|l| l.starts_with("perf_sentinel_findings_total{") && l.contains("trace_id="))
.expect("expected at least one findings exemplar line");
assert!(
exemplar_line.ends_with(r#" # {trace_id="trace-numeric"} 1.0"#),
"findings exemplar must follow OpenMetrics 1.0 format: {exemplar_line}"
);
let waste_line = output
.lines()
.find(|l| l.starts_with("perf_sentinel_io_waste_ratio ") && l.contains("trace_id="))
.expect("expected the io_waste_ratio exemplar line");
assert!(
waste_line.ends_with(r#" # {trace_id="trace-numeric"} 1.0"#),
"io_waste_ratio exemplar must follow OpenMetrics 1.0 format: {waste_line}"
);
}
#[test]
fn prometheus_output_format_matches_expected_prefixes() {
let state = MetricsState::new();
state
.findings_total
.with_label_values(&["n_plus_one_sql", "warning"])
.inc();
state.io_waste_ratio.set(0.5);
let output = state.render();
let has_findings_prefix = output
.lines()
.any(|l| l.starts_with("perf_sentinel_findings_total{"));
assert!(
has_findings_prefix,
"prometheus output must contain lines starting with 'perf_sentinel_findings_total{{': {output}"
);
let has_waste_prefix = output
.lines()
.any(|l| l.starts_with("perf_sentinel_io_waste_ratio "));
assert!(
has_waste_prefix,
"prometheus output must contain lines starting with 'perf_sentinel_io_waste_ratio ': {output}"
);
}
#[test]
fn registry_contains_otlp_rejected() {
let state = MetricsState::new();
let output = state.render();
assert!(
output.contains("perf_sentinel_otlp_rejected_total"),
"registry should expose perf_sentinel_otlp_rejected_total, got: {output}"
);
}
#[test]
fn otlp_rejected_starts_at_zero_for_all_three_reasons() {
let state = MetricsState::new();
for reason in [
OtlpRejectReason::UnsupportedMediaType,
OtlpRejectReason::ParseError,
OtlpRejectReason::ChannelFull,
] {
let count = state
.otlp_rejected_total
.with_label_values(&[reason.as_str()])
.get();
assert_eq!(count, 0, "reason {} should start at 0", reason.as_str());
}
let output = state.render();
for reason in ["unsupported_media_type", "parse_error", "channel_full"] {
assert!(
output.contains(&format!(
"perf_sentinel_otlp_rejected_total{{reason=\"{reason}\"}} 0"
)),
"pre-warmed line for reason {reason} should appear in /metrics, got: {output}"
);
}
}
#[test]
fn record_otlp_reject_increments_correct_label() {
let state = MetricsState::new();
state.record_otlp_reject(OtlpRejectReason::ChannelFull);
state.record_otlp_reject(OtlpRejectReason::ChannelFull);
state.record_otlp_reject(OtlpRejectReason::ChannelFull);
assert_eq!(
state
.otlp_rejected_total
.with_label_values(&["channel_full"])
.get(),
3
);
assert_eq!(
state
.otlp_rejected_total
.with_label_values(&["parse_error"])
.get(),
0
);
assert_eq!(
state
.otlp_rejected_total
.with_label_values(&["unsupported_media_type"])
.get(),
0
);
}
#[cfg(feature = "daemon")]
#[test]
fn ack_failure_reason_as_str_round_trips_all_variants() {
for (variant, label) in [
(AckFailureReason::AlreadyAcked, "already_acked"),
(AckFailureReason::NotAcked, "not_acked"),
(AckFailureReason::Unauthorized, "unauthorized"),
(AckFailureReason::NoStore, "no_store"),
(AckFailureReason::InvalidSignature, "invalid_signature"),
(AckFailureReason::LimitReached, "limit_reached"),
(AckFailureReason::FileTooLarge, "file_too_large"),
(AckFailureReason::EntryTooLarge, "entry_too_large"),
(AckFailureReason::InternalError, "internal_error"),
] {
assert_eq!(variant.as_str(), label);
}
}
#[cfg(feature = "daemon")]
#[test]
fn record_ack_success_increments_correct_label() {
let state = MetricsState::new();
state.record_ack_success(AckAction::Ack);
state.record_ack_success(AckAction::Ack);
state.record_ack_success(AckAction::Unack);
assert_eq!(state.ack_operations_ack_success.get(), 2);
assert_eq!(state.ack_operations_unack_success.get(), 1);
}
#[cfg(feature = "daemon")]
#[test]
fn record_ack_failure_increments_correct_combination() {
let state = MetricsState::new();
state.record_ack_failure(AckAction::Ack, AckFailureReason::Unauthorized);
state.record_ack_failure(AckAction::Unack, AckFailureReason::NotAcked);
assert_eq!(
state
.ack_operations_failed_total
.with_label_values(&["ack", "unauthorized"])
.get(),
1
);
assert_eq!(
state
.ack_operations_failed_total
.with_label_values(&["unack", "not_acked"])
.get(),
1
);
assert_eq!(
state
.ack_operations_failed_total
.with_label_values(&["ack", "not_acked"])
.get(),
0
);
}
#[cfg(feature = "daemon")]
#[test]
fn ack_operations_total_starts_at_zero_for_both_actions() {
let state = MetricsState::new();
assert_eq!(state.ack_operations_ack_success.get(), 0);
assert_eq!(state.ack_operations_unack_success.get(), 0);
let output = state.render();
for action in ["ack", "unack"] {
assert!(
output.contains(&format!(
"perf_sentinel_ack_operations_total{{action=\"{action}\"}} 0"
)),
"pre-warmed line for action={action} should appear, got: {output}"
);
}
}
#[cfg(feature = "daemon")]
#[test]
fn ack_operations_failed_total_starts_at_zero_for_documented_combinations() {
let state = MetricsState::new();
let output = state.render();
let documented: &[(&str, &[&str])] = &[
(
"ack",
&[
"already_acked",
"unauthorized",
"no_store",
"invalid_signature",
"limit_reached",
"file_too_large",
"entry_too_large",
"internal_error",
],
),
(
"unack",
&[
"not_acked",
"unauthorized",
"no_store",
"invalid_signature",
"internal_error",
],
),
];
for (action, reasons) in documented {
for reason in *reasons {
let line = format!(
"perf_sentinel_ack_operations_failed_total{{action=\"{action}\",reason=\"{reason}\"}} 0"
);
assert!(
output.contains(&line),
"pre-warmed line {line} should appear in /metrics"
);
}
}
for forbidden in [
"perf_sentinel_ack_operations_failed_total{action=\"ack\",reason=\"not_acked\"}",
"perf_sentinel_ack_operations_failed_total{action=\"unack\",reason=\"already_acked\"}",
"perf_sentinel_ack_operations_failed_total{action=\"unack\",reason=\"limit_reached\"}",
"perf_sentinel_ack_operations_failed_total{action=\"unack\",reason=\"file_too_large\"}",
"perf_sentinel_ack_operations_failed_total{action=\"unack\",reason=\"entry_too_large\"}",
] {
assert!(
!output.contains(forbidden),
"forbidden combination {forbidden} should not be pre-warmed"
);
}
}
#[cfg(feature = "daemon")]
#[test]
fn ack_operations_appear_in_render() {
let state = MetricsState::new();
let output = state.render();
assert!(
output.contains("perf_sentinel_ack_operations_total"),
"registry should expose perf_sentinel_ack_operations_total"
);
assert!(
output.contains("perf_sentinel_ack_operations_failed_total"),
"registry should expose perf_sentinel_ack_operations_failed_total"
);
}
#[cfg(target_os = "linux")]
#[test]
fn process_collector_registered_on_linux() {
let state = MetricsState::new();
let output = state.render();
assert!(
output
.lines()
.any(|l| l.starts_with("process_resident_memory_bytes")),
"process_resident_memory_bytes should be exposed on Linux, got: {output}"
);
}
#[cfg(not(target_os = "linux"))]
#[test]
fn process_collector_not_registered_on_non_linux() {
let state = MetricsState::new();
let output = state.render();
assert!(
!output.contains("process_resident_memory_bytes"),
"process_resident_memory_bytes must not be exposed off Linux, got: {output}"
);
}
}