use axum::body::Body;
use axum::extract::State;
use axum::http::{Response, StatusCode};
use prometheus::{
Encoder, GaugeVec, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge,
IntGaugeVec, Opts, Registry, TextEncoder,
};
use std::sync::Arc;
use crate::proxy::AppState;
#[derive(Clone)]
pub struct Metrics {
registry: Registry,
pub requests_total: IntCounterVec,
pub request_duration_seconds: HistogramVec,
pub tokens_total: IntCounterVec,
pub security_findings_total: IntCounterVec,
pub circuit_breaker_state: GaugeVec,
pub storage_operations_total: IntCounterVec,
pub cost_usd_total: IntCounterVec,
pub anomalies_total: IntCounterVec,
pub security_detector_latency_seconds: HistogramVec,
pub active_connections: IntGauge,
pub boundary_defense_applied_total: IntCounterVec,
pub boundary_defense_messages_wrapped: HistogramVec,
pub boundary_defense_reminder_injected_total: IntCounterVec,
pub boundary_defense_overhead_bytes: HistogramVec,
pub boundary_defense_errors_total: IntCounterVec,
pub boundary_defense_skipped_total: IntCounterVec,
pub boundary_defense_shadow_mode: IntGauge,
pub zone_detection_zones_total: IntCounterVec,
pub zone_detection_findings_total: IntCounterVec,
pub zone_detection_failures_total: IntCounterVec,
pub spotlighting_zones_total: IntCounterVec,
pub spotlighting_byte_delta_total: IntCounterVec,
pub spotlighting_marker_collision_total: IntCounter,
pub spotlighting_failures_total: IntCounterVec,
pub ml_chunks_total: HistogramVec,
pub ml_input_truncated_total: IntCounterVec,
pub response_truncated_total: IntCounter,
pub analysis_text_truncated_total: IntCounter,
pub action_executions_total: IntCounterVec,
pub action_latency_seconds: HistogramVec,
pub ip_blocks_active: IntGauge,
pub action_rule_matches_total: IntCounterVec,
pub feature_flag_updates_total: IntCounterVec,
pub feature_flag_bool_state: IntGaugeVec,
pub feature_flag_string_state: IntGaugeVec,
pub config_persist_errors_total: IntCounter,
pub audit_event_dropped_total: IntCounterVec,
pub judge_requests_total: IntCounterVec,
pub judge_latency_seconds: HistogramVec,
pub judge_tokens_total: IntCounterVec,
pub judge_verdicts_total: IntCounterVec,
pub judge_queue_depth: IntGauge,
pub judge_verdict_agreement: IntCounterVec,
pub judge_dropped_total: IntCounterVec,
pub judge_promotion_rejected_total: IntCounterVec,
pub judge_shadow_would_block_total: IntCounterVec,
pub judge_golden_set_alignment: GaugeVec,
pub judge_golden_set_false_positive_rate: GaugeVec,
pub ml_inflight_requests: IntGauge,
pub ml_rejected_total: IntCounter,
pub analyzer_dropped_total: IntCounterVec,
}
impl Metrics {
pub fn new() -> Self {
let registry = Registry::new();
let requests_total = IntCounterVec::new(
Opts::new("llmtrace_requests_total", "Total proxied LLM requests"),
&["provider", "model", "status_code"],
)
.expect("metric: requests_total");
registry
.register(Box::new(requests_total.clone()))
.expect("register requests_total");
let request_duration_seconds = HistogramVec::new(
HistogramOpts::new(
"llmtrace_request_duration_seconds",
"Request duration in seconds",
)
.buckets(vec![0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0]),
&["provider", "model"],
)
.expect("metric: request_duration_seconds");
registry
.register(Box::new(request_duration_seconds.clone()))
.expect("register request_duration_seconds");
let tokens_total = IntCounterVec::new(
Opts::new("llmtrace_tokens_total", "Total tokens observed"),
&["direction", "provider", "model"],
)
.expect("metric: tokens_total");
registry
.register(Box::new(tokens_total.clone()))
.expect("register tokens_total");
let security_findings_total = IntCounterVec::new(
Opts::new(
"llmtrace_security_findings_total",
"Total security findings detected",
),
&["severity", "finding_type"],
)
.expect("metric: security_findings_total");
registry
.register(Box::new(security_findings_total.clone()))
.expect("register security_findings_total");
let circuit_breaker_state = GaugeVec::new(
Opts::new(
"llmtrace_circuit_breaker_state",
"Circuit breaker state (1 = active)",
),
&["subsystem", "state"],
)
.expect("metric: circuit_breaker_state");
registry
.register(Box::new(circuit_breaker_state.clone()))
.expect("register circuit_breaker_state");
let storage_operations_total = IntCounterVec::new(
Opts::new(
"llmtrace_storage_operations_total",
"Total storage operations",
),
&["operation", "status"],
)
.expect("metric: storage_operations_total");
registry
.register(Box::new(storage_operations_total.clone()))
.expect("register storage_operations_total");
let cost_usd_total = IntCounterVec::new(
Opts::new(
"llmtrace_cost_usd_total",
"Estimated cost in micro-USD (divide by 1_000_000 for USD)",
),
&["tenant", "model"],
)
.expect("metric: cost_usd_total");
registry
.register(Box::new(cost_usd_total.clone()))
.expect("register cost_usd_total");
let anomalies_total = IntCounterVec::new(
Opts::new("llmtrace_anomalies_total", "Total anomalies detected"),
&["anomaly_type"],
)
.expect("metric: anomalies_total");
registry
.register(Box::new(anomalies_total.clone()))
.expect("register anomalies_total");
let security_detector_latency_seconds = HistogramVec::new(
HistogramOpts::new(
"llmtrace_security_detector_latency_seconds",
"Per-detector security analysis latency in seconds",
)
.buckets(vec![0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]),
&["detector"],
)
.expect("metric: security_detector_latency_seconds");
registry
.register(Box::new(security_detector_latency_seconds.clone()))
.expect("register security_detector_latency_seconds");
let active_connections = IntGauge::new(
"llmtrace_active_connections",
"Currently active proxy connections",
)
.expect("metric: active_connections");
registry
.register(Box::new(active_connections.clone()))
.expect("register active_connections");
let boundary_defense_applied_total = IntCounterVec::new(
Opts::new(
"llmtrace_boundary_defense_applied_total",
"Requests where boundary defense was applied",
),
&["provider", "mode"],
)
.expect("metric: boundary_defense_applied_total");
registry
.register(Box::new(boundary_defense_applied_total.clone()))
.expect("register boundary_defense_applied_total");
let boundary_defense_messages_wrapped = HistogramVec::new(
HistogramOpts::new(
"llmtrace_boundary_defense_messages_wrapped",
"Number of messages wrapped per request",
)
.buckets(vec![0.0, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0]),
&["provider"],
)
.expect("metric: boundary_defense_messages_wrapped");
registry
.register(Box::new(boundary_defense_messages_wrapped.clone()))
.expect("register boundary_defense_messages_wrapped");
let boundary_defense_reminder_injected_total = IntCounterVec::new(
Opts::new(
"llmtrace_boundary_defense_reminder_injected_total",
"Requests where system prompt reminder was injected",
),
&["provider"],
)
.expect("metric: boundary_defense_reminder_injected_total");
registry
.register(Box::new(boundary_defense_reminder_injected_total.clone()))
.expect("register boundary_defense_reminder_injected_total");
let boundary_defense_overhead_bytes = HistogramVec::new(
HistogramOpts::new(
"llmtrace_boundary_defense_overhead_bytes",
"Byte delta per request from boundary defense",
)
.buckets(vec![0.0, 50.0, 100.0, 200.0, 500.0, 1000.0, 5000.0]),
&["provider"],
)
.expect("metric: boundary_defense_overhead_bytes");
registry
.register(Box::new(boundary_defense_overhead_bytes.clone()))
.expect("register boundary_defense_overhead_bytes");
let boundary_defense_errors_total = IntCounterVec::new(
Opts::new(
"llmtrace_boundary_defense_errors_total",
"Errors in boundary defense pipeline",
),
&["error_type"],
)
.expect("metric: boundary_defense_errors_total");
registry
.register(Box::new(boundary_defense_errors_total.clone()))
.expect("register boundary_defense_errors_total");
let boundary_defense_skipped_total = IntCounterVec::new(
Opts::new(
"llmtrace_boundary_defense_skipped_total",
"Requests skipped by boundary defense",
),
&["reason"],
)
.expect("metric: boundary_defense_skipped_total");
registry
.register(Box::new(boundary_defense_skipped_total.clone()))
.expect("register boundary_defense_skipped_total");
let boundary_defense_shadow_mode = IntGauge::new(
"llmtrace_boundary_defense_shadow_mode",
"Whether boundary defense shadow mode is active (1) or not (0)",
)
.expect("metric: boundary_defense_shadow_mode");
registry
.register(Box::new(boundary_defense_shadow_mode.clone()))
.expect("register boundary_defense_shadow_mode");
let zone_detection_zones_total = IntCounterVec::new(
Opts::new(
"llmtrace_zone_detection_zones_total",
"Zones emitted by the IS-060 zone detector",
),
&["kind", "origin", "framing"],
)
.expect("metric: zone_detection_zones_total");
registry
.register(Box::new(zone_detection_zones_total.clone()))
.expect("register zone_detection_zones_total");
let zone_detection_findings_total = IntCounterVec::new(
Opts::new(
"llmtrace_zone_detection_findings_total",
"Findings produced by zone-aware analysis, by finding_type and zone_kind",
),
&["finding_type", "zone_kind"],
)
.expect("metric: zone_detection_findings_total");
registry
.register(Box::new(zone_detection_findings_total.clone()))
.expect("register zone_detection_findings_total");
let zone_detection_failures_total = IntCounterVec::new(
Opts::new(
"llmtrace_zone_detection_failures_total",
"Zone-detection failures (header parse errors, byte-range mismatches)",
),
&["reason"],
)
.expect("metric: zone_detection_failures_total");
registry
.register(Box::new(zone_detection_failures_total.clone()))
.expect("register zone_detection_failures_total");
let spotlighting_zones_total = IntCounterVec::new(
Opts::new(
"llmtrace_spotlighting_zones_total",
"Data zones marked by the IS-060 PR-2 datamarking transform",
),
&["kind", "shadow"],
)
.expect("metric: spotlighting_zones_total");
registry
.register(Box::new(spotlighting_zones_total.clone()))
.expect("register spotlighting_zones_total");
let spotlighting_byte_delta_total = IntCounterVec::new(
Opts::new(
"llmtrace_spotlighting_byte_delta_total",
"Cumulative bytes added by marker substitution per request",
),
&["shadow"],
)
.expect("metric: spotlighting_byte_delta_total");
registry
.register(Box::new(spotlighting_byte_delta_total.clone()))
.expect("register spotlighting_byte_delta_total");
let spotlighting_marker_collision_total = IntCounter::new(
"llmtrace_spotlighting_marker_collision_total",
"Marker codepoints resampled because the first sample collided with zone content",
)
.expect("metric: spotlighting_marker_collision_total");
registry
.register(Box::new(spotlighting_marker_collision_total.clone()))
.expect("register spotlighting_marker_collision_total");
let spotlighting_failures_total = IntCounterVec::new(
Opts::new(
"llmtrace_spotlighting_failures_total",
"Datamarking pipeline failures (fail-open events), by reason",
),
&["reason"],
)
.expect("metric: spotlighting_failures_total");
registry
.register(Box::new(spotlighting_failures_total.clone()))
.expect("register spotlighting_failures_total");
let ml_chunks_total = HistogramVec::new(
HistogramOpts::new(
"llmtrace_ml_chunks_total",
"ML sliding window chunks processed per classify call",
)
.buckets(vec![1.0, 2.0, 3.0, 5.0, 10.0]),
&["model"],
)
.expect("metric: ml_chunks_total");
registry
.register(Box::new(ml_chunks_total.clone()))
.expect("register ml_chunks_total");
let ml_input_truncated_total = IntCounterVec::new(
Opts::new(
"llmtrace_ml_input_truncated_total",
"ML inputs that hit the sliding window chunk cap",
),
&["model"],
)
.expect("metric: ml_input_truncated_total");
registry
.register(Box::new(ml_input_truncated_total.clone()))
.expect("register ml_input_truncated_total");
let response_truncated_total = IntCounter::new(
"llmtrace_response_truncated_total",
"Response collections truncated due to max_response_size_bytes",
)
.expect("metric: response_truncated_total");
registry
.register(Box::new(response_truncated_total.clone()))
.expect("register response_truncated_total");
let analysis_text_truncated_total = IntCounter::new(
"llmtrace_analysis_text_truncated_total",
"Analysis text truncations due to max_analysis_text_bytes",
)
.expect("metric: analysis_text_truncated_total");
registry
.register(Box::new(analysis_text_truncated_total.clone()))
.expect("register analysis_text_truncated_total");
let action_executions_total = IntCounterVec::new(
Opts::new(
"llmtrace_action_executions_total",
"Total enforcement actions executed by the router",
),
&["action_type", "status", "mode"],
)
.expect("metric: action_executions_total");
registry
.register(Box::new(action_executions_total.clone()))
.expect("register action_executions_total");
let action_latency_seconds = HistogramVec::new(
HistogramOpts::new(
"llmtrace_action_latency_seconds",
"Per-action execution latency in seconds",
)
.buckets(vec![0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.5, 1.0, 5.0]),
&["action_type"],
)
.expect("metric: action_latency_seconds");
registry
.register(Box::new(action_latency_seconds.clone()))
.expect("register action_latency_seconds");
let ip_blocks_active = IntGauge::new(
"llmtrace_ip_blocks_active",
"Currently blocked IPs recorded by this proxy instance",
)
.expect("metric: ip_blocks_active");
registry
.register(Box::new(ip_blocks_active.clone()))
.expect("register ip_blocks_active");
let action_rule_matches_total = IntCounterVec::new(
Opts::new(
"llmtrace_action_rule_matches_total",
"Rule match frequency by finding type and action type",
),
&["finding_type", "action_type"],
)
.expect("metric: action_rule_matches_total");
registry
.register(Box::new(action_rule_matches_total.clone()))
.expect("register action_rule_matches_total");
let feature_flag_updates_total = IntCounterVec::new(
Opts::new(
"llmtrace_feature_flag_updates_total",
"Total runtime feature-flag updates, labelled by feature name",
),
&["feature"],
)
.expect("metric: feature_flag_updates_total");
registry
.register(Box::new(feature_flag_updates_total.clone()))
.expect("register feature_flag_updates_total");
let feature_flag_bool_state = IntGaugeVec::new(
Opts::new(
"llmtrace_feature_flag_bool_state",
"Current runtime value of each bool-typed feature flag (0 or 1)",
),
&["feature"],
)
.expect("metric: feature_flag_bool_state");
registry
.register(Box::new(feature_flag_bool_state.clone()))
.expect("register feature_flag_bool_state");
let feature_flag_string_state = IntGaugeVec::new(
Opts::new(
"llmtrace_feature_flag_string_state",
"Info metric for string-typed feature flags; the active (feature,value) pair is 1",
),
&["feature", "value"],
)
.expect("metric: feature_flag_string_state");
registry
.register(Box::new(feature_flag_string_state.clone()))
.expect("register feature_flag_string_state");
let config_persist_errors_total = IntCounter::new(
"llmtrace_config_persist_errors_total",
"Total failures writing the runtime feature-flag sidecar overlay",
)
.expect("metric: config_persist_errors_total");
registry
.register(Box::new(config_persist_errors_total.clone()))
.expect("register config_persist_errors_total");
let audit_event_dropped_total = IntCounterVec::new(
Opts::new(
"llmtrace_audit_event_dropped_total",
"Total forensic AuditEvent writes that failed to persist, by event type",
),
&["event_type"],
)
.expect("metric: audit_event_dropped_total");
registry
.register(Box::new(audit_event_dropped_total.clone()))
.expect("register audit_event_dropped_total");
let judge_requests_total = IntCounterVec::new(
Opts::new(
"llmtrace_judge_requests_total",
"Total judge invocations by backend, model, mode, and status",
),
&["backend", "model", "mode", "status"],
)
.expect("metric: judge_requests_total");
registry
.register(Box::new(judge_requests_total.clone()))
.expect("register judge_requests_total");
let judge_latency_seconds = HistogramVec::new(
HistogramOpts::new(
"llmtrace_judge_latency_seconds",
"Judge call latency in seconds",
)
.buckets(vec![0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 20.0, 30.0, 60.0]),
&["backend", "model", "mode"],
)
.expect("metric: judge_latency_seconds");
registry
.register(Box::new(judge_latency_seconds.clone()))
.expect("register judge_latency_seconds");
let judge_tokens_total = IntCounterVec::new(
Opts::new(
"llmtrace_judge_tokens_total",
"Judge token consumption by direction, backend, and model",
),
&["direction", "backend", "model"],
)
.expect("metric: judge_tokens_total");
registry
.register(Box::new(judge_tokens_total.clone()))
.expect("register judge_tokens_total");
let judge_verdicts_total = IntCounterVec::new(
Opts::new(
"llmtrace_judge_verdicts_total",
"Verdicts emitted by the judge by category, recommended action, threat flag, and model",
),
&["category", "recommended_action", "is_threat", "model"],
)
.expect("metric: judge_verdicts_total");
registry
.register(Box::new(judge_verdicts_total.clone()))
.expect("register judge_verdicts_total");
let judge_queue_depth = IntGauge::new(
"llmtrace_judge_queue_depth",
"Current judge worker queue depth",
)
.expect("metric: judge_queue_depth");
registry
.register(Box::new(judge_queue_depth.clone()))
.expect("register judge_queue_depth");
let judge_verdict_agreement = IntCounterVec::new(
Opts::new(
"llmtrace_judge_verdict_agreement",
"Agreement between the judge verdict and the prior ensemble outcome",
),
&["agreement"],
)
.expect("metric: judge_verdict_agreement");
registry
.register(Box::new(judge_verdict_agreement.clone()))
.expect("register judge_verdict_agreement");
let judge_dropped_total = IntCounterVec::new(
Opts::new(
"llmtrace_judge_dropped_total",
"Judge requests dropped without a backend call, by reason",
),
&["reason"],
)
.expect("metric: judge_dropped_total");
registry
.register(Box::new(judge_dropped_total.clone()))
.expect("register judge_dropped_total");
for reason in &[
"disabled",
"below_threshold",
"channel_full",
"channel_closed",
"persist_failure",
"semaphore_closed",
"shutdown",
"analysis_text_truncated",
] {
judge_dropped_total.with_label_values(&[reason]).inc_by(0);
}
let judge_promotion_rejected_total = IntCounterVec::new(
Opts::new(
"llmtrace_judge_promotion_rejected_total",
"Judge verdicts that did not pass the inline promotion gate, by reason",
),
&["reason"],
)
.expect("metric: judge_promotion_rejected_total");
registry
.register(Box::new(judge_promotion_rejected_total.clone()))
.expect("register judge_promotion_rejected_total");
for reason in &[
"not_threat_or_block",
"below_confidence",
"below_score",
"no_ensemble_support",
] {
judge_promotion_rejected_total
.with_label_values(&[reason])
.inc_by(0);
}
let judge_shadow_would_block_total = IntCounterVec::new(
Opts::new(
"llmtrace_judge_shadow_would_block_total",
"Judge verdicts suppressed by shadow mode that would otherwise have been promoted to Block",
),
&["category", "recommended_action"],
)
.expect("metric: judge_shadow_would_block_total");
registry
.register(Box::new(judge_shadow_would_block_total.clone()))
.expect("register judge_shadow_would_block_total");
let judge_golden_set_alignment = GaugeVec::new(
Opts::new(
"llmtrace_judge_golden_set_alignment",
"Per-category alignment between the security analyzer and the golden-set ground truth (0.0–1.0)",
),
&["category"],
)
.expect("metric: judge_golden_set_alignment");
registry
.register(Box::new(judge_golden_set_alignment.clone()))
.expect("register judge_golden_set_alignment");
let judge_golden_set_false_positive_rate = GaugeVec::new(
Opts::new(
"llmtrace_judge_golden_set_false_positive_rate",
"Per-category false-positive rate against benign golden-set entries (0.0–1.0)",
),
&["category"],
)
.expect("metric: judge_golden_set_false_positive_rate");
registry
.register(Box::new(judge_golden_set_false_positive_rate.clone()))
.expect("register judge_golden_set_false_positive_rate");
let ml_inflight_requests = IntGauge::new(
"llmtrace_ml_inflight_requests",
"Requests currently inside the ML detection pipeline (permits-in-use)",
)
.expect("metric: ml_inflight_requests");
registry
.register(Box::new(ml_inflight_requests.clone()))
.expect("register ml_inflight_requests");
let ml_rejected_total = IntCounter::new(
"llmtrace_ml_rejected_total",
"Total ML pipeline requests rejected because the concurrency cap was saturated",
)
.expect("metric: ml_rejected_total");
registry
.register(Box::new(ml_rejected_total.clone()))
.expect("register ml_rejected_total");
let analyzer_dropped_total = IntCounterVec::new(
Opts::new(
"llmtrace_analyzer_dropped_total",
"Post-response security analysis attempts dropped without producing findings, by reason (issue #298)",
),
&["reason"],
)
.expect("metric: analyzer_dropped_total");
registry
.register(Box::new(analyzer_dropped_total.clone()))
.expect("register analyzer_dropped_total");
for reason in &[
"disabled",
"circuit_breaker_open",
"analyzer_error",
"analyzer_timeout",
] {
analyzer_dropped_total
.with_label_values(&[reason])
.inc_by(0);
}
for subsystem in &["storage", "security"] {
for state in &["closed", "open", "half_open"] {
let val = if *state == "closed" { 1.0 } else { 0.0 };
circuit_breaker_state
.with_label_values(&[subsystem, state])
.set(val);
}
}
Self {
registry,
requests_total,
request_duration_seconds,
tokens_total,
security_findings_total,
circuit_breaker_state,
storage_operations_total,
cost_usd_total,
anomalies_total,
security_detector_latency_seconds,
active_connections,
boundary_defense_applied_total,
boundary_defense_messages_wrapped,
boundary_defense_reminder_injected_total,
boundary_defense_overhead_bytes,
boundary_defense_errors_total,
boundary_defense_skipped_total,
boundary_defense_shadow_mode,
zone_detection_zones_total,
zone_detection_findings_total,
zone_detection_failures_total,
spotlighting_zones_total,
spotlighting_byte_delta_total,
spotlighting_marker_collision_total,
spotlighting_failures_total,
ml_chunks_total,
ml_input_truncated_total,
response_truncated_total,
analysis_text_truncated_total,
action_executions_total,
action_latency_seconds,
ip_blocks_active,
action_rule_matches_total,
feature_flag_updates_total,
feature_flag_bool_state,
feature_flag_string_state,
config_persist_errors_total,
audit_event_dropped_total,
judge_requests_total,
judge_latency_seconds,
judge_tokens_total,
judge_verdicts_total,
judge_queue_depth,
judge_verdict_agreement,
judge_dropped_total,
judge_promotion_rejected_total,
judge_shadow_would_block_total,
judge_golden_set_alignment,
judge_golden_set_false_positive_rate,
ml_inflight_requests,
ml_rejected_total,
analyzer_dropped_total,
}
}
pub fn record_golden_set_alignment(
&self,
category: &str,
alignment_rate: f64,
false_positive_rate: f64,
) {
self.judge_golden_set_alignment
.with_label_values(&[category])
.set(alignment_rate);
self.judge_golden_set_false_positive_rate
.with_label_values(&[category])
.set(false_positive_rate);
}
pub fn gather_text(&self) -> Result<String, prometheus::Error> {
let encoder = TextEncoder::new();
let metric_families = self.registry.gather();
let mut buffer = Vec::new();
encoder.encode(&metric_families, &mut buffer)?;
Ok(String::from_utf8_lossy(&buffer).into_owned())
}
pub fn record_request(
&self,
provider: &str,
model: &str,
status_code: u16,
duration_secs: f64,
) {
let status = status_code.to_string();
self.requests_total
.with_label_values(&[provider, model, &status])
.inc();
self.request_duration_seconds
.with_label_values(&[provider, model])
.observe(duration_secs);
}
pub fn record_tokens(
&self,
provider: &str,
model: &str,
prompt_tokens: Option<u32>,
completion_tokens: Option<u32>,
) {
if let Some(pt) = prompt_tokens {
self.tokens_total
.with_label_values(&["prompt", provider, model])
.inc_by(u64::from(pt));
}
if let Some(ct) = completion_tokens {
self.tokens_total
.with_label_values(&["completion", provider, model])
.inc_by(u64::from(ct));
}
}
pub fn record_security_findings(&self, findings: &[llmtrace_core::SecurityFinding]) {
for f in findings {
let severity = format!("{}", f.severity);
self.security_findings_total
.with_label_values(&[&severity, &f.finding_type])
.inc();
}
}
pub fn record_analyzer_dropped(&self, reason: &str) {
self.analyzer_dropped_total
.with_label_values(&[reason])
.inc();
}
pub fn set_circuit_breaker_state(&self, subsystem: &str, state: &str) {
for s in &["closed", "open", "half_open"] {
let val = if *s == state { 1.0 } else { 0.0 };
self.circuit_breaker_state
.with_label_values(&[subsystem, s])
.set(val);
}
}
pub fn record_storage_operation(&self, operation: &str, success: bool) {
let status = if success { "success" } else { "error" };
self.storage_operations_total
.with_label_values(&[operation, status])
.inc();
}
pub fn record_cost(&self, tenant: &str, model: &str, cost_usd: f64) {
let micro_usd = (cost_usd * 1_000_000.0) as u64;
if micro_usd > 0 {
self.cost_usd_total
.with_label_values(&[tenant, model])
.inc_by(micro_usd);
}
}
pub fn record_detector_latency(&self, detector: &str, duration_ms: u64) {
let secs = duration_ms as f64 / 1000.0;
self.security_detector_latency_seconds
.with_label_values(&[detector])
.observe(secs);
}
pub fn record_boundary_defense(
&self,
provider: &str,
messages_wrapped: u32,
reminder_injected: bool,
overhead_bytes: i64,
shadow_mode: bool,
) {
let mode = if shadow_mode { "shadow" } else { "active" };
self.boundary_defense_applied_total
.with_label_values(&[provider, mode])
.inc();
self.boundary_defense_messages_wrapped
.with_label_values(&[provider])
.observe(f64::from(messages_wrapped));
if reminder_injected {
self.boundary_defense_reminder_injected_total
.with_label_values(&[provider])
.inc();
}
self.boundary_defense_overhead_bytes
.with_label_values(&[provider])
.observe(overhead_bytes as f64);
}
pub fn record_zone_detection(&self, zones: &[(&str, &str, &str)], failure_reasons: &[&str]) {
for (kind, origin, framing) in zones {
self.zone_detection_zones_total
.with_label_values(&[kind, origin, framing])
.inc();
}
for reason in failure_reasons {
self.zone_detection_failures_total
.with_label_values(&[reason])
.inc();
}
}
pub fn record_zone_findings(&self, findings: &[llmtrace_core::SecurityFinding]) {
for f in findings {
if let Some(zone_kind) = f.metadata.get("zone_kind") {
self.zone_detection_findings_total
.with_label_values(&[&f.finding_type, zone_kind])
.inc();
}
}
}
pub fn record_datamarking(
&self,
zones_marked: u32,
byte_delta: i64,
marker_collisions: u32,
shadow_mode: bool,
failure_reasons: &[&str],
) {
let shadow = if shadow_mode { "true" } else { "false" };
if zones_marked > 0 {
self.spotlighting_zones_total
.with_label_values(&["data", shadow])
.inc_by(u64::from(zones_marked));
}
if byte_delta > 0 {
self.spotlighting_byte_delta_total
.with_label_values(&[shadow])
.inc_by(byte_delta as u64);
}
if marker_collisions > 0 {
self.spotlighting_marker_collision_total
.inc_by(u64::from(marker_collisions));
}
for reason in failure_reasons {
self.spotlighting_failures_total
.with_label_values(&[reason])
.inc();
}
}
pub fn record_anomalies(&self, findings: &[llmtrace_core::SecurityFinding]) {
for f in findings {
if let Some(anomaly_type) = f.metadata.get("anomaly_type") {
self.anomalies_total
.with_label_values(&[anomaly_type])
.inc();
}
}
}
pub fn record_action_execution(&self, action_type: &str, status: &str, mode: &str) {
self.action_executions_total
.with_label_values(&[action_type, status, mode])
.inc();
}
pub fn record_action_latency(&self, action_type: &str, duration: std::time::Duration) {
self.action_latency_seconds
.with_label_values(&[action_type])
.observe(duration.as_secs_f64());
}
pub fn record_action_rule_match(&self, finding_type: &str, action_type: &str) {
self.action_rule_matches_total
.with_label_values(&[finding_type, action_type])
.inc();
}
pub fn record_judge_request(&self, backend: &str, model: &str, mode: &str, status: &str) {
self.judge_requests_total
.with_label_values(&[backend, model, mode, status])
.inc();
}
pub fn record_judge_latency(
&self,
backend: &str,
model: &str,
mode: &str,
duration: std::time::Duration,
) {
self.judge_latency_seconds
.with_label_values(&[backend, model, mode])
.observe(duration.as_secs_f64());
}
pub fn record_judge_tokens(
&self,
backend: &str,
model: &str,
prompt_tokens: Option<u32>,
completion_tokens: Option<u32>,
) {
if let Some(n) = prompt_tokens {
self.judge_tokens_total
.with_label_values(&["prompt", backend, model])
.inc_by(u64::from(n));
}
if let Some(n) = completion_tokens {
self.judge_tokens_total
.with_label_values(&["completion", backend, model])
.inc_by(u64::from(n));
}
}
pub fn record_judge_verdict(&self, verdict: &llmtrace_core::JudgeVerdict) {
let is_threat = if verdict.is_threat { "true" } else { "false" };
self.judge_verdicts_total
.with_label_values(&[
verdict.category.as_str(),
verdict.recommended_action.as_str(),
is_threat,
verdict.model_used.as_str(),
])
.inc();
}
pub fn record_judge_dropped(&self, reason: &str) {
self.judge_dropped_total.with_label_values(&[reason]).inc();
}
pub fn record_judge_agreement(&self, agreement: &str) {
self.judge_verdict_agreement
.with_label_values(&[agreement])
.inc();
}
pub fn record_judge_promotion_rejected(&self, reason: &str) {
self.judge_promotion_rejected_total
.with_label_values(&[reason])
.inc();
}
pub fn record_judge_shadow_would_block(&self, category: &str, recommended_action: &str) {
self.judge_shadow_would_block_total
.with_label_values(&[category, recommended_action])
.inc();
}
}
impl Default for Metrics {
fn default() -> Self {
Self::new()
}
}
pub async fn metrics_handler(State(state): State<Arc<AppState>>) -> Response<Body> {
match state.metrics.gather_text() {
Ok(text) => Response::builder()
.status(StatusCode::OK)
.header("content-type", "text/plain; version=0.0.4; charset=utf-8")
.body(Body::from(text))
.unwrap(),
Err(e) => Response::builder()
.status(StatusCode::INTERNAL_SERVER_ERROR)
.header("content-type", "text/plain")
.body(Body::from(format!("Failed to gather metrics: {e}")))
.unwrap(),
}
}
pub fn provider_label(provider: &llmtrace_core::LLMProvider) -> &'static str {
use llmtrace_core::LLMProvider;
match provider {
LLMProvider::OpenAI => "openai",
LLMProvider::Anthropic => "anthropic",
LLMProvider::VLLm => "vllm",
LLMProvider::SGLang => "sglang",
LLMProvider::TGI => "tgi",
LLMProvider::Ollama => "ollama",
LLMProvider::AzureOpenAI => "azure_openai",
LLMProvider::Bedrock => "bedrock",
LLMProvider::Custom(_) => "custom",
}
}
#[cfg(test)]
mod tests {
use super::*;
use llmtrace_core::{SecurityFinding, SecuritySeverity};
#[test]
fn test_metrics_new_succeeds() {
let m = Metrics::new();
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_active_connections"));
}
#[test]
fn test_record_request_increments_counter() {
let m = Metrics::new();
m.record_request("openai", "gpt-4", 200, 1.5);
m.record_request("openai", "gpt-4", 200, 0.8);
m.record_request("openai", "gpt-4", 500, 0.1);
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_requests_total"));
assert!(text.contains("status_code=\"200\""));
assert!(text.contains("status_code=\"500\""));
}
#[test]
fn test_record_request_observes_histogram() {
let m = Metrics::new();
m.record_request("anthropic", "claude-3", 200, 2.0);
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_request_duration_seconds"));
assert!(text.contains("provider=\"anthropic\""));
}
#[test]
fn test_record_tokens() {
let m = Metrics::new();
m.record_tokens("openai", "gpt-4", Some(100), Some(50));
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_tokens_total"));
assert!(text.contains("direction=\"prompt\""));
assert!(text.contains("direction=\"completion\""));
}
#[test]
fn test_record_tokens_none_values() {
let m = Metrics::new();
m.record_tokens("openai", "gpt-4", None, None);
let text = m.gather_text().unwrap();
assert!(!text.is_empty());
}
#[test]
fn test_record_security_findings() {
let m = Metrics::new();
let findings = vec![
SecurityFinding::new(
SecuritySeverity::High,
"prompt_injection".to_string(),
"test".to_string(),
0.9,
),
SecurityFinding::new(
SecuritySeverity::Low,
"pii_leak".to_string(),
"test".to_string(),
0.5,
),
];
m.record_security_findings(&findings);
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_security_findings_total"));
assert!(text.contains("severity=\"High\""));
assert!(text.contains("finding_type=\"prompt_injection\""));
assert!(text.contains("severity=\"Low\""));
assert!(text.contains("finding_type=\"pii_leak\""));
}
#[test]
fn test_circuit_breaker_state_gauge() {
let m = Metrics::new();
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_circuit_breaker_state"));
m.set_circuit_breaker_state("storage", "open");
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_circuit_breaker_state"));
}
#[test]
fn test_storage_operations() {
let m = Metrics::new();
m.record_storage_operation("store_trace", true);
m.record_storage_operation("store_trace", false);
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_storage_operations_total"));
assert!(text.contains("operation=\"store_trace\""));
assert!(text.contains("status=\"success\""));
assert!(text.contains("status=\"error\""));
}
#[test]
fn test_cost_recording() {
let m = Metrics::new();
m.record_cost("tenant-abc", "gpt-4", 0.05);
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_cost_usd_total"));
assert!(text.contains("tenant=\"tenant-abc\""));
assert!(text.contains("model=\"gpt-4\""));
}
#[test]
fn test_cost_zero_not_recorded() {
let m = Metrics::new();
m.record_cost("t", "m", 0.0);
let text = m.gather_text().unwrap();
assert!(!text.contains("tenant=\"t\""));
}
#[test]
fn test_anomaly_recording() {
let m = Metrics::new();
let mut f = SecurityFinding::new(
SecuritySeverity::High,
"anomaly_cost_spike".to_string(),
"test".to_string(),
0.9,
);
f.metadata
.insert("anomaly_type".to_string(), "cost_spike".to_string());
m.record_anomalies(&[f]);
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_anomalies_total"));
assert!(text.contains("anomaly_type=\"cost_spike\""));
}
#[test]
fn test_active_connections_gauge() {
let m = Metrics::new();
m.active_connections.inc();
m.active_connections.inc();
assert_eq!(m.active_connections.get(), 2);
m.active_connections.dec();
assert_eq!(m.active_connections.get(), 1);
}
#[test]
fn test_provider_label() {
use llmtrace_core::LLMProvider;
assert_eq!(provider_label(&LLMProvider::OpenAI), "openai");
assert_eq!(provider_label(&LLMProvider::Anthropic), "anthropic");
assert_eq!(provider_label(&LLMProvider::VLLm), "vllm");
assert_eq!(provider_label(&LLMProvider::Ollama), "ollama");
assert_eq!(provider_label(&LLMProvider::Custom("foo".into())), "custom");
}
#[test]
fn test_gather_text_valid_prometheus_format() {
let m = Metrics::new();
m.record_request("openai", "gpt-4", 200, 1.0);
m.record_tokens("openai", "gpt-4", Some(100), Some(50));
m.record_storage_operation("store_trace", true);
let text = m.gather_text().unwrap();
assert!(text.contains("# HELP llmtrace_requests_total"));
assert!(text.contains("# TYPE llmtrace_requests_total counter"));
assert!(text.contains("# HELP llmtrace_request_duration_seconds"));
assert!(text.contains("# TYPE llmtrace_request_duration_seconds histogram"));
assert!(text.contains("# HELP llmtrace_tokens_total"));
assert!(text.contains("# TYPE llmtrace_tokens_total counter"));
assert!(text.contains("# HELP llmtrace_active_connections"));
assert!(text.contains("# TYPE llmtrace_active_connections gauge"));
}
#[test]
fn test_default_circuit_breaker_state() {
let m = Metrics::new();
let closed_storage = m
.circuit_breaker_state
.with_label_values(&["storage", "closed"])
.get();
let open_storage = m
.circuit_breaker_state
.with_label_values(&["storage", "open"])
.get();
assert_eq!(closed_storage, 1.0);
assert_eq!(open_storage, 0.0);
let closed_security = m
.circuit_breaker_state
.with_label_values(&["security", "closed"])
.get();
assert_eq!(closed_security, 1.0);
}
#[test]
fn test_action_metrics_are_recorded() {
let m = Metrics::new();
m.record_action_execution("webhook", "success", "async");
m.record_action_latency("webhook", std::time::Duration::from_millis(25));
m.record_action_rule_match("prompt_injection", "webhook");
m.ip_blocks_active.inc();
let text = m.gather_text().unwrap();
assert!(text.contains("llmtrace_action_executions_total"));
assert!(text.contains("action_type=\"webhook\""));
assert!(text.contains("status=\"success\""));
assert!(text.contains("mode=\"async\""));
assert!(text.contains("llmtrace_action_latency_seconds"));
assert!(text.contains("llmtrace_ip_blocks_active"));
assert!(text.contains("llmtrace_action_rule_matches_total"));
assert!(text.contains("finding_type=\"prompt_injection\""));
}
}