use std::sync::{LazyLock, Mutex};
use std::time::Duration;
use prometheus_client::{
encoding::text::encode,
metrics::{
counter::Counter,
gauge::Gauge,
histogram::{Histogram, exponential_buckets},
},
registry::Registry,
};
use tracing::{error, warn};
const WARN_ACQUISITION_SECS: f64 = 1.0;
const ALERT_CRASH_RATE_THRESHOLD: f64 = 0.10;
pub struct BrowserMetrics {
pub pool_size: Gauge,
pub acquisition_duration_seconds: Histogram,
pub crashes_total: Counter,
pub process_rss_bytes: Gauge,
acquisitions_total: Counter,
registry: Mutex<Registry>,
}
impl BrowserMetrics {
fn new() -> Self {
let acquisition_duration_seconds = Histogram::new(exponential_buckets(0.005, 2.0, 12));
let pool_size = Gauge::default();
let crashes_total: Counter = Counter::default();
let acquisitions_total: Counter = Counter::default();
let process_rss_bytes = Gauge::default();
let mut registry = Registry::default();
registry.register(
"browser_pool_size",
"Number of active browser instances currently in use",
pool_size.clone(),
);
registry.register(
"browser_acquisition_duration_seconds",
"Time taken to acquire a browser instance from the pool",
acquisition_duration_seconds.clone(),
);
registry.register(
"browser_crashes_total",
"Cumulative number of browser crashes or health-check failures",
crashes_total.clone(),
);
registry.register(
"browser_acquisitions_total",
"Cumulative number of browser acquisition calls",
acquisitions_total.clone(),
);
registry.register(
"process_rss_bytes",
"Resident set size of the current process in bytes",
process_rss_bytes.clone(),
);
Self {
pool_size,
acquisition_duration_seconds,
crashes_total,
acquisitions_total,
process_rss_bytes,
registry: Mutex::new(registry),
}
}
pub fn record_acquisition(&self, duration: Duration) {
let secs = duration.as_secs_f64();
self.acquisition_duration_seconds.observe(secs);
self.acquisitions_total.inc();
if secs > WARN_ACQUISITION_SECS {
warn!(
elapsed_ms = duration.as_millis(),
"Browser acquisition exceeded 1s performance budget"
);
}
let crashes = self.crashes_total.get();
let acquires = self.acquisitions_total.get();
if acquires > 0 {
let cap = u64::from(u32::MAX);
let rate = f64::from(u32::try_from(crashes.min(cap)).unwrap_or(u32::MAX))
/ f64::from(u32::try_from(acquires.min(cap)).unwrap_or(u32::MAX));
if rate > ALERT_CRASH_RATE_THRESHOLD {
error!(
crash_rate = format!("{:.1}%", rate * 100.0),
crashes, acquires, "Browser crash rate exceeds 10% alert threshold"
);
}
}
}
pub fn set_pool_size(&self, active: i64) {
self.pool_size.set(active);
}
pub fn record_crash(&self) {
self.crashes_total.inc();
}
pub fn refresh_rss(&self) -> i64 {
let rss = rss_bytes();
self.process_rss_bytes.set(rss);
rss
}
pub fn gather(&self) -> String {
self.refresh_rss();
let guard = match self.registry.lock() {
Ok(g) => g,
Err(e) => {
warn!("Metrics registry lock poisoned: {e}");
return String::new();
}
};
let mut buf = String::new();
if let Err(e) = encode(&mut buf, &guard) {
warn!("Failed to encode Prometheus metrics: {e}");
}
buf
}
}
pub static METRICS: LazyLock<BrowserMetrics> = LazyLock::new(BrowserMetrics::new);
pub fn gather() -> String {
METRICS.gather()
}
#[allow(clippy::missing_const_for_fn)]
fn rss_bytes() -> i64 {
#[cfg(target_os = "linux")]
{
read_linux_rss().unwrap_or(0)
}
#[cfg(not(target_os = "linux"))]
{
0
}
}
#[cfg(target_os = "linux")]
fn read_linux_rss() -> Option<i64> {
let status = std::fs::read_to_string("/proc/self/status").ok()?;
for line in status.lines() {
if let Some(rest) = line.strip_prefix("VmRSS:") {
let kb: i64 = rest
.split_whitespace()
.next()
.and_then(|s| s.parse().ok())?;
return Some(kb * 1024);
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
fn fresh_metrics() -> BrowserMetrics {
BrowserMetrics::new()
}
#[test]
fn pool_size_gauge_tracks_value() {
let m = fresh_metrics();
m.set_pool_size(3);
assert_eq!(m.pool_size.get(), 3);
m.set_pool_size(0);
assert_eq!(m.pool_size.get(), 0);
}
#[test]
fn crash_counter_increments() {
let m = fresh_metrics();
m.record_crash();
m.record_crash();
assert_eq!(m.crashes_total.get(), 2);
}
#[test]
fn acquisition_duration_observes() {
let m = fresh_metrics();
m.record_acquisition(Duration::from_millis(100));
m.record_acquisition(Duration::from_millis(500));
assert_eq!(m.acquisitions_total.get(), 2);
}
#[test]
fn gather_contains_metric_names() {
let m = fresh_metrics();
m.set_pool_size(2);
m.record_crash();
let output = m.gather();
assert!(output.contains("browser_pool_size"), "missing pool_size");
assert!(
output.contains("browser_crashes_total"),
"missing crashes_total"
);
assert!(
output.contains("browser_acquisition_duration_seconds"),
"missing acquisition histogram"
);
}
#[test]
fn global_gather_has_expected_keys() {
let output = gather();
assert!(output.contains("browser_pool_size"));
}
#[test]
fn rss_is_non_negative() {
assert!(rss_bytes() >= 0);
}
}