hyperi-rustlib 2.8.15

There's plenty of sage advice out there about how to run Rust services in production at scale — config cascades, structured logging, masking secrets, multi-backend secrets management, Prometheus, OpenTelemetry, Kafka transports, tiered disk-spillover sinks, adaptive worker pools, graceful shutdown — but almost none of it as code you can just install and use. This is that code. Opinionated, drop-in, working out of the box. The patterns from blog posts, watercooler chats and beers with your Google mates as actual library — not a framework you assemble from twenty crates and 8 weeks of munging.
Documentation
// Project:   hyperi-rustlib
// File:      src/worker/metrics.rs
// Purpose:   Metric registration and threshold gauge emission
// Language:  Rust
//
// License:   BUSL-1.1
// Copyright: (c) 2026 HYPERI PTY LIMITED

use crate::metrics::MetricsManager;

use super::config::WorkerPoolConfig;

/// Register all worker pool metrics with the `MetricsManager`.
///
/// Threshold gauges are emitted immediately with current config values.
pub fn register(manager: &MetricsManager, config: &WorkerPoolConfig) {
    // Operational metrics -- register descriptions (return values intentionally unused)
    let _ = manager.gauge(
        "worker_pool_active_threads",
        "Permits currently leased (in-flight worker count)",
    );
    let _ = manager.gauge(
        "worker_pool_target_threads",
        "Target thread count from scaler (admission ceiling)",
    );
    let _ = manager.gauge(
        "worker_pool_available_threads",
        "Headroom: leasable permits right now (target - leased)",
    );
    let _ = manager.gauge("worker_pool_max_threads", "Maximum pool threads");
    let _ = manager.gauge(
        "worker_pool_cpu_utilisation",
        "Current CPU utilisation sample",
    );
    // dual-emit: drop OLD in next release (MIGRATIONS) -- `_ratio` is the
    // Prometheus/OTel-convention name for a 0-1 ratio gauge.
    let _ = manager.gauge(
        "worker_pool_cpu_utilisation_ratio",
        "Current CPU utilisation sample (0.0-1.0)",
    );
    let _ = manager.gauge(
        "worker_pool_memory_utilisation",
        "Effective memory pressure",
    );
    // dual-emit: drop OLD in next release (MIGRATIONS)
    let _ = manager.gauge(
        "worker_pool_memory_utilisation_ratio",
        "Effective memory pressure (0.0-1.0)",
    );
    let _ = manager.gauge(
        "worker_pool_saturation",
        "Pool saturation ratio (active/max)",
    );
    // dual-emit: drop OLD in next release (MIGRATIONS)
    let _ = manager.gauge(
        "worker_pool_saturation_ratio",
        "Pool saturation ratio, active/max (0.0-1.0)",
    );
    let _ = manager.counter(
        "worker_pool_tasks_total",
        "Total tasks submitted to rayon pool",
    );
    let _ = manager.histogram(
        "worker_pool_task_duration_seconds",
        "Per-task execution time",
    );
    let _ = manager.histogram(
        "worker_pool_batch_duration_seconds",
        "End-to-end batch processing time",
    );
    let _ = manager.histogram(
        "worker_pool_semaphore_wait_seconds",
        "Time waiting for semaphore permit",
    );
    let _ = manager.counter("worker_pool_scale_events_total", "Scaling events");
    let _ = manager.gauge(
        "worker_pool_async_inflight",
        "Current async fan-out tasks in flight",
    );

    // Threshold descriptors (NEW convention names; values emitted by
    // emit_thresholds). The dual-emit OLD names are emitted but not separately
    // registered -- they drop next release (MIGRATIONS).
    let _ = manager.gauge(
        "worker_pool_grow_below_ratio",
        "Grow-below CPU threshold (0.0-1.0)",
    );
    let _ = manager.gauge(
        "worker_pool_shrink_above_ratio",
        "Shrink-above CPU threshold (0.0-1.0)",
    );
    let _ = manager.gauge(
        "worker_pool_emergency_above_ratio",
        "Emergency-above CPU threshold (0.0-1.0)",
    );
    let _ = manager.gauge(
        "worker_pool_memory_pressure_cap_ratio",
        "Memory pressure cap (0.0-1.0)",
    );
    let _ = manager.gauge(
        "worker_pool_scale_interval_seconds",
        "Scaling interval (seconds)",
    );
    let _ = manager.gauge(
        "worker_pool_health_saturation_timeout_seconds",
        "Health saturation timeout (seconds)",
    );

    // Threshold gauges (config values as metrics -- emitted immediately)
    emit_thresholds(config);
}

/// Emit threshold gauge values (called at startup and on config reload).
///
/// Metric names match config keys exactly for mechanical derivation:
/// config key `grow_below` -> metric `worker_pool_grow_below`.
pub fn emit_thresholds(config: &WorkerPoolConfig) {
    metrics::gauge!("worker_pool_min_threads").set(config.min_threads as f64);
    metrics::gauge!("worker_pool_max_threads").set(config.max_threads as f64);
    metrics::gauge!("worker_pool_grow_below").set(config.grow_below);
    metrics::gauge!("worker_pool_shrink_above").set(config.shrink_above);
    metrics::gauge!("worker_pool_emergency_above").set(config.emergency_above);
    metrics::gauge!("worker_pool_memory_pressure_cap").set(config.memory_pressure_cap);
    metrics::gauge!("worker_pool_scale_interval_secs").set(config.scale_interval_secs as f64);
    metrics::gauge!("worker_pool_async_concurrency").set(config.async_concurrency as f64);
    metrics::gauge!("worker_pool_health_saturation_timeout_secs")
        .set(config.health_saturation_timeout_secs as f64);

    // dual-emit: drop OLD in next release (MIGRATIONS). The threshold gauges
    // above are pressure-CEL inputs / Grafana overlay lines; the `_ratio`
    // (0-1 ratio thresholds) and `_seconds` (was `_secs`) names match the
    // Prometheus/OTel base-unit convention.
    metrics::gauge!("worker_pool_grow_below_ratio").set(config.grow_below);
    metrics::gauge!("worker_pool_shrink_above_ratio").set(config.shrink_above);
    metrics::gauge!("worker_pool_emergency_above_ratio").set(config.emergency_above);
    metrics::gauge!("worker_pool_memory_pressure_cap_ratio").set(config.memory_pressure_cap);
    metrics::gauge!("worker_pool_scale_interval_seconds").set(config.scale_interval_secs as f64);
    metrics::gauge!("worker_pool_health_saturation_timeout_seconds")
        .set(config.health_saturation_timeout_secs as f64);
}