Skip to main content

faucet_core/observability/
install.rs

1//! Idempotent global installer for the Prometheus recorder and a
2//! `tracing-subscriber`. Safe to call more than once; subsequent calls warn
3//! and continue rather than panicking. Port-in-use becomes a typed error.
4
5use thiserror::Error;
6
7/// Configuration for `install_observability`. Either or both sections may be
8/// `None`; unset sections install nothing.
9#[derive(Debug, Clone, Default)]
10pub struct ObservabilityConfig {
11    pub prometheus: Option<PrometheusConfig>,
12    pub tracing: Option<TracingConfig>,
13}
14
15#[derive(Debug, Clone)]
16pub struct PrometheusConfig {
17    /// `host:port` to bind a `/metrics` HTTP endpoint. Recommended:
18    /// `127.0.0.1:9464`.
19    pub listen: String,
20    /// Histogram bucket overrides (in seconds). When `None`, sensible defaults
21    /// apply (0.001..300s spanning sub-ms through five-minute durations).
22    pub buckets: Option<Vec<f64>>,
23}
24
25#[derive(Debug, Clone)]
26pub struct TracingConfig {
27    /// `EnvFilter`-style directive, e.g. `"info"` or `"faucet_core=debug,info"`.
28    pub level: String,
29}
30
31/// Report from `install_observability` so callers can log what actually
32/// happened (recorder installed vs. already-installed vs. disabled).
33#[derive(Debug, Clone, Default)]
34pub struct InstallReport {
35    pub prometheus_listen: Option<String>,
36    pub prometheus_already_installed: bool,
37    pub tracing_already_installed: bool,
38}
39
40#[derive(Debug, Error)]
41pub enum InstallError {
42    #[error("failed to bind Prometheus listener at {listen}: {source}")]
43    PrometheusBind {
44        listen: String,
45        #[source]
46        source: std::io::Error,
47    },
48    #[error("failed to install Prometheus recorder: {0}")]
49    PrometheusInstall(String),
50}
51
52/// Install observability if requested. Always returns; never panics.
53///
54/// Behavior:
55/// - If `prometheus` is set, builds a `PrometheusBuilder` and installs the
56///   recorder + HTTP `/metrics` endpoint at the configured listen address.
57///   Already-installed recorder (typed `BuildError::FailedToSetGlobalRecorder`)
58///   is logged via `tracing::warn!` and continues. Listen-address parse failures
59///   and HTTP-listener bind failures (e.g. port-in-use, typed
60///   `BuildError::FailedToCreateHTTPListener`) return `InstallError::PrometheusBind`.
61/// - If `tracing` is set, installs a `tracing-subscriber` registry with the
62///   given env-filter directive as the default subscriber. Already-set-default
63///   is logged via `tracing::warn!` and continues.
64#[cfg(feature = "observability-install")]
65pub fn install_observability(cfg: &ObservabilityConfig) -> Result<InstallReport, InstallError> {
66    let mut report = InstallReport::default();
67
68    if let Some(p) = cfg.prometheus.as_ref() {
69        use metrics_exporter_prometheus::{BuildError, PrometheusBuilder};
70
71        let listen: std::net::SocketAddr =
72            p.listen.parse().map_err(|e: std::net::AddrParseError| {
73                InstallError::PrometheusBind {
74                    listen: p.listen.clone(),
75                    source: std::io::Error::new(std::io::ErrorKind::InvalidInput, e.to_string()),
76                }
77            })?;
78
79        const DEFAULT_BUCKETS: &[f64] = &[
80            0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 300.0,
81        ];
82        let buckets = p.buckets.as_deref().unwrap_or(DEFAULT_BUCKETS);
83
84        let builder = PrometheusBuilder::new()
85            .with_http_listener(listen)
86            .set_buckets(buckets)
87            .map_err(|e| InstallError::PrometheusInstall(e.to_string()))?;
88
89        match builder.install() {
90            Ok(()) => report.prometheus_listen = Some(p.listen.clone()),
91            // Match the TYPED `BuildError` variant rather than scraping its
92            // Display string — the latter breaks silently if the upstream
93            // wording changes.
94            Err(e) => match e {
95                // Recorder already installed (e.g. a prior `install` call or a
96                // test harness). Idempotent: warn and continue.
97                BuildError::FailedToSetGlobalRecorder(_) => {
98                    tracing::warn!("Prometheus recorder already installed; continuing");
99                    report.prometheus_already_installed = true;
100                }
101                // The HTTP `/metrics` listener could not bind. This is where a
102                // genuine bind failure (e.g. EADDRINUSE / port-in-use) lands,
103                // since the real `TcpListener::bind` happens inside `install()`,
104                // not in the address parse above. Surface it as the dedicated
105                // bind error so port-in-use is reported correctly.
106                BuildError::FailedToCreateHTTPListener(msg) => {
107                    return Err(InstallError::PrometheusBind {
108                        listen: p.listen.clone(),
109                        source: std::io::Error::other(msg),
110                    });
111                }
112                other => return Err(InstallError::PrometheusInstall(other.to_string())),
113            },
114        }
115    }
116
117    if let Some(t) = cfg.tracing.as_ref() {
118        use tracing_subscriber::EnvFilter;
119        use tracing_subscriber::layer::SubscriberExt;
120        use tracing_subscriber::util::SubscriberInitExt;
121
122        let filter = EnvFilter::try_new(&t.level).unwrap_or_else(|_| EnvFilter::new("info"));
123        let registry = tracing_subscriber::registry()
124            .with(filter)
125            .with(tracing_subscriber::fmt::layer());
126        if registry.try_init().is_err() {
127            // Some other code path has already set a global default. Log and
128            // continue — observability still works through the previously-
129            // installed subscriber.
130            tracing::warn!("tracing subscriber already installed; continuing");
131            report.tracing_already_installed = true;
132        }
133    }
134
135    // Register build_info after any Prometheus install attempt — set!() into
136    // a not-yet-installed recorder is a no-op, so we order it last.
137    register_build_info();
138
139    Ok(report)
140}
141
142/// Non-`observability-install` stub. Returns an empty report, never panics.
143#[cfg(not(feature = "observability-install"))]
144pub fn install_observability(_cfg: &ObservabilityConfig) -> Result<InstallReport, InstallError> {
145    register_build_info();
146    Ok(InstallReport::default())
147}
148
149/// Register the `faucet_build_info{version}` gauge (set to 1) under the
150/// currently-installed `metrics` recorder. Safe to call from any code path
151/// that wants to ensure the gauge is set; `install_observability` invokes
152/// this automatically. Gauges are naturally idempotent under the `metrics`
153/// model — repeat calls just re-set the same value.
154///
155/// The version label is `CARGO_PKG_VERSION` of `faucet-core` — matches the
156/// crate that owns the observability layer. Dashboards `group_left` the gauge
157/// onto every other metric to annotate panels with the running version.
158pub fn register_build_info() {
159    metrics::gauge!(
160        "faucet_build_info",
161        "version" => env!("CARGO_PKG_VERSION"),
162    )
163    .set(1.0);
164}
165
166#[cfg(all(test, feature = "observability-install"))]
167mod tests {
168    use super::*;
169    use std::sync::Mutex;
170
171    static LOCK: Mutex<()> = Mutex::new(());
172
173    #[test]
174    fn no_config_returns_empty_report() {
175        let _g = LOCK.lock().unwrap_or_else(|e| e.into_inner());
176        let r = install_observability(&ObservabilityConfig::default()).unwrap();
177        assert!(r.prometheus_listen.is_none());
178        assert!(!r.prometheus_already_installed);
179        assert!(!r.tracing_already_installed);
180    }
181
182    #[test]
183    fn malformed_listen_returns_bind_error() {
184        let _g = LOCK.lock().unwrap_or_else(|e| e.into_inner());
185        let cfg = ObservabilityConfig {
186            prometheus: Some(PrometheusConfig {
187                listen: "not-a-socket".into(),
188                buckets: None,
189            }),
190            tracing: None,
191        };
192        match install_observability(&cfg) {
193            Err(InstallError::PrometheusBind { .. }) => {}
194            other => panic!("expected PrometheusBind error, got {other:?}"),
195        }
196    }
197}