Skip to main content

zlayer_observability/
lib.rs

1//! `ZLayer` Observability - Logging, Tracing, and Metrics
2//!
3//! Provides unified observability infrastructure:
4//! - Structured logging with JSON/pretty formats
5//! - OpenTelemetry distributed tracing
6//! - Prometheus metrics exposition
7//!
8//! # Quick Start
9//!
10//! ```no_run
11//! use zlayer_observability::{init_observability, ObservabilityConfig};
12//!
13//! #[tokio::main]
14//! async fn main() {
15//!     let config = ObservabilityConfig::default();
16//!     let _guards = init_observability(&config).expect("Failed to init observability");
17//!
18//!     tracing::info!("Application started");
19//! }
20//! ```
21
22pub mod config;
23pub mod container_spans;
24pub mod error;
25pub mod log_reader;
26pub mod logging;
27pub mod logs;
28pub mod metrics;
29pub mod propagation;
30pub mod tracing_otel;
31
32use std::path::PathBuf;
33
34pub use config::*;
35pub use container_spans::*;
36pub use error::{ObservabilityError, Result};
37pub use logging::{init_logging, LogGuard};
38pub use metrics::{init_metrics, metrics, HealthStatus, ZLayerMetrics};
39pub use tracing_otel::{
40    init_otlp_in_runtime, otlp_is_enabled, set_telemetry_provider, DefaultOtlpProvider, OtlpGuard,
41    TelemetryProvider,
42};
43
44/// Combined guards for all observability components
45pub struct ObservabilityGuards {
46    /// Guard for the logging system (keeps async file writer running). Empty
47    /// (no-op) when OTLP is enabled and subscriber installation was deferred to
48    /// the in-runtime OTLP activation.
49    pub log_guard: LogGuard,
50    /// Guard for OTLP telemetry forwarding. Holds the live SDK tracer/logger
51    /// providers (flush on drop) when forwarding was activated by this call;
52    /// empty for the daemon path, where activation happens later via
53    /// [`init_otlp_in_runtime`] inside the Tokio runtime.
54    pub otlp: OtlpGuard,
55}
56
57/// Initialize all observability components
58///
59/// This is the recommended way to set up observability. It initializes:
60/// - Logging (always)
61/// - Metrics (always)
62/// - Tracing (if enabled in config)
63///
64/// Returns guards that must be held for the lifetime of the application.
65///
66/// # Example
67///
68/// ```no_run
69/// use zlayer_observability::{init_observability, ObservabilityConfig};
70///
71/// #[tokio::main]
72/// async fn main() {
73///     let config = ObservabilityConfig::default();
74///     let _guards = init_observability(&config).expect("Failed to init observability");
75///
76///     tracing::info!("Application started");
77///     // guards are dropped when main exits, flushing logs and traces
78/// }
79/// ```
80///
81/// # Errors
82/// Returns an error if any observability component fails to initialize.
83pub fn init_observability(config: &ObservabilityConfig) -> Result<ObservabilityGuards> {
84    // OTLP forwarding and ZLayer's console/file subscriber are mutually
85    // exclusive: there is a single global `tracing` subscriber slot, and
86    // `init_otlp_in_runtime` claims it. Crucially, the OTLP exporter builds its
87    // batch processors with `runtime::Tokio` and so MUST run inside an active
88    // Tokio runtime — but ZLayer's daemon calls `init_observability` BEFORE it
89    // builds its runtime. So when OTLP is enabled we DEFER subscriber install:
90    // the caller (re)activates it from inside the runtime via
91    // `init_otlp_in_runtime`. When OTLP is off, we install the console/file
92    // subscriber here exactly as before.
93    let log_guard = if otlp_is_enabled(&config.tracing) {
94        LogGuard::noop()
95    } else {
96        let g = logging::init_logging_inner(&config.logging)?;
97        tracing::info!("Observability initialized");
98        g
99    };
100
101    // Metrics are independent of the subscriber and always initialized.
102    let _ = init_metrics(&config.metrics)?;
103
104    Ok(ObservabilityGuards {
105        log_guard,
106        // Daemon path: OTLP is activated later, inside the runtime.
107        otlp: OtlpGuard::default(),
108    })
109}
110
111/// Options for [`init_common_logging`].
112///
113/// Sensible defaults (`Default`) give every satellite a pretty console plus a
114/// daily-rotated file sink under `~/.zlayer/logs`; tweak only what differs.
115#[derive(Debug, Clone)]
116pub struct CommonLoggingOptions {
117    /// Also write a daily-rotated file sink (7-file cap) alongside the console.
118    pub file: bool,
119    /// Override the log directory. Defaults to `~/.zlayer/logs`.
120    pub log_dir: Option<PathBuf>,
121    /// Level used when `RUST_LOG` is unset and no `filter_directives` are given.
122    pub default_level: LogLevel,
123    /// Per-crate default filter directives used when `RUST_LOG` is unset
124    /// (e.g. `"zlayer_web=debug,tower_http=debug"`).
125    pub filter_directives: Option<String>,
126    /// Console/file format. Defaults to pretty.
127    pub format: LogFormat,
128}
129
130impl Default for CommonLoggingOptions {
131    fn default() -> Self {
132        Self {
133            file: true,
134            log_dir: None,
135            default_level: LogLevel::Info,
136            filter_directives: None,
137            format: LogFormat::Pretty,
138        }
139    }
140}
141
142/// Shared logging entry point for the satellite binaries (overlayd, web,
143/// manager, desktop).
144///
145/// Installs a structured tracing subscriber with an `EnvFilter` (honoring
146/// `RUST_LOG`), a console layer, and — by default — a daily-rotated file sink at
147/// `~/.zlayer/logs/<app_name>.log` (reusing the same rolling-file + cleanup
148/// machinery as the daemon). OTLP forwarding is wired automatically from the
149/// `OTEL_*` environment, so satellites join the constellation's `ZLogging` hook
150/// the moment an endpoint is configured (off by default).
151///
152/// Hold the returned guards for the lifetime of the process; dropping them
153/// flushes the async file writer and shuts the tracer provider down.
154///
155/// # Errors
156/// Returns an error if a file sink is requested but the log directory cannot be
157/// created, or if OTLP forwarding is configured with a broken endpoint.
158pub fn init_common_logging(
159    app_name: &str,
160    opts: CommonLoggingOptions,
161) -> Result<ObservabilityGuards> {
162    let file = if opts.file {
163        let dir = opts.log_dir.unwrap_or_else(default_common_log_dir);
164        // Best-effort: a failed create surfaces as a writer error below; we don't
165        // want a missing log dir to take down a satellite's whole startup.
166        let _ = std::fs::create_dir_all(&dir);
167        Some(FileLoggingConfig {
168            directory: dir,
169            prefix: format!("{app_name}.log"),
170            rotation: RotationStrategy::Daily,
171            max_files: Some(7),
172        })
173    } else {
174        None
175    };
176
177    let logging = LoggingConfig {
178        level: opts.default_level,
179        format: opts.format,
180        file,
181        filter_directives: opts.filter_directives,
182        ..Default::default()
183    };
184
185    // OTLP forwarding straight from the environment (OTEL_TRACES_ENABLED /
186    // OTEL_EXPORTER_OTLP_ENDPOINT); inert when unset.
187    let tracing = TracingConfig::from_env();
188
189    // The OTLP exporter must be built inside a Tokio runtime. Satellites differ:
190    // `zlayer-web`/`zlayer-manager` are `#[tokio::main]` (call us in-runtime),
191    // while `zlayer-overlayd`/`zlayer-desktop` call us before building their
192    // runtime. Only route through OTLP when it is enabled AND we're actually
193    // inside a runtime; otherwise fall back to the console/file subscriber so we
194    // never hit the "no reactor running" panic.
195    if otlp_is_enabled(&tracing) && tokio::runtime::Handle::try_current().is_ok() {
196        let obs = ObservabilityConfig {
197            logging,
198            tracing,
199            ..Default::default()
200        };
201        let otlp = init_otlp_in_runtime(&obs);
202        return Ok(ObservabilityGuards {
203            // `init_otlp_in_runtime` owns the OTel subscriber or installs a
204            // console/file fallback itself; either way it's inside `otlp`.
205            log_guard: LogGuard::noop(),
206            otlp,
207        });
208    }
209
210    let log_guard = logging::init_logging_inner(&logging)?;
211    Ok(ObservabilityGuards {
212        log_guard,
213        otlp: OtlpGuard::default(),
214    })
215}
216
217/// Resolve the standard `~/.zlayer/logs` directory for satellite file sinks.
218///
219/// Resolved from `HOME` (or `USERPROFILE` on Windows); falls back to the temp
220/// dir if neither is set so a file sink never hard-fails on a headless box.
221fn default_common_log_dir() -> PathBuf {
222    std::env::var_os("HOME")
223        .or_else(|| std::env::var_os("USERPROFILE"))
224        .map_or_else(std::env::temp_dir, PathBuf::from)
225        .join(".zlayer")
226        .join("logs")
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn test_default_config() {
235        let config = ObservabilityConfig::default();
236        assert!(!config.tracing.enabled);
237        assert!(config.metrics.enabled);
238    }
239}