zlayer_observability/lib.rs
1//! `ZLayer` Observability - Logging, Tracing, and Metrics
2//!
3//! Provides unified observability infrastructure:
4//! - Structured logging with JSON/pretty formats
5//! - OpenTelemetry distributed tracing
6//! - Prometheus metrics exposition
7//!
8//! # Quick Start
9//!
10//! ```no_run
11//! use zlayer_observability::{init_observability, ObservabilityConfig};
12//!
13//! #[tokio::main]
14//! async fn main() {
15//! let config = ObservabilityConfig::default();
16//! let _guards = init_observability(&config).expect("Failed to init observability");
17//!
18//! tracing::info!("Application started");
19//! }
20//! ```
21
22pub mod config;
23pub mod container_spans;
24pub mod error;
25pub mod log_reader;
26pub mod logging;
27pub mod logs;
28pub mod metrics;
29pub mod propagation;
30pub mod tracing_otel;
31
32use std::path::PathBuf;
33
34pub use config::*;
35pub use container_spans::*;
36pub use error::{ObservabilityError, Result};
37pub use logging::{init_logging, LogGuard};
38pub use metrics::{init_metrics, metrics, HealthStatus, ZLayerMetrics};
39pub use tracing_otel::{
40 init_otlp_in_runtime, otlp_is_enabled, set_telemetry_provider, DefaultOtlpProvider, OtlpGuard,
41 TelemetryProvider,
42};
43
44/// Combined guards for all observability components
45pub struct ObservabilityGuards {
46 /// Guard for the logging system (keeps async file writer running). Empty
47 /// (no-op) when OTLP is enabled and subscriber installation was deferred to
48 /// the in-runtime OTLP activation.
49 pub log_guard: LogGuard,
50 /// Guard for OTLP telemetry forwarding. Holds the live SDK tracer/logger
51 /// providers (flush on drop) when forwarding was activated by this call;
52 /// empty for the daemon path, where activation happens later via
53 /// [`init_otlp_in_runtime`] inside the Tokio runtime.
54 pub otlp: OtlpGuard,
55}
56
57/// Initialize all observability components
58///
59/// This is the recommended way to set up observability. It initializes:
60/// - Logging (always)
61/// - Metrics (always)
62/// - Tracing (if enabled in config)
63///
64/// Returns guards that must be held for the lifetime of the application.
65///
66/// # Example
67///
68/// ```no_run
69/// use zlayer_observability::{init_observability, ObservabilityConfig};
70///
71/// #[tokio::main]
72/// async fn main() {
73/// let config = ObservabilityConfig::default();
74/// let _guards = init_observability(&config).expect("Failed to init observability");
75///
76/// tracing::info!("Application started");
77/// // guards are dropped when main exits, flushing logs and traces
78/// }
79/// ```
80///
81/// # Errors
82/// Returns an error if any observability component fails to initialize.
83pub fn init_observability(config: &ObservabilityConfig) -> Result<ObservabilityGuards> {
84 // OTLP forwarding and ZLayer's console/file subscriber are mutually
85 // exclusive: there is a single global `tracing` subscriber slot, and
86 // `init_otlp_in_runtime` claims it. Crucially, the OTLP exporter builds its
87 // batch processors with `runtime::Tokio` and so MUST run inside an active
88 // Tokio runtime — but ZLayer's daemon calls `init_observability` BEFORE it
89 // builds its runtime. So when OTLP is enabled we DEFER subscriber install:
90 // the caller (re)activates it from inside the runtime via
91 // `init_otlp_in_runtime`. When OTLP is off, we install the console/file
92 // subscriber here exactly as before.
93 let log_guard = if otlp_is_enabled(&config.tracing) {
94 LogGuard::noop()
95 } else {
96 let g = logging::init_logging_inner(&config.logging)?;
97 tracing::info!("Observability initialized");
98 g
99 };
100
101 // Metrics are independent of the subscriber and always initialized.
102 let _ = init_metrics(&config.metrics)?;
103
104 Ok(ObservabilityGuards {
105 log_guard,
106 // Daemon path: OTLP is activated later, inside the runtime.
107 otlp: OtlpGuard::default(),
108 })
109}
110
111/// Options for [`init_common_logging`].
112///
113/// Sensible defaults (`Default`) give every satellite a pretty console plus a
114/// daily-rotated file sink under `~/.zlayer/logs`; tweak only what differs.
115#[derive(Debug, Clone)]
116pub struct CommonLoggingOptions {
117 /// Also write a daily-rotated file sink (7-file cap) alongside the console.
118 pub file: bool,
119 /// Override the log directory. Defaults to `~/.zlayer/logs`.
120 pub log_dir: Option<PathBuf>,
121 /// Level used when `RUST_LOG` is unset and no `filter_directives` are given.
122 pub default_level: LogLevel,
123 /// Per-crate default filter directives used when `RUST_LOG` is unset
124 /// (e.g. `"zlayer_web=debug,tower_http=debug"`).
125 pub filter_directives: Option<String>,
126 /// Console/file format. Defaults to pretty.
127 pub format: LogFormat,
128}
129
130impl Default for CommonLoggingOptions {
131 fn default() -> Self {
132 Self {
133 file: true,
134 log_dir: None,
135 default_level: LogLevel::Info,
136 filter_directives: None,
137 format: LogFormat::Pretty,
138 }
139 }
140}
141
142/// Shared logging entry point for the satellite binaries (overlayd, web,
143/// manager, desktop).
144///
145/// Installs a structured tracing subscriber with an `EnvFilter` (honoring
146/// `RUST_LOG`), a console layer, and — by default — a daily-rotated file sink at
147/// `~/.zlayer/logs/<app_name>.log` (reusing the same rolling-file + cleanup
148/// machinery as the daemon). OTLP forwarding is wired automatically from the
149/// `OTEL_*` environment, so satellites join the constellation's `ZLogging` hook
150/// the moment an endpoint is configured (off by default).
151///
152/// Hold the returned guards for the lifetime of the process; dropping them
153/// flushes the async file writer and shuts the tracer provider down.
154///
155/// # Errors
156/// Returns an error if a file sink is requested but the log directory cannot be
157/// created, or if OTLP forwarding is configured with a broken endpoint.
158pub fn init_common_logging(
159 app_name: &str,
160 opts: CommonLoggingOptions,
161) -> Result<ObservabilityGuards> {
162 let file = if opts.file {
163 let dir = opts.log_dir.unwrap_or_else(default_common_log_dir);
164 // Best-effort: a failed create surfaces as a writer error below; we don't
165 // want a missing log dir to take down a satellite's whole startup.
166 let _ = std::fs::create_dir_all(&dir);
167 Some(FileLoggingConfig {
168 directory: dir,
169 prefix: format!("{app_name}.log"),
170 rotation: RotationStrategy::Daily,
171 max_files: Some(7),
172 })
173 } else {
174 None
175 };
176
177 let logging = LoggingConfig {
178 level: opts.default_level,
179 format: opts.format,
180 file,
181 filter_directives: opts.filter_directives,
182 ..Default::default()
183 };
184
185 // OTLP forwarding straight from the environment (OTEL_TRACES_ENABLED /
186 // OTEL_EXPORTER_OTLP_ENDPOINT); inert when unset.
187 let tracing = TracingConfig::from_env();
188
189 // The OTLP exporter must be built inside a Tokio runtime. Satellites differ:
190 // `zlayer-web`/`zlayer-manager` are `#[tokio::main]` (call us in-runtime),
191 // while `zlayer-overlayd`/`zlayer-desktop` call us before building their
192 // runtime. Only route through OTLP when it is enabled AND we're actually
193 // inside a runtime; otherwise fall back to the console/file subscriber so we
194 // never hit the "no reactor running" panic.
195 if otlp_is_enabled(&tracing) && tokio::runtime::Handle::try_current().is_ok() {
196 let obs = ObservabilityConfig {
197 logging,
198 tracing,
199 ..Default::default()
200 };
201 let otlp = init_otlp_in_runtime(&obs);
202 return Ok(ObservabilityGuards {
203 // `init_otlp_in_runtime` owns the OTel subscriber or installs a
204 // console/file fallback itself; either way it's inside `otlp`.
205 log_guard: LogGuard::noop(),
206 otlp,
207 });
208 }
209
210 let log_guard = logging::init_logging_inner(&logging)?;
211 Ok(ObservabilityGuards {
212 log_guard,
213 otlp: OtlpGuard::default(),
214 })
215}
216
217/// Resolve the standard `~/.zlayer/logs` directory for satellite file sinks.
218///
219/// Resolved from `HOME` (or `USERPROFILE` on Windows); falls back to the temp
220/// dir if neither is set so a file sink never hard-fails on a headless box.
221fn default_common_log_dir() -> PathBuf {
222 std::env::var_os("HOME")
223 .or_else(|| std::env::var_os("USERPROFILE"))
224 .map_or_else(std::env::temp_dir, PathBuf::from)
225 .join(".zlayer")
226 .join("logs")
227}
228
229#[cfg(test)]
230mod tests {
231 use super::*;
232
233 #[test]
234 fn test_default_config() {
235 let config = ObservabilityConfig::default();
236 assert!(!config.tracing.enabled);
237 assert!(config.metrics.enabled);
238 }
239}