zlayer-observability 0.14.0

OpenTelemetry tracing and Prometheus metrics for ZLayer
Documentation
//! `ZLayer` Observability - Logging, Tracing, and Metrics
//!
//! Provides unified observability infrastructure:
//! - Structured logging with JSON/pretty formats
//! - OpenTelemetry distributed tracing
//! - Prometheus metrics exposition
//!
//! # Quick Start
//!
//! ```no_run
//! use zlayer_observability::{init_observability, ObservabilityConfig};
//!
//! #[tokio::main]
//! async fn main() {
//!     let config = ObservabilityConfig::default();
//!     let _guards = init_observability(&config).expect("Failed to init observability");
//!
//!     tracing::info!("Application started");
//! }
//! ```

pub mod config;
pub mod container_spans;
pub mod error;
pub mod log_reader;
pub mod logging;
pub mod logs;
pub mod metrics;
pub mod propagation;
pub mod tracing_otel;

use std::path::PathBuf;

pub use config::*;
pub use container_spans::*;
pub use error::{ObservabilityError, Result};
pub use logging::{init_logging, LogGuard};
pub use metrics::{init_metrics, metrics, HealthStatus, ZLayerMetrics};
pub use tracing_otel::{
    init_otlp_in_runtime, otlp_is_enabled, set_telemetry_provider, DefaultOtlpProvider, OtlpGuard,
    TelemetryProvider,
};

/// Combined guards for all observability components
pub struct ObservabilityGuards {
    /// Guard for the logging system (keeps async file writer running). Empty
    /// (no-op) when OTLP is enabled and subscriber installation was deferred to
    /// the in-runtime OTLP activation.
    pub log_guard: LogGuard,
    /// Guard for OTLP telemetry forwarding. Holds the live SDK tracer/logger
    /// providers (flush on drop) when forwarding was activated by this call;
    /// empty for the daemon path, where activation happens later via
    /// [`init_otlp_in_runtime`] inside the Tokio runtime.
    pub otlp: OtlpGuard,
}

/// Initialize all observability components
///
/// This is the recommended way to set up observability. It initializes:
/// - Logging (always)
/// - Metrics (always)
/// - Tracing (if enabled in config)
///
/// Returns guards that must be held for the lifetime of the application.
///
/// # Example
///
/// ```no_run
/// use zlayer_observability::{init_observability, ObservabilityConfig};
///
/// #[tokio::main]
/// async fn main() {
///     let config = ObservabilityConfig::default();
///     let _guards = init_observability(&config).expect("Failed to init observability");
///
///     tracing::info!("Application started");
///     // guards are dropped when main exits, flushing logs and traces
/// }
/// ```
///
/// # Errors
/// Returns an error if any observability component fails to initialize.
pub fn init_observability(config: &ObservabilityConfig) -> Result<ObservabilityGuards> {
    // OTLP forwarding and ZLayer's console/file subscriber are mutually
    // exclusive: there is a single global `tracing` subscriber slot, and
    // `init_otlp_in_runtime` claims it. Crucially, the OTLP exporter builds its
    // batch processors with `runtime::Tokio` and so MUST run inside an active
    // Tokio runtime — but ZLayer's daemon calls `init_observability` BEFORE it
    // builds its runtime. So when OTLP is enabled we DEFER subscriber install:
    // the caller (re)activates it from inside the runtime via
    // `init_otlp_in_runtime`. When OTLP is off, we install the console/file
    // subscriber here exactly as before.
    let log_guard = if otlp_is_enabled(&config.tracing) {
        LogGuard::noop()
    } else {
        let g = logging::init_logging_inner(&config.logging)?;
        tracing::info!("Observability initialized");
        g
    };

    // Metrics are independent of the subscriber and always initialized.
    let _ = init_metrics(&config.metrics)?;

    Ok(ObservabilityGuards {
        log_guard,
        // Daemon path: OTLP is activated later, inside the runtime.
        otlp: OtlpGuard::default(),
    })
}

/// Options for [`init_common_logging`].
///
/// Sensible defaults (`Default`) give every satellite a pretty console plus a
/// daily-rotated file sink under `~/.zlayer/logs`; tweak only what differs.
#[derive(Debug, Clone)]
pub struct CommonLoggingOptions {
    /// Also write a daily-rotated file sink (7-file cap) alongside the console.
    pub file: bool,
    /// Override the log directory. Defaults to `~/.zlayer/logs`.
    pub log_dir: Option<PathBuf>,
    /// Level used when `RUST_LOG` is unset and no `filter_directives` are given.
    pub default_level: LogLevel,
    /// Per-crate default filter directives used when `RUST_LOG` is unset
    /// (e.g. `"zlayer_web=debug,tower_http=debug"`).
    pub filter_directives: Option<String>,
    /// Console/file format. Defaults to pretty.
    pub format: LogFormat,
}

impl Default for CommonLoggingOptions {
    fn default() -> Self {
        Self {
            file: true,
            log_dir: None,
            default_level: LogLevel::Info,
            filter_directives: None,
            format: LogFormat::Pretty,
        }
    }
}

/// Shared logging entry point for the satellite binaries (overlayd, web,
/// manager, desktop).
///
/// Installs a structured tracing subscriber with an `EnvFilter` (honoring
/// `RUST_LOG`), a console layer, and — by default — a daily-rotated file sink at
/// `~/.zlayer/logs/<app_name>.log` (reusing the same rolling-file + cleanup
/// machinery as the daemon). OTLP forwarding is wired automatically from the
/// `OTEL_*` environment, so satellites join the constellation's `ZLogging` hook
/// the moment an endpoint is configured (off by default).
///
/// Hold the returned guards for the lifetime of the process; dropping them
/// flushes the async file writer and shuts the tracer provider down.
///
/// # Errors
/// Returns an error if a file sink is requested but the log directory cannot be
/// created, or if OTLP forwarding is configured with a broken endpoint.
pub fn init_common_logging(
    app_name: &str,
    opts: CommonLoggingOptions,
) -> Result<ObservabilityGuards> {
    let file = if opts.file {
        let dir = opts.log_dir.unwrap_or_else(default_common_log_dir);
        // Best-effort: a failed create surfaces as a writer error below; we don't
        // want a missing log dir to take down a satellite's whole startup.
        let _ = std::fs::create_dir_all(&dir);
        Some(FileLoggingConfig {
            directory: dir,
            prefix: format!("{app_name}.log"),
            rotation: RotationStrategy::Daily,
            max_files: Some(7),
        })
    } else {
        None
    };

    let logging = LoggingConfig {
        level: opts.default_level,
        format: opts.format,
        file,
        filter_directives: opts.filter_directives,
        ..Default::default()
    };

    // OTLP forwarding straight from the environment (OTEL_TRACES_ENABLED /
    // OTEL_EXPORTER_OTLP_ENDPOINT); inert when unset.
    let tracing = TracingConfig::from_env();

    // The OTLP exporter must be built inside a Tokio runtime. Satellites differ:
    // `zlayer-web`/`zlayer-manager` are `#[tokio::main]` (call us in-runtime),
    // while `zlayer-overlayd`/`zlayer-desktop` call us before building their
    // runtime. Only route through OTLP when it is enabled AND we're actually
    // inside a runtime; otherwise fall back to the console/file subscriber so we
    // never hit the "no reactor running" panic.
    if otlp_is_enabled(&tracing) && tokio::runtime::Handle::try_current().is_ok() {
        let obs = ObservabilityConfig {
            logging,
            tracing,
            ..Default::default()
        };
        let otlp = init_otlp_in_runtime(&obs);
        return Ok(ObservabilityGuards {
            // `init_otlp_in_runtime` owns the OTel subscriber or installs a
            // console/file fallback itself; either way it's inside `otlp`.
            log_guard: LogGuard::noop(),
            otlp,
        });
    }

    let log_guard = logging::init_logging_inner(&logging)?;
    Ok(ObservabilityGuards {
        log_guard,
        otlp: OtlpGuard::default(),
    })
}

/// Resolve the standard `~/.zlayer/logs` directory for satellite file sinks.
///
/// Resolved from `HOME` (or `USERPROFILE` on Windows); falls back to the temp
/// dir if neither is set so a file sink never hard-fails on a headless box.
fn default_common_log_dir() -> PathBuf {
    std::env::var_os("HOME")
        .or_else(|| std::env::var_os("USERPROFILE"))
        .map_or_else(std::env::temp_dir, PathBuf::from)
        .join(".zlayer")
        .join("logs")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_default_config() {
        let config = ObservabilityConfig::default();
        assert!(!config.tracing.enabled);
        assert!(config.metrics.enabled);
    }
}