zlayer-observability 0.13.0

OpenTelemetry tracing and Prometheus metrics for ZLayer
Documentation
//! Structured logging with JSON/pretty output and file rotation

use std::io;
use tracing_appender::non_blocking::{NonBlocking, WorkerGuard};
use tracing_subscriber::{
    fmt::{self, format::FmtSpan, writer::BoxMakeWriter},
    layer::SubscriberExt,
    util::SubscriberInitExt,
    EnvFilter, Layer, Registry,
};

use crate::config::{FileLoggingConfig, LogFormat, LoggingConfig, RotationStrategy};
use crate::error::Result;

/// A single composed subscriber layer, boxed so layers of differing formats /
/// writers can live in one `Vec` and be installed together.
type BoxedLayer = Box<dyn Layer<Registry> + Send + Sync>;

/// Guard that must be held to keep the async file writer running
pub struct LogGuard {
    /// Held for its `Drop` implementation to flush async log writes.
    #[allow(dead_code)]
    guard: Option<WorkerGuard>,
}

impl LogGuard {
    fn new(guard: Option<WorkerGuard>) -> Self {
        Self { guard }
    }

    /// A guard that holds nothing — used when subscriber installation is
    /// deferred to the in-runtime OTLP activation (which owns the
    /// global subscriber when OTLP is enabled).
    #[must_use]
    pub(crate) fn noop() -> Self {
        Self { guard: None }
    }
}

/// Initialize logging with the given configuration.
///
/// Returns a guard that must be held for the lifetime of the application
/// to ensure logs are flushed properly.
///
/// # Errors
/// Returns an error if file logging is configured but the log directory cannot be created.
///
/// # Panics
/// Panics if the environment filter directives are malformed (only when `RUST_LOG` is set).
pub fn init_logging(config: &LoggingConfig) -> Result<LogGuard> {
    init_logging_inner(config)
}

/// Install ZLayer's console (+ optional rotated file) `tracing` subscriber.
///
/// This is the subscriber used whenever OTLP forwarding is **off**. When OTLP is
/// enabled, [`crate::tracing_otel::init_otlp_in_runtime`] installs the OTel
/// subscriber instead; the two are mutually exclusive because there is only one
/// global `tracing` subscriber slot.
///
/// Each output layer carries its own [`EnvFilter`] (built from `RUST_LOG`, then
/// the config's directives/level) so filtering stays global without relying on
/// a single shared filter — the layers are boxed into one `Vec` to keep the
/// type machinery flat.
///
/// # Errors
/// Returns an error if file logging is configured but the log directory cannot be created.
///
/// # Panics
/// Panics if the environment filter directives are malformed (only when `RUST_LOG` is set).
pub(crate) fn init_logging_inner(config: &LoggingConfig) -> Result<LogGuard> {
    // Handle file logging setup. Presence of a file sink also flips the console
    // onto stdout (see `build_console_layer`).
    let (file_writer, guard) = if let Some(file_config) = &config.file {
        let (writer, guard) = create_file_writer(file_config)?;
        (Some(writer), Some(guard))
    } else {
        (None, None)
    };

    let mut layers: Vec<BoxedLayer> = Vec::new();
    layers.push(build_console_layer(config, file_writer.is_some()));
    if let Some(writer) = file_writer {
        layers.push(build_file_layer(config, writer));
    }

    tracing_subscriber::registry().with(layers).init();

    Ok(LogGuard::new(guard))
}

/// Build the human/structured console layer.
///
/// When a file sink is present (`to_stdout == true`, i.e. the daemon path) the
/// console writes to STDOUT, NOT stderr: `serve` installs a stderr->tracing
/// redirect (`install_stderr_redirect_to_tracing`) that dup2's fd 2 onto a pipe
/// whose reader re-emits each line as a `tracing::error!`. If the console layer
/// also wrote to fd 2, every event would loop back through that pipe and
/// deadlock on the global stderr mutex once the pipe fills. Keeping the daemon
/// console on stdout (fd 1) keeps it disjoint from the fd-2 capture. The CLI /
/// satellite path (no file sink) stays on stderr so command stdout (e.g. `ps
/// --format json`) is clean.
fn build_console_layer(config: &LoggingConfig, to_stdout: bool) -> BoxedLayer {
    let writer: BoxMakeWriter = if to_stdout {
        BoxMakeWriter::new(io::stdout)
    } else {
        BoxMakeWriter::new(io::stderr)
    };

    let base = fmt::layer()
        .with_writer(writer)
        .with_target(config.include_target)
        .with_file(config.include_location)
        .with_line_number(config.include_location)
        .with_span_events(FmtSpan::CLOSE);

    let filter = make_filter(config);
    match config.format {
        LogFormat::Pretty => base.pretty().with_filter(filter).boxed(),
        LogFormat::Json => base.json().with_filter(filter).boxed(),
        LogFormat::Compact => base.compact().with_filter(filter).boxed(),
    }
}

/// Build the rotated JSON file layer (ANSI off so the on-disk logs stay clean).
fn build_file_layer(config: &LoggingConfig, writer: NonBlocking) -> BoxedLayer {
    fmt::layer()
        .with_writer(writer)
        .with_target(config.include_target)
        .with_file(config.include_location)
        .with_line_number(config.include_location)
        .with_span_events(FmtSpan::CLOSE)
        .with_ansi(false)
        .json()
        .with_filter(make_filter(config))
        .boxed()
}

/// Build a fresh `EnvFilter` from `RUST_LOG`, falling back to the config's
/// per-crate directives, then the global level. A fresh instance is produced per
/// output layer (`EnvFilter` is not `Clone`) so every sink filters identically.
fn make_filter(config: &LoggingConfig) -> EnvFilter {
    EnvFilter::try_from_default_env().unwrap_or_else(|_| {
        config.filter_directives.as_ref().map_or_else(
            || EnvFilter::new(level_to_string(config.level)),
            EnvFilter::new,
        )
    })
}

fn level_to_string(level: crate::config::LogLevel) -> String {
    match level {
        crate::config::LogLevel::Trace => "trace",
        crate::config::LogLevel::Debug => "debug",
        crate::config::LogLevel::Info => "info",
        crate::config::LogLevel::Warn => "warn",
        crate::config::LogLevel::Error => "error",
    }
    .to_string()
}

#[allow(clippy::unnecessary_wraps)]
fn create_file_writer(
    config: &FileLoggingConfig,
) -> Result<(tracing_appender::non_blocking::NonBlocking, WorkerGuard)> {
    // Clean up old rotated files before creating the appender.
    if let Some(max) = config.max_files {
        cleanup_rotated_files(&config.directory, &config.prefix, max);
    }

    let file_appender = match config.rotation {
        RotationStrategy::Daily => {
            tracing_appender::rolling::daily(&config.directory, &config.prefix)
        }
        RotationStrategy::Hourly => {
            tracing_appender::rolling::hourly(&config.directory, &config.prefix)
        }
        RotationStrategy::Never => {
            tracing_appender::rolling::never(&config.directory, &config.prefix)
        }
    };

    Ok(tracing_appender::non_blocking(file_appender))
}

/// Delete the oldest rotated log files beyond `max_files`.
///
/// `tracing-appender` names rotated files as `{prefix}.{date}` (e.g.
/// `daemon.2026-04-03`). We list all files matching the prefix, sort
/// lexicographically (dates sort naturally), and remove the oldest.
fn cleanup_rotated_files(directory: &std::path::Path, prefix: &str, max_files: usize) {
    let Ok(entries) = std::fs::read_dir(directory) else {
        return;
    };

    let dot_prefix = format!("{prefix}.");
    let mut files: Vec<std::path::PathBuf> = entries
        .filter_map(std::result::Result::ok)
        .map(|e| e.path())
        .filter(|p| {
            p.file_name()
                .and_then(|n| n.to_str())
                .is_some_and(|n| n.starts_with(&dot_prefix))
        })
        .collect();

    if files.len() <= max_files {
        return;
    }

    // Sort ascending by name (oldest dates first).
    files.sort();

    let to_remove = files.len() - max_files;
    for path in files.into_iter().take(to_remove) {
        let _ = std::fs::remove_file(&path);
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_log_level_conversion() {
        assert_eq!(level_to_string(crate::config::LogLevel::Info), "info");
        assert_eq!(level_to_string(crate::config::LogLevel::Debug), "debug");
        assert_eq!(level_to_string(crate::config::LogLevel::Trace), "trace");
        assert_eq!(level_to_string(crate::config::LogLevel::Warn), "warn");
        assert_eq!(level_to_string(crate::config::LogLevel::Error), "error");
    }

    #[test]
    fn test_log_guard_creation() {
        let guard = LogGuard::new(None);
        assert!(guard.guard.is_none());
    }
}