Skip to main content

hwhkit_observability/
lib.rs

1//! Logging + tracing setup for hwhkit services.
2//!
3//! - [`init_logging`] accepts `format = "auto" | "json" | "pretty"`.
4//!   `auto` picks JSON when stdout is not a TTY (production / containers)
5//!   and pretty when running interactively.
6//! - With the `otel` feature enabled,
7//!   [`otel_layer::init_with_otel`] wires an OTLP gRPC exporter into the
8//!   `tracing` subscriber so every span is shipped to the configured
9//!   collector.
10//! - All public init functions return [`ObservabilityError`]; the crate
11//!   has no remaining `Result<_, String>` surface (project-wide policy as
12//!   of 0.6).
13
14#![warn(missing_docs)]
15
16use std::error::Error as StdError;
17
18use serde::{Deserialize, Serialize};
19use tracing_subscriber::{fmt, prelude::*, EnvFilter, Registry};
20
21/// Error returned by the public initialisation entry points
22/// ([`init_logging`], [`otel_layer::init_with_otel`]).
23///
24/// Marked `#[non_exhaustive]` so future variants (e.g. registry-already-
25/// installed, exporter-handshake-failed) can be added without breaking
26/// existing pattern matching.
27#[derive(Debug, thiserror::Error)]
28#[non_exhaustive]
29pub enum ObservabilityError {
30    /// The log filter directive (passed via [`LoggingConfig::level`])
31    /// could not be parsed by `tracing-subscriber`'s `EnvFilter`.
32    #[error("invalid log filter `{filter}`")]
33    BadFilter {
34        /// The directive string that failed to parse.
35        filter: String,
36        /// Underlying parse error from `tracing-subscriber`.
37        #[source]
38        source: Box<dyn StdError + Send + Sync>,
39    },
40
41    /// The OTLP / OpenTelemetry exporter pipeline failed to install.
42    /// Common causes: TLS setup mismatch, the global tracer provider was
43    /// already installed, or the collector endpoint was unreachable on
44    /// the synchronous handshake.
45    #[error("OTel exporter init failed")]
46    OtelInit {
47        /// Optional human-friendly context (e.g. "install_batch failed").
48        #[allow(dead_code)]
49        context: Option<String>,
50        /// Underlying error from the OTel pipeline builder.
51        #[source]
52        source: Box<dyn StdError + Send + Sync>,
53    },
54
55    /// The crate was built without the `otel` feature but the caller
56    /// asked for the OTel-aware initialiser. Rebuild with
57    /// `--features otel` (or, when consuming via the umbrella `hwhkit`
58    /// crate, `--features otel`) and try again.
59    #[error("hwhkit-observability built without `otel` feature")]
60    OtelDisabled,
61}
62
63/// Standalone logging configuration for callers that drive
64/// [`init_logging`] directly without going through the
65/// `hwhkit_config::AppConfig` pipeline.
66///
67/// **The canonical type lives in `hwhkit_config::LoggingConfig`** and is
68/// what the bootstrap pipeline consumes. This type is intentionally kept
69/// in sync (same `level`/`format` fields) so the two can be converted
70/// trivially. Prefer the config-crate one when wiring through the
71/// framework.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73#[non_exhaustive]
74pub struct LoggingConfig {
75    /// `tracing-subscriber` env-filter directive, e.g. `"info"` or
76    /// `"hyper=warn,my_app=debug"`.
77    pub level: String,
78    /// One of `"auto"`, `"pretty"`, or `"json"`.
79    pub format: String,
80}
81
82impl Default for LoggingConfig {
83    fn default() -> Self {
84        Self {
85            level: "info".to_string(),
86            format: "auto".to_string(),
87        }
88    }
89}
90
91impl LoggingConfig {
92    /// Convenience constructor: pretty-printed logs at the given level.
93    pub fn pretty(level: impl Into<String>) -> Self {
94        Self {
95            level: level.into(),
96            format: "pretty".to_string(),
97        }
98    }
99    /// Convenience constructor: JSON-formatted logs at the given level.
100    pub fn json(level: impl Into<String>) -> Self {
101        Self {
102            level: level.into(),
103            format: "json".to_string(),
104        }
105    }
106}
107
108/// Standalone OTLP/OTel configuration for callers that drive
109/// [`otel_layer::init_with_otel`] directly. Mirrors
110/// `hwhkit_config::OtelConfig`.
111#[derive(Debug, Clone, Serialize, Deserialize)]
112#[non_exhaustive]
113pub struct OtelConfig {
114    /// Master switch — when `false` the helper falls back to plain
115    /// logging without ever opening a connection to the collector.
116    pub enabled: bool,
117    /// OTLP/gRPC endpoint URL.
118    pub endpoint: String,
119    /// Value emitted as the `service.name` resource attribute.
120    pub service_name: String,
121    /// Value emitted as the `service.version` resource attribute.
122    pub service_version: String,
123    /// Value emitted as the `deployment.environment` resource attribute.
124    pub environment: String,
125}
126
127impl Default for OtelConfig {
128    fn default() -> Self {
129        Self {
130            enabled: false,
131            endpoint: "http://localhost:4317".to_string(),
132            service_name: "hwhkit-service".to_string(),
133            service_version: env!("CARGO_PKG_VERSION").to_string(),
134            environment: "dev".to_string(),
135        }
136    }
137}
138
139fn detect_tty() -> bool {
140    // Avoid pulling in extra crates: probe via libc on unix, fall back to
141    // false elsewhere.
142    #[cfg(unix)]
143    {
144        // SAFETY: isatty(3) is signal-safe and only reads kernel state.
145        unsafe { libc_inline::isatty(1) != 0 }
146    }
147    #[cfg(not(unix))]
148    {
149        false
150    }
151}
152
153#[cfg(unix)]
154mod libc_inline {
155    extern "C" {
156        pub fn isatty(fd: i32) -> i32;
157    }
158}
159
160fn make_filter(level: &str) -> Result<EnvFilter, ObservabilityError> {
161    EnvFilter::try_from_default_env()
162        .or_else(|_| EnvFilter::try_new(level))
163        .map_err(|e| ObservabilityError::BadFilter {
164            filter: level.to_string(),
165            source: Box::new(e),
166        })
167}
168
169/// Initialize tracing logging without OTel. Safe to call once per process.
170pub fn init_logging(config: &LoggingConfig) -> Result<(), ObservabilityError> {
171    let filter = make_filter(&config.level)?;
172    let format = resolve_format(&config.format);
173    let registry = Registry::default().with(filter);
174
175    let _ = match format {
176        ResolvedFormat::Json => registry.with(fmt::layer().json()).try_init(),
177        ResolvedFormat::Pretty => registry
178            .with(
179                fmt::layer()
180                    .with_target(false)
181                    .with_file(false)
182                    .with_line_number(false)
183                    .with_thread_ids(false),
184            )
185            .try_init(),
186    };
187
188    Ok(())
189}
190
191#[derive(Debug, Clone, Copy)]
192enum ResolvedFormat {
193    Json,
194    Pretty,
195}
196
197fn resolve_format(raw: &str) -> ResolvedFormat {
198    match raw {
199        "json" => ResolvedFormat::Json,
200        "pretty" => ResolvedFormat::Pretty,
201        // "auto" or anything else → tty-aware
202        _ => {
203            if detect_tty() {
204                ResolvedFormat::Pretty
205            } else {
206                ResolvedFormat::Json
207            }
208        }
209    }
210}
211
212#[cfg(feature = "otel")]
213pub mod otel_layer {
214    //! OpenTelemetry-aware initialiser, gated on the `otel` feature.
215
216    use super::*;
217    use opentelemetry::trace::TracerProvider as _;
218    use opentelemetry::KeyValue;
219    use opentelemetry_otlp::WithExportConfig;
220    use opentelemetry_sdk::{
221        propagation::TraceContextPropagator,
222        trace::{self as sdktrace, RandomIdGenerator, Sampler},
223        Resource,
224    };
225    use tracing_opentelemetry::OpenTelemetryLayer;
226
227    /// Initialize tracing-subscriber with OTLP gRPC exporter. Returns a
228    /// guard that should be dropped at shutdown to flush spans.
229    pub fn init_with_otel(
230        log_cfg: &LoggingConfig,
231        otel_cfg: &OtelConfig,
232    ) -> Result<OtelGuard, ObservabilityError> {
233        let filter = make_filter(&log_cfg.level)?;
234        let format = resolve_format(&log_cfg.format);
235
236        let resource = Resource::new(vec![
237            KeyValue::new("service.name", otel_cfg.service_name.clone()),
238            KeyValue::new("service.version", otel_cfg.service_version.clone()),
239            KeyValue::new("deployment.environment", otel_cfg.environment.clone()),
240        ]);
241
242        opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new());
243
244        let exporter = opentelemetry_otlp::new_exporter()
245            .tonic()
246            .with_endpoint(otel_cfg.endpoint.clone());
247
248        let provider = opentelemetry_otlp::new_pipeline()
249            .tracing()
250            .with_exporter(exporter)
251            .with_trace_config(
252                sdktrace::Config::default()
253                    .with_sampler(Sampler::AlwaysOn)
254                    .with_id_generator(RandomIdGenerator::default())
255                    .with_resource(resource),
256            )
257            .install_batch(opentelemetry_sdk::runtime::Tokio)
258            .map_err(|e| ObservabilityError::OtelInit {
259                context: Some("install_batch failed".to_string()),
260                source: Box::new(e),
261            })?;
262
263        let tracer = provider.tracer("hwhkit");
264        let otel_layer = OpenTelemetryLayer::new(tracer);
265
266        let registry = Registry::default().with(filter).with(otel_layer);
267        let _ = match format {
268            ResolvedFormat::Json => registry.with(fmt::layer().json()).try_init(),
269            ResolvedFormat::Pretty => registry
270                .with(
271                    fmt::layer()
272                        .with_target(false)
273                        .with_file(false)
274                        .with_line_number(false),
275                )
276                .try_init(),
277        };
278
279        Ok(OtelGuard {
280            _provider: provider,
281        })
282    }
283
284    /// Holder that flushes the OTLP pipeline on drop.
285    pub struct OtelGuard {
286        _provider: opentelemetry_sdk::trace::TracerProvider,
287    }
288
289    impl Drop for OtelGuard {
290        fn drop(&mut self) {
291            opentelemetry::global::shutdown_tracer_provider();
292        }
293    }
294}
295
296#[cfg(not(feature = "otel"))]
297pub mod otel_layer {
298    //! Stub module when the `otel` feature is disabled.
299
300    use super::*;
301    /// Empty placeholder — same shape as the real guard so callers don't
302    /// need to feature-gate at the use-site.
303    pub struct OtelGuard;
304    /// Stub when the `otel` feature is disabled. Returns
305    /// [`ObservabilityError::OtelDisabled`] so callers can fall back to
306    /// plain logging.
307    pub fn init_with_otel(
308        _: &LoggingConfig,
309        _: &OtelConfig,
310    ) -> Result<OtelGuard, ObservabilityError> {
311        Err(ObservabilityError::OtelDisabled)
312    }
313}
314
315#[cfg(feature = "otel-sqlx")]
316pub mod sqlx_instrument;
317
318#[cfg(feature = "otel-redis")]
319pub mod redis_instrument;
320
321#[cfg(feature = "otel-reqwest")]
322pub mod reqwest_instrument;