krishiv_metrics/init.rs
1use opentelemetry::trace::TracerProvider as _;
2use opentelemetry_sdk::trace::SdkTracerProvider;
3use tracing_subscriber::layer::SubscriberExt as _;
4use tracing_subscriber::util::SubscriberInitExt as _;
5
6/// Errors returned by [`init`].
7#[derive(Debug, thiserror::Error)]
8pub enum MetricsError {
9 /// The OTLP exporter pipeline failed to build.
10 #[error("OTLP exporter build failed: {0}")]
11 OtlpBuild(String),
12 /// A tracing subscriber initialization error.
13 #[error("subscriber init failed: {0}")]
14 Subscriber(String),
15}
16
17/// **Beta API**: may change between minor releases.
18///
19/// Selects the OTel span exporter backend used when initializing the tracer provider.
20pub enum TracerExporter {
21 /// Exports spans to stdout. Useful for development and CI.
22 Stdout,
23 /// Disables all span export. Used in tests and when telemetry is not needed.
24 NoOp,
25 /// Captures exported spans in memory for assertion in unit tests.
26 ///
27 /// **For testing only.** Uses a synchronous simple span processor that blocks
28 /// the tracing thread on each export. Do not use in production; use
29 /// [`TracerExporter::NoOp`] or the OTLP path (`otlp_endpoint`) instead.
30 InMemory(opentelemetry_sdk::trace::InMemorySpanExporter),
31}
32
33/// **Beta API**: may change between minor releases.
34///
35/// Configuration for the Krishiv metrics and tracing subsystem.
36pub struct MetricsConfig {
37 /// Name of the service reported in OTel spans.
38 pub service_name: String,
39 /// Which span exporter to use.
40 pub exporter: TracerExporter,
41 /// Tracing filter string (e.g. `"info"`, `"krishiv=debug,warn"`).
42 /// Defaults to `"info"` when `None`.
43 pub log_filter: Option<String>,
44 /// Optional OTLP collector endpoint (e.g. `"http://localhost:4317"`).
45 ///
46 /// When `Some`, the OTLP gRPC exporter is used instead of the `exporter`
47 /// field. When `None`, the `exporter` field controls output.
48 pub otlp_endpoint: Option<String>,
49 /// Deployment target emitted as the `deployment.target` OTel resource
50 /// attribute on every span. Falls back to the `KRISHIV_DEPLOYMENT_TARGET`
51 /// environment variable when `None`. Typical values: `"embedded"`,
52 /// `"single-node"`, `"distributed"`, `"k8s"`, `"bare-metal"`.
53 pub deployment_target: Option<String>,
54}
55
56impl MetricsConfig {
57 /// Resolve the effective deployment target: explicit config → env var → "unknown".
58 pub fn resolved_deployment_target(&self) -> String {
59 self.deployment_target
60 .clone()
61 .or_else(|| std::env::var("KRISHIV_DEPLOYMENT_TARGET").ok())
62 .unwrap_or_else(|| "unknown".to_string())
63 }
64}
65
66impl Default for MetricsConfig {
67 fn default() -> Self {
68 Self {
69 service_name: "krishiv".to_string(),
70 // NoOp default so tests don't write to stdout.
71 exporter: TracerExporter::NoOp,
72 log_filter: None,
73 otlp_endpoint: None,
74 deployment_target: None,
75 }
76 }
77}
78
79/// **Beta API**: may change between minor releases.
80///
81/// Opaque handle returned by [`init`]. Shuts down the OTel tracer provider on drop.
82pub struct MetricsHandle {
83 tracer_provider: SdkTracerProvider,
84}
85
86impl MetricsHandle {
87 /// Create a no-op handle (used when metrics init fails or telemetry is disabled).
88 ///
89 /// The returned handle owns a no-op tracer provider — no spans are exported.
90 pub fn noop() -> Self {
91 Self {
92 tracer_provider: SdkTracerProvider::builder().build(),
93 }
94 }
95
96 /// Explicitly shut down the tracer provider and flush any pending spans.
97 pub fn shutdown(self) {
98 // Drop runs `Drop::drop` which calls `tracer_provider.shutdown()`.
99 }
100
101 #[cfg(test)]
102 pub(crate) fn tracer_provider(&self) -> &SdkTracerProvider {
103 &self.tracer_provider
104 }
105}
106
107impl Drop for MetricsHandle {
108 fn drop(&mut self) {
109 // Best-effort shutdown; log the error so observability failures are
110 // visible instead of silently dropping the last batch of spans.
111 if let Err(error) = self.tracer_provider.shutdown() {
112 tracing::debug!(error = %error, "metrics tracer provider shutdown failed");
113 }
114 }
115}
116
117/// **Beta API**: may change between minor releases.
118///
119/// Initializes the OTel tracer provider and the `tracing` subscriber.
120///
121/// Calling this multiple times is safe: subsequent calls will fail to set a new global
122/// subscriber (which is ignored) but the returned [`MetricsHandle`] still owns a valid
123/// tracer provider.
124///
125/// # Errors
126///
127/// Returns a [`MetricsError`] if the OTLP exporter pipeline fails to build (only
128/// possible when `config.otlp_endpoint` is `Some`).
129pub fn init(config: MetricsConfig) -> Result<MetricsHandle, MetricsError> {
130 let filter_str = config.log_filter.as_deref().unwrap_or("info").to_string();
131 let filter = tracing_subscriber::EnvFilter::new(&filter_str);
132 let deployment_target = config.resolved_deployment_target();
133
134 // Build a resource with service.name and deployment.target attributes.
135 let resource = opentelemetry_sdk::Resource::builder()
136 .with_attribute(opentelemetry::KeyValue::new(
137 "service.name",
138 config.service_name.clone(),
139 ))
140 .with_attribute(opentelemetry::KeyValue::new(
141 "deployment.target",
142 deployment_target,
143 ))
144 .build();
145
146 let tracer_provider = if let Some(endpoint) = config.otlp_endpoint {
147 // Build an OTLP gRPC exporter pipeline.
148 use opentelemetry_otlp::{SpanExporter, WithExportConfig as _};
149
150 let exporter = SpanExporter::builder()
151 .with_tonic()
152 .with_endpoint(endpoint)
153 .build()
154 .map_err(|e| MetricsError::OtlpBuild(format!("{e}")))?;
155
156 SdkTracerProvider::builder()
157 .with_resource(resource)
158 .with_batch_exporter(exporter)
159 .build()
160 } else {
161 match config.exporter {
162 TracerExporter::Stdout => SdkTracerProvider::builder()
163 .with_resource(resource)
164 .with_simple_exporter(opentelemetry_stdout::SpanExporter::default())
165 .build(),
166 TracerExporter::NoOp => SdkTracerProvider::builder().build(),
167 TracerExporter::InMemory(exporter) => SdkTracerProvider::builder()
168 .with_resource(resource)
169 .with_simple_exporter(exporter)
170 .build(),
171 }
172 };
173
174 let tracer = tracer_provider.tracer(config.service_name.clone());
175
176 opentelemetry::global::set_tracer_provider(tracer_provider.clone());
177
178 // try_init is safe to call multiple times; it returns Err when a subscriber is
179 // already set, which we intentionally ignore so tests can call init() repeatedly.
180 let _ = tracing_subscriber::registry()
181 .with(filter)
182 .with(tracing_subscriber::fmt::layer().json())
183 .with(tracing_opentelemetry::layer().with_tracer(tracer))
184 .try_init();
185
186 Ok(MetricsHandle { tracer_provider })
187}
188
189/// **Beta API**: may change between minor releases.
190///
191/// Returns the W3C `traceparent` header value for the currently active `tracing` span,
192/// or `None` when no span is active.
193///
194/// Format: `"00-{trace_id}-{span_id}-01"`
195///
196/// Used by gRPC interceptors to propagate trace context via the `TraceContext` metadata key.
197pub fn current_traceparent() -> Option<String> {
198 use opentelemetry::trace::TraceContextExt as _;
199 use tracing_opentelemetry::OpenTelemetrySpanExt as _;
200
201 let ctx = tracing::Span::current().context();
202 let span_ref = ctx.span();
203 let span_ctx = span_ref.span_context();
204
205 if span_ctx.is_valid() {
206 Some(format!(
207 "00-{}-{}-01",
208 span_ctx.trace_id(),
209 span_ctx.span_id()
210 ))
211 } else {
212 None
213 }
214}