taceo_nodes_observability/
lib.rs1#![deny(missing_docs)]
2use eyre::Context;
16use metrics_exporter_dogstatsd::DogStatsDBuilder;
17use secrecy::{ExposeSecret, SecretString};
18use std::net::SocketAddr;
19use std::str::FromStr;
20use std::time::Duration;
21use std::{backtrace::Backtrace, panic};
22use telemetry_batteries::tracing::{TracingShutdownHandle, datadog::DatadogBattery};
23use tracing_subscriber::{
24 EnvFilter,
25 fmt::{self},
26};
27use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
28
29#[derive(Debug, Clone)]
34pub struct TracingConfig {
35 pub service_name: Option<String>,
37 pub traces_endpoint: Option<String>,
39 pub metrics: Option<MetricsConfig>,
41}
42
43impl TracingConfig {
44 pub fn try_from_env() -> eyre::Result<Self> {
52 let service_name = match std::env::var("TRACING_SERVICE_NAME") {
53 Ok(name) => Some(name),
54 Err(std::env::VarError::NotPresent) => None,
55 Err(e) => {
56 eyre::bail!("Failed to read SERVICE_NAME from environment: {}", e);
57 }
58 };
59 let traces_endpoint = match std::env::var("TRACING_ENDPOINT") {
60 Ok(endpoint) => Some(endpoint),
61 Err(std::env::VarError::NotPresent) => None,
62 Err(e) => {
63 eyre::bail!("Failed to read TRACING_ENDPOINT from environment: {}", e);
64 }
65 };
66
67 let metrics_config = MetricsConfig::try_from_env()?;
68
69 Ok(Self {
70 service_name,
71 traces_endpoint,
72 metrics: metrics_config,
73 })
74 }
75}
76
77#[derive(Debug, Clone)]
81pub enum MetricsConfig {
82 Datadog(DatadogMetricsConfig),
84 StatsD(StatsDMetricsConfig),
86 Prometheus(PrometheusMetricsConfig),
88}
89
90impl MetricsConfig {
91 pub fn try_from_env() -> eyre::Result<Option<Self>> {
96 match std::env::var("METRICS_EXPORTER") {
97 Ok(choice) => match choice.trim().to_lowercase().as_str() {
98 "datadog" => Ok(Some(Self::Datadog(
99 DatadogMetricsConfig::try_from_env()
100 .context("during constructing Datadog metrics exporter from environment")?,
101 ))),
102 "statsd" => Ok(Some(Self::StatsD(
103 StatsDMetricsConfig::try_from_env()
104 .context("during constructing StatsD metrics exporter from environment")?,
105 ))),
106 "prometheus" => Ok(Some(Self::Prometheus(
107 PrometheusMetricsConfig::try_from_env().context(
108 "during constructing Prometheus metrics exporter from environment",
109 )?,
110 ))),
111 _ => eyre::bail!(
112 "environment: METRICS_EXPORTER must be \"datadog\", \"statsd\", or \"prometheus\", not \"{}\"",
113 choice
114 ),
115 },
116 Err(std::env::VarError::NotPresent) => Ok(None),
117 Err(e) => {
118 eyre::bail!("Failed to read METRICS_EXPORTER from environment: {}", e);
119 }
120 }
121 }
122}
123
124#[derive(Debug, Clone)]
126pub struct DatadogMetricsConfig {
127 pub(crate) host: String,
128 pub(crate) port: u16,
129 pub(crate) prefix: Option<String>,
130}
131
132impl DatadogMetricsConfig {
133 pub fn try_from_env() -> eyre::Result<Self> {
138 let host = match std::env::var("METRICS_DATADOG_HOST") {
139 Ok(host) => host,
140 Err(e) => {
141 eyre::bail!(
142 "Failed to read METRICS_DATADOG_HOST from environment: {}",
143 e
144 );
145 }
146 };
147 let port = match std::env::var("METRICS_DATADOG_PORT") {
148 Ok(port) => match port.parse() {
149 Ok(port) => port,
150 Err(e) => {
151 eyre::bail!("Failed to parse port from METRICS_DATADOG_PORT: {}", e);
152 }
153 },
154 Err(std::env::VarError::NotPresent) => 8125u16,
155 Err(e) => {
156 eyre::bail!(
157 "Failed to read METRICS_DATADOG_PORT from environment: {}",
158 e
159 );
160 }
161 };
162 let prefix = match std::env::var("METRICS_DATADOG_PREFIX") {
163 Ok(prefix) => Some(prefix),
164 Err(std::env::VarError::NotPresent) => None,
165 Err(e) => {
166 eyre::bail!(
167 "Failed to read METRICS_DATADOG_PREFIX from environment: {}",
168 e
169 );
170 }
171 };
172 Ok(Self { host, port, prefix })
173 }
174}
175
176#[derive(Debug, Clone)]
178pub struct StatsDMetricsConfig {
179 pub(crate) host: String,
180 pub(crate) port: u16,
181 pub(crate) prefix: Option<String>,
182 pub(crate) queue_size: Option<usize>,
183 pub(crate) buffer_size: Option<usize>,
184}
185
186impl StatsDMetricsConfig {
187 pub fn try_from_env() -> eyre::Result<Self> {
191 let host = match std::env::var("METRICS_STATSD_HOST") {
192 Ok(host) => host,
193 Err(e) => {
194 eyre::bail!("Failed to read METRICS_STATSD_HOST from environment: {}", e);
195 }
196 };
197 let port = match std::env::var("METRICS_STATSD_PORT") {
198 Ok(port) => match port.parse() {
199 Ok(port) => port,
200 Err(e) => {
201 eyre::bail!("Failed to parse port from METRICS_STATSD_PORT: {}", e);
202 }
203 },
204 Err(std::env::VarError::NotPresent) => 8125u16,
205 Err(e) => {
206 eyre::bail!("Failed to read METRICS_STATSD_PORT from environment: {}", e);
207 }
208 };
209 let prefix = match std::env::var("METRICS_STATSD_PREFIX") {
210 Ok(prefix) => Some(prefix),
211 Err(std::env::VarError::NotPresent) => None,
212 Err(e) => {
213 eyre::bail!(
214 "Failed to read METRICS_STATSD_PREFIX from environment: {}",
215 e
216 );
217 }
218 };
219 let queue_size = match std::env::var("METRICS_STATSD_QUEUE_SIZE") {
220 Ok(queue_size) => Some(
221 queue_size
222 .parse()
223 .context("during reading METRICS_STATSD_QUEUE_SIZE from environment")?,
224 ),
225 Err(std::env::VarError::NotPresent) => None,
226 Err(e) => {
227 eyre::bail!(
228 "Failed to read METRICS_STATSD_QUEUE_SIZE from environment: {}",
229 e
230 );
231 }
232 };
233 let buffer_size = match std::env::var("METRICS_STATSD_BUFFER_SIZE") {
234 Ok(buffer_size) => Some(
235 buffer_size
236 .parse()
237 .context("during reading METRICS_STATSD_BUFFER_SIZE from environment")?,
238 ),
239 Err(std::env::VarError::NotPresent) => None,
240 Err(e) => {
241 eyre::bail!(
242 "Failed to read METRICS_STATSD_BUFFER_SIZE from environment: {}",
243 e
244 );
245 }
246 };
247 Ok(Self {
248 host,
249 port,
250 prefix,
251 queue_size,
252 buffer_size,
253 })
254 }
255}
256
257#[derive(Debug, Clone)]
259pub enum PrometheusMetricsConfig {
260 Scrape(ScrapePrometheusMetricsConfig),
262 Push(PushPrometheusMetricsConfig),
264}
265
266impl PrometheusMetricsConfig {
267 pub fn try_from_env() -> eyre::Result<Self> {
272 match std::env::var("METRICS_PROMETHEUS_MODE") {
273 Ok(choice) => match choice.trim().to_lowercase().as_str() {
274 "scrape" => Ok(Self::Scrape(ScrapePrometheusMetricsConfig::try_from_env()?)),
275 "push" => Ok(Self::Push(PushPrometheusMetricsConfig::try_from_env()?)),
276 _ => eyre::bail!(
277 "environment: METRICS_PROMETHEUS_MODE must be \"scrape\" or \"push\", not \"{}\"",
278 choice
279 ),
280 },
281 Err(e) => {
282 eyre::bail!(
283 "Failed to read METRICS_PROMETHEUS_MODE from environment: {}",
284 e
285 );
286 }
287 }
288 }
289}
290
291#[derive(Debug, Clone)]
293pub struct ScrapePrometheusMetricsConfig {
294 pub(crate) bind_addr: Option<SocketAddr>,
295}
296
297impl ScrapePrometheusMetricsConfig {
298 pub fn try_from_env() -> eyre::Result<Self> {
301 match std::env::var("METRICS_PROMETHEUS_BIND_ADDR") {
302 Ok(bind_addr) => Ok(ScrapePrometheusMetricsConfig {
303 bind_addr: Some(
304 bind_addr
305 .parse()
306 .context("during reading METRICS_PROMETHEUS_BIND_ADDR from environment")?,
307 ),
308 }),
309 Err(std::env::VarError::NotPresent) => {
310 Ok(ScrapePrometheusMetricsConfig { bind_addr: None })
311 }
312 Err(e) => {
313 eyre::bail!(
314 "Failed to read METRICS_PROMETHEUS_BIND_ADDR from environment: {}",
315 e
316 );
317 }
318 }
319 }
320}
321
322#[derive(Debug, Clone)]
324pub struct PushPrometheusMetricsConfig {
325 pub(crate) endpoint: String,
326 pub(crate) interval: Duration,
327 pub(crate) username: Option<SecretString>,
328 pub(crate) password: Option<SecretString>,
329 pub(crate) use_http_post_method: bool,
330}
331impl PushPrometheusMetricsConfig {
332 pub fn try_from_env() -> eyre::Result<Self> {
336 let endpoint = match std::env::var("METRICS_PROMETHEUS_ENDPOINT") {
337 Ok(endpoint) => endpoint,
338 Err(e) => {
339 eyre::bail!(
340 "Failed to read METRICS_PROMETHEUS_ENDPOINT from environment: {}",
341 e
342 );
343 }
344 };
345 let interval = match std::env::var("METRICS_PROMETHEUS_INTERVAL") {
346 Ok(interval) => {
347 std::time::Duration::from(humantime::Duration::from_str(&interval).context(
348 "During parsing METRICS_PROMETHEUS_INTERVAL from env: \
349 Expecting a duration string such as \"1h 24min\", \"29s\", ..",
350 )?)
351 }
352 Err(e) => {
353 eyre::bail!(
354 "Failed to read METRICS_PROMETHEUS_INTERVAL from environment: {}",
355 e
356 );
357 }
358 };
359 let username = match std::env::var("METRICS_PROMETHEUS_USERNAME") {
360 Ok(username) => Some(SecretString::from(username)),
361 Err(std::env::VarError::NotPresent) => None,
362 Err(e) => {
363 eyre::bail!(
364 "Failed to read METRICS_PROMETHEUS_USERNAME from environment: {}",
365 e
366 );
367 }
368 };
369 let password = match std::env::var("METRICS_PROMETHEUS_PASSWORD") {
370 Ok(password) => Some(SecretString::from(password)),
371 Err(std::env::VarError::NotPresent) => None,
372 Err(e) => {
373 eyre::bail!(
374 "Failed to read METRICS_PROMETHEUS_PASSWORD from environment: {}",
375 e
376 );
377 }
378 };
379 let use_http_post_method = match std::env::var("METRICS_PROMETHEUS_USE_HTTP_POST_METHOD") {
380 Ok(use_http_post_method) => use_http_post_method.parse().context(
381 "during reading METRICS_PROMETHEUS_USE_HTTP_POST_METHOD from environment (expecting bool)",
382 )?,
383 Err(std::env::VarError::NotPresent) => false,
384 Err(e) => {
385 eyre::bail!(
386 "Failed to read METRICS_PROMETHEUS_USE_HTTP_POST_METHOD from environment: {}",
387 e
388 );
389 }
390 };
391 Ok(PushPrometheusMetricsConfig {
392 endpoint,
393 interval,
394 username,
395 password,
396 use_http_post_method,
397 })
398 }
399}
400
401pub fn initialize_metrics(config: &MetricsConfig) -> eyre::Result<()> {
405 match config {
406 MetricsConfig::Datadog(datadog_conf) => {
407 tracing::debug!("Setting up Datadog metrics exporter ..");
408 let mut builder = DogStatsDBuilder::default()
409 .with_remote_address(format!("{}:{}", &datadog_conf.host, datadog_conf.port))?
410 .send_histograms_as_distributions(true);
411 if let Some(prefix) = &datadog_conf.prefix {
412 builder = builder.set_global_prefix(prefix);
413 };
414 builder.install()?;
415 }
416 MetricsConfig::StatsD(statsd_conf) => {
417 tracing::debug!("Setting up StatsD metrics exporter ..");
418 let builder = metrics_exporter_statsd::StatsdBuilder::from(
419 statsd_conf.host.to_owned(),
420 statsd_conf.port,
421 );
422 let builder = {
423 if let Some(buffer_size) = statsd_conf.buffer_size {
424 builder.with_buffer_size(buffer_size)
425 } else {
426 builder
427 }
428 };
429 let builder = {
430 if let Some(queue_size) = statsd_conf.queue_size {
431 builder.with_queue_size(queue_size)
432 } else {
433 builder
434 }
435 };
436 let recorder = builder
437 .build(statsd_conf.prefix.as_deref())
438 .context("during building StatsD metrics exporter")?;
439 metrics::set_global_recorder(recorder)
440 .context("during setting StatsD metrics exporter as global recorder")?;
441 }
442 MetricsConfig::Prometheus(prometheus_conf) => match prometheus_conf {
443 PrometheusMetricsConfig::Scrape(scrape_conf) => {
444 tracing::debug!("Setting up Prometheus scrape metrics exporter ..");
445 let builder = if let Some(bind_addr) = scrape_conf.bind_addr {
446 metrics_exporter_prometheus::PrometheusBuilder::new()
447 .with_http_listener(bind_addr)
448 } else {
449 metrics_exporter_prometheus::PrometheusBuilder::new()
450 };
451 builder.install().context(
452 "during installing Prometheus scrape metrics exporter as global recorder",
453 )?;
454 }
455 PrometheusMetricsConfig::Push(push_conf) => {
456 tracing::debug!("Setting up Prometheus push metrics exporter ..");
457 metrics_exporter_prometheus::PrometheusBuilder::new()
458 .with_push_gateway(
459 &push_conf.endpoint,
460 push_conf.interval,
461 push_conf
462 .username
463 .to_owned()
464 .map(|x| x.expose_secret().to_owned()),
465 push_conf
466 .password
467 .to_owned()
468 .map(|x| x.expose_secret().to_owned()),
469 push_conf.use_http_post_method,
470 )
471 .context("during building Prometheus push metrics exporter")?
472 .install()
473 .context(
474 "during installing Prometheus push metrics exporter as global recorder",
475 )?;
476 }
477 },
478 };
479 Ok(())
480}
481
482pub fn initialize_tracing(config: &TracingConfig) -> eyre::Result<Option<TracingShutdownHandle>> {
503 let handle = {
504 if let Some(service_name) = config.service_name.as_deref() {
505 let tracing_shutdown_handle =
506 DatadogBattery::init(config.traces_endpoint.as_deref(), service_name, None, true);
507 panic::set_hook(Box::new(|panic_info| {
509 let message = match panic_info.payload().downcast_ref::<&str>() {
510 Some(s) => *s,
511 None => match panic_info.payload().downcast_ref::<String>() {
512 Some(s) => s.as_str(),
513 None => "Unknown panic message",
514 },
515 };
516 let location = if let Some(location) = panic_info.location() {
517 format!(
518 "{}:{}:{}",
519 location.file(),
520 location.line(),
521 location.column()
522 )
523 } else {
524 "Unknown location".to_string()
525 };
526
527 let backtrace = Backtrace::capture();
528 let backtrace_string = format!("{backtrace:?}");
529
530 let backtrace_single_line = backtrace_string.replace('\n', " | ");
531
532 tracing::error!(
533 { backtrace = %backtrace_single_line, location = %location},
534 "Panic occurred with message: {}",
535 message
536 );
537 }));
538 Ok(Some(tracing_shutdown_handle))
539 } else {
540 install_tracing("info");
541 Ok(None)
542 }
543 };
544
545 if let Some(metrics_conf) = &config.metrics {
546 initialize_metrics(metrics_conf)?;
547 }
548
549 handle
550}
551
552pub fn install_tracing(env_filter: &str) {
570 let fmt_layer = fmt::layer().with_target(false).with_line_number(false);
571 let filter_layer = EnvFilter::try_from_default_env()
572 .or_else(|_| EnvFilter::try_new(env_filter))
573 .unwrap();
574
575 tracing_subscriber::registry()
576 .with(filter_layer)
577 .with(fmt_layer)
578 .init();
579}