zksync_vlog/
lib.rs

1//! This crate contains the observability subsystem.
2//! It is responsible for providing a centralized interface for consistent observability configuration.
3
4use std::{backtrace::Backtrace, borrow::Cow, panic::PanicInfo, str::FromStr};
5
6// Temporary re-export of `sentry::capture_message` aiming to simplify the transition from `vlog` to using
7// crates directly.
8use opentelemetry::{
9    sdk::{
10        propagation::TraceContextPropagator,
11        trace::{self, RandomIdGenerator, Sampler, Tracer},
12        Resource,
13    },
14    KeyValue,
15};
16use opentelemetry_otlp::WithExportConfig;
17use opentelemetry_semantic_conventions::resource::SERVICE_NAME;
18pub use sentry::{capture_message, Level as AlertLevel};
19use sentry::{types::Dsn, ClientInitGuard};
20use serde::{de::Error, Deserialize, Deserializer};
21use tracing_opentelemetry::OpenTelemetryLayer;
22use tracing_subscriber::{
23    filter::Filtered,
24    fmt,
25    layer::{Layered, SubscriberExt},
26    registry::LookupSpan,
27    util::SubscriberInitExt,
28    EnvFilter, Layer,
29};
30
31pub mod prometheus;
32
33type TracingLayer<Inner> =
34    Layered<Filtered<OpenTelemetryLayer<Inner, Tracer>, EnvFilter, Inner>, Inner>;
35
36/// Specifies the format of the logs in stdout.
37#[derive(Debug, Clone, Copy, Default)]
38pub enum LogFormat {
39    #[default]
40    Plain,
41    Json,
42}
43
44impl std::fmt::Display for LogFormat {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        match self {
47            Self::Plain => f.write_str("plain"),
48            Self::Json => f.write_str("json"),
49        }
50    }
51}
52
53#[derive(Debug)]
54pub struct LogFormatError(&'static str);
55
56impl std::fmt::Display for LogFormatError {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        write!(f, "{}", self.0)
59    }
60}
61
62impl std::error::Error for LogFormatError {}
63
64impl FromStr for LogFormat {
65    type Err = LogFormatError;
66
67    fn from_str(s: &str) -> Result<Self, Self::Err> {
68        match s {
69            "plain" => Ok(LogFormat::Plain),
70            "json" => Ok(LogFormat::Json),
71            _ => Err(LogFormatError("invalid log format")),
72        }
73    }
74}
75
76impl<'de> Deserialize<'de> for LogFormat {
77    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
78        let s = String::deserialize(deserializer)?;
79        s.parse::<Self>().map_err(D::Error::custom)
80    }
81}
82
83// Doesn't define WARN and ERROR, because the highest verbosity of spans is INFO.
84#[derive(Copy, Clone, Debug, Default)]
85pub enum OpenTelemetryLevel {
86    #[default]
87    OFF,
88    INFO,
89    DEBUG,
90    TRACE,
91}
92
93#[derive(Debug)]
94pub struct OpenTelemetryLevelFormatError;
95
96impl std::fmt::Display for OpenTelemetryLevelFormatError {
97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98        write!(f, "Invalid OpenTelemetry level format")
99    }
100}
101
102impl std::error::Error for OpenTelemetryLevelFormatError {}
103
104impl FromStr for OpenTelemetryLevel {
105    type Err = OpenTelemetryLevelFormatError;
106
107    fn from_str(s: &str) -> Result<Self, Self::Err> {
108        match s {
109            "off" => Ok(OpenTelemetryLevel::OFF),
110            "info" => Ok(OpenTelemetryLevel::INFO),
111            "debug" => Ok(OpenTelemetryLevel::DEBUG),
112            "trace" => Ok(OpenTelemetryLevel::TRACE),
113            _ => Err(OpenTelemetryLevelFormatError),
114        }
115    }
116}
117
118impl std::fmt::Display for OpenTelemetryLevel {
119    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
120        let str = match self {
121            OpenTelemetryLevel::OFF => "off",
122            OpenTelemetryLevel::INFO => "info",
123            OpenTelemetryLevel::DEBUG => "debug",
124            OpenTelemetryLevel::TRACE => "trace",
125        };
126        write!(f, "{}", str)
127    }
128}
129
130#[derive(Clone, Debug)]
131pub struct OpenTelemetryOptions {
132    /// Enables export of span data of specified level (and above) using opentelemetry exporters.
133    pub opentelemetry_level: OpenTelemetryLevel,
134    /// Opentelemetry HTTP collector endpoint.
135    pub otlp_endpoint: String,
136    /// Logical service name to be used for exported events. See [`SERVICE_NAME`].
137    pub service_name: String,
138}
139
140/// Builder for the observability subsystem.
141/// Currently capable of configuring logging output and sentry integration.
142#[derive(Debug, Default)]
143pub struct ObservabilityBuilder {
144    disable_default_logs: bool,
145    log_format: LogFormat,
146    log_directives: Option<String>,
147    sentry_url: Option<Dsn>,
148    sentry_environment: Option<String>,
149    opentelemetry_options: Option<OpenTelemetryOptions>,
150}
151
152/// Guard for the observability subsystem.
153/// Releases configured integrations upon being dropped.
154pub struct ObservabilityGuard {
155    _sentry_guard: Option<ClientInitGuard>,
156}
157
158impl std::fmt::Debug for ObservabilityGuard {
159    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160        f.debug_struct("ObservabilityGuard").finish()
161    }
162}
163
164impl ObservabilityBuilder {
165    /// Creates a new builder with default values.
166    pub fn new() -> Self {
167        Self::default()
168    }
169
170    /// Sets the log format.
171    /// Default is `LogFormat::Plain`.
172    pub fn with_log_format(mut self, log_format: LogFormat) -> Self {
173        self.log_format = log_format;
174        self
175    }
176
177    pub fn with_log_directives(mut self, log_level: String) -> Self {
178        self.log_directives = Some(log_level);
179        self
180    }
181
182    /// Disables logs enabled by default.
183    /// May be used, for example, in interactive CLI applications, where the user may want to fully control
184    /// the verbosity.
185    pub fn disable_default_logs(mut self) -> Self {
186        self.disable_default_logs = true;
187        self
188    }
189
190    /// Enables Sentry integration.
191    /// Returns an error if the provided Sentry URL is invalid.
192    pub fn with_sentry_url(
193        mut self,
194        sentry_url: &str,
195    ) -> Result<Self, sentry::types::ParseDsnError> {
196        let sentry_url = sentry_url.parse()?;
197        self.sentry_url = Some(sentry_url);
198        Ok(self)
199    }
200
201    /// Sets the Sentry environment ID.
202    /// If not set, no environment will be provided in Sentry events.
203    pub fn with_sentry_environment(mut self, environment: Option<String>) -> Self {
204        self.sentry_environment = environment;
205        self
206    }
207
208    pub fn with_opentelemetry(
209        mut self,
210        opentelemetry_level: &str,
211        otlp_endpoint: String,
212        service_name: String,
213    ) -> Result<Self, OpenTelemetryLevelFormatError> {
214        self.opentelemetry_options = Some(OpenTelemetryOptions {
215            opentelemetry_level: opentelemetry_level.parse()?,
216            otlp_endpoint,
217            service_name,
218        });
219        Ok(self)
220    }
221
222    fn add_opentelemetry_layer<S>(
223        opentelemetry_level: OpenTelemetryLevel,
224        otlp_endpoint: String,
225        service_name: String,
226        subscriber: S,
227    ) -> TracingLayer<S>
228    where
229        S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync,
230    {
231        let filter = match opentelemetry_level {
232            OpenTelemetryLevel::OFF => EnvFilter::new("off"),
233            OpenTelemetryLevel::INFO => EnvFilter::new("info"),
234            OpenTelemetryLevel::DEBUG => EnvFilter::new("debug"),
235            OpenTelemetryLevel::TRACE => EnvFilter::new("trace"),
236        };
237        // `otel::tracing` should be a level info to emit opentelemetry trace & span
238        // `otel` set to debug to log detected resources, configuration read and inferred
239        let filter = filter
240            .add_directive("otel::tracing=trace".parse().unwrap())
241            .add_directive("otel=debug".parse().unwrap());
242
243        let resource = vec![KeyValue::new(SERVICE_NAME, service_name)];
244
245        let tracer = opentelemetry_otlp::new_pipeline()
246            .tracing()
247            .with_exporter(
248                opentelemetry_otlp::new_exporter()
249                    .http()
250                    .with_endpoint(otlp_endpoint),
251            )
252            .with_trace_config(
253                trace::config()
254                    .with_sampler(Sampler::AlwaysOn)
255                    .with_id_generator(RandomIdGenerator::default())
256                    .with_resource(Resource::new(resource)),
257            )
258            .install_batch(opentelemetry::runtime::Tokio)
259            .unwrap();
260
261        opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new());
262        let layer = tracing_opentelemetry::layer()
263            .with_tracer(tracer)
264            .with_filter(filter);
265        subscriber.with(layer)
266    }
267
268    /// Builds a filter for the logs.
269    ///
270    /// Unless `disable_default_logs` was set, uses `zksync=info` as a default which is then merged
271    /// with user-defined directives. Provided directives can extend/override the default value.
272    ///
273    /// The provided default convers all the crates with a name starting with `zksync` (per `tracing`
274    /// [documentation][1]), which is a good enough default for any project.
275    ///
276    /// If `log_directives` are provided via `with_log_directives`, they will be used.
277    /// Otherwise, the value will be parsed from the environment variable `RUST_LOG`.
278    ///
279    /// [1]: https://docs.rs/tracing-subscriber/0.3.18/tracing_subscriber/filter/targets/struct.Targets.html#filtering-with-targets
280    fn build_filter(&self) -> EnvFilter {
281        let mut directives = if self.disable_default_logs {
282            "".to_string()
283        } else {
284            "zksync=info,".to_string()
285        };
286        if let Some(log_directives) = &self.log_directives {
287            directives.push_str(log_directives);
288        } else if let Ok(env_directives) = std::env::var(EnvFilter::DEFAULT_ENV) {
289            directives.push_str(&env_directives);
290        };
291        EnvFilter::new(directives)
292    }
293
294    /// Initializes the observability subsystem.
295    pub fn build(self) -> ObservabilityGuard {
296        // Initialize logs.
297        let env_filter = self.build_filter();
298
299        match self.log_format {
300            LogFormat::Plain => {
301                let subscriber = tracing_subscriber::registry()
302                    .with(env_filter)
303                    .with(fmt::Layer::default());
304                if let Some(opts) = self.opentelemetry_options {
305                    let subscriber = Self::add_opentelemetry_layer(
306                        opts.opentelemetry_level,
307                        opts.otlp_endpoint,
308                        opts.service_name,
309                        subscriber,
310                    );
311                    subscriber.init()
312                } else {
313                    subscriber.init()
314                }
315            }
316            LogFormat::Json => {
317                let timer = tracing_subscriber::fmt::time::UtcTime::rfc_3339();
318                let subscriber = tracing_subscriber::registry().with(env_filter).with(
319                    fmt::Layer::default()
320                        .with_file(true)
321                        .with_line_number(true)
322                        .with_timer(timer)
323                        .json(),
324                );
325                if let Some(opts) = self.opentelemetry_options {
326                    let subscriber = Self::add_opentelemetry_layer(
327                        opts.opentelemetry_level,
328                        opts.otlp_endpoint,
329                        opts.service_name,
330                        subscriber,
331                    );
332                    subscriber.init()
333                } else {
334                    subscriber.init()
335                }
336            }
337        };
338
339        // Check whether we need to change the default panic handler.
340        // Note that this must happen before we initialize Sentry, since otherwise
341        // Sentry's panic handler will also invoke the default one, resulting in unformatted
342        // panic info being output to stderr.
343        if matches!(self.log_format, LogFormat::Json) {
344            // Remove any existing hook. We expect that no hook is set by default.
345            let _ = std::panic::take_hook();
346            // Override the default panic handler to print the panic in JSON format.
347            std::panic::set_hook(Box::new(json_panic_handler));
348        };
349
350        // Initialize the Sentry.
351        let sentry_guard = if let Some(sentry_url) = self.sentry_url {
352            let options = sentry::ClientOptions {
353                release: sentry::release_name!(),
354                environment: self.sentry_environment.map(Cow::from),
355                attach_stacktrace: true,
356                ..Default::default()
357            };
358
359            Some(sentry::init((sentry_url, options)))
360        } else {
361            None
362        };
363
364        ObservabilityGuard {
365            _sentry_guard: sentry_guard,
366        }
367    }
368}
369
370fn json_panic_handler(panic_info: &PanicInfo) {
371    let backtrace = Backtrace::force_capture();
372    let timestamp = chrono::Utc::now();
373    let panic_message = if let Some(s) = panic_info.payload().downcast_ref::<String>() {
374        s.as_str()
375    } else if let Some(s) = panic_info.payload().downcast_ref::<&str>() {
376        s
377    } else {
378        "Panic occurred without additional info"
379    };
380
381    let panic_location = panic_info
382        .location()
383        .map(|val| val.to_string())
384        .unwrap_or_else(|| "Unknown location".to_owned());
385
386    let backtrace_str = backtrace.to_string();
387    let timestamp_str = timestamp.format("%Y-%m-%dT%H:%M:%S%.fZ").to_string();
388
389    println!(
390        "{}",
391        serde_json::json!({
392            "timestamp": timestamp_str,
393            "level": "CRITICAL",
394            "fields": {
395                "message": panic_message,
396                "location": panic_location,
397                "backtrace": backtrace_str,
398            }
399        })
400    );
401}