trace_tools/subscriber/layer/
flamegraph.rs

1// Copyright 2022 IOTA Stiftung
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{subscriber::visitors::LocationVisitor, util::Flamegrapher, Error};
5
6use lazy_static::lazy_static;
7use parking_lot::RwLock;
8use tracing::{callsite, span, subscriber, Metadata, Subscriber};
9use tracing_subscriber::{layer::Context, registry::LookupSpan, Layer};
10
11use std::{
12    cell::Cell,
13    collections::{HashMap, HashSet},
14    fmt::{Display, Write as _},
15    fs::File,
16    io::{BufWriter, Write},
17    path::Path,
18    time::{Duration, Instant},
19};
20
21lazy_static! {
22    /// The initialisation time of this layer.
23    static ref START: Instant = Instant::now();
24}
25
26thread_local! {
27    /// The last event on this thread's current span stack.
28    static LAST_EVENT: Cell<Instant> = Cell::new(*START);
29}
30
31/// A [`tracing_subscriber::Layer`] for generating a flamegraph of tracked [`Span`](tracing::Span)s.
32///
33/// This layer registers an interest in the following [`Span`](tracing::Span) targets:
34///  - `trace_tools::observe`: Generated by implementers of the [`Observe`](crate::Observe) trait.
35///  - `tokio::task`: Used internally by `tokio` to instrument tasks, when compiling with
36/// `--cfg tokio_unstable` flags.
37///  - [`Span`](tracing::Span)s with the name `runtime.spawn`: used internally by newer versions of `tokio`
38/// to instrument tasks, when compiling with `--cfg tokio_unstable` flags.
39pub struct FlamegraphLayer {
40    callsites: RwLock<HashSet<callsite::Identifier>>,
41    span_locations: RwLock<HashMap<span::Id, Option<SpanLocation>>>,
42    out_file: RwLock<BufWriter<File>>,
43}
44
45impl FlamegraphLayer {
46    pub(crate) fn new<P: AsRef<Path>>(stack_filename: P) -> Result<(Self, Flamegrapher), Error> {
47        // This does not need to be used, but dereferencing here initialises to the current time, rather
48        // than evaluating later.
49        let _ = *START;
50
51        let stack_filename = stack_filename.as_ref().with_extension("folded");
52        let out_file = File::create(stack_filename.clone()).map_err(Error::FlamegraphLayer)?;
53
54        let flamegrapher = Flamegrapher::new()
55            .with_stack_file(stack_filename)
56            .expect("stack file does not exist");
57
58        let layer = Self {
59            callsites: RwLock::new(HashSet::new()),
60            span_locations: RwLock::new(HashMap::new()),
61            out_file: RwLock::new(BufWriter::new(out_file)),
62        };
63
64        Ok((layer, flamegrapher))
65    }
66
67    fn is_tracked_callsite(&self, callsite: &callsite::Identifier) -> bool {
68        self.callsites.read().contains(callsite)
69    }
70
71    fn is_tracked<S>(&self, id: &span::Id, ctx: &Context<'_, S>) -> bool
72    where
73        S: Subscriber + for<'a> LookupSpan<'a>,
74    {
75        ctx.span(id)
76            .map(|span| self.is_tracked_callsite(&span.metadata().callsite()))
77            .unwrap_or(false)
78    }
79
80    fn stack_string_on_enter<S>(&self, id: &span::Id, ctx: &Context<'_, S>) -> String
81    where
82        S: Subscriber + for<'a> LookupSpan<'a>,
83    {
84        // Skip the current span, since we want to write the samples in the previous stack configuration,
85        // before this span was entered.
86        self.stack_string(id, ctx, true)
87    }
88
89    fn stack_string_on_exit<S>(&self, id: &span::Id, ctx: &Context<'_, S>) -> String
90    where
91        S: Subscriber + for<'a> LookupSpan<'a>,
92    {
93        // Keep the current span, since we want to write the samples in the previous stack configuration,
94        // before this span was exited.
95        self.stack_string(id, ctx, false)
96    }
97
98    /// Writes a [`String`] that describes the stack of a given [`Span`](tracing::Span).
99    ///
100    /// This string is then directly used in the folded stack file to describe the number of samples in
101    /// the described stack.
102    fn stack_string<S>(&self, id: &span::Id, ctx: &Context<'_, S>, skip_current_span: bool) -> String
103    where
104        S: Subscriber + for<'a> LookupSpan<'a>,
105    {
106        let delta = self.time_since_last_event();
107        let mut stack_str = "all-spans".to_string();
108
109        // If the span is tracked but not written to the registry, something has gone very wrong.
110        let mut leaf_span = Some(ctx.span(id).expect("span is not in registry"));
111        if skip_current_span {
112            leaf_span = leaf_span.and_then(|span| span.parent());
113        }
114
115        if let Some(span) = leaf_span {
116            for span in span.scope().from_root() {
117                let location = match self
118                    .span_locations
119                    .read()
120                    .get(&span.id())
121                    .and_then(|location| location.as_ref())
122                {
123                    Some(location) => location.to_string(),
124                    None => "unknown".to_string(),
125                };
126
127                write!(stack_str, "; {}", location).expect("writing to `String` should never fail");
128            }
129        }
130
131        write!(stack_str, " {}", delta.as_micros()).expect("writing to `String` should never fail");
132        stack_str
133    }
134
135    /// Returns the time since the last event within this [`Span`](tracing::Span) stack, and updates the
136    /// last event time to the current time.
137    fn time_since_last_event(&self) -> Duration {
138        let now = Instant::now();
139
140        let last_event = LAST_EVENT.with(|time| {
141            let last_event = time.get();
142            time.set(now);
143            last_event
144        });
145
146        now - last_event
147    }
148}
149
150impl<S> Layer<S> for FlamegraphLayer
151where
152    S: Subscriber + for<'a> LookupSpan<'a>,
153{
154    fn register_callsite(&self, metadata: &'static Metadata<'static>) -> subscriber::Interest {
155        match (metadata.name(), metadata.target()) {
156            ("runtime.spawn", _) | ("task", "tokio::task") | (_, "trace_tools::observe") => {
157                self.callsites.write().insert(metadata.callsite());
158            }
159            (_, _) => {}
160        }
161
162        // Ensure any downstream layers have access to all spans and events.
163        subscriber::Interest::always()
164    }
165
166    fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, _ctx: Context<'_, S>) {
167        if self.is_tracked_callsite(&attrs.metadata().callsite()) {
168            let location = SpanLocation::from_attributes(attrs);
169            self.span_locations.write().insert(id.clone(), location);
170        }
171    }
172
173    fn on_enter(&self, id: &span::Id, ctx: Context<'_, S>) {
174        if self.is_tracked(id, &ctx) {
175            let stack_str = self.stack_string_on_enter(id, &ctx);
176            let _ = writeln!(*self.out_file.write(), "{}", stack_str);
177        }
178    }
179
180    fn on_exit(&self, id: &span::Id, ctx: Context<'_, S>) {
181        if self.is_tracked(id, &ctx) {
182            let stack_str = self.stack_string_on_exit(id, &ctx);
183            let _ = writeln!(*self.out_file.write(), "{}", stack_str);
184        }
185    }
186
187    fn on_close(&self, id: span::Id, ctx: Context<'_, S>) {
188        if self.is_tracked(&id, &ctx) {
189            let _ = self.span_locations.write().remove(&id);
190        }
191    }
192}
193
194struct SpanLocation {
195    file: String,
196    line: u32,
197}
198
199impl SpanLocation {
200    fn from_attributes(attrs: &span::Attributes) -> Option<Self> {
201        let mut visitor = LocationVisitor::new();
202        attrs.record(&mut visitor);
203        visitor.try_into().ok()
204    }
205}
206
207impl Display for SpanLocation {
208    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209        write!(f, "{}:{}", self.file, self.line)
210    }
211}
212
213impl TryFrom<LocationVisitor> for SpanLocation {
214    /// This is a bit odd, but since a `LocationVisitor` can fail to record any location information,
215    /// constructing a `SpanLocation` from one can fail. We don't need a specific error though, because
216    /// `SpanLocation::from_attributes` will just return `None` instead.
217    type Error = ();
218
219    fn try_from(visitor: LocationVisitor) -> Result<Self, Self::Error> {
220        let LocationVisitor { file, line } = visitor;
221
222        file.zip(line).map(|(file, line)| SpanLocation { file, line }).ok_or(())
223    }
224}