tracing_timing/
lib.rs

1//! Inter-event timing metrics on top of [`tracing`].
2//!
3//! This crate provides a `tracing::Subscriber` that keeps statistics on inter-event timing
4//! information. More concretely, given code like this:
5//!
6//! ```rust
7//! use tracing::*;
8//! use tracing_timing::{Builder, Histogram};
9//! let subscriber = Builder::default().build(|| Histogram::new_with_max(1_000_000, 2).unwrap());
10//! let dispatcher = Dispatch::new(subscriber);
11//! dispatcher::with_default(&dispatcher, || {
12//!     trace_span!("request").in_scope(|| {
13//!         // do a little bit of work
14//!         trace!("fast");
15//!         // do a lot of work
16//!         trace!("slow");
17//!     })
18//! });
19//! ```
20//!
21//! You can produce something like this (see `examples/pretty.rs`):
22//!
23//! ```text
24//! fast:
25//! mean: 173.2µs, p50: 172µs, p90: 262µs, p99: 327µs, p999: 450µs, max: 778µs
26//!   25µs | *                                        |  2.2th %-ile
27//!   50µs | *                                        |  2.2th %-ile
28//!   75µs | *                                        |  4.7th %-ile
29//!  100µs | ***                                      | 11.5th %-ile
30//!  125µs | *****                                    | 24.0th %-ile
31//!  150µs | *******                                  | 41.1th %-ile
32//!  175µs | ********                                 | 59.2th %-ile
33//!  200µs | *******                                  | 75.4th %-ile
34//!  225µs | **                                       | 80.1th %-ile
35//!  250µs | ***                                      | 87.3th %-ile
36//!  275µs | ***                                      | 94.4th %-ile
37//!  300µs | **                                       | 97.8th %-ile
38//!
39//! slow:
40//! mean: 623.3µs, p50: 630µs, p90: 696µs, p99: 770µs, p999: 851µs, max: 950µs
41//!  500µs | *                                        |  1.6th %-ile
42//!  525µs | **                                       |  4.8th %-ile
43//!  550µs | ***                                      | 10.9th %-ile
44//!  575µs | *****                                    | 22.2th %-ile
45//!  600µs | *******                                  | 37.9th %-ile
46//!  625µs | ********                                 | 55.9th %-ile
47//!  650µs | *******                                  | 72.9th %-ile
48//!  675µs | ******                                   | 85.6th %-ile
49//!  700µs | ****                                     | 93.5th %-ile
50//!  725µs | **                                       | 97.1th %-ile
51//! ```
52//!
53//! When [`TimingSubscriber`] is used as the `tracing::Dispatch`, the time between each event in a
54//! span is measured using [`quanta`], and is recorded in "[high dynamic range histograms]" using
55//! [`hdrhistogram`]'s multi-threaded recording facilities. The recorded timing information is
56//! grouped using the [`SpanGroup`] and [`EventGroup`] traits, allowing you to combine recorded
57//! statistics across spans and events.
58//!
59//! ## Extracting timing histograms
60//!
61//! The crate does not implement a mechanism for recording the resulting histograms. Instead, you
62//! can implement this as you see fit using [`TimingSubscriber::with_histograms`]. It gives you
63//! access to the histograms for all groups. Note that you must call `refresh()` on each histogram
64//! to see its latest values (see [`hdrhistogram::SyncHistogram`]). Note that calling `refresh()`
65//! will block until the next event is posted, so you may want [`TimingSubscriber::force_synchronize`]
66//! instead.
67//!
68//! To access the histograms later, use `tracing::Dispatch::downcast_ref`. If your type is hard to
69//! name, you can use a [`TimingSubscriber::downcaster`] instead.
70//!
71//! ```rust
72//! use tracing::*;
73//! use tracing_timing::{Builder, Histogram, TimingSubscriber};
74//! let subscriber = Builder::default().build(|| Histogram::new_with_max(1_000_000, 2).unwrap());
75//! let dispatch = Dispatch::new(subscriber);
76//! // ...
77//! // code that hands off clones of the dispatch, maybe to other threads.
78//! // for example:
79//! tracing::dispatcher::set_global_default(dispatch.clone())
80//!   .expect("setting tracing default failed");
81//! // (note that Dispatch implements Clone,  so you can keep a handle to it!)
82//! //
83//! // then, at some later time, in some other place, you can call:
84//! // ...
85//! dispatch.downcast_ref::<TimingSubscriber>().unwrap().with_histograms(|hs| {
86//!     for (span_group, hs) in hs {
87//!         for (event_group, h) in hs {
88//!             // make sure we see the latest samples:
89//!             h.refresh();
90//!             // print the median:
91//!             println!("{} -> {}: {}ns", span_group, event_group, h.value_at_quantile(0.5))
92//!         }
93//!     }
94//! });
95//! ```
96//!
97//! See the documentation for [`hdrhistogram`] for more on what you can do once you have the
98//! histograms.
99//!
100//! ## Using the [`Layer`](https://docs.rs/tracing-subscriber/0.2.0/tracing_subscriber/layer/trait.Layer.html) API
101//!
102//! To use the `Layer` API from `tracing-subscriber`, you first need to enable the `layer` feature.
103//! Then, use [`Builder::layer`] to construct a layer that you can mix in with other layers:
104//!
105//! ```rust
106//! # #[cfg(feature = "layer")] {
107//! # use tracing::*;
108//! # use tracing_timing::{Builder, Histogram};
109//! use tracing_subscriber::{registry::Registry, Layer};
110//! let timing_layer = Builder::default()
111//!     .layer(|| Histogram::new_with_max(1_000_000, 2).unwrap());
112//! let dispatch = Dispatch::new(timing_layer.with_subscriber(Registry::default()));
113//! # }
114//! ```
115//!
116//! ## Grouping samples
117//!
118//! By default, [`TimingSubscriber`] groups samples by the "name" of the containing span and the
119//! "message" of the relevant event. These are the first string parameter you pass to each of the
120//! relevant tracing macros. You can override this behavior either by providing your own
121//! implementation of [`SpanGroup`] and [`EventGroup`] to [`Builder::spans`] and
122//! [`Builder::events`] respectively. There are also a number of pre-defined "groupers" in the
123//! [`group`] module that cover the most common cases.
124//!
125//! ## Interpreting the output
126//!
127//! To understand the output you get from `tracing-timing`, a more complicated example might help.
128//! Consider the following tracing code:
129//!
130//! ```no_run
131//! # let rand = || 0.5;
132//! # use tracing::{trace_span, trace};
133//! loop {
134//!     trace_span!("foo").in_scope(|| {
135//!         trace!("foo_start");
136//!         std::thread::sleep(std::time::Duration::from_millis(1));
137//!         trace_span!("bar").in_scope(|| {
138//!             trace!("bar_start");
139//!             std::thread::sleep(std::time::Duration::from_millis(1));
140//!             if rand() > 0.5 {
141//!                 trace!("bar_mid1");
142//!                 std::thread::sleep(std::time::Duration::from_millis(1));
143//!             } else {
144//!                 trace!("bar_mid2");
145//!                 std::thread::sleep(std::time::Duration::from_millis(2));
146//!             }
147//!             trace!("bar_end");
148//!         });
149//!         trace!("foo_end");
150//!     })
151//! }
152//! ```
153//!
154//! What histogram data would you expect to see for each event?
155//!
156//! Well, let's walk through it. There are two span groups: "foo" and "bar". "bar" will contain
157//! entries for the events "bar_start", the "bar_mid"s, and "bar_end". "foo" will contain entries
158//! for the events "foo_start", "foo_end", **and** all the "bar" events by default (see
159//! [`Builder::no_span_recursion`]). Let's look at each of those in turn:
160//!
161//!  - "foo_start" is easy: it tracks the time since "foo" was created.
162//!  - "foo_end" is mostly easy: it tracks the time since "bar_end".
163//!    If span recursion is disabled, it instead tracks the time since "foo_start".
164//!  - "bar_start" is trickier: in "bar", it tracks the time since "bar" was entered. in "foo",
165//!    it contains the time since "foo_start".
166//!  - the "bar_mid"s are easy: they both track the time since "bar_start".
167//!  - "bar_end" is tricky: it tracks the time since whichever event of "bar_mid1" and "bar_mid2"
168//!    happened! The histogram will show a bi-modal distribution with one peak at 1ms, and one peak
169//!    at 2ms. If you want to be able to distinguish these, you will have to insert additional
170//!    tracing events inside the branches.
171//!
172//! # Timing information over time
173//!
174//! Every time you refresh a histogram, it incorporates any new timing samples recorded since the
175//! last call to `refresh`, allowing you to see timing metrics across all time. If you are
176//! monitoring the health of a continuously running system, you may instead wish to only see
177//! metrics across a limited window of time. You can do this by clearing the histogram in
178//! [`TimingSubscriber::with_histograms`] before refreshing them, or periodically as you see fit.
179//!
180//! # Usage notes:
181//!
182//! **Event timing is _per span_, not per span _per thread_.** This means that if you emit events
183//! for the same span concurrently from multiple threads, you may see weird timing information.
184//!
185//! **Span creation takes a lock.** This means that you will generally want to avoid creating
186//! extraneous spans. One technique that works well here is subsampling your application, for
187//! example by only creating tracking spans for _some_ of your requests.
188//!
189//!   [`tracing`]: https://docs.rs/tracing/
190//!   [high dynamic range histograms]: https://hdrhistogram.github.io/HdrHistogram/
191//!   [`hdrhistogram`]: https://docs.rs/hdrhistogram/
192//!   [`quanta`]: https://docs.rs/quanta/
193//!   [`hdrhistogram::SyncHistogram`]: https://docs.rs/hdrhistogram/6/hdrhistogram/sync/struct.SyncHistogram.html
194//!
195
196#![deny(missing_docs)]
197
198use crossbeam::channel;
199use crossbeam::sync::ShardedLock;
200use hdrhistogram::{sync::Recorder, SyncHistogram};
201use indexmap::IndexMap;
202use std::cell::{RefCell, UnsafeCell};
203use std::hash::Hash;
204use std::marker::PhantomData;
205use std::ops::{Deref, DerefMut};
206use std::sync::{atomic, Mutex};
207use tracing_core::*;
208
209// also test README.md code example
210use doc_comment::doc_comment;
211doc_comment!(include_str!("../README.md"));
212
213/// A faster hasher for `tracing-timing` maps.
214pub type Hasher = fxhash::FxBuildHasher;
215
216/// A standard library `HashMap` with a faster hasher.
217pub type HashMap<K, V> = std::collections::HashMap<K, V, Hasher>;
218
219static TID: atomic::AtomicUsize = atomic::AtomicUsize::new(0);
220
221thread_local! {
222    static MYTID: RefCell<Option<usize>> = RefCell::new(None);
223}
224
225mod builder;
226pub use builder::Builder;
227pub use hdrhistogram::Histogram;
228
229pub mod group;
230
231type Map<S, E, T> = HashMap<S, HashMap<E, T>>;
232
233/// Translate attributes from a tracing span into a timing span group.
234///
235/// All spans whose attributes produce the same `Id`-typed value when passed through `group`
236/// share a namespace for the groups produced by [`EventGroup::group`] on their contained events.
237///
238/// This trait is implemented for all functions with the appropriate signature. Note that you _may_
239/// run into weird lifetime errors from the compiler when using a closure as a `SpanGroup`. This is
240/// a [known compiler issue]. You can work around it by adding a slight type hint to the arguments
241/// passed to the closure as follows (note the `: &_`):
242///
243/// ```rust
244/// use tracing_timing::{Builder, Histogram};
245/// use tracing::span;
246/// let s = Builder::default()
247///     .spans(|_: &span::Attributes| "all spans as one")
248///     .build(|| Histogram::new(3).unwrap());
249/// ```
250///
251///   [known compiler issue]: https://github.com/rust-lang/rust/issues/41078
252pub trait SpanGroup {
253    /// The type of the timing span group.
254    type Id;
255
256    /// Extract the group for this span's attributes.
257    fn group(&self, span: &span::Attributes) -> Self::Id;
258}
259
260/// Translate attributes from a tracing event into a timing event group.
261///
262/// All events that share a [`SpanGroup`], and whose attributes produce the same `Id`-typed value
263/// when passed through `group`, are considered a single timing target, and have their samples
264/// recorded together.
265///
266/// This trait is implemented for all functions with the appropriate signature. Note that you _may_
267/// run into weird lifetime errors from the compiler when using a closure as an `EventGroup`. This
268/// is a [known compiler issue]. You can work around it by adding a slight type hint to the
269/// arguments passed to the closure as follows (note the `: &_`):
270///
271/// ```rust
272/// use tracing_timing::{Builder, Histogram};
273/// use tracing::Event;
274/// let s = Builder::default()
275///     .events(|_: &Event| "all events as one")
276///     .build(|| Histogram::new(3).unwrap());
277/// ```
278///
279///   [known compiler issue]: https://github.com/rust-lang/rust/issues/41078
280pub trait EventGroup {
281    /// The type of the timing event group.
282    type Id;
283
284    /// Extract the group for this event.
285    fn group(&self, event: &Event) -> Self::Id;
286}
287
288fn span_id_to_slab_idx(span: &span::Id) -> usize {
289    span.into_u64() as usize - 1
290}
291
292struct WriterState<S: Hash + Eq, E: Hash + Eq> {
293    // TID => (S + callsite) => E => thread-local Recorder
294    tls: ThreadLocal<Map<S, E, Recorder<u64>>>,
295
296    // used to produce a Recorder for a thread that has not recorded for a given sid/eid pair
297    idle_recorders: Map<S, E, hdrhistogram::sync::IdleRecorder<Recorder<u64>, u64>>,
298
299    // used to communicate new histograms to the reader
300    created: channel::Sender<(S, E, SyncHistogram<u64>)>,
301
302    // used to produce a new Histogram when a new sid/eid pair is encountered
303    //
304    // TODO:
305    // placing this in a ShardedLock requires that it is Sync, but it's only ever used when you're
306    // holding the write lock. not sure how to describe this in the type system.
307    new_histogram: Box<dyn FnMut(&S, &E) -> Histogram<u64> + Send + Sync>,
308}
309
310impl<S, E> std::fmt::Debug for WriterState<S, E>
311where
312    S: Hash + Eq + std::fmt::Debug,
313    E: Hash + Eq + std::fmt::Debug,
314{
315    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316        f.debug_struct("WriterState")
317            .field("idle_recorders", &self.idle_recorders)
318            .field("created", &self.created)
319            .finish()
320    }
321}
322
323#[derive(Debug)]
324struct ReaderState<S: Hash + Eq, E: Hash + Eq> {
325    created: channel::Receiver<(S, E, SyncHistogram<u64>)>,
326    histograms: HashMap<S, IndexMap<E, SyncHistogram<u64>, Hasher>>,
327}
328
329#[derive(Debug)]
330struct SpanState<S> {
331    group: S,
332
333    // We need fast access to the last event for each span.
334    last_event: atomic::AtomicU64,
335}
336
337// Share impl between Subscriber and Layer versions
338struct Timing<S = group::ByName, E = group::ByMessage>
339where
340    S: SpanGroup,
341    S::Id: Hash + Eq,
342    E: EventGroup,
343    E::Id: Hash + Eq,
344{
345    span_group: S,
346    event_group: E,
347    time: quanta::Clock,
348    bubble_spans: bool,
349    span_close_events: bool,
350
351    writers: ShardedLock<WriterState<S::Id, E::Id>>,
352    reader: Mutex<ReaderState<S::Id, E::Id>>,
353}
354
355impl<S, E> std::fmt::Debug for Timing<S, E>
356where
357    S: SpanGroup + std::fmt::Debug,
358    E: EventGroup + std::fmt::Debug,
359    S::Id: Hash + Eq + std::fmt::Debug,
360    E::Id: Hash + Eq + std::fmt::Debug,
361{
362    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
363        f.debug_struct("Timing")
364            .field("span_group", &self.span_group)
365            .field("event_group", &self.event_group)
366            .field("time", &self.time)
367            .field("writers", &self.writers)
368            .field("reader", &self.reader)
369            .finish()
370    }
371}
372
373impl<S, E> Timing<S, E>
374where
375    S: SpanGroup,
376    E: EventGroup,
377    S::Id: Hash + Eq,
378    E::Id: Hash + Eq,
379{
380    fn force_synchronize(&self) {
381        // first, remove all thread-local recorders
382        let mut inner = self.writers.write().unwrap();
383        // note that we don't remove the tls _entry_,
384        // since that would make all writers take the write lock later!
385        for tls in inner.tls.values_mut() {
386            // we hold the write lock, so we know no other thread is using its tls.
387            let tls = unsafe { &mut *tls.get() };
388            tls.clear();
389        }
390        // now that we've done that, refresh all the histograms. we do it with a 0 timeout since we
391        // know that dropping all the recorders above will cause refresh to see up-to-date values,
392        // and we don't care about samples coming _after_ the clear above.
393        drop(inner);
394        self.with_histograms(|hs| {
395            for hs in hs.values_mut() {
396                for h in hs.values_mut() {
397                    h.refresh_timeout(std::time::Duration::new(0, 0));
398                }
399            }
400        })
401    }
402
403    fn with_histograms<F, R>(&self, f: F) -> R
404    where
405        F: FnOnce(&mut HashMap<S::Id, IndexMap<E::Id, SyncHistogram<u64>, Hasher>>) -> R,
406    {
407        // writers never take this lock, so we don't hold them up should the user call refresh(),
408        let mut reader = self.reader.lock().unwrap();
409        while let Ok((sid, eid, h)) = reader.created.try_recv() {
410            let h = reader
411                .histograms
412                .entry(sid)
413                .or_insert_with(IndexMap::default)
414                .insert(eid, h);
415            assert!(
416                h.is_none(),
417                "second histogram created for same sid/eid combination"
418            );
419        }
420        f(&mut reader.histograms)
421    }
422}
423
424impl<S, E> Timing<S, E>
425where
426    S: SpanGroup,
427    E: EventGroup,
428    S::Id: Clone + Hash + Eq,
429    E::Id: Clone + Hash + Eq,
430{
431    fn time<'a, F>(&self, event: &Event, mut for_each_parent: F)
432    where
433        S: 'a,
434        F: FnMut(&mut dyn FnMut(&SpanState<S::Id>) -> bool),
435    {
436        let start = self.time.raw();
437        let inner = self.writers.read().unwrap();
438
439        let record = move |state: &SpanState<S::Id>, r: &mut Recorder<u64>| {
440            // NOTE: we substitute in the last possible timestamp to avoid measuring time spent
441            // in accessing the various timing datastructures (like taking locks). this has the
442            // effect of measuing Δt₁₂ = e₂.start - e₁.end, which is probably what users expect
443            let previous = state
444                .last_event
445                .swap(self.time.raw(), atomic::Ordering::AcqRel);
446            if previous > start {
447                // someone else recorded a sample _just_ now
448                // the delta is effectively zero, but recording a 0 sample is misleading
449                return;
450            }
451
452            r.saturating_record(self.time.delta(previous, start).as_nanos() as u64)
453        };
454
455        // who are we?
456        let tid = ThreadId::default();
457
458        // fast path: sid/eid pair is known to this thread
459        let eid = self.event_group.group(event);
460        if let Some(ref tls) = inner.tls.get(&tid) {
461            // we know no-one else has our TID:
462            // NOTE: it's _not_ safe to use this after we drop the lock due to force_synchronize.
463            let tls = unsafe { &mut *tls.get() };
464
465            for_each_parent(&mut |state| {
466                // the span id _must_ be known, as it's added when created
467                if let Some(ref mut recorder) =
468                    tls.get_mut(&state.group).and_then(|rs| rs.get_mut(&eid))
469                {
470                    // sid/eid already known and we already have a thread-local recorder!
471                    record(state, recorder);
472                } else if let Some(ref ir) = inner.idle_recorders[&state.group].get(&eid) {
473                    // we didn't know about the eid, but if there's already a recorder for it,
474                    // we can just create a local recorder from it and move on
475                    let mut recorder = ir.recorder();
476                    record(state, &mut recorder);
477                    let r = tls
478                        .entry(state.group.clone())
479                        .or_insert_with(Default::default)
480                        .insert(eid.clone(), recorder);
481                    assert!(r.is_none());
482                } else {
483                    // we're the first thread to see this pair, so we need to make a histogram for it
484                    return false;
485                }
486
487                // keep recording up the stack
488                self.bubble_spans
489            });
490
491        // at least one sid/eid pair was unknown
492        } else {
493            // this thread does not yet have TLS -- we'll have to take the lock
494        }
495
496        // slow path: either this thread is new, or a sid/eid pair was new
497        // in either case, we need to take the write lock
498        // to do that, we must first drop the read lock
499        drop(inner);
500        let mut inner = self.writers.write().unwrap();
501        let inner = &mut *inner;
502
503        // if we don't have any thread-local state, construct that first
504        let tls = inner.tls.entry(tid).or_insert_with(Default::default);
505        // no-one else has our TID _and_ we have exclusive access to inner
506        let tls = unsafe { &mut *tls.get() };
507
508        // use an existing recorder if one exists, or make a new histogram if one does not
509        let nh = &mut inner.new_histogram;
510        let created = &mut inner.created;
511        let idle = &mut inner.idle_recorders;
512        for_each_parent(&mut |state| {
513            // since we're recursing up the tree, we _may_ find that we already have a recorder for
514            // a _later_ span's sid/eid. make sure we don't create a new one in that case!
515            let recorder = tls
516                .entry(state.group.clone())
517                .or_insert_with(Default::default)
518                .entry(eid.clone())
519                .or_insert_with(|| {
520                    // nope, get us a thread-local recorder
521                    idle.get_mut(&state.group)
522                        .unwrap()
523                        .entry(eid.clone())
524                        .or_insert_with(|| {
525                            // no histogram exists! make one.
526                            let h = (nh)(&state.group, &eid).into_sync();
527                            let ir = h.recorder().into_idle();
528                            created.send((state.group.clone(), eid.clone(), h)).expect(
529                                "WriterState implies ReaderState, which holds the receiver",
530                            );
531                            ir
532                        })
533                        .recorder()
534                });
535
536            // finally, we can record the sample
537            record(state, recorder);
538
539            // recurse to parent if any
540            self.bubble_spans
541        });
542    }
543
544    fn ensure_group(&self, group: S::Id) {
545        self.writers
546            .write()
547            .unwrap()
548            .idle_recorders
549            .entry(group)
550            .or_insert_with(HashMap::default);
551    }
552}
553
554mod subscriber;
555pub use subscriber::{Downcaster as SubscriberDowncaster, TimingSubscriber};
556
557#[cfg(feature = "layer")]
558mod layer;
559#[cfg(feature = "layer")]
560pub use layer::{Downcaster as LayerDowncaster, TimingLayer};
561
562#[derive(Hash, Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
563#[repr(transparent)]
564struct ThreadId {
565    tid: usize,
566    _notsend: PhantomData<UnsafeCell<()>>,
567}
568
569impl Default for ThreadId {
570    fn default() -> Self {
571        MYTID.with(|mytid| {
572            let mut mytid = mytid.borrow_mut();
573            if let Some(ref mytid) = *mytid {
574                ThreadId {
575                    tid: *mytid,
576                    _notsend: PhantomData,
577                }
578            } else {
579                let tid = TID.fetch_add(1, atomic::Ordering::AcqRel);
580                *mytid = Some(tid);
581                ThreadId {
582                    tid,
583                    _notsend: PhantomData,
584                }
585            }
586        })
587    }
588}
589
590#[derive(Default)]
591struct ThreadLocal<T>(HashMap<ThreadId, UnsafeCell<T>>);
592
593impl<T> Deref for ThreadLocal<T> {
594    type Target = HashMap<ThreadId, UnsafeCell<T>>;
595    fn deref(&self) -> &Self::Target {
596        &self.0
597    }
598}
599
600impl<T> DerefMut for ThreadLocal<T> {
601    fn deref_mut(&mut self) -> &mut Self::Target {
602        &mut self.0
603    }
604}
605
606unsafe impl<T: Send> Send for ThreadLocal<T> {}
607unsafe impl<T: Sync> Sync for ThreadLocal<T> {}