tracing_timing/lib.rs
1//! Inter-event timing metrics on top of [`tracing`].
2//!
3//! This crate provides a `tracing::Subscriber` that keeps statistics on inter-event timing
4//! information. More concretely, given code like this:
5//!
6//! ```rust
7//! use tracing::*;
8//! use tracing_timing::{Builder, Histogram};
9//! let subscriber = Builder::default().build(|| Histogram::new_with_max(1_000_000, 2).unwrap());
10//! let dispatcher = Dispatch::new(subscriber);
11//! dispatcher::with_default(&dispatcher, || {
12//! trace_span!("request").in_scope(|| {
13//! // do a little bit of work
14//! trace!("fast");
15//! // do a lot of work
16//! trace!("slow");
17//! })
18//! });
19//! ```
20//!
21//! You can produce something like this (see `examples/pretty.rs`):
22//!
23//! ```text
24//! fast:
25//! mean: 173.2µs, p50: 172µs, p90: 262µs, p99: 327µs, p999: 450µs, max: 778µs
26//! 25µs | * | 2.2th %-ile
27//! 50µs | * | 2.2th %-ile
28//! 75µs | * | 4.7th %-ile
29//! 100µs | *** | 11.5th %-ile
30//! 125µs | ***** | 24.0th %-ile
31//! 150µs | ******* | 41.1th %-ile
32//! 175µs | ******** | 59.2th %-ile
33//! 200µs | ******* | 75.4th %-ile
34//! 225µs | ** | 80.1th %-ile
35//! 250µs | *** | 87.3th %-ile
36//! 275µs | *** | 94.4th %-ile
37//! 300µs | ** | 97.8th %-ile
38//!
39//! slow:
40//! mean: 623.3µs, p50: 630µs, p90: 696µs, p99: 770µs, p999: 851µs, max: 950µs
41//! 500µs | * | 1.6th %-ile
42//! 525µs | ** | 4.8th %-ile
43//! 550µs | *** | 10.9th %-ile
44//! 575µs | ***** | 22.2th %-ile
45//! 600µs | ******* | 37.9th %-ile
46//! 625µs | ******** | 55.9th %-ile
47//! 650µs | ******* | 72.9th %-ile
48//! 675µs | ****** | 85.6th %-ile
49//! 700µs | **** | 93.5th %-ile
50//! 725µs | ** | 97.1th %-ile
51//! ```
52//!
53//! When [`TimingSubscriber`] is used as the `tracing::Dispatch`, the time between each event in a
54//! span is measured using [`quanta`], and is recorded in "[high dynamic range histograms]" using
55//! [`hdrhistogram`]'s multi-threaded recording facilities. The recorded timing information is
56//! grouped using the [`SpanGroup`] and [`EventGroup`] traits, allowing you to combine recorded
57//! statistics across spans and events.
58//!
59//! ## Extracting timing histograms
60//!
61//! The crate does not implement a mechanism for recording the resulting histograms. Instead, you
62//! can implement this as you see fit using [`TimingSubscriber::with_histograms`]. It gives you
63//! access to the histograms for all groups. Note that you must call `refresh()` on each histogram
64//! to see its latest values (see [`hdrhistogram::SyncHistogram`]). Note that calling `refresh()`
65//! will block until the next event is posted, so you may want [`TimingSubscriber::force_synchronize`]
66//! instead.
67//!
68//! To access the histograms later, use `tracing::Dispatch::downcast_ref`. If your type is hard to
69//! name, you can use a [`TimingSubscriber::downcaster`] instead.
70//!
71//! ```rust
72//! use tracing::*;
73//! use tracing_timing::{Builder, Histogram, TimingSubscriber};
74//! let subscriber = Builder::default().build(|| Histogram::new_with_max(1_000_000, 2).unwrap());
75//! let dispatch = Dispatch::new(subscriber);
76//! // ...
77//! // code that hands off clones of the dispatch, maybe to other threads.
78//! // for example:
79//! tracing::dispatcher::set_global_default(dispatch.clone())
80//! .expect("setting tracing default failed");
81//! // (note that Dispatch implements Clone, so you can keep a handle to it!)
82//! //
83//! // then, at some later time, in some other place, you can call:
84//! // ...
85//! dispatch.downcast_ref::<TimingSubscriber>().unwrap().with_histograms(|hs| {
86//! for (span_group, hs) in hs {
87//! for (event_group, h) in hs {
88//! // make sure we see the latest samples:
89//! h.refresh();
90//! // print the median:
91//! println!("{} -> {}: {}ns", span_group, event_group, h.value_at_quantile(0.5))
92//! }
93//! }
94//! });
95//! ```
96//!
97//! See the documentation for [`hdrhistogram`] for more on what you can do once you have the
98//! histograms.
99//!
100//! ## Using the [`Layer`](https://docs.rs/tracing-subscriber/0.2.0/tracing_subscriber/layer/trait.Layer.html) API
101//!
102//! To use the `Layer` API from `tracing-subscriber`, you first need to enable the `layer` feature.
103//! Then, use [`Builder::layer`] to construct a layer that you can mix in with other layers:
104//!
105//! ```rust
106//! # #[cfg(feature = "layer")] {
107//! # use tracing::*;
108//! # use tracing_timing::{Builder, Histogram};
109//! use tracing_subscriber::{registry::Registry, Layer};
110//! let timing_layer = Builder::default()
111//! .layer(|| Histogram::new_with_max(1_000_000, 2).unwrap());
112//! let dispatch = Dispatch::new(timing_layer.with_subscriber(Registry::default()));
113//! # }
114//! ```
115//!
116//! ## Grouping samples
117//!
118//! By default, [`TimingSubscriber`] groups samples by the "name" of the containing span and the
119//! "message" of the relevant event. These are the first string parameter you pass to each of the
120//! relevant tracing macros. You can override this behavior either by providing your own
121//! implementation of [`SpanGroup`] and [`EventGroup`] to [`Builder::spans`] and
122//! [`Builder::events`] respectively. There are also a number of pre-defined "groupers" in the
123//! [`group`] module that cover the most common cases.
124//!
125//! ## Interpreting the output
126//!
127//! To understand the output you get from `tracing-timing`, a more complicated example might help.
128//! Consider the following tracing code:
129//!
130//! ```no_run
131//! # let rand = || 0.5;
132//! # use tracing::{trace_span, trace};
133//! loop {
134//! trace_span!("foo").in_scope(|| {
135//! trace!("foo_start");
136//! std::thread::sleep(std::time::Duration::from_millis(1));
137//! trace_span!("bar").in_scope(|| {
138//! trace!("bar_start");
139//! std::thread::sleep(std::time::Duration::from_millis(1));
140//! if rand() > 0.5 {
141//! trace!("bar_mid1");
142//! std::thread::sleep(std::time::Duration::from_millis(1));
143//! } else {
144//! trace!("bar_mid2");
145//! std::thread::sleep(std::time::Duration::from_millis(2));
146//! }
147//! trace!("bar_end");
148//! });
149//! trace!("foo_end");
150//! })
151//! }
152//! ```
153//!
154//! What histogram data would you expect to see for each event?
155//!
156//! Well, let's walk through it. There are two span groups: "foo" and "bar". "bar" will contain
157//! entries for the events "bar_start", the "bar_mid"s, and "bar_end". "foo" will contain entries
158//! for the events "foo_start", "foo_end", **and** all the "bar" events by default (see
159//! [`Builder::no_span_recursion`]). Let's look at each of those in turn:
160//!
161//! - "foo_start" is easy: it tracks the time since "foo" was created.
162//! - "foo_end" is mostly easy: it tracks the time since "bar_end".
163//! If span recursion is disabled, it instead tracks the time since "foo_start".
164//! - "bar_start" is trickier: in "bar", it tracks the time since "bar" was entered. in "foo",
165//! it contains the time since "foo_start".
166//! - the "bar_mid"s are easy: they both track the time since "bar_start".
167//! - "bar_end" is tricky: it tracks the time since whichever event of "bar_mid1" and "bar_mid2"
168//! happened! The histogram will show a bi-modal distribution with one peak at 1ms, and one peak
169//! at 2ms. If you want to be able to distinguish these, you will have to insert additional
170//! tracing events inside the branches.
171//!
172//! # Timing information over time
173//!
174//! Every time you refresh a histogram, it incorporates any new timing samples recorded since the
175//! last call to `refresh`, allowing you to see timing metrics across all time. If you are
176//! monitoring the health of a continuously running system, you may instead wish to only see
177//! metrics across a limited window of time. You can do this by clearing the histogram in
178//! [`TimingSubscriber::with_histograms`] before refreshing them, or periodically as you see fit.
179//!
180//! # Usage notes:
181//!
182//! **Event timing is _per span_, not per span _per thread_.** This means that if you emit events
183//! for the same span concurrently from multiple threads, you may see weird timing information.
184//!
185//! **Span creation takes a lock.** This means that you will generally want to avoid creating
186//! extraneous spans. One technique that works well here is subsampling your application, for
187//! example by only creating tracking spans for _some_ of your requests.
188//!
189//! [`tracing`]: https://docs.rs/tracing/
190//! [high dynamic range histograms]: https://hdrhistogram.github.io/HdrHistogram/
191//! [`hdrhistogram`]: https://docs.rs/hdrhistogram/
192//! [`quanta`]: https://docs.rs/quanta/
193//! [`hdrhistogram::SyncHistogram`]: https://docs.rs/hdrhistogram/6/hdrhistogram/sync/struct.SyncHistogram.html
194//!
195
196#![deny(missing_docs)]
197
198use crossbeam::channel;
199use crossbeam::sync::ShardedLock;
200use hdrhistogram::{sync::Recorder, SyncHistogram};
201use indexmap::IndexMap;
202use std::cell::{RefCell, UnsafeCell};
203use std::hash::Hash;
204use std::marker::PhantomData;
205use std::ops::{Deref, DerefMut};
206use std::sync::{atomic, Mutex};
207use tracing_core::*;
208
209// also test README.md code example
210use doc_comment::doc_comment;
211doc_comment!(include_str!("../README.md"));
212
213/// A faster hasher for `tracing-timing` maps.
214pub type Hasher = fxhash::FxBuildHasher;
215
216/// A standard library `HashMap` with a faster hasher.
217pub type HashMap<K, V> = std::collections::HashMap<K, V, Hasher>;
218
219static TID: atomic::AtomicUsize = atomic::AtomicUsize::new(0);
220
221thread_local! {
222 static MYTID: RefCell<Option<usize>> = RefCell::new(None);
223}
224
225mod builder;
226pub use builder::Builder;
227pub use hdrhistogram::Histogram;
228
229pub mod group;
230
231type Map<S, E, T> = HashMap<S, HashMap<E, T>>;
232
233/// Translate attributes from a tracing span into a timing span group.
234///
235/// All spans whose attributes produce the same `Id`-typed value when passed through `group`
236/// share a namespace for the groups produced by [`EventGroup::group`] on their contained events.
237///
238/// This trait is implemented for all functions with the appropriate signature. Note that you _may_
239/// run into weird lifetime errors from the compiler when using a closure as a `SpanGroup`. This is
240/// a [known compiler issue]. You can work around it by adding a slight type hint to the arguments
241/// passed to the closure as follows (note the `: &_`):
242///
243/// ```rust
244/// use tracing_timing::{Builder, Histogram};
245/// use tracing::span;
246/// let s = Builder::default()
247/// .spans(|_: &span::Attributes| "all spans as one")
248/// .build(|| Histogram::new(3).unwrap());
249/// ```
250///
251/// [known compiler issue]: https://github.com/rust-lang/rust/issues/41078
252pub trait SpanGroup {
253 /// The type of the timing span group.
254 type Id;
255
256 /// Extract the group for this span's attributes.
257 fn group(&self, span: &span::Attributes) -> Self::Id;
258}
259
260/// Translate attributes from a tracing event into a timing event group.
261///
262/// All events that share a [`SpanGroup`], and whose attributes produce the same `Id`-typed value
263/// when passed through `group`, are considered a single timing target, and have their samples
264/// recorded together.
265///
266/// This trait is implemented for all functions with the appropriate signature. Note that you _may_
267/// run into weird lifetime errors from the compiler when using a closure as an `EventGroup`. This
268/// is a [known compiler issue]. You can work around it by adding a slight type hint to the
269/// arguments passed to the closure as follows (note the `: &_`):
270///
271/// ```rust
272/// use tracing_timing::{Builder, Histogram};
273/// use tracing::Event;
274/// let s = Builder::default()
275/// .events(|_: &Event| "all events as one")
276/// .build(|| Histogram::new(3).unwrap());
277/// ```
278///
279/// [known compiler issue]: https://github.com/rust-lang/rust/issues/41078
280pub trait EventGroup {
281 /// The type of the timing event group.
282 type Id;
283
284 /// Extract the group for this event.
285 fn group(&self, event: &Event) -> Self::Id;
286}
287
288fn span_id_to_slab_idx(span: &span::Id) -> usize {
289 span.into_u64() as usize - 1
290}
291
292struct WriterState<S: Hash + Eq, E: Hash + Eq> {
293 // TID => (S + callsite) => E => thread-local Recorder
294 tls: ThreadLocal<Map<S, E, Recorder<u64>>>,
295
296 // used to produce a Recorder for a thread that has not recorded for a given sid/eid pair
297 idle_recorders: Map<S, E, hdrhistogram::sync::IdleRecorder<Recorder<u64>, u64>>,
298
299 // used to communicate new histograms to the reader
300 created: channel::Sender<(S, E, SyncHistogram<u64>)>,
301
302 // used to produce a new Histogram when a new sid/eid pair is encountered
303 //
304 // TODO:
305 // placing this in a ShardedLock requires that it is Sync, but it's only ever used when you're
306 // holding the write lock. not sure how to describe this in the type system.
307 new_histogram: Box<dyn FnMut(&S, &E) -> Histogram<u64> + Send + Sync>,
308}
309
310impl<S, E> std::fmt::Debug for WriterState<S, E>
311where
312 S: Hash + Eq + std::fmt::Debug,
313 E: Hash + Eq + std::fmt::Debug,
314{
315 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316 f.debug_struct("WriterState")
317 .field("idle_recorders", &self.idle_recorders)
318 .field("created", &self.created)
319 .finish()
320 }
321}
322
323#[derive(Debug)]
324struct ReaderState<S: Hash + Eq, E: Hash + Eq> {
325 created: channel::Receiver<(S, E, SyncHistogram<u64>)>,
326 histograms: HashMap<S, IndexMap<E, SyncHistogram<u64>, Hasher>>,
327}
328
329#[derive(Debug)]
330struct SpanState<S> {
331 group: S,
332
333 // We need fast access to the last event for each span.
334 last_event: atomic::AtomicU64,
335}
336
337// Share impl between Subscriber and Layer versions
338struct Timing<S = group::ByName, E = group::ByMessage>
339where
340 S: SpanGroup,
341 S::Id: Hash + Eq,
342 E: EventGroup,
343 E::Id: Hash + Eq,
344{
345 span_group: S,
346 event_group: E,
347 time: quanta::Clock,
348 bubble_spans: bool,
349 span_close_events: bool,
350
351 writers: ShardedLock<WriterState<S::Id, E::Id>>,
352 reader: Mutex<ReaderState<S::Id, E::Id>>,
353}
354
355impl<S, E> std::fmt::Debug for Timing<S, E>
356where
357 S: SpanGroup + std::fmt::Debug,
358 E: EventGroup + std::fmt::Debug,
359 S::Id: Hash + Eq + std::fmt::Debug,
360 E::Id: Hash + Eq + std::fmt::Debug,
361{
362 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
363 f.debug_struct("Timing")
364 .field("span_group", &self.span_group)
365 .field("event_group", &self.event_group)
366 .field("time", &self.time)
367 .field("writers", &self.writers)
368 .field("reader", &self.reader)
369 .finish()
370 }
371}
372
373impl<S, E> Timing<S, E>
374where
375 S: SpanGroup,
376 E: EventGroup,
377 S::Id: Hash + Eq,
378 E::Id: Hash + Eq,
379{
380 fn force_synchronize(&self) {
381 // first, remove all thread-local recorders
382 let mut inner = self.writers.write().unwrap();
383 // note that we don't remove the tls _entry_,
384 // since that would make all writers take the write lock later!
385 for tls in inner.tls.values_mut() {
386 // we hold the write lock, so we know no other thread is using its tls.
387 let tls = unsafe { &mut *tls.get() };
388 tls.clear();
389 }
390 // now that we've done that, refresh all the histograms. we do it with a 0 timeout since we
391 // know that dropping all the recorders above will cause refresh to see up-to-date values,
392 // and we don't care about samples coming _after_ the clear above.
393 drop(inner);
394 self.with_histograms(|hs| {
395 for hs in hs.values_mut() {
396 for h in hs.values_mut() {
397 h.refresh_timeout(std::time::Duration::new(0, 0));
398 }
399 }
400 })
401 }
402
403 fn with_histograms<F, R>(&self, f: F) -> R
404 where
405 F: FnOnce(&mut HashMap<S::Id, IndexMap<E::Id, SyncHistogram<u64>, Hasher>>) -> R,
406 {
407 // writers never take this lock, so we don't hold them up should the user call refresh(),
408 let mut reader = self.reader.lock().unwrap();
409 while let Ok((sid, eid, h)) = reader.created.try_recv() {
410 let h = reader
411 .histograms
412 .entry(sid)
413 .or_insert_with(IndexMap::default)
414 .insert(eid, h);
415 assert!(
416 h.is_none(),
417 "second histogram created for same sid/eid combination"
418 );
419 }
420 f(&mut reader.histograms)
421 }
422}
423
424impl<S, E> Timing<S, E>
425where
426 S: SpanGroup,
427 E: EventGroup,
428 S::Id: Clone + Hash + Eq,
429 E::Id: Clone + Hash + Eq,
430{
431 fn time<'a, F>(&self, event: &Event, mut for_each_parent: F)
432 where
433 S: 'a,
434 F: FnMut(&mut dyn FnMut(&SpanState<S::Id>) -> bool),
435 {
436 let start = self.time.raw();
437 let inner = self.writers.read().unwrap();
438
439 let record = move |state: &SpanState<S::Id>, r: &mut Recorder<u64>| {
440 // NOTE: we substitute in the last possible timestamp to avoid measuring time spent
441 // in accessing the various timing datastructures (like taking locks). this has the
442 // effect of measuing Δt₁₂ = e₂.start - e₁.end, which is probably what users expect
443 let previous = state
444 .last_event
445 .swap(self.time.raw(), atomic::Ordering::AcqRel);
446 if previous > start {
447 // someone else recorded a sample _just_ now
448 // the delta is effectively zero, but recording a 0 sample is misleading
449 return;
450 }
451
452 r.saturating_record(self.time.delta(previous, start).as_nanos() as u64)
453 };
454
455 // who are we?
456 let tid = ThreadId::default();
457
458 // fast path: sid/eid pair is known to this thread
459 let eid = self.event_group.group(event);
460 if let Some(ref tls) = inner.tls.get(&tid) {
461 // we know no-one else has our TID:
462 // NOTE: it's _not_ safe to use this after we drop the lock due to force_synchronize.
463 let tls = unsafe { &mut *tls.get() };
464
465 for_each_parent(&mut |state| {
466 // the span id _must_ be known, as it's added when created
467 if let Some(ref mut recorder) =
468 tls.get_mut(&state.group).and_then(|rs| rs.get_mut(&eid))
469 {
470 // sid/eid already known and we already have a thread-local recorder!
471 record(state, recorder);
472 } else if let Some(ref ir) = inner.idle_recorders[&state.group].get(&eid) {
473 // we didn't know about the eid, but if there's already a recorder for it,
474 // we can just create a local recorder from it and move on
475 let mut recorder = ir.recorder();
476 record(state, &mut recorder);
477 let r = tls
478 .entry(state.group.clone())
479 .or_insert_with(Default::default)
480 .insert(eid.clone(), recorder);
481 assert!(r.is_none());
482 } else {
483 // we're the first thread to see this pair, so we need to make a histogram for it
484 return false;
485 }
486
487 // keep recording up the stack
488 self.bubble_spans
489 });
490
491 // at least one sid/eid pair was unknown
492 } else {
493 // this thread does not yet have TLS -- we'll have to take the lock
494 }
495
496 // slow path: either this thread is new, or a sid/eid pair was new
497 // in either case, we need to take the write lock
498 // to do that, we must first drop the read lock
499 drop(inner);
500 let mut inner = self.writers.write().unwrap();
501 let inner = &mut *inner;
502
503 // if we don't have any thread-local state, construct that first
504 let tls = inner.tls.entry(tid).or_insert_with(Default::default);
505 // no-one else has our TID _and_ we have exclusive access to inner
506 let tls = unsafe { &mut *tls.get() };
507
508 // use an existing recorder if one exists, or make a new histogram if one does not
509 let nh = &mut inner.new_histogram;
510 let created = &mut inner.created;
511 let idle = &mut inner.idle_recorders;
512 for_each_parent(&mut |state| {
513 // since we're recursing up the tree, we _may_ find that we already have a recorder for
514 // a _later_ span's sid/eid. make sure we don't create a new one in that case!
515 let recorder = tls
516 .entry(state.group.clone())
517 .or_insert_with(Default::default)
518 .entry(eid.clone())
519 .or_insert_with(|| {
520 // nope, get us a thread-local recorder
521 idle.get_mut(&state.group)
522 .unwrap()
523 .entry(eid.clone())
524 .or_insert_with(|| {
525 // no histogram exists! make one.
526 let h = (nh)(&state.group, &eid).into_sync();
527 let ir = h.recorder().into_idle();
528 created.send((state.group.clone(), eid.clone(), h)).expect(
529 "WriterState implies ReaderState, which holds the receiver",
530 );
531 ir
532 })
533 .recorder()
534 });
535
536 // finally, we can record the sample
537 record(state, recorder);
538
539 // recurse to parent if any
540 self.bubble_spans
541 });
542 }
543
544 fn ensure_group(&self, group: S::Id) {
545 self.writers
546 .write()
547 .unwrap()
548 .idle_recorders
549 .entry(group)
550 .or_insert_with(HashMap::default);
551 }
552}
553
554mod subscriber;
555pub use subscriber::{Downcaster as SubscriberDowncaster, TimingSubscriber};
556
557#[cfg(feature = "layer")]
558mod layer;
559#[cfg(feature = "layer")]
560pub use layer::{Downcaster as LayerDowncaster, TimingLayer};
561
562#[derive(Hash, Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
563#[repr(transparent)]
564struct ThreadId {
565 tid: usize,
566 _notsend: PhantomData<UnsafeCell<()>>,
567}
568
569impl Default for ThreadId {
570 fn default() -> Self {
571 MYTID.with(|mytid| {
572 let mut mytid = mytid.borrow_mut();
573 if let Some(ref mytid) = *mytid {
574 ThreadId {
575 tid: *mytid,
576 _notsend: PhantomData,
577 }
578 } else {
579 let tid = TID.fetch_add(1, atomic::Ordering::AcqRel);
580 *mytid = Some(tid);
581 ThreadId {
582 tid,
583 _notsend: PhantomData,
584 }
585 }
586 })
587 }
588}
589
590#[derive(Default)]
591struct ThreadLocal<T>(HashMap<ThreadId, UnsafeCell<T>>);
592
593impl<T> Deref for ThreadLocal<T> {
594 type Target = HashMap<ThreadId, UnsafeCell<T>>;
595 fn deref(&self) -> &Self::Target {
596 &self.0
597 }
598}
599
600impl<T> DerefMut for ThreadLocal<T> {
601 fn deref_mut(&mut self) -> &mut Self::Target {
602 &mut self.0
603 }
604}
605
606unsafe impl<T: Send> Send for ThreadLocal<T> {}
607unsafe impl<T: Sync> Sync for ThreadLocal<T> {}