nm/lib.rs
1#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
2
3//! # nm - nanometer
4//!
5//! Collect metrics about observed events with low overhead even in
6//! highly multithreaded applications running on 100+ processors.
7//!
8//! Using arbitrary development hardware, we measure between 2 and 20 nanoseconds per
9//! observation, depending on how the event is configured. Benchmarks are included.
10//!
11//! # Collected metrics
12//!
13//! For each defined event, the following metrics are collected:
14//!
15//! * Count of observations (`u64`).
16//! * Mean magnitude of observations (`i64`).
17//! * (Optional) Histogram of magnitudes, with configurable bucket boundaries (`[i64]`).
18//!
19//! # Defining events
20//!
21//! Use thread-local static variables to define the events to observe:
22//!
23//! ```
24//! use nm::Event;
25//!
26//! thread_local! {
27//! static PACKAGES_RECEIVED: Event = Event::builder()
28//! .name("packages_received")
29//! .build();
30//!
31//! static PACKAGES_SENT: Event = Event::builder()
32//! .name("packages_sent")
33//! .build();
34//! }
35//! ```
36//!
37//! Recommended event name format: `big_medium_small_units`.
38//!
39//! The above two events are merely counters, so there are no units in the name.
40//!
41//! When only an event name is provided to the builder, only the count and mean magnitude of
42//! observations will be recorded. If you want to capture more information about the distribution
43//! of event magnitudes, you must specify the histogram buckets to use.
44//!
45//! ```
46//! use nm::{Event, Magnitude};
47//!
48//! const PACKAGE_WEIGHT_GRAMS_BUCKETS: &[Magnitude] = &[0, 100, 200, 500, 1000, 2000, 5000, 10000];
49//!
50//! thread_local! {
51//! static PACKAGES_RECEIVED_WEIGHT_GRAMS: Event = Event::builder()
52//! .name("packages_received_weight_grams")
53//! .histogram(PACKAGE_WEIGHT_GRAMS_BUCKETS)
54//! .build();
55//!
56//! static PACKAGES_SENT_WEIGHT_GRAMS: Event = Event::builder()
57//! .name("packages_sent_weight_grams")
58//! .histogram(PACKAGE_WEIGHT_GRAMS_BUCKETS)
59//! .build();
60//! }
61//! ```
62//!
63//! Choose the bucket boundaries based on the expected distribution of magnitudes.
64//!
65//! # Capturing observations
66//!
67//! To capture an observation, call `observe()` on the event.
68//! Different variants of this method are provided to capture observations with different
69//! characteristics:
70//!
71//! ```
72//! # use nm::{Event, Magnitude};
73//! #
74//! # const PACKAGE_WEIGHT_GRAMS_BUCKETS: &[Magnitude] = &[0, 100, 200, 500, 1000, 2000, 5000, 10000];
75//! #
76//! # thread_local! {
77//! # static PACKAGES_RECEIVED: Event = Event::builder()
78//! # .name("packages_received")
79//! # .build();
80//! #
81//! # static PACKAGES_RECEIVED_WEIGHT_GRAMS: Event = Event::builder()
82//! # .name("packages_received_weight_grams")
83//! # .histogram(PACKAGE_WEIGHT_GRAMS_BUCKETS)
84//! # .build();
85//! #
86//! # static PACKAGE_SEND_DURATION_MS: Event = Event::builder()
87//! # .name("package_send_duration_ms")
88//! # .build();
89//! # }
90//! use std::time::Duration;
91//!
92//! // observe(x) observes an event with a magnitude of `x`.
93//! PACKAGES_RECEIVED_WEIGHT_GRAMS.with(|e| e.observe(900));
94//!
95//! // observe_once() observes an event with a nominal magnitude of 1, to clearly express that
96//! // this event has no concept of magnitude and we use 1 as a nominal placeholder.
97//! PACKAGES_RECEIVED.with(|e| e.observe_once());
98//!
99//! // observe_millis(x) observes an event with a magnitude of `x` in milliseconds while
100//! // ensuring that any data type conversions respect the crate panic and mathematics policies.
101//! let send_duration = Duration::from_millis(150);
102//! PACKAGE_SEND_DURATION_MS.with(|e| e.observe_millis(send_duration));
103//!
104//! // batch(count) allows you to observe `count` occurrences in one call,
105//! // each with the same magnitude, for greater efficiency in batch operations.
106//! PACKAGES_RECEIVED_WEIGHT_GRAMS.with(|e| e.batch(500).observe(8));
107//! PACKAGES_RECEIVED.with(|e| e.batch(500).observe_once());
108//! PACKAGE_SEND_DURATION_MS.with(|e| e.batch(500).observe_millis(send_duration));
109//! ```
110//!
111//! ## Observing durations of operations
112//!
113//! You can efficiently capture the duration of function calls via `observe_duration_millis()`:
114//!
115//! ```
116//! use nm::{Event, Magnitude};
117//!
118//! const CONNECT_TIME_MS_BUCKETS: &[Magnitude] = &[0, 10, 20, 50, 100, 200, 500, 1000];
119//!
120//! thread_local! {
121//! static CONNECT_TIME_MS: Event = Event::builder()
122//! .name("net_http_connect_time_ms")
123//! .histogram(CONNECT_TIME_MS_BUCKETS)
124//! .build();
125//! }
126//!
127//! pub fn http_connect() {
128//! CONNECT_TIME_MS.with(|e| {
129//! e.observe_duration_millis(|| {
130//! do_http_connect();
131//! })
132//! });
133//! }
134//! # http_connect();
135//! # fn do_http_connect() {}
136//! ```
137//!
138//! This captures the duration of the function call in milliseconds. The measurement has
139//! a platform-defined measurement granularity (typically around 1-20 ms). This means that
140//! faster operations may indicate a duration of zero.
141//!
142//! It is not practical to measure the duration of individual operations at a finer level of
143//! precision because the measurement overhead becomes prohibitive. If you are observing
144//! operations that last nanoseconds or microseconds, you should only measure them in
145//! aggregate (e.g. duration per batch of 10000).
146//!
147//! # Reporting to terminal
148//!
149//! To collect a report of all observations, call `Report::collect()`. This implements the
150//! `Display` trait, so you can print it to the terminal:
151//!
152//! ```
153//! use nm::Report;
154//!
155//! let report = Report::collect();
156//! println!("{report}");
157//! ```
158//!
159//! # Reporting to external systems
160//!
161//! A report can be inspected to extract the data within and deliver it to an external system,
162//! such as an OpenTelemetry exporter for storage in a metrics database.
163//!
164//! ```
165//! use nm::Report;
166//!
167//! let report = Report::collect();
168//!
169//! for event in report.events() {
170//! println!(
171//! "Event {} has occurred {} times with a total magnitude of {}",
172//! event.name(),
173//! event.count(),
174//! event.sum()
175//! );
176//! }
177//! ```
178//!
179//! Note that the report accumulates data from the start of the process. This means the
180//! data does not reset between reports. If you only want to record differences, you need
181//! to account for the previous state of the event yourself.
182//!
183//! # Minimizing overhead by on-demand publishing
184//!
185//! The ultimate goal of the metrics collected by an [`Event`] is to end up in a [`Report`].
186//!
187//! There are two models by which this can happen:
188//!
189//! - **Pull** model - the reporting system queries each event in the process for its latest data
190//! set when generating a report. This is the default and requires no action from you.
191//! - **Push** model - data from an event only flows to a thread-local [`MetricsPusher`], which
192//! publishes the data into the reporting system on demand. This requires you to periodically
193//! trigger the publishing via [`MetricsPusher::push()`][MetricsPusher::push].
194//!
195//! The push model has lower measurement overhead due to a more optimal internal data layout
196//! but requires action from you to ensure that data is published. If you never push the data,
197//! it will never show up in a report.
198//!
199//! The previous examples all use the default pull model. Here is an example with the push model:
200//!
201//! ```
202//! use nm::{Event, MetricsPusher, Push};
203//!
204//! thread_local! {
205//! static HTTP_EVENTS_PUSHER: MetricsPusher = MetricsPusher::new();
206//!
207//! static CONNECT_TIME_MS: Event<Push> = Event::builder()
208//! .name("net_http_connect_time_ms")
209//! .pusher_local(&HTTP_EVENTS_PUSHER)
210//! .build();
211//! }
212//!
213//! pub fn http_connect() {
214//! CONNECT_TIME_MS.with(|e| {
215//! e.observe_duration_millis(|| {
216//! do_http_connect();
217//! })
218//! });
219//! }
220//!
221//! loop {
222//! http_connect();
223//!
224//! // Periodically push the data to the reporting system.
225//! if is_time_to_push() {
226//! HTTP_EVENTS_PUSHER.with(MetricsPusher::push);
227//! }
228//! # break; // Avoid infinite loop when running example.
229//! }
230//! # fn do_http_connect() {}
231//! # fn is_time_to_push() -> bool { true }
232//! ```
233//!
234//! You should consider using the push model when an event is only used under controlled conditions
235//! where you are certain that every thread that will be observing an event is guaranteed to call
236//! [`MetricsPusher::push()`][MetricsPusher::push] at some point.
237//!
238//! The choice of publishing model can be made separately for each event.
239//!
240//! # Dynamically registered events
241//!
242//! It is not strictly required to define events as thread-local statics. You can also create
243//! instances of `Event` on the fly using the same `Event::builder()` mechanism. This can be useful
244//! if you do not know at compile time which events you will need, such as when creating one event
245//! per item defined in a configuration file.
246//!
247//! Note, however, that each event (each unique event name) can only be registered once per thread.
248//! Any attempt to register an event two times with the same name on the same thread will result
249//! in a panic.
250//!
251//! # Panic policy
252//!
253//! This crate may panic when registering events if an invalid configuration
254//! is supplied for the event.
255//!
256//! This crate will not panic for "mathematical" reasons during observation of events,
257//! such as overflow or underflow due to excessively large event counts or magnitudes.
258//!
259//! # Mathematics policy
260//!
261//! Attempting to use excessively large values, either instantaneous or cumulative, may result in
262//! mangled data. For example, attempting to observe events with magnitudes near `i64::MAX`. There
263//! is no guarantee made about what the specific outcome will be in this case (though the panic
264//! policy above still applies). Do not stray near `i64` boundaries and you should be fine.
265
266mod constants;
267mod data_types;
268mod event;
269mod event_builder;
270mod observations;
271mod observe;
272mod publish_model;
273mod pusher;
274mod registries;
275mod reports;
276mod sealed;
277
278pub(crate) use constants::*;
279pub use data_types::*;
280pub use event::*;
281pub use event_builder::*;
282pub(crate) use observations::*;
283pub use observe::*;
284pub use publish_model::*;
285pub use pusher::*;
286pub(crate) use registries::*;
287pub use reports::*;
288pub(crate) use sealed::*;