coarse_prof/lib.rs
1//! `coarse-prof` allows you to hierarchically measure the time that blocks in
2//! your program take, enabling you to get an intuition of where most time is
3//! spent. This can be useful for game development, where you have a bunch of
4//! things that need to run in every frame, such as physics, rendering,
5//! networking and so on, and you may wish to identify the hot spots, so that
6//! you know whether and what to optimize.
7//!
8//! `coarse-prof`'s implementation has been inspired by
9//! [hprof](https://github.com/cmr/hprof).
10//! In contrast to `hprof`, which resets measurements after each frame, this
11//! library tracks averages over multiple frames. Also, `coarse-prof` provides
12//! the macro [`profile`](macro.profile.html) for profiling a scope, so that
13//! users do not have to assign a name to scope guards.
14//!
15//! # Example
16//!
17//! ```
18//! use std::thread::sleep;
19//! use std::time::Duration;
20//!
21//! use coarse_prof::profile;
22//!
23//! fn render() {
24//! profile!("render");
25//!
26//! // So slow!
27//! sleep(Duration::from_millis(10));
28//! }
29//!
30//! // Our game's main loop
31//! let num_frames = 100;
32//! for i in 0..num_frames {
33//! profile!("frame");
34//!
35//! // Physics don't run every frame
36//! if i % 10 == 0 {
37//! profile!("physics");
38//! sleep(Duration::from_millis(2));
39//!
40//! {
41//! profile!("collisions");
42//! sleep(Duration::from_millis(1));
43//! }
44//! }
45//!
46//! render();
47//! }
48//!
49//! // Print the profiling results.
50//! coarse_prof::write(&mut std::io::stdout()).unwrap();
51//! ```
52//!
53//! Example output:
54//! ```text
55//! frame: 100.00%, 10.40ms/call @ 96.17Hz
56//! physics: 3.04%, 3.16ms/call @ 9.62Hz
57//! collisions: 33.85%, 1.07ms/call @ 9.62Hz
58//! render: 96.84%, 10.07ms/call @ 96.17Hz
59//! ```
60
61use std::cell::RefCell;
62use std::io;
63use std::rc::Rc;
64use std::time::Duration;
65
66use instant::Instant;
67
68thread_local!(
69 /// Global thread-local instance of the profiler.
70 pub static PROFILER: RefCell<Profiler> = RefCell::new(Profiler::new())
71);
72
73/// Print profiling scope tree.
74///
75/// Example output:
76/// ```text
77/// frame: 100.00%, 10.40ms/call @ 96.17Hz
78/// physics: 3.04%, 3.16ms/call @ 9.62Hz
79/// collisions: 33.85%, 1.07ms/call @ 9.62Hz
80/// render: 96.84%, 10.07ms/call @ 96.17Hz
81/// ```
82///
83/// Percentages represent the amount of time taken relative to the parent node.
84///
85/// Frequencies are computed with respect to the total amount of time spent in
86/// root nodes. Thus, if you have multiple root nodes and they do not cover
87/// all code that runs in your program, the printed frequencies will be
88/// overestimated.
89pub fn write<W: io::Write>(out: &mut W) -> io::Result<()> {
90 PROFILER.with(|p| p.borrow().write(out))
91}
92
93/// Reset profiling information.
94pub fn reset() {
95 PROFILER.with(|p| p.borrow_mut().reset());
96}
97
98/// Manually enter a scope.
99///
100/// The returned instance of [`Guard`](struct.Guard.html) should be dropped
101/// when leaving the scope.
102///
103/// Usually, it is more convenient to use the macro
104/// [`profile`](macro.profile.html) for including a scope in profiling, but in
105/// some special cases explicit entering/leaving can make sense.
106pub fn enter(name: &'static str) -> Guard {
107 PROFILER.with(|p| p.borrow_mut().enter(name))
108}
109
110/// Use this macro to add the current scope to profiling. In effect, the time
111/// taken from entering to leaving the scope will be measured.
112///
113/// Internally, the scope is inserted in the scope tree of the global
114/// thread-local [`PROFILER`](constant.PROFILER.html).
115///
116/// # Example
117///
118/// The following example will profile the scope `"foo"`, which has the scope
119/// `"bar"` as a child.
120///
121/// ```
122/// use coarse_prof::profile;
123///
124/// {
125/// profile!("foo");
126///
127/// {
128/// profile!("bar");
129/// // ... do something ...
130/// }
131///
132/// // ... do some more ...
133/// }
134/// ```
135#[macro_export]
136macro_rules! profile {
137 ($name:expr) => {
138 let _guard = $crate::PROFILER.with(|p| p.borrow_mut().enter($name));
139 };
140}
141
142/// Internal representation of scopes as a tree.
143struct Scope {
144 /// Name of the scope.
145 name: &'static str,
146
147 /// Parent scope in the tree. Root scopes have no parent.
148 pred: Option<Rc<RefCell<Scope>>>,
149
150 /// Child scopes in the tree.
151 succs: Vec<Rc<RefCell<Scope>>>,
152
153 /// How often has this scope been visited?
154 num_calls: usize,
155
156 /// In total, how much time has been spent in this scope?
157 duration_sum: Duration,
158
159 /// Minimal duration spent in this scope.
160 duration_min: Duration,
161
162 /// Maximal duration spent in this scope.
163 duration_max: Duration,
164}
165
166impl Scope {
167 fn new(name: &'static str, pred: Option<Rc<RefCell<Scope>>>) -> Scope {
168 Scope {
169 name,
170 pred,
171 succs: Vec::new(),
172 num_calls: 0,
173 duration_sum: Duration::new(0, 0),
174 duration_min: Duration::new(u64::MAX, u32::MIN),
175 duration_max: Duration::new(0, 0),
176 }
177 }
178
179 /// Enter this scope. Returns a `Guard` instance that should be dropped
180 /// when leaving the scope.
181 fn enter(&mut self) -> Guard {
182 Guard::enter()
183 }
184
185 /// Leave this scope. Called automatically by the `Guard` instance.
186 fn leave(&mut self, duration: Duration) {
187 self.num_calls += 1;
188
189 // Even though this is extremely unlikely, let's not panic on overflow.
190 let duration_sum = self.duration_sum.checked_add(duration);
191 self.duration_sum = duration_sum.unwrap_or(Duration::from_millis(0));
192
193 self.duration_min = self.duration_min.min(duration);
194 self.duration_max = self.duration_max.max(duration);
195 }
196
197 fn write_recursive<W: io::Write>(
198 &self,
199 out: &mut W,
200 total_duration: Duration,
201 depth: usize,
202 ) -> io::Result<()> {
203 let total_duration_secs = total_duration.as_secs_f64();
204 let duration_sum_secs = self.duration_sum.as_secs_f64();
205 let pred_sum_secs = self.pred.clone().map_or(total_duration_secs, |pred| {
206 pred.borrow().duration_sum.as_secs_f64()
207 });
208
209 let percent = duration_sum_secs / pred_sum_secs * 100.0;
210
211 // Write self
212 for _ in 0..depth {
213 write!(out, " ")?;
214 }
215 writeln!(
216 out,
217 "{}: {:3.2}%, {:>4.2}ms avg, {:>4.2}ms min, {:>4.2}ms max @ {:.2}Hz",
218 self.name,
219 percent,
220 duration_sum_secs * 1000.0 / (self.num_calls as f64),
221 self.duration_min.as_secs_f64() * 1000.0,
222 self.duration_max.as_secs_f64() * 1000.0,
223 self.num_calls as f64 / total_duration_secs,
224 )?;
225
226 // Write children
227 for succ in &self.succs {
228 succ.borrow()
229 .write_recursive(out, total_duration, depth + 1)?;
230 }
231
232 Ok(())
233 }
234}
235
236/// A guard that is created when entering a scope and dropped when leaving it.
237pub struct Guard {
238 enter_time: Instant,
239}
240
241impl Guard {
242 fn enter() -> Self {
243 Self {
244 enter_time: Instant::now(),
245 }
246 }
247}
248
249impl Drop for Guard {
250 fn drop(&mut self) {
251 let duration = self.enter_time.elapsed();
252 PROFILER.with(|p| p.borrow_mut().leave(duration));
253 }
254}
255
256/// A `Profiler` stores the scope tree and keeps track of the currently active
257/// scope.
258///
259/// Note that there is a global thread-local instance of `Profiler` in
260/// [`PROFILER`](constant.PROFILER.html), so it is not possible to manually
261/// create an instance of `Profiler`.
262pub struct Profiler {
263 roots: Vec<Rc<RefCell<Scope>>>,
264 current: Option<Rc<RefCell<Scope>>>,
265 start_time: Instant,
266}
267
268impl Profiler {
269 fn new() -> Profiler {
270 Profiler {
271 roots: Vec::new(),
272 current: None,
273 start_time: Instant::now(),
274 }
275 }
276
277 /// Enter a scope. Returns a [`Guard`](struct.Guard.html) that should be
278 /// dropped upon leaving the scope.
279 ///
280 /// Usually, this method will be called by the
281 /// [`profile`](macro.profile.html) macro, so it does not need to be used
282 /// directly.
283 pub fn enter(&mut self, name: &'static str) -> Guard {
284 // Check if we have already registered `name` at the current point in
285 // the tree.
286 let succ = if let Some(current) = self.current.as_ref() {
287 // We are currently in some scope.
288 let existing_succ = current
289 .borrow()
290 .succs
291 .iter()
292 .find(|succ| succ.borrow().name == name)
293 .cloned();
294
295 existing_succ.unwrap_or_else(|| {
296 // Add new successor node to the current node.
297 let new_scope = Scope::new(name, Some(current.clone()));
298 let succ = Rc::new(RefCell::new(new_scope));
299
300 current.borrow_mut().succs.push(succ.clone());
301
302 succ
303 })
304 } else {
305 // We are currently not within any scope. Check if `name` already
306 // is a root.
307 let existing_root = self
308 .roots
309 .iter()
310 .find(|root| root.borrow().name == name)
311 .cloned();
312
313 existing_root.unwrap_or_else(|| {
314 // Add a new root node.
315 let new_scope = Scope::new(name, None);
316 let succ = Rc::new(RefCell::new(new_scope));
317
318 self.roots.push(succ.clone());
319
320 succ
321 })
322 };
323
324 let guard = succ.borrow_mut().enter();
325
326 self.current = Some(succ);
327
328 guard
329 }
330
331 /// Completely reset profiling data.
332 fn reset(&mut self) {
333 self.roots.clear();
334 self.start_time = Instant::now();
335
336 // Note that we could now still be anywhere in the previous profiling
337 // tree, so we can not simply reset `self.current`. However, as the
338 // frame comes to an end we will eventually leave a root node, at which
339 // point `self.current` will be set to `None`.
340 }
341
342 /// Leave the current scope.
343 fn leave(&mut self, duration: Duration) {
344 self.current = if let Some(current) = self.current.as_ref() {
345 current.borrow_mut().leave(duration);
346
347 // Set current scope back to the parent node (if any).
348 current.borrow().pred.as_ref().cloned()
349 } else {
350 // This should not happen with proper usage.
351 log::error!("Called coarse_prof::leave() while not in any scope");
352
353 None
354 };
355 }
356
357 fn write<W: io::Write>(&self, out: &mut W) -> io::Result<()> {
358 let total_duration = Instant::now().duration_since(self.start_time);
359
360 for root in self.roots.iter() {
361 root.borrow().write_recursive(out, total_duration, 0)?;
362 }
363
364 out.flush()
365 }
366}
367
368#[cfg(test)]
369mod tests {
370 #[test]
371 fn test_multiple_roots() {
372 super::reset();
373
374 for i in 0..=5 {
375 if i == 5 {
376 profile!("a");
377 }
378 {
379 profile!("b");
380 }
381 }
382
383 super::PROFILER.with(|p| {
384 let p = p.borrow();
385
386 assert_eq!(p.roots.len(), 2);
387
388 for root in p.roots.iter() {
389 assert!(root.borrow().pred.is_none());
390 assert!(root.borrow().succs.is_empty());
391 }
392
393 assert_eq!(p.roots[0].borrow().name, "b");
394 assert_eq!(p.roots[1].borrow().name, "a");
395
396 assert_eq!(p.roots[0].borrow().num_calls, 6);
397 assert_eq!(p.roots[1].borrow().num_calls, 1);
398 });
399 }
400
401 #[test]
402 fn test_succ_reuse() {
403 use std::ptr;
404
405 super::reset();
406
407 for i in 0..=5 {
408 profile!("a");
409 if i > 2 {
410 profile!("b");
411 }
412 }
413
414 assert_eq!(super::PROFILER.with(|p| p.borrow().roots.len()), 1);
415
416 super::PROFILER.with(|p| {
417 let p = p.borrow();
418
419 assert_eq!(p.roots.len(), 1);
420
421 let root = p.roots[0].borrow();
422 assert_eq!(root.name, "a");
423 assert!(root.pred.is_none());
424 assert_eq!(root.succs.len(), 1);
425 assert_eq!(root.num_calls, 6);
426
427 let succ = root.succs[0].borrow();
428 assert_eq!(succ.name, "b");
429 assert!(ptr::eq(
430 succ.pred.as_ref().unwrap().as_ref(),
431 p.roots[0].as_ref()
432 ));
433 assert!(succ.succs.is_empty());
434 assert_eq!(succ.num_calls, 3);
435 });
436 }
437
438 #[test]
439 fn test_reset_during_frame() {
440 super::reset();
441
442 for i in 0..=5 {
443 profile!("a");
444 profile!("b");
445 {
446 profile!("c");
447 if i == 5 {
448 super::reset();
449 }
450
451 assert!(super::PROFILER.with(|p| p.borrow().current.is_some()));
452
453 profile!("d");
454 }
455 }
456
457 super::PROFILER.with(|p| {
458 let p = p.borrow();
459
460 assert!(p.roots.is_empty());
461 assert!(p.current.is_none());
462 });
463 }
464}