splashsurf_lib/
profiling.rs

1//! Implementation details for the [`profile`](crate::profile) macro
2
3use parking_lot::RwLock;
4use std::collections::hash_map::RandomState;
5use std::collections::{HashMap, HashSet};
6use std::error::Error;
7use std::hash::{BuildHasher, Hash};
8use std::io;
9use std::sync::LazyLock;
10use std::time::{Duration, Instant};
11use thread_local::ThreadLocal;
12
13/// Thread local storage of the [`Profiler`]s storing all `Scope`s of the thread
14pub static PROFILER: LazyLock<ThreadLocal<RwLock<Profiler>>> = LazyLock::new(ThreadLocal::new);
15
16/// `RandomState` used to obtain `Hasher`s to hash [`ScopeId`]s for parent/child identification
17pub static RANDOM_STATE: LazyLock<RandomState> = LazyLock::new(RandomState::new);
18
19/// Implementation of the profile macro, use [`profile`](crate::profile) instead
20#[doc(hidden)]
21#[macro_export]
22macro_rules! profile_impl {
23    ($name:expr) => {
24        let (_profiling_scope_guard, _) = $crate::profiling::PROFILER
25            .get_or(Default::default)
26            .write()
27            .enter($name);
28    };
29    ($scope_id:ident, $name:expr) => {
30        let (_profiling_scope_guard, $scope_id) = $crate::profiling::PROFILER
31            .get_or(Default::default)
32            .write()
33            .enter($name);
34    };
35    ($name:expr, parent = $parent_id:ident) => {
36        let (_profiling_scope_guard, _) = $crate::profiling::PROFILER
37            .get_or(Default::default)
38            .write()
39            .enter_with_parent($name, &$parent_id);
40    };
41    ($scope_id:ident, $name:expr, parent = $parent_id:ident) => {
42        let (_profiling_scope_guard, $scope_id) = $crate::profiling::PROFILER
43            .get_or(Default::default)
44            .write()
45            .enter_with_parent($name, &$parent_id);
46    };
47}
48
49/// A scope guard recording the elapsed time of the scope
50pub struct Guard {
51    enter_time: Instant,
52}
53
54impl Guard {
55    fn new() -> Self {
56        Self {
57            enter_time: Instant::now(),
58        }
59    }
60}
61
62/// Dropping a `Guard` will add its recorded elapsed time to the top of the current local scope stack
63impl Drop for Guard {
64    fn drop(&mut self) {
65        let duration = self.enter_time.elapsed();
66        PROFILER
67            .get()
68            .expect("Missing thread local profiler")
69            .write()
70            .leave(duration)
71    }
72}
73
74#[derive(Clone, Debug)]
75struct Scope {
76    name: &'static str,
77    num_calls: usize,
78    duration_sum: Duration,
79    first_call: Instant,
80}
81
82impl Scope {
83    fn new(name: &'static str) -> Self {
84        Scope {
85            name,
86            num_calls: 0,
87            duration_sum: Default::default(),
88            first_call: Instant::now(),
89        }
90    }
91
92    fn merge(&mut self, other: &Self) {
93        if other.name == self.name {
94            self.num_calls += other.num_calls;
95            self.duration_sum += other.duration_sum;
96            if other.first_call < self.first_call {
97                self.first_call = other.first_call;
98            }
99        }
100    }
101}
102
103/// Type used to uniquely identify scopes across threads
104#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
105pub struct ScopeId {
106    name: &'static str,
107    parent_hash: u64,
108}
109
110impl ScopeId {
111    fn get_hash(id: Option<&ScopeId>) -> u64 {
112        RANDOM_STATE.hash_one(id)
113    }
114}
115
116/// Profiler storing all locally created scopes with their timings and the current stack of scopes
117#[derive(Default)]
118pub struct Profiler {
119    /// All local scopes that were created using the `enter` functions
120    scopes: HashMap<ScopeId, Scope>,
121    /// Current stack hierarchy of the local scopes
122    scope_stack: Vec<ScopeId>,
123    /// All local scopes that have no parent (or which have a manually assigned parent)
124    roots: HashSet<ScopeId>,
125}
126
127impl Profiler {
128    /// Resets all profiling data of this profiler
129    pub fn reset(&mut self) {
130        self.scopes.clear();
131        self.scope_stack.clear();
132        self.roots.clear();
133    }
134
135    /// Enter a scope with the given name and push it onto the stack of scopes. It will be a child scope of the current top of the stack.
136    pub fn enter(&mut self, name: &'static str) -> (Guard, ScopeId) {
137        // TODO: If the scope on top of the stack has the same name, use the next one as parent, to avoid huge chains for recursive functions
138        let id = self.new_id(name, self.scope_stack.last());
139        self.enter_with_id(name, id)
140    }
141
142    /// Enter a scope with the given name and push it onto the stack of scopes. It will be manually assigned as a child of the given parent scope.
143    pub fn enter_with_parent(&mut self, name: &'static str, parent: &ScopeId) -> (Guard, ScopeId) {
144        let id = self.new_id(name, Some(parent));
145        self.enter_with_id(name, id)
146    }
147
148    fn enter_with_id(&mut self, name: &'static str, id: ScopeId) -> (Guard, ScopeId) {
149        self.scopes.entry(id).or_insert_with(|| Scope::new(name));
150
151        // Insert as root, even it has a manually assigned parent to prevent child scopes from ending up as roots themselves
152        if self.scope_stack.is_empty() {
153            self.roots.insert(id);
154        }
155
156        self.scope_stack.push(id);
157        (Guard::new(), id)
158    }
159
160    /// Leave the scope at the top of the stack and increment its scope by the given duration
161    fn leave(&mut self, duration: Duration) {
162        if let Some(id) = self.scope_stack.pop() {
163            if let Some(scope) = self.scopes.get_mut(&id) {
164                scope.num_calls += 1;
165                scope.duration_sum += duration;
166            }
167        }
168    }
169
170    fn new_id(&self, name: &'static str, parent: Option<&ScopeId>) -> ScopeId {
171        ScopeId {
172            name,
173            parent_hash: ScopeId::get_hash(parent),
174        }
175    }
176}
177
178fn write_recursively<W: io::Write>(
179    out: &mut W,
180    sorted_scopes: &[(ScopeId, Scope)],
181    current: &(ScopeId, Scope),
182    parent_duration: Option<Duration>,
183    depth: usize,
184    is_parallel: bool,
185) -> io::Result<()> {
186    let (id, scope) = current;
187
188    for _ in 0..depth {
189        write!(out, "  ")?;
190    }
191
192    let duration_sum_secs = scope.duration_sum.as_secs_f64();
193    let parent_duration_secs = parent_duration.map_or(duration_sum_secs, |t| t.as_secs_f64());
194    let percent = duration_sum_secs / parent_duration_secs * 100.0;
195
196    writeln!(
197        out,
198        "{}: {}{:3.2}%, {:>4.2}ms avg, {} {} (total: {:.3}s)",
199        scope.name,
200        if is_parallel { "≈" } else { "" },
201        percent,
202        duration_sum_secs * 1000.0 / (scope.num_calls as f64),
203        scope.num_calls,
204        if scope.num_calls > 1 { "calls" } else { "call" },
205        duration_sum_secs
206    )?;
207
208    // Compute sum of runtimes of children
209    let mut children_runtime = Duration::default();
210    let current_hash = ScopeId::get_hash(Some(id));
211    for s in sorted_scopes {
212        let (child_id, child_scope) = s;
213        if child_id.parent_hash == current_hash {
214            children_runtime += child_scope.duration_sum;
215        }
216    }
217
218    // If children were run in parallel, their total runtime can be larger than that of the current scope
219    let children_parallel = children_runtime > scope.duration_sum;
220    let own_runtime = if children_parallel {
221        children_runtime
222    } else {
223        scope.duration_sum
224    };
225
226    // Process children in sorted order
227    let current_hash = ScopeId::get_hash(Some(id));
228    for s in sorted_scopes {
229        let (child_id, _) = s;
230        if child_id.parent_hash == current_hash {
231            // TODO: Prevent infinite recursion for recursive functions, maybe remove current scope from map?
232            //  Maybe we don't have this problem, instead it will be a huge chain which is as long as the recursion depth...
233            write_recursively(
234                out,
235                sorted_scopes,
236                s,
237                Some(own_runtime),
238                depth + 1,
239                children_parallel,
240            )?;
241        }
242    }
243
244    Ok(())
245}
246
247/// Pretty print the collected profiling data of all thread local [`Profiler`]s to the given writer
248pub fn write<W: io::Write>(out: &mut W) -> io::Result<()> {
249    let mut merged_scopes = HashMap::<ScopeId, Scope>::new();
250    let mut roots = HashSet::<ScopeId>::new();
251
252    // Collect scopes over all threads
253    for profiler in PROFILER.iter() {
254        let profiler = profiler.read();
255        roots.extend(profiler.roots.iter());
256
257        for (&id, scope) in &profiler.scopes {
258            merged_scopes
259                .entry(id)
260                .and_modify(|s| s.merge(scope))
261                .or_insert_with(|| scope.clone());
262        }
263    }
264
265    // Sort and filter root scopes
266    let sorted_roots = {
267        let root_hash = ScopeId::get_hash(None);
268        let mut roots = roots
269            .into_iter()
270            // Remove roots that are not actual roots (happens if their parent was set manually)
271            .filter(|id| id.parent_hash == root_hash)
272            // Get (id, scope) tuple
273            .flat_map(|id| merged_scopes.get(&id).cloned().map(|s| (id, s)))
274            .collect::<Vec<_>>();
275
276        roots.sort_unstable_by_key(|(_, s)| s.first_call);
277        roots
278    };
279
280    // Sort all scopes by first call time
281    let sorted_scopes = {
282        let mut scopes = merged_scopes.into_iter().collect::<Vec<_>>();
283        scopes.sort_unstable_by_key(|(_, s)| s.first_call);
284        scopes
285    };
286
287    // Print the stats
288    for root in &sorted_roots {
289        write_recursively(out, sorted_scopes.as_slice(), root, None, 0, false)?;
290    }
291
292    Ok(())
293}
294
295/// Returns the pretty printed output of the collected profiling data as a `String`
296pub fn write_to_string() -> Result<String, Box<dyn Error>> {
297    let mut buffer = Vec::new();
298    write(&mut buffer)?;
299    Ok(String::from_utf8(buffer)?)
300}
301
302/// Resets the profiling data of all thread local [`Profiler`]s
303///
304/// Note that it should be ensure that this is called outside of any scopes. It's safe to also call
305/// this function from inside a scope but it can lead to inconsistent profiling data (if a new scope
306/// is created afterwards followed by dropping of an old scope).
307pub fn reset() {
308    for profiler in PROFILER.iter() {
309        profiler.write().reset();
310    }
311}