1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
//! Implementation details for the [`profile`](crate::profile) macro

use lazy_static::lazy_static;
use parking_lot::RwLock;
use std::collections::hash_map::RandomState;
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::hash::{BuildHasher, Hash, Hasher};
use std::io;
use std::time::{Duration, Instant};
use thread_local::ThreadLocal;

lazy_static! {
    /// Thread local storage of the [`Profiler`](Profiler)s storing all [`Scope`](Scope)s of the thread
    pub static ref PROFILER: ThreadLocal<RwLock<Profiler>> = ThreadLocal::new();
    /// `RandomState` used to obtain `Hasher`s to hash [`ScopeId`](ScopeId)s for parent/child identification
    pub static ref RANDOM_STATE: RandomState = RandomState::new();
}

/// Implementation of the profile macro, use [`profile`](crate::profile) instead
#[doc(hidden)]
#[macro_export]
macro_rules! profile_impl {
    ($name:expr) => {
        let (_profiling_scope_guard, _) = $crate::profiling::PROFILER
            .get_or(Default::default)
            .write()
            .enter($name);
    };
    ($scope_id:ident, $name:expr) => {
        let (_profiling_scope_guard, $scope_id) = $crate::profiling::PROFILER
            .get_or(Default::default)
            .write()
            .enter($name);
    };
    ($name:expr, parent = $parent_id:ident) => {
        let (_profiling_scope_guard, _) = $crate::profiling::PROFILER
            .get_or(Default::default)
            .write()
            .enter_with_parent($name, &$parent_id);
    };
    ($scope_id:ident, $name:expr, parent = $parent_id:ident) => {
        let (_profiling_scope_guard, $scope_id) = $crate::profiling::PROFILER
            .get_or(Default::default)
            .write()
            .enter_with_parent($name, &$parent_id);
    };
}

/// A scope guard recording the elapsed time of the scope
pub struct Guard {
    enter_time: Instant,
}

impl Guard {
    fn new() -> Self {
        Self {
            enter_time: Instant::now(),
        }
    }
}

/// Dropping a `Guard` will add its recorded elapsed time to the top of the current local scope stack
impl Drop for Guard {
    fn drop(&mut self) {
        let duration = self.enter_time.elapsed();
        PROFILER
            .get()
            .expect("Missing thread local profiler")
            .write()
            .leave(duration)
    }
}

#[derive(Clone, Debug)]
struct Scope {
    name: &'static str,
    num_calls: usize,
    duration_sum: Duration,
    first_call: Instant,
}

impl Scope {
    fn new(name: &'static str) -> Self {
        Scope {
            name,
            num_calls: 0,
            duration_sum: Default::default(),
            first_call: Instant::now(),
        }
    }

    fn merge(&mut self, other: &Self) {
        if other.name == self.name {
            self.num_calls += other.num_calls;
            self.duration_sum += other.duration_sum;
            if other.first_call < self.first_call {
                self.first_call = other.first_call;
            }
        }
    }
}

/// Type used to uniquely identify scopes across threads
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct ScopeId {
    name: &'static str,
    parent_hash: u64,
}

impl ScopeId {
    fn get_hash(id: Option<&ScopeId>) -> u64 {
        let mut hasher = RANDOM_STATE.build_hasher();
        id.hash(&mut hasher);
        let hash = hasher.finish();
        hash
    }
}

/// Profiler storing all locally created scopes with their timings and the current stack of scopes
#[derive(Default)]
pub struct Profiler {
    /// All local scopes that were created using the `enter` functions
    scopes: HashMap<ScopeId, Scope>,
    /// Current stack hierarchy of the local scopes
    scope_stack: Vec<ScopeId>,
    /// All local scopes that have no parent (or which have a manually assigned parent)
    roots: HashSet<ScopeId>,
}

impl Profiler {
    /// Resets all profiling data of this profiler
    pub fn reset(&mut self) {
        self.scopes.clear();
        self.scope_stack.clear();
        self.roots.clear();
    }

    /// Enter a scope with the given name and push it onto the stack of scopes. It will be a child scope of the current top of the stack.
    pub fn enter(&mut self, name: &'static str) -> (Guard, ScopeId) {
        // TODO: If the scope on top of the stack has the same name, use the next one as parent, to avoid huge chains for recursive functions
        let id = self.new_id(name, self.scope_stack.last());
        self.enter_with_id(name, id)
    }

    /// Enter a scope with the given name and push it onto the stack of scopes. It will be manually assigned as a child of the given parent scope.
    pub fn enter_with_parent(&mut self, name: &'static str, parent: &ScopeId) -> (Guard, ScopeId) {
        let id = self.new_id(name, Some(parent));
        self.enter_with_id(name, id)
    }

    fn enter_with_id(&mut self, name: &'static str, id: ScopeId) -> (Guard, ScopeId) {
        self.scopes.entry(id).or_insert_with(|| Scope::new(name));

        // Insert as root, even it has a manually assigned parent to prevent child scopes from ending up as roots themselves
        if self.scope_stack.is_empty() {
            self.roots.insert(id);
        }

        self.scope_stack.push(id);
        (Guard::new(), id)
    }

    /// Leave the scope at the top of the stack and increment its scope by the given duration
    fn leave(&mut self, duration: Duration) {
        if let Some(id) = self.scope_stack.pop() {
            if let Some(scope) = self.scopes.get_mut(&id) {
                scope.num_calls += 1;
                scope.duration_sum += duration;
            }
        }
    }

    fn new_id(&self, name: &'static str, parent: Option<&ScopeId>) -> ScopeId {
        ScopeId {
            name,
            parent_hash: ScopeId::get_hash(parent),
        }
    }
}

fn write_recursively<W: io::Write>(
    out: &mut W,
    sorted_scopes: &[(ScopeId, Scope)],
    current: &(ScopeId, Scope),
    total_duration: Option<Duration>,
    depth: usize,
) -> io::Result<()> {
    let (id, scope) = current;

    for _ in 0..depth {
        write!(out, "  ")?;
    }

    let duration_sum_secs = scope.duration_sum.as_secs_f64();
    let total_duration_secs = total_duration.map_or(duration_sum_secs, |t| t.as_secs_f64());
    let percent = duration_sum_secs / total_duration_secs * 100.0;

    writeln!(
        out,
        "{}: {:3.2}%, {:>4.2}ms avg @ {:.2}Hz ({} {})",
        scope.name,
        percent,
        duration_sum_secs * 1000.0 / (scope.num_calls as f64),
        scope.num_calls as f64 / total_duration_secs,
        scope.num_calls,
        if scope.num_calls > 1 { "calls" } else { "call" }
    )?;

    // Process children in sorted order
    let current_hash = ScopeId::get_hash(Some(id));
    for s in sorted_scopes {
        let (child_id, _) = s;
        if child_id.parent_hash == current_hash {
            // TODO: Prevent infinite recursion for recursive functions, maybe remove current scope from map?
            //  Maybe we don't have this problem, instead it will be a huge chain which is as long as the recursion depth...
            write_recursively(out, sorted_scopes, s, Some(scope.duration_sum), depth + 1)?;
        }
    }

    Ok(())
}

/// Pretty print the collected profiling data of all thread local [`Profiler`]s to the given writer
pub fn write<W: io::Write>(out: &mut W) -> io::Result<()> {
    let mut merged_scopes = HashMap::<ScopeId, Scope>::new();
    let mut roots = HashSet::<ScopeId>::new();

    // Collect scopes over all threads
    for profiler in PROFILER.iter() {
        let profiler = profiler.read();
        roots.extend(profiler.roots.iter());

        for (&id, scope) in &profiler.scopes {
            merged_scopes
                .entry(id)
                .and_modify(|s| s.merge(scope))
                .or_insert_with(|| scope.clone());
        }
    }

    // Sort and filter root scopes
    let sorted_roots = {
        let root_hash = ScopeId::get_hash(None);
        let mut roots = roots
            .into_iter()
            // Remove roots that are not actual roots (happens if their parent was set manually)
            .filter(|id| id.parent_hash == root_hash)
            // Get (id, scope) tuple
            .flat_map(|id| merged_scopes.get(&id).cloned().map(|s| (id, s)))
            .collect::<Vec<_>>();

        roots.sort_unstable_by_key(|(_, s)| s.first_call);
        roots
    };

    // Sort all scopes by first call time
    let sorted_scopes = {
        let mut scopes = merged_scopes.into_iter().collect::<Vec<_>>();
        scopes.sort_unstable_by_key(|(_, s)| s.first_call);
        scopes
    };

    // Print the stats
    for root in &sorted_roots {
        write_recursively(out, sorted_scopes.as_slice(), root, None, 0)?;
    }

    Ok(())
}

/// Returns the pretty printed output of the collected profiling data as a `String`
pub fn write_to_string() -> Result<String, Box<dyn Error>> {
    let mut buffer = Vec::new();
    write(&mut buffer)?;
    Ok(String::from_utf8(buffer)?)
}

/// Resets the profiling data of all thread local [`Profiler`]s
///
/// Note that it should be ensure that this is called outside of any scopes. It's safe to also call
/// this function from inside a scope but it can lead to inconsistent profiling data (if a new scope
/// is created afterwards followed by dropping of an old scope).
pub fn reset() {
    for profiler in PROFILER.iter() {
        profiler.write().reset();
    }
}