mod scope;
pub use crate::perftools::profiler::scope::AsyncScope;
#[cfg(test)]
mod tests;
use crate::{
perftools::profiler::scope::{
Guard,
Scope,
},
runtime::types::demi_callback_t,
};
use ::futures::future::FusedFuture;
use ::std::{
cell::RefCell,
io,
pin::Pin,
rc::Rc,
time::{
Duration,
SystemTime,
},
};
use std::thread;
#[cfg(feature = "auto-calibrate")]
const SAMPLE_SIZE: usize = 16641;
thread_local!(
pub static PROFILER: RefCell<Profiler> = RefCell::new(Profiler::new())
);
pub struct Profiler {
roots: Vec<Rc<RefCell<Scope>>>,
current: Option<Rc<RefCell<Scope>>>,
ns_per_cycle: f64,
perf_callback: Option<demi_callback_t>,
#[cfg(feature = "auto-calibrate")]
clock_drift: u64,
}
pub fn write<W: io::Write>(out: &mut W, max_depth: Option<usize>) -> io::Result<()> {
PROFILER.with(|p| p.borrow().write(out, max_depth))
}
pub fn reset() {
PROFILER.with(|p| p.borrow_mut().reset());
}
pub fn set_callback(perf_callback: demi_callback_t) {
PROFILER.with(|p| p.borrow_mut().set_callback(perf_callback));
}
impl Profiler {
fn new() -> Profiler {
Profiler {
roots: Vec::new(),
current: None,
ns_per_cycle: Self::measure_ns_per_cycle(),
perf_callback: None,
#[cfg(feature = "auto-calibrate")]
clock_drift: Self::clock_drift(SAMPLE_SIZE),
}
}
pub fn set_callback(&mut self, perf_callback: demi_callback_t) {
self.perf_callback = Some(perf_callback)
}
#[inline]
pub fn sync_scope(&mut self, name: &'static str) -> Guard {
let scope = self.get_scope(name);
self.enter_scope(scope)
}
#[inline]
pub async fn coroutine_scope<F: FusedFuture>(name: &'static str, mut coroutine: Pin<Box<F>>) -> F::Output {
AsyncScope::new(
PROFILER.with(|p| p.borrow_mut().get_root_scope(name)),
coroutine.as_mut(),
)
.await
}
fn get_root_scope(&mut self, name: &'static str) -> Rc<RefCell<Scope>> {
let existing_root = self.roots.iter().find(|root| root.borrow().get_name() == name).cloned();
existing_root.unwrap_or_else(|| {
let new_scope: Scope = Scope::new(name, None, self.perf_callback);
let succ = Rc::new(RefCell::new(new_scope));
self.roots.push(succ.clone());
succ
})
}
pub fn get_scope(&mut self, name: &'static str) -> Rc<RefCell<Scope>> {
if let Some(current) = self.current.as_ref() {
let existing_succ = current
.borrow()
.get_succs()
.iter()
.find(|succ| succ.borrow().get_name() == name)
.cloned();
existing_succ.unwrap_or_else(|| {
let new_scope: Scope = Scope::new(name, Some(current.clone()), self.perf_callback);
let succ = Rc::new(RefCell::new(new_scope));
current.borrow_mut().add_succ(succ.clone());
succ
})
} else {
self.get_root_scope(name)
}
}
fn enter_scope(&mut self, scope: Rc<RefCell<Scope>>) -> Guard {
trace!("Entering scope: {}", scope.borrow().get_name());
let guard = scope.borrow_mut().enter();
self.current = Some(scope);
guard
}
fn reset(&mut self) {
self.roots.clear();
}
#[inline]
fn leave_scope(&mut self, duration: u64) {
self.current = if let Some(current) = self.current.as_ref() {
cfg_if::cfg_if! {
if #[cfg(feature = "auto-calibrate")] {
let d = duration.checked_sub(self.clock_drift);
current.borrow_mut().leave(d.unwrap_or(duration));
} else {
current.borrow_mut().leave(duration);
}
}
current.borrow().get_pred().as_ref().cloned()
} else {
log::error!("Called perftools::profiler::leave() while not in any scope");
None
};
}
fn write<W: io::Write>(&self, out: &mut W, max_depth: Option<usize>) -> io::Result<()> {
let total_duration = self.roots.iter().map(|root| root.borrow().get_duration_sum()).sum();
let thread_id: thread::ThreadId = thread::current().id();
writeln!(
out,
"call_depth,thread_id,function_name,num_calls,percent_time,cycles_per_call,nanoseconds_per_call"
)?;
for root in self.roots.iter() {
root.borrow()
.write_recursive(out, thread_id, total_duration, 0, max_depth, self.ns_per_cycle)?;
}
out.flush()
}
fn measure_ns_per_cycle() -> f64 {
let start: SystemTime = SystemTime::now();
let (start_cycle, _): (u64, u32) = unsafe { x86::time::rdtscp() };
test::black_box((0..10000).fold(0, |old, new| old ^ new));
let (end_cycle, _): (u64, u32) = unsafe { x86::time::rdtscp() };
let since_the_epoch: Duration = SystemTime::now().duration_since(start).expect("Time went backwards");
let in_ns: u64 = since_the_epoch.as_secs() * 1_000_000_000 + since_the_epoch.subsec_nanos() as u64;
in_ns as f64 / (end_cycle - start_cycle) as f64
}
#[cfg(feature = "auto-calibrate")]
fn clock_drift(nsamples: usize) -> u64 {
let mut total = 0;
for _ in 0..nsamples {
let now: u64 = x86::time::rdtscp();
let duration: u64 = x86::time::rdtscp() - now;
let d = total + duration;
}
total / (nsamples as u64)
}
}
impl Drop for Profiler {
fn drop(&mut self) {
self.write(&mut std::io::stdout(), None)
.expect("failed to write to stdout");
}
}