use std::cell::{Cell, RefCell};
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use serde::Serialize;
use crate::builtins::error::capture_call_stack;
use crate::error::{StatorError, StatorResult};
pub(crate) static PROFILING_ACTIVE: AtomicBool = AtomicBool::new(false);
pub(crate) static SAMPLE_NEEDED: AtomicBool = AtomicBool::new(false);
thread_local! {
static SAMPLES: RefCell<Vec<(u64, Vec<&'static str>)>> =
const { RefCell::new(Vec::new()) };
static SESSION_START_MICROS: Cell<u64> = const { Cell::new(0) };
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct CallFrame {
pub function_name: String,
pub script_id: String,
pub url: String,
pub line_number: u32,
pub column_number: u32,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ProfileNode {
pub id: u32,
pub call_frame: CallFrame,
pub hit_count: u32,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub children: Vec<u32>,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct CpuProfile {
pub nodes: Vec<ProfileNode>,
pub start_time: u64,
pub end_time: u64,
pub samples: Vec<u32>,
pub time_deltas: Vec<u32>,
}
pub fn maybe_record_sample() {
if !PROFILING_ACTIVE.load(Ordering::Relaxed) {
return;
}
if SAMPLE_NEEDED.swap(false, Ordering::AcqRel) {
let stack = capture_call_stack();
let ts = now_micros();
SAMPLES.with(|s| s.borrow_mut().push((ts, stack)));
}
}
#[inline]
fn now_micros() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_micros() as u64)
.unwrap_or(0)
}
#[cfg(all(target_arch = "x86_64", unix))]
fn setup_sigprof() {
use std::mem;
extern "C" fn sigprof_handler(_sig: libc::c_int) {
SAMPLE_NEEDED.store(true, Ordering::Release);
}
unsafe {
let mut sa: libc::sigaction = mem::zeroed();
sa.sa_sigaction = sigprof_handler as *const () as libc::sighandler_t;
libc::sigemptyset(&mut sa.sa_mask);
sa.sa_flags = 0;
libc::sigaction(libc::SIGPROF, &sa, std::ptr::null_mut());
}
}
#[cfg(all(target_arch = "x86_64", unix))]
fn teardown_sigprof() {
use std::mem;
unsafe {
let timer: libc::itimerval = mem::zeroed();
libc::setitimer(libc::ITIMER_PROF, &timer, std::ptr::null_mut());
let mut sa: libc::sigaction = mem::zeroed();
sa.sa_sigaction = libc::SIG_DFL;
libc::sigemptyset(&mut sa.sa_mask);
libc::sigaction(libc::SIGPROF, &sa, std::ptr::null_mut());
}
}
pub struct CpuProfiler {
timer_stop: Option<Arc<AtomicBool>>,
timer_handle: Option<std::thread::JoinHandle<()>>,
#[cfg(all(target_arch = "x86_64", unix))]
interpreter_thread: libc::pthread_t,
}
impl CpuProfiler {
pub fn new() -> Self {
Self {
timer_stop: None,
timer_handle: None,
#[cfg(all(target_arch = "x86_64", unix))]
interpreter_thread: 0,
}
}
pub fn start(&mut self, interval_micros: u64) -> StatorResult<()> {
if PROFILING_ACTIVE.swap(true, Ordering::SeqCst) {
return Err(StatorError::Internal(
"CPU profiler: a session is already active".into(),
));
}
SAMPLES.with(|s| s.borrow_mut().clear());
SESSION_START_MICROS.with(|t| t.set(now_micros()));
SAMPLE_NEEDED.store(false, Ordering::Release);
#[cfg(all(target_arch = "x86_64", unix))]
{
self.interpreter_thread = unsafe { libc::pthread_self() };
setup_sigprof();
}
let stop_flag = Arc::new(AtomicBool::new(false));
let stop_clone = Arc::clone(&stop_flag);
let interval = Duration::from_micros(interval_micros);
#[cfg(all(target_arch = "x86_64", unix))]
let interp_tid = self.interpreter_thread;
let handle = std::thread::Builder::new()
.name("stator-profiler-timer".into())
.spawn(move || {
while !stop_clone.load(Ordering::Relaxed) {
std::thread::sleep(interval);
if stop_clone.load(Ordering::Relaxed) {
break;
}
#[cfg(all(target_arch = "x86_64", unix))]
unsafe {
libc::pthread_kill(interp_tid, libc::SIGPROF);
}
SAMPLE_NEEDED.store(true, Ordering::Release);
}
})
.map_err(|e| StatorError::Internal(format!("profiler timer thread: {e}")))?;
self.timer_stop = Some(stop_flag);
self.timer_handle = Some(handle);
Ok(())
}
pub fn stop(&mut self) -> Option<CpuProfile> {
if !PROFILING_ACTIVE.swap(false, Ordering::SeqCst) {
return None;
}
if let Some(flag) = self.timer_stop.take() {
flag.store(true, Ordering::Relaxed);
}
if let Some(handle) = self.timer_handle.take() {
let _ = handle.join();
}
#[cfg(all(target_arch = "x86_64", unix))]
teardown_sigprof();
let end_time = now_micros();
let start_time = SESSION_START_MICROS.with(|t| t.get());
let samples: Vec<(u64, Vec<&'static str>)> =
SAMPLES.with(|s| s.borrow_mut().drain(..).collect());
Some(build_profile_tree(&samples, start_time, end_time))
}
}
impl Default for CpuProfiler {
fn default() -> Self {
Self::new()
}
}
fn build_profile_tree(
samples: &[(u64, Vec<&'static str>)],
start_time: u64,
end_time: u64,
) -> CpuProfile {
let mut nodes: Vec<ProfileNode> = vec![ProfileNode {
id: 1,
call_frame: CallFrame {
function_name: "(root)".to_string(),
script_id: "0".to_string(),
url: String::new(),
line_number: 0,
column_number: 0,
},
hit_count: 0,
children: vec![],
}];
let mut sample_ids: Vec<u32> = Vec::with_capacity(samples.len());
let mut time_deltas: Vec<u32> = Vec::with_capacity(samples.len());
let mut prev_ts = start_time;
for (ts, stack) in samples {
let delta = ts.saturating_sub(prev_ts) as u32;
time_deltas.push(delta);
prev_ts = *ts;
let mut current_id: u32 = 1;
for frame_name in stack {
let child_id = find_or_create_child(&mut nodes, current_id, frame_name);
current_id = child_id;
}
nodes[(current_id - 1) as usize].hit_count += 1;
sample_ids.push(current_id);
}
CpuProfile {
nodes,
start_time,
end_time,
samples: sample_ids,
time_deltas,
}
}
fn find_or_create_child(nodes: &mut Vec<ProfileNode>, parent_id: u32, name: &str) -> u32 {
let parent_idx = (parent_id - 1) as usize;
let n = nodes[parent_idx].children.len();
for i in 0..n {
let child_id = nodes[parent_idx].children[i];
if nodes[(child_id - 1) as usize].call_frame.function_name == name {
return child_id;
}
}
let new_id = nodes.len() as u32 + 1;
nodes.push(ProfileNode {
id: new_id,
call_frame: CallFrame {
function_name: name.to_string(),
script_id: "0".to_string(),
url: String::new(),
line_number: 0,
column_number: 0,
},
hit_count: 0,
children: vec![],
});
nodes[parent_idx].children.push(new_id);
new_id
}
#[cfg(test)]
mod tests {
use std::cell::RefCell;
use std::rc::Rc;
use std::time::Duration;
use super::*;
use crate::builtins::error::{pop_call_frame, push_call_frame};
use crate::bytecode::bytecode_generator::BytecodeGenerator;
use crate::interpreter::{Interpreter, InterpreterFrame};
use crate::parser;
#[test]
fn test_build_profile_tree_empty() {
let profile = build_profile_tree(&[], 1000, 2000);
assert_eq!(profile.nodes.len(), 1);
assert_eq!(profile.nodes[0].call_frame.function_name, "(root)");
assert!(profile.samples.is_empty());
assert!(profile.time_deltas.is_empty());
assert_eq!(profile.start_time, 1000);
assert_eq!(profile.end_time, 2000);
}
#[test]
fn test_build_profile_tree_single_sample() {
let samples = vec![(1500u64, vec!["outer", "inner"])];
let profile = build_profile_tree(&samples, 1000, 2000);
assert_eq!(profile.nodes.len(), 3);
assert_eq!(profile.nodes[0].call_frame.function_name, "(root)");
assert_eq!(profile.nodes[1].call_frame.function_name, "outer");
assert_eq!(profile.nodes[2].call_frame.function_name, "inner");
assert_eq!(profile.nodes[2].hit_count, 1);
assert_eq!(profile.nodes[0].hit_count, 0);
assert_eq!(profile.samples, vec![3u32]); assert_eq!(profile.time_deltas, vec![500u32]);
}
#[test]
fn test_build_profile_tree_merges_common_prefixes() {
let samples = vec![(1100u64, vec!["outer", "a"]), (1200u64, vec!["outer", "b"])];
let profile = build_profile_tree(&samples, 1000, 2000);
assert_eq!(profile.nodes.len(), 4);
let outer = profile
.nodes
.iter()
.find(|n| n.call_frame.function_name == "outer")
.unwrap();
assert_eq!(outer.children.len(), 2);
}
#[test]
fn test_build_profile_tree_time_deltas() {
let samples = vec![
(1100u64, vec!["f"]),
(1300u64, vec!["f"]),
(1700u64, vec!["f"]),
];
let profile = build_profile_tree(&samples, 1000, 2000);
assert_eq!(profile.time_deltas, vec![100u32, 200u32, 400u32]);
}
fn profiler_lock() -> std::sync::MutexGuard<'static, ()> {
static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
LOCK.lock().unwrap_or_else(|e| e.into_inner())
}
#[test]
fn test_profiler_stop_without_start_returns_none() {
let _g = profiler_lock();
PROFILING_ACTIVE.store(false, Ordering::SeqCst);
let mut p = CpuProfiler::new();
assert!(p.stop().is_none());
}
#[test]
fn test_profiler_double_start_returns_error() {
let _g = profiler_lock();
PROFILING_ACTIVE.store(false, Ordering::SeqCst);
let mut p1 = CpuProfiler::new();
let mut p2 = CpuProfiler::new();
p1.start(100_000).expect("first start should succeed");
let result = p2.start(100_000);
assert!(result.is_err(), "second start must fail");
p1.stop();
}
#[test]
fn test_profiler_collects_samples_during_js_execution() {
let _g = profiler_lock();
PROFILING_ACTIVE.store(false, Ordering::SeqCst);
let mut profiler = CpuProfiler::new();
profiler.start(1_000).expect("start");
let src = "1 + 2 + 3";
let bytecodes = parser::parse(src)
.and_then(|p| BytecodeGenerator::compile_program(&p))
.expect("compile");
let mut frame = InterpreterFrame::new_with_globals(
Rc::new(bytecodes),
vec![],
Rc::new(RefCell::new(crate::interpreter::GlobalEnv::new())),
);
Interpreter::run(&mut frame).expect("run");
let profile = profiler.stop().expect("stop returns Some");
assert!(!profile.nodes.is_empty(), "profile must have nodes");
assert!(profile.start_time <= profile.end_time);
}
#[test]
fn test_profiler_records_call_stack_frames() {
let _g = profiler_lock();
PROFILING_ACTIVE.store(false, Ordering::SeqCst);
SAMPLES.with(|s| s.borrow_mut().clear());
let mut profiler = CpuProfiler::new();
profiler.start(10_000_000).expect("start");
let _ = push_call_frame("myFunction");
SAMPLE_NEEDED.store(true, Ordering::Release);
maybe_record_sample();
pop_call_frame();
let profile = profiler.stop().expect("stop returns Some");
let has_my_fn = profile
.nodes
.iter()
.any(|n| n.call_frame.function_name == "myFunction");
assert!(has_my_fn, "profile must contain myFunction node");
assert_eq!(profile.samples.len(), 1);
}
#[test]
fn test_profiler_serialises_to_cdp_json() {
let _g = profiler_lock();
PROFILING_ACTIVE.store(false, Ordering::SeqCst);
SAMPLES.with(|s| s.borrow_mut().clear());
let mut profiler = CpuProfiler::new();
profiler.start(10_000_000).expect("start");
let _ = push_call_frame("greet");
SAMPLE_NEEDED.store(true, Ordering::Release);
maybe_record_sample();
pop_call_frame();
let profile = profiler.stop().expect("stop returns Some");
let json = serde_json::to_value(&profile).expect("serialise");
assert!(json["nodes"].is_array());
assert!(json["startTime"].is_number());
assert!(json["endTime"].is_number());
assert!(json["samples"].is_array());
assert!(json["timeDeltas"].is_array());
for node in json["nodes"].as_array().unwrap() {
assert!(node["callFrame"]["functionName"].is_string());
}
}
#[test]
fn test_maybe_record_sample_noop_when_inactive() {
let _g = profiler_lock();
PROFILING_ACTIVE.store(false, Ordering::SeqCst);
SAMPLES.with(|s| s.borrow_mut().clear());
SAMPLE_NEEDED.store(true, Ordering::Release);
assert!(
SAMPLE_NEEDED.load(Ordering::Acquire),
"SAMPLE_NEEDED must be true before the call"
);
maybe_record_sample();
let count = SAMPLES.with(|s| s.borrow().len());
assert_eq!(count, 0, "no sample should have been recorded");
SAMPLE_NEEDED.store(false, Ordering::Release);
}
#[test]
fn test_profiler_profile_has_duration() {
let _g = profiler_lock();
PROFILING_ACTIVE.store(false, Ordering::SeqCst);
let mut profiler = CpuProfiler::new();
profiler.start(10_000_000).expect("start");
std::thread::sleep(Duration::from_millis(5));
let profile = profiler.stop().expect("stop");
assert!(
profile.end_time >= profile.start_time,
"end_time ({}) must be >= start_time ({})",
profile.end_time,
profile.start_time
);
}
}