mod ml_analysis;
mod output;
mod syscall_handling;
use anyhow::{Context, Result};
use nix::sys::ptrace;
use nix::sys::wait::{waitpid, WaitStatus};
use nix::unistd::{fork, ForkResult, Pid};
use std::os::unix::process::CommandExt;
use std::process::Command;
use tracing::{info, trace, warn};
#[derive(Debug, Clone)]
pub struct VisualizerEvent {
pub name: String,
pub duration_us: u64,
pub result: i64,
pub pid: i32,
}
pub struct TracerConfig {
pub enable_source: bool,
pub filter: crate::filter::SyscallFilter,
pub statistics_mode: bool,
pub timing_mode: bool,
pub output_format: crate::cli::OutputFormat,
pub follow_forks: bool,
pub profile_self: bool,
pub function_time: bool,
pub stats_extended: bool, pub anomaly_threshold: f32, pub anomaly_realtime: bool, pub anomaly_window_size: usize, pub hpu_analysis: bool, pub hpu_cpu_only: bool, pub ml_anomaly: bool, pub ml_clusters: usize, pub ml_compare: bool, pub ml_outliers: bool, pub ml_outlier_threshold: f32, pub ml_outlier_trees: usize, pub explain: bool, pub dl_anomaly: bool, pub dl_threshold: f32, pub dl_hidden_size: usize, pub dl_epochs: usize, pub trace_transpiler_decisions: bool, pub transpiler_map: Option<crate::transpiler_map::TranspilerMap>, pub otlp_endpoint: Option<String>, pub otlp_service_name: String, pub trace_parent: Option<String>, pub chaos_config: Option<crate::chaos::ChaosConfig>, pub visualizer_sink: Option<std::sync::mpsc::Sender<VisualizerEvent>>,
}
impl Default for TracerConfig {
fn default() -> Self {
Self {
enable_source: false,
filter: crate::filter::SyscallFilter::all(),
statistics_mode: false,
timing_mode: false,
output_format: crate::cli::OutputFormat::default(),
follow_forks: false,
profile_self: false,
function_time: false,
stats_extended: false,
anomaly_threshold: 2.0,
anomaly_realtime: false,
anomaly_window_size: 100,
hpu_analysis: false,
hpu_cpu_only: false,
ml_anomaly: false,
ml_clusters: 5,
ml_compare: false,
ml_outliers: false,
ml_outlier_threshold: 0.1,
ml_outlier_trees: 100,
explain: false,
dl_anomaly: false,
dl_threshold: 2.0,
dl_hidden_size: 8,
dl_epochs: 50,
trace_transpiler_decisions: false,
transpiler_map: None,
otlp_endpoint: None,
otlp_service_name: "renacer".to_string(),
trace_parent: None,
chaos_config: None,
visualizer_sink: None,
}
}
}
pub fn attach_to_pid(pid: i32, config: TracerConfig) -> Result<i32> {
let pid = Pid::from_raw(pid);
ptrace::attach(pid).context(format!("Failed to attach to PID {pid}"))?;
waitpid(pid, None).context("Failed to wait for attach signal")?;
eprintln!("[renacer: Attached to process {pid}]");
trace_child(pid, config)
}
#[allow(unsafe_code)]
pub fn trace_command(command: &[String], config: TracerConfig) -> Result<i32> {
if command.is_empty() {
anyhow::bail!("Command array is empty");
}
let program = &command[0];
let args = &command[1..];
let chaos_config = config.chaos_config.clone();
match unsafe { fork() }.context("Failed to fork")? {
ForkResult::Parent { child } => trace_child(child, config),
ForkResult::Child => {
ptrace::traceme().context("Failed to PTRACE_TRACEME")?;
if let Some(ref chaos) = chaos_config {
if let Err(e) = chaos.apply_limits() {
eprintln!("[renacer: Warning: Failed to apply chaos limits: {e}]");
}
}
let err = Command::new(program).args(args).exec();
eprintln!("Failed to exec {program}: {err}");
std::process::exit(1);
}
}
}
struct Tracers {
profiling_ctx: Option<crate::profiling::ProfilingContext>,
function_profiler: Option<crate::function_profiler::FunctionProfiler>,
stats_tracker: Option<crate::stats::StatsTracker>,
json_output: Option<crate::json_output::JsonOutput>,
csv_output: Option<crate::csv_output::CsvOutput>,
csv_stats_output: Option<crate::csv_output::CsvStatsOutput>,
html_output: Option<crate::html_output::HtmlOutput>, anomaly_detector: Option<crate::anomaly::AnomalyDetector>, decision_tracer: Option<crate::decision_trace::DecisionTracer>, #[cfg(feature = "otlp")]
otlp_exporter: Option<crate::otlp_exporter::OtlpExporter>, visualizer_sink: Option<std::sync::mpsc::Sender<VisualizerEvent>>,
}
fn initialize_profiling_tracers(
config: &TracerConfig,
) -> (
Option<crate::profiling::ProfilingContext>,
Option<crate::function_profiler::FunctionProfiler>,
Option<crate::anomaly::AnomalyDetector>,
) {
let profiling_ctx =
if config.profile_self { Some(crate::profiling::ProfilingContext::new()) } else { None };
let function_profiler = if config.function_time {
Some(crate::function_profiler::FunctionProfiler::new())
} else {
None
};
let anomaly_detector = if config.anomaly_realtime {
Some(crate::anomaly::AnomalyDetector::new(
config.anomaly_window_size,
config.anomaly_threshold,
))
} else {
None
};
(profiling_ctx, function_profiler, anomaly_detector)
}
fn initialize_output_tracers(
config: &TracerConfig,
) -> (
Option<crate::json_output::JsonOutput>,
Option<crate::csv_output::CsvOutput>,
Option<crate::csv_output::CsvStatsOutput>,
Option<crate::html_output::HtmlOutput>,
) {
use crate::cli::OutputFormat;
let json_output = if matches!(config.output_format, OutputFormat::Json) {
Some(crate::json_output::JsonOutput::new())
} else {
None
};
let csv_output = if matches!(config.output_format, OutputFormat::Csv) && !config.statistics_mode
{
Some(crate::csv_output::CsvOutput::new(config.timing_mode, config.enable_source))
} else {
None
};
let csv_stats_output =
if matches!(config.output_format, OutputFormat::Csv) && config.statistics_mode {
Some(crate::csv_output::CsvStatsOutput::new())
} else {
None
};
let html_output = if matches!(config.output_format, OutputFormat::Html) {
Some(crate::html_output::HtmlOutput::new(config.timing_mode, config.enable_source))
} else {
None
};
(json_output, csv_output, csv_stats_output, html_output)
}
fn initialize_tracers(config: &TracerConfig) -> Tracers {
let (profiling_ctx, function_profiler, anomaly_detector) = initialize_profiling_tracers(config);
let (json_output, csv_output, csv_stats_output, html_output) =
initialize_output_tracers(config);
let stats_tracker =
if config.statistics_mode || config.ml_anomaly || config.ml_outliers || config.dl_anomaly {
Some(crate::stats::StatsTracker::new())
} else {
None
};
let decision_tracer = if config.trace_transpiler_decisions {
Some(crate::decision_trace::DecisionTracer::new())
} else {
None
};
#[cfg(feature = "otlp")]
let otlp_exporter = if let Some(ref endpoint) = config.otlp_endpoint {
use crate::trace_context::TraceContext;
let trace_context = config
.trace_parent
.as_ref()
.and_then(|s| TraceContext::parse(s).ok())
.or_else(TraceContext::from_env);
if trace_context.is_some() {
eprintln!("[renacer: Distributed tracing enabled - joining parent trace]");
}
match crate::otlp_exporter::OtlpExporter::new(
crate::otlp_exporter::OtlpConfig::new(
endpoint.clone(),
config.otlp_service_name.clone(),
),
trace_context,
) {
Ok(exporter) => {
eprintln!("[renacer: OTLP export enabled to {endpoint}]");
Some(exporter)
}
Err(e) => {
eprintln!("[renacer: OTLP initialization failed: {e}]");
None
}
}
} else {
None
};
Tracers {
profiling_ctx,
function_profiler,
stats_tracker,
json_output,
csv_output,
csv_stats_output,
html_output,
anomaly_detector,
decision_tracer,
#[cfg(feature = "otlp")]
otlp_exporter,
visualizer_sink: config.visualizer_sink.clone(),
}
}
fn setup_ptrace_options(child: Pid, follow_forks: bool) -> Result<()> {
setup_ptrace_options_internal(child, follow_forks, true)
}
fn setup_ptrace_options_internal(child: Pid, follow_forks: bool, wait_first: bool) -> Result<()> {
if wait_first {
trace!(pid = %child, "waiting for initial SIGSTOP");
let status = waitpid(child, None).context("Failed to wait for child")?;
trace!(pid = %child, status = ?status, "initial wait completed");
}
let mut options = ptrace::Options::PTRACE_O_TRACESYSGOOD | ptrace::Options::PTRACE_O_EXITKILL;
if follow_forks {
options |= ptrace::Options::PTRACE_O_TRACEFORK
| ptrace::Options::PTRACE_O_TRACEVFORK
| ptrace::Options::PTRACE_O_TRACECLONE;
}
trace!(pid = %child, "setting ptrace options");
ptrace::setoptions(child, options).context("Failed to set ptrace options")?;
trace!(pid = %child, "ptrace options set");
trace!(pid = %child, "sending initial PTRACE_SYSCALL");
ptrace::syscall(child, None).context("Failed to continue child with PTRACE_SYSCALL")?;
trace!(pid = %child, "initial PTRACE_SYSCALL sent");
Ok(())
}
fn load_dwarf_context(child: Pid) -> Option<crate::dwarf::DwarfContext> {
if let Ok(exe_path) = std::fs::read_link(format!("/proc/{child}/exe")) {
match crate::dwarf::DwarfContext::load(&exe_path) {
Ok(ctx) => {
eprintln!("[renacer: DWARF debug info loaded from {}]", exe_path.display());
Some(ctx)
}
Err(e) => {
eprintln!("[renacer: Warning - failed to load DWARF: {e}]");
eprintln!("[renacer: Continuing without source correlation]");
None
}
}
} else {
None
}
}
fn handle_ptrace_event(
pid: Pid,
event: i32,
processes: &mut std::collections::HashMap<Pid, ProcessState>,
config: &TracerConfig,
) -> Result<()> {
use nix::libc;
match event {
libc::PTRACE_EVENT_FORK | libc::PTRACE_EVENT_VFORK | libc::PTRACE_EVENT_CLONE => {
let new_pid_raw = ptrace::getevent(pid)
.context("Failed to get event message for fork/vfork/clone")?;
let new_pid = Pid::from_raw(new_pid_raw as i32);
let wait_status = waitpid(new_pid, None).context("Failed to wait for new child")?;
match wait_status {
WaitStatus::Exited(_, _) | WaitStatus::Signaled(_, _, _) => {
eprintln!("[renacer: Process {pid} forked child {new_pid} (already exited)]");
}
_ => {
if let Err(e) =
setup_ptrace_options_internal(new_pid, config.follow_forks, false)
{
warn!("Failed to setup ptrace options for child {}: {}", new_pid, e);
return Ok(());
}
processes.insert(new_pid, ProcessState::new());
match ptrace::syscall(new_pid, None) {
Ok(()) => {
eprintln!("[renacer: Process {pid} forked child {new_pid}]");
}
Err(nix::errno::Errno::ESRCH) => {
processes.remove(&new_pid);
eprintln!(
"[renacer: Process {pid} forked child {new_pid} (exited immediately)]"
);
}
Err(e) => {
return Err(anyhow::anyhow!("Failed to continue new child: {e}"));
}
}
}
}
}
_ => {
}
}
Ok(())
}
fn handle_syscall_event(
child: Pid,
in_syscall: &mut bool,
current_syscall_entry: &mut Option<SyscallEntry>,
syscall_entry_time: &mut Option<std::time::Instant>,
dwarf_ctx: Option<&crate::dwarf::DwarfContext>,
config: &TracerConfig,
tracers: &mut Tracers,
) -> Result<()> {
let in_json_mode = tracers.json_output.is_some();
let in_csv_mode = tracers.csv_output.is_some() || tracers.csv_stats_output.is_some();
let in_html_mode = tracers.html_output.is_some();
let structured_output = in_json_mode || in_csv_mode || in_html_mode;
if *in_syscall {
let duration_us =
syscall_entry_time.map(|start| start.elapsed().as_micros() as u64).unwrap_or(0);
process_syscall_exit(
child,
current_syscall_entry,
tracers,
config.timing_mode,
duration_us,
)?;
*current_syscall_entry = None;
*syscall_entry_time = None;
*in_syscall = false;
} else {
if config.timing_mode || config.statistics_mode || structured_output {
*syscall_entry_time = Some(std::time::Instant::now());
}
*current_syscall_entry = process_syscall_entry(
child,
dwarf_ctx,
config,
tracers.profiling_ctx.as_mut(),
structured_output,
)?;
*in_syscall = true;
}
Ok(())
}
fn process_syscall_entry(
child: Pid,
dwarf_ctx: Option<&crate::dwarf::DwarfContext>,
config: &TracerConfig,
profiling_ctx: Option<&mut crate::profiling::ProfilingContext>,
structured_output: bool,
) -> Result<Option<SyscallEntry>> {
if let Some(prof) = profiling_ctx {
prof.measure(crate::profiling::ProfilingCategory::Other, || {
syscall_handling::handle_syscall_entry(
child,
dwarf_ctx,
&config.filter,
config.statistics_mode,
structured_output,
config.function_time,
config.transpiler_map.as_ref(),
)
})
} else {
syscall_handling::handle_syscall_entry(
child,
dwarf_ctx,
&config.filter,
config.statistics_mode,
structured_output,
config.function_time,
config.transpiler_map.as_ref(),
)
}
}
fn process_syscall_exit(
child: Pid,
current_syscall_entry: &Option<SyscallEntry>,
tracers: &mut Tracers,
timing_mode: bool,
duration_us: u64,
) -> Result<()> {
if let Some(mut prof) = tracers.profiling_ctx.take() {
let result = prof.measure(crate::profiling::ProfilingCategory::Other, || {
syscall_handling::handle_syscall_exit(
child,
current_syscall_entry,
tracers,
timing_mode,
duration_us,
)
});
prof.record_syscall();
tracers.profiling_ctx = Some(prof);
result
} else {
syscall_handling::handle_syscall_exit(
child,
current_syscall_entry,
tracers,
timing_mode,
duration_us,
)
}
}
struct AnalysisConfig {
stats_extended: bool,
anomaly_threshold: f32,
hpu_analysis: bool,
hpu_cpu_only: bool,
ml_anomaly: bool,
ml_clusters: usize,
ml_compare: bool,
ml_outliers: bool, ml_outlier_threshold: f32, ml_outlier_trees: usize, dl_anomaly: bool, dl_threshold: f32, dl_hidden_size: usize, dl_epochs: usize, explain: bool, }
#[derive(Debug)]
struct ProcessState {
in_syscall: bool,
current_syscall_entry: Option<SyscallEntry>,
syscall_entry_time: Option<std::time::Instant>,
dwarf_ctx: Option<crate::dwarf::DwarfContext>,
dwarf_loaded: bool,
}
impl ProcessState {
fn new() -> Self {
Self {
in_syscall: false,
current_syscall_entry: None,
syscall_entry_time: None,
dwarf_ctx: None,
dwarf_loaded: false,
}
}
}
fn handle_traced_process_status(
status: WaitStatus,
processes: &mut std::collections::HashMap<Pid, ProcessState>,
main_pid: Pid,
main_exit_code: &mut i32,
config: &TracerConfig,
) -> Result<Option<Pid>> {
match status {
WaitStatus::Exited(p, code) => {
processes.remove(&p);
if p == main_pid {
*main_exit_code = code;
}
Ok(None)
}
WaitStatus::Signaled(p, sig, _) => {
eprintln!("Process {p} killed by signal: {sig:?}");
processes.remove(&p);
if p == main_pid {
*main_exit_code = 128 + sig as i32;
}
Ok(None)
}
WaitStatus::PtraceSyscall(p) => Ok(Some(p)),
WaitStatus::PtraceEvent(p, _sig, event) => {
handle_ptrace_event(p, event, processes, config)?;
ptrace::syscall(p, None).context("Failed to PTRACE_SYSCALL after event")?;
Ok(None)
}
_ => {
if let Some(p) = status.pid() {
ptrace::syscall(p, None).ok();
}
Ok(None)
}
}
}
fn process_syscall_for_pid(
pid: Pid,
processes: &mut std::collections::HashMap<Pid, ProcessState>,
config: &TracerConfig,
tracers: &mut Tracers,
) -> Result<()> {
let state = if let Some(s) = processes.get_mut(&pid) {
s
} else {
ptrace::syscall(pid, None).ok();
return Ok(());
};
if config.enable_source && !state.dwarf_loaded {
state.dwarf_loaded = true;
state.dwarf_ctx = load_dwarf_context(pid);
}
handle_syscall_event(
pid,
&mut state.in_syscall,
&mut state.current_syscall_entry,
&mut state.syscall_entry_time,
state.dwarf_ctx.as_ref(),
config,
tracers,
)?;
ptrace::syscall(pid, None).context("Failed to PTRACE_SYSCALL")
}
#[cfg(feature = "otlp")]
fn start_otlp_root_span(tracers: &mut Tracers, child: Pid) {
if let Some(ref mut exporter) = tracers.otlp_exporter {
let program_name = std::fs::read_to_string(format!("/proc/{child}/cmdline"))
.ok()
.and_then(|s| s.split('\0').next().map(std::string::ToString::to_string))
.unwrap_or_else(|| format!("pid:{child}"));
exporter.start_root_span(&program_name, child.as_raw());
}
}
fn build_analysis_config(config: &TracerConfig) -> AnalysisConfig {
AnalysisConfig {
stats_extended: config.stats_extended,
anomaly_threshold: config.anomaly_threshold,
hpu_analysis: config.hpu_analysis,
hpu_cpu_only: config.hpu_cpu_only,
ml_anomaly: config.ml_anomaly,
ml_clusters: config.ml_clusters,
ml_compare: config.ml_compare,
ml_outliers: config.ml_outliers,
ml_outlier_threshold: config.ml_outlier_threshold,
ml_outlier_trees: config.ml_outlier_trees,
dl_anomaly: config.dl_anomaly,
dl_threshold: config.dl_threshold,
dl_hidden_size: config.dl_hidden_size,
dl_epochs: config.dl_epochs,
explain: config.explain,
}
}
fn wait_for_event(
config: &TracerConfig,
child: Pid,
processes: &std::collections::HashMap<Pid, ProcessState>,
) -> Result<Option<nix::sys::wait::WaitStatus>> {
let wait_result =
if config.follow_forks { waitpid(Pid::from_raw(-1), None) } else { waitpid(child, None) };
match wait_result {
Ok(s) => {
trace!(status = ?s, "waitpid returned");
Ok(Some(s))
}
Err(_) if processes.is_empty() => {
trace!("waitpid error but processes empty, breaking");
Ok(None)
}
Err(e) => {
warn!(error = %e, "waitpid failed");
Err(e).context("Failed to waitpid")
}
}
}
fn trace_child(child: Pid, config: TracerConfig) -> Result<i32> {
info!(pid = %child, "starting trace_child");
let mut tracers = initialize_tracers(&config);
trace!("tracers initialized");
#[cfg(feature = "otlp")]
start_otlp_root_span(&mut tracers, child);
trace!("calling setup_ptrace_options");
setup_ptrace_options(child, config.follow_forks)?;
trace!("ptrace options set successfully");
use std::collections::HashMap;
let mut processes: HashMap<Pid, ProcessState> = HashMap::new();
processes.insert(child, ProcessState::new());
let main_pid = child;
let mut main_exit_code = 0;
info!("entering main wait loop");
while !processes.is_empty() {
trace!(num_processes = processes.len(), "calling waitpid");
let Some(status) = wait_for_event(&config, child, &processes)? else {
break;
};
let pid = if let Some(p) = handle_traced_process_status(
status,
&mut processes,
main_pid,
&mut main_exit_code,
&config,
)? {
trace!(pid = %p, "handle_traced_process_status returned pid");
p
} else {
trace!("handle_traced_process_status returned None, continuing");
continue;
};
trace!(pid = %pid, "calling process_syscall_for_pid");
process_syscall_for_pid(pid, &mut processes, &config, &mut tracers)?;
trace!(pid = %pid, "process_syscall_for_pid completed");
}
info!("exited main wait loop");
output::print_summaries(
tracers,
config.timing_mode,
main_exit_code,
&build_analysis_config(&config),
);
Ok(main_exit_code)
}
#[derive(Debug)]
struct SyscallEntry {
name: String,
args: Vec<String>,
source: Option<crate::json_output::JsonSourceLocation>,
function_name: Option<String>,
caller_name: Option<String>,
raw_arg1: Option<u64>,
raw_arg2: Option<u64>,
_raw_arg3: Option<u64>,
}
#[cfg(test)]
#[path = "core_tests.rs"]
mod core_tests;