use crate::{ProfileEvent, TorshResult};
use serde::{Deserialize, Serialize};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use torsh_core::TorshError;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VTuneConfig {
pub itt_enabled: bool,
pub hotspot_analysis: bool,
pub threading_analysis: bool,
pub memory_access_analysis: bool,
pub microarchitecture_analysis: bool,
pub hardware_events: bool,
pub sampling_frequency: u32,
pub output_dir: Option<String>,
pub cpu_mask: Option<u64>,
}
impl Default for VTuneConfig {
fn default() -> Self {
Self {
itt_enabled: true,
hotspot_analysis: true,
threading_analysis: true,
memory_access_analysis: false,
microarchitecture_analysis: false,
hardware_events: true,
sampling_frequency: 1000, output_dir: None,
cpu_mask: None,
}
}
}
pub struct VTuneProfiler {
config: VTuneConfig,
events: Arc<Mutex<Vec<ProfileEvent>>>,
start_time: Instant,
enabled: bool,
session_id: String,
collection_id: u64,
}
impl VTuneProfiler {
pub fn new(config: VTuneConfig) -> Self {
Self {
config,
events: Arc::new(Mutex::new(Vec::new())),
start_time: Instant::now(),
enabled: false,
session_id: format!("vtune_session_{}", chrono::Utc::now().timestamp()),
collection_id: 0,
}
}
pub fn enable(&mut self) -> TorshResult<()> {
self.enabled = true;
self.start_time = Instant::now();
self.collection_id += 1;
if let Ok(mut events) = self.events.lock() {
events.clear();
}
if self.config.itt_enabled {
self.init_itt_api()?;
}
self.start_vtune_collection()?;
Ok(())
}
pub fn disable(&mut self) -> TorshResult<()> {
self.enabled = false;
self.stop_vtune_collection()?;
if self.config.itt_enabled {
self.finalize_itt_api()?;
}
Ok(())
}
pub fn start_itt_task(&self, name: &str) -> TorshResult<ITTTask> {
if !self.enabled || !self.config.itt_enabled {
return Ok(ITTTask::new_disabled());
}
let start_time = Instant::now();
let task = ITTTask::new(name.to_string(), start_time);
Ok(task)
}
#[allow(clippy::too_many_arguments)]
pub fn record_function_execution(
&self,
function_name: &str,
module: &str,
file: &str,
line: u32,
duration: Duration,
cpu_cycles: Option<u64>,
cache_misses: Option<u64>,
branch_mispredicts: Option<u64>,
) -> TorshResult<()> {
if !self.enabled {
return Ok(());
}
let mut events = self.events.lock().map_err(|_| {
TorshError::InvalidArgument("Failed to acquire lock on events".to_string())
})?;
let start_us = self.start_time.elapsed().as_micros() as u64;
let duration_us = duration.as_micros() as u64;
let event_name = format!("{module}::{function_name}() [{file}:{line}]");
let mut metadata = format!(
"{{\"session_id\": \"{}\", \"collection_id\": {}",
self.session_id, self.collection_id
);
if let Some(cycles) = cpu_cycles {
metadata.push_str(&format!(", \"cpu_cycles\": {cycles}"));
}
if let Some(misses) = cache_misses {
metadata.push_str(&format!(", \"cache_misses\": {misses}"));
}
if let Some(mispredicts) = branch_mispredicts {
metadata.push_str(&format!(", \"branch_mispredicts\": {mispredicts}"));
}
metadata.push('}');
events.push(ProfileEvent {
name: event_name,
category: "vtune_function".to_string(),
start_us,
duration_us,
thread_id: 0, operation_count: Some(1),
flops: Some(0), bytes_transferred: Some(0), stack_trace: Some(metadata),
});
Ok(())
}
pub fn record_threading_event(
&self,
event_type: ThreadingEventType,
thread_id: usize,
synchronization_object: Option<&str>,
wait_time: Option<Duration>,
) -> TorshResult<()> {
if !self.enabled || !self.config.threading_analysis {
return Ok(());
}
let mut events = self.events.lock().map_err(|_| {
TorshError::InvalidArgument("Failed to acquire lock on events".to_string())
})?;
let start_us = self.start_time.elapsed().as_micros() as u64;
let duration_us = wait_time.map(|d| d.as_micros() as u64).unwrap_or(0);
let event_name = format!(
"{:?}{}",
event_type,
synchronization_object
.map(|obj| format!(" [{obj}]"))
.unwrap_or_default()
);
let metadata = format!(
"{{\"session_id\": \"{}\", \"collection_id\": {}, \"thread_id\": {}}}",
self.session_id, self.collection_id, thread_id
);
events.push(ProfileEvent {
name: event_name,
category: "vtune_threading".to_string(),
start_us,
duration_us,
thread_id,
operation_count: Some(1),
flops: Some(0),
bytes_transferred: Some(0),
stack_trace: Some(metadata),
});
Ok(())
}
pub fn record_memory_access(
&self,
access_type: MemoryAccessType,
address: u64,
size: usize,
latency: Option<Duration>,
cache_level: Option<u8>,
) -> TorshResult<()> {
if !self.enabled || !self.config.memory_access_analysis {
return Ok(());
}
let mut events = self.events.lock().map_err(|_| {
TorshError::InvalidArgument("Failed to acquire lock on events".to_string())
})?;
let start_us = self.start_time.elapsed().as_micros() as u64;
let duration_us = latency.map(|d| d.as_micros() as u64).unwrap_or(0);
let event_name = format!(
"{:?} [0x{:x}, {} bytes{}]",
access_type,
address,
size,
cache_level
.map(|level| format!(", L{level}"))
.unwrap_or_default()
);
let metadata = format!(
"{{\"session_id\": \"{}\", \"collection_id\": {}, \"address\": \"0x{:x}\", \"size\": {}}}",
self.session_id, self.collection_id, address, size
);
events.push(ProfileEvent {
name: event_name,
category: "vtune_memory".to_string(),
start_us,
duration_us,
thread_id: 0, operation_count: Some(1),
flops: Some(0),
bytes_transferred: Some(size as u64),
stack_trace: Some(metadata),
});
Ok(())
}
pub fn export_vtune_data(&self, filename: &str) -> TorshResult<()> {
let events = self.events.lock().map_err(|_| {
TorshError::InvalidArgument("Failed to acquire lock on events".to_string())
})?;
let vtune_data = VTuneExportData {
session_id: self.session_id.clone(),
collection_id: self.collection_id,
config: self.config.clone(),
events: events.clone(),
total_events: events.len(),
total_duration_us: events.iter().map(|e| e.duration_us).sum(),
timestamp: chrono::Utc::now(),
};
let json_data = serde_json::to_string_pretty(&vtune_data)
.map_err(|e| TorshError::InvalidArgument(format!("Failed to serialize data: {e}")))?;
std::fs::write(filename, json_data)
.map_err(|e| TorshError::InvalidArgument(format!("Failed to write file: {e}")))?;
Ok(())
}
pub fn get_vtune_stats(&self) -> TorshResult<VTuneStats> {
let events = self.events.lock().map_err(|_| {
TorshError::InvalidArgument("Failed to acquire lock on events".to_string())
})?;
let function_events: Vec<_> = events
.iter()
.filter(|e| e.category == "vtune_function")
.collect();
let threading_events: Vec<_> = events
.iter()
.filter(|e| e.category == "vtune_threading")
.collect();
let memory_events: Vec<_> = events
.iter()
.filter(|e| e.category == "vtune_memory")
.collect();
let total_function_time_us: u64 = function_events.iter().map(|e| e.duration_us).sum();
let total_thread_wait_time_us: u64 = threading_events.iter().map(|e| e.duration_us).sum();
let total_memory_accesses: usize = memory_events.len();
let avg_function_duration_us = if !function_events.is_empty() {
total_function_time_us as f64 / function_events.len() as f64
} else {
0.0
};
let unique_threads: std::collections::HashSet<_> =
events.iter().map(|e| e.thread_id).collect();
Ok(VTuneStats {
total_events: events.len(),
function_events: function_events.len(),
threading_events: threading_events.len(),
memory_events: memory_events.len(),
total_function_time_us,
total_thread_wait_time_us,
total_memory_accesses,
avg_function_duration_us,
unique_threads: unique_threads.len(),
session_id: self.session_id.clone(),
collection_id: self.collection_id,
})
}
fn init_itt_api(&self) -> TorshResult<()> {
Ok(())
}
fn finalize_itt_api(&self) -> TorshResult<()> {
Ok(())
}
fn start_vtune_collection(&self) -> TorshResult<()> {
Ok(())
}
fn stop_vtune_collection(&self) -> TorshResult<()> {
Ok(())
}
}
pub struct ITTTask {
name: String,
start_time: Instant,
enabled: bool,
}
impl ITTTask {
fn new(name: String, start_time: Instant) -> Self {
Self {
name,
start_time,
enabled: true,
}
}
fn new_disabled() -> Self {
Self {
name: String::new(),
start_time: Instant::now(),
enabled: false,
}
}
pub fn duration(&self) -> Duration {
self.start_time.elapsed()
}
pub fn name(&self) -> &str {
&self.name
}
}
impl Drop for ITTTask {
fn drop(&mut self) {
if self.enabled {
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum ThreadingEventType {
ThreadCreate,
ThreadJoin,
ThreadDestroy,
MutexLock,
MutexUnlock,
MutexWait,
ConditionWait,
ConditionSignal,
BarrierWait,
SemaphoreWait,
SemaphorePost,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum MemoryAccessType {
Read,
Write,
ReadWrite,
Prefetch,
CacheLineLoad,
CacheLineStore,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct VTuneExportData {
pub session_id: String,
pub collection_id: u64,
pub config: VTuneConfig,
pub events: Vec<ProfileEvent>,
pub total_events: usize,
pub total_duration_us: u64,
pub timestamp: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VTuneStats {
pub total_events: usize,
pub function_events: usize,
pub threading_events: usize,
pub memory_events: usize,
pub total_function_time_us: u64,
pub total_thread_wait_time_us: u64,
pub total_memory_accesses: usize,
pub avg_function_duration_us: f64,
pub unique_threads: usize,
pub session_id: String,
pub collection_id: u64,
}
pub fn create_vtune_profiler() -> VTuneProfiler {
VTuneProfiler::new(VTuneConfig::default())
}
pub fn create_vtune_profiler_with_config(config: VTuneConfig) -> VTuneProfiler {
VTuneProfiler::new(config)
}
pub fn export_vtune_json(profiler: &VTuneProfiler, filename: &str) -> TorshResult<()> {
profiler.export_vtune_data(filename)
}
pub fn get_vtune_statistics(profiler: &VTuneProfiler) -> TorshResult<VTuneStats> {
profiler.get_vtune_stats()
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
#[test]
fn test_vtune_profiler_creation() {
let profiler = create_vtune_profiler();
assert!(!profiler.enabled);
}
#[test]
fn test_vtune_profiler_enable_disable() {
let mut profiler = create_vtune_profiler();
assert!(profiler.enable().is_ok());
assert!(profiler.enabled);
assert!(profiler.disable().is_ok());
assert!(!profiler.enabled);
}
#[test]
fn test_itt_task() {
let mut profiler = create_vtune_profiler();
profiler.enable().unwrap();
let task = profiler.start_itt_task("test_task").unwrap();
assert_eq!(task.name(), "test_task");
std::thread::sleep(std::time::Duration::from_micros(100));
assert!(task.duration().as_nanos() > 0);
}
#[test]
fn test_function_recording() {
let mut profiler = create_vtune_profiler();
profiler.enable().unwrap();
let result = profiler.record_function_execution(
"test_function",
"test_module",
"test.rs",
42,
Duration::from_micros(100),
Some(1000),
Some(10),
Some(5),
);
assert!(result.is_ok());
let stats = profiler.get_vtune_stats().unwrap();
assert_eq!(stats.function_events, 1);
assert_eq!(stats.total_function_time_us, 100);
}
#[test]
fn test_threading_recording() {
let mut profiler = create_vtune_profiler();
profiler.enable().unwrap();
let result = profiler.record_threading_event(
ThreadingEventType::MutexWait,
123,
Some("test_mutex"),
Some(Duration::from_micros(50)),
);
assert!(result.is_ok());
let stats = profiler.get_vtune_stats().unwrap();
assert_eq!(stats.threading_events, 1);
assert_eq!(stats.total_thread_wait_time_us, 50);
}
#[test]
fn test_memory_recording() {
let config = VTuneConfig {
memory_access_analysis: true,
..Default::default()
};
let mut profiler = create_vtune_profiler_with_config(config);
profiler.enable().unwrap();
let result = profiler.record_memory_access(
MemoryAccessType::Read,
0x1000,
64,
Some(Duration::from_nanos(100)),
Some(1),
);
assert!(result.is_ok());
let stats = profiler.get_vtune_stats().unwrap();
assert_eq!(stats.memory_events, 1);
assert_eq!(stats.total_memory_accesses, 1);
}
#[test]
fn test_export_vtune_data() {
let mut profiler = create_vtune_profiler();
profiler.enable().unwrap();
profiler
.record_function_execution(
"test_function",
"test_module",
"test.rs",
42,
Duration::from_micros(100),
None,
None,
None,
)
.unwrap();
let temp_file = std::env::temp_dir().join("test_vtune_export.json");
let temp_str = temp_file.display().to_string();
let result = profiler.export_vtune_data(&temp_str);
assert!(result.is_ok());
let _ = std::fs::remove_file(&temp_file);
}
#[test]
fn test_custom_config() {
let config = VTuneConfig {
itt_enabled: false,
hotspot_analysis: false,
threading_analysis: false,
memory_access_analysis: true,
microarchitecture_analysis: true,
hardware_events: false,
sampling_frequency: 2000,
output_dir: Some(std::env::temp_dir().join("vtune").display().to_string()),
cpu_mask: Some(0xFF),
};
let profiler = create_vtune_profiler_with_config(config.clone());
assert_eq!(profiler.config.sampling_frequency, 2000);
assert!(profiler.config.memory_access_analysis);
assert!(!profiler.config.itt_enabled);
}
}