Skip to main content

torsh_jit/
profiler.rs

1//! Profiler integration for JIT compilation
2//!
3//! This module provides comprehensive profiling capabilities for JIT-compiled code,
4//! including performance counters, sampling profilers, and external profiler integration.
5
6use crate::{JitError, JitResult};
7use indexmap::IndexMap;
8use std::collections::HashMap;
9use std::sync::{Arc, Mutex};
10use std::time::{Duration, Instant};
11
12/// Profiler manager for JIT compilation
13#[derive(Debug)]
14pub struct ProfilerManager {
15    /// Active profiling sessions
16    sessions: Arc<Mutex<IndexMap<String, ProfilingSession>>>,
17
18    /// Performance counters
19    counters: Arc<Mutex<PerformanceCounters>>,
20
21    /// Profiler configuration
22    config: ProfilerConfig,
23
24    /// External profiler integrations
25    external_profilers: Vec<Box<dyn ExternalProfiler>>,
26
27    /// Sampling profiler
28    sampling_profiler: Option<SamplingProfiler>,
29
30    /// Global profiling statistics
31    stats: Arc<Mutex<ProfilerStats>>,
32
33    /// Session counter for generating unique IDs
34    session_counter: Arc<Mutex<u64>>,
35}
36
37/// Profiling session for tracking execution
38#[derive(Debug, Clone)]
39pub struct ProfilingSession {
40    /// Session ID
41    pub id: String,
42
43    /// Session name
44    pub name: String,
45
46    /// Start time
47    pub start_time: Instant,
48
49    /// Duration (if completed)
50    pub duration: Option<Duration>,
51
52    /// Performance events collected
53    pub events: Vec<PerformanceEvent>,
54
55    /// Function call stacks
56    pub call_stacks: Vec<CallStack>,
57
58    /// Memory allocation tracking
59    pub memory_events: Vec<MemoryEvent>,
60
61    /// Hardware performance counters
62    pub hw_counters: HardwareCounters,
63
64    /// Session metadata
65    pub metadata: HashMap<String, String>,
66
67    /// Session status
68    pub status: SessionStatus,
69}
70
71/// Status of a profiling session
72#[derive(Debug, Clone, PartialEq)]
73pub enum SessionStatus {
74    Active,
75    Completed,
76    Failed(String),
77    Cancelled,
78}
79
80/// Performance event types
81#[derive(Debug, Clone)]
82pub enum PerformanceEvent {
83    /// Function entry
84    FunctionEntry {
85        function_name: String,
86        timestamp: Instant,
87        thread_id: u64,
88        address: u64,
89    },
90
91    /// Function exit
92    FunctionExit {
93        function_name: String,
94        timestamp: Instant,
95        thread_id: u64,
96        duration: Duration,
97    },
98
99    /// Kernel launch (for GPU code)
100    KernelLaunch {
101        kernel_name: String,
102        timestamp: Instant,
103        grid_size: (u32, u32, u32),
104        block_size: (u32, u32, u32),
105    },
106
107    /// Kernel completion
108    KernelComplete {
109        kernel_name: String,
110        timestamp: Instant,
111        duration: Duration,
112        occupancy: f32,
113    },
114
115    /// Memory allocation
116    MemoryAlloc {
117        size: usize,
118        address: u64,
119        timestamp: Instant,
120        alignment: usize,
121    },
122
123    /// Memory deallocation
124    MemoryFree { address: u64, timestamp: Instant },
125
126    /// Cache miss
127    CacheMiss {
128        level: u8,
129        address: u64,
130        timestamp: Instant,
131    },
132
133    /// Branch misprediction
134    BranchMisprediction {
135        address: u64,
136        timestamp: Instant,
137        target_address: u64,
138    },
139
140    /// Custom user event
141    Custom {
142        name: String,
143        timestamp: Instant,
144        data: HashMap<String, String>,
145    },
146}
147
148/// Call stack representation
149#[derive(Debug, Clone)]
150pub struct CallStack {
151    /// Timestamp when stack was captured
152    pub timestamp: Instant,
153
154    /// Thread ID
155    pub thread_id: u64,
156
157    /// Stack frames (bottom to top)
158    pub frames: Vec<StackFrame>,
159
160    /// Total depth
161    pub depth: usize,
162}
163
164/// Stack frame information
165#[derive(Debug, Clone)]
166pub struct StackFrame {
167    /// Function name
168    pub function_name: String,
169
170    /// Address
171    pub address: u64,
172
173    /// Source location (if available)
174    pub source_location: Option<SourceLocation>,
175
176    /// Inlined function information
177    pub inlined: bool,
178
179    /// Module name
180    pub module_name: String,
181}
182
183/// Source location for profiling
184#[derive(Debug, Clone)]
185pub struct SourceLocation {
186    pub file: String,
187    pub line: u32,
188    pub column: u32,
189}
190
191/// Memory event tracking
192#[derive(Debug, Clone)]
193pub enum MemoryEvent {
194    /// Allocation
195    Alloc {
196        size: usize,
197        address: u64,
198        timestamp: Instant,
199        stack_trace: Vec<StackFrame>,
200    },
201
202    /// Deallocation
203    Free {
204        address: u64,
205        timestamp: Instant,
206        stack_trace: Vec<StackFrame>,
207    },
208
209    /// Memory access
210    Access {
211        address: u64,
212        size: usize,
213        is_write: bool,
214        timestamp: Instant,
215    },
216
217    /// Page fault
218    PageFault {
219        address: u64,
220        timestamp: Instant,
221        fault_type: PageFaultType,
222    },
223}
224
225/// Page fault types
226#[derive(Debug, Clone)]
227pub enum PageFaultType {
228    Major,
229    Minor,
230    Protection,
231}
232
233/// Hardware performance counters
234#[derive(Debug, Clone, Default)]
235pub struct HardwareCounters {
236    /// CPU cycles
237    pub cycles: u64,
238
239    /// Instructions executed
240    pub instructions: u64,
241
242    /// Cache misses (L1, L2, L3)
243    pub cache_misses: [u64; 3],
244
245    /// Cache references
246    pub cache_references: u64,
247
248    /// Branch mispredictions
249    pub branch_mispredictions: u64,
250
251    /// Branch instructions
252    pub branches: u64,
253
254    /// Page faults
255    pub page_faults: u64,
256
257    /// Context switches
258    pub context_switches: u64,
259
260    /// CPU migrations
261    pub cpu_migrations: u64,
262
263    /// Custom counters
264    pub custom_counters: HashMap<String, u64>,
265}
266
267/// Global performance counters
268#[derive(Debug, Clone, Default)]
269pub struct PerformanceCounters {
270    /// Total compilation time
271    pub total_compile_time: Duration,
272
273    /// Total execution time
274    pub total_execution_time: Duration,
275
276    /// Number of compilations
277    pub compilation_count: u64,
278
279    /// Number of executions
280    pub execution_count: u64,
281
282    /// Memory usage statistics
283    pub memory_stats: MemoryStats,
284
285    /// Function call counts
286    pub function_calls: HashMap<String, u64>,
287
288    /// Kernel launch counts
289    pub kernel_launches: HashMap<String, u64>,
290
291    /// Error counts
292    pub error_counts: HashMap<String, u64>,
293}
294
295/// Memory usage statistics
296#[derive(Debug, Clone, Default)]
297pub struct MemoryStats {
298    /// Current memory usage
299    pub current_usage: usize,
300
301    /// Peak memory usage
302    pub peak_usage: usize,
303
304    /// Total allocations
305    pub total_allocations: u64,
306
307    /// Total deallocations
308    pub total_deallocations: u64,
309
310    /// Total bytes allocated
311    pub total_bytes_allocated: u64,
312
313    /// Total bytes freed
314    pub total_bytes_freed: u64,
315
316    /// Average allocation size
317    pub avg_allocation_size: f64,
318
319    /// Allocation histogram
320    pub allocation_histogram: HashMap<usize, u64>,
321}
322
323/// Profiler configuration
324#[derive(Debug, Clone)]
325pub struct ProfilerConfig {
326    /// Enable profiling
327    pub enabled: bool,
328
329    /// Sampling frequency in Hz
330    pub sampling_frequency: u32,
331
332    /// Enable call stack collection
333    pub collect_call_stacks: bool,
334
335    /// Enable memory tracking
336    pub track_memory: bool,
337
338    /// Enable hardware counter collection
339    pub collect_hardware_counters: bool,
340
341    /// Maximum number of events per session
342    pub max_events_per_session: usize,
343
344    /// Enable external profiler integration
345    pub enable_external_profilers: bool,
346
347    /// Output format for profiling data
348    pub output_format: ProfilerOutputFormat,
349
350    /// Output directory
351    pub output_directory: String,
352}
353
354/// Profiler output formats
355#[derive(Debug, Clone)]
356pub enum ProfilerOutputFormat {
357    /// Chrome tracing format
358    ChromeTracing,
359
360    /// Linux perf format
361    PerfData,
362
363    /// Intel VTune format
364    VTune,
365
366    /// Custom JSON format
367    Json,
368
369    /// Binary format
370    Binary,
371}
372
373/// Sampling profiler for continuous monitoring
374#[derive(Debug)]
375pub struct SamplingProfiler {
376    /// Sampling thread handle
377    thread_handle: Option<std::thread::JoinHandle<()>>,
378
379    /// Sampling configuration
380    config: SamplingConfig,
381
382    /// Collected samples
383    samples: Arc<Mutex<Vec<Sample>>>,
384
385    /// Running flag
386    running: Arc<Mutex<bool>>,
387}
388
389/// Sampling configuration
390#[derive(Debug, Clone)]
391pub struct SamplingConfig {
392    /// Sampling interval
393    pub interval: Duration,
394
395    /// Enable stack trace collection
396    pub collect_stacks: bool,
397
398    /// Maximum stack depth
399    pub max_stack_depth: usize,
400
401    /// Target threads (empty = all threads)
402    pub target_threads: Vec<u64>,
403}
404
405/// Profiling sample
406#[derive(Debug, Clone)]
407pub struct Sample {
408    /// Timestamp
409    pub timestamp: Instant,
410
411    /// Thread ID
412    pub thread_id: u64,
413
414    /// CPU ID
415    pub cpu_id: u32,
416
417    /// Program counter
418    pub pc: u64,
419
420    /// Stack trace
421    pub stack_trace: Option<Vec<StackFrame>>,
422
423    /// CPU utilization
424    pub cpu_utilization: f32,
425
426    /// Memory usage
427    pub memory_usage: usize,
428}
429
430/// External profiler trait
431pub trait ExternalProfiler: Send + Sync + std::fmt::Debug {
432    /// Start profiling
433    fn start(&mut self) -> JitResult<()>;
434
435    /// Stop profiling
436    fn stop(&mut self) -> JitResult<()>;
437
438    /// Add a function to profile
439    fn add_function(&mut self, name: &str, address: u64, size: usize) -> JitResult<()>;
440
441    /// Remove a function from profiling
442    fn remove_function(&mut self, address: u64) -> JitResult<()>;
443
444    /// Export profiling data
445    fn export_data(&self, output_path: &str) -> JitResult<()>;
446
447    /// Get profiler name
448    fn name(&self) -> &str;
449}
450
451/// Linux perf profiler integration
452#[derive(Debug)]
453pub struct PerfProfiler {
454    /// Perf session active
455    active: bool,
456
457    /// Function mappings
458    function_map: HashMap<u64, String>,
459
460    /// JIT dump file
461    jit_dump_file: Option<std::fs::File>,
462
463    /// Map file path
464    map_file: Option<std::path::PathBuf>,
465}
466
467/// Intel VTune profiler integration
468#[derive(Debug)]
469pub struct VTuneProfiler {
470    /// VTune session active
471    active: bool,
472
473    /// Function mappings
474    function_map: HashMap<u64, String>,
475}
476
477/// Profiler statistics
478#[derive(Debug, Clone, Default)]
479pub struct ProfilerStats {
480    /// Total sessions created
481    pub total_sessions: u64,
482
483    /// Active sessions
484    pub active_sessions: u64,
485
486    /// Total events collected
487    pub total_events: u64,
488
489    /// Total samples collected
490    pub total_samples: u64,
491
492    /// Profiling overhead percentage
493    pub overhead_percentage: f32,
494
495    /// Data export count
496    pub export_count: u64,
497}
498
499impl Default for ProfilerConfig {
500    fn default() -> Self {
501        Self {
502            enabled: true,
503            sampling_frequency: 1000, // 1 KHz
504            collect_call_stacks: true,
505            track_memory: true,
506            collect_hardware_counters: false, // Requires privileged access
507            max_events_per_session: 1_000_000,
508            enable_external_profilers: false,
509            output_format: ProfilerOutputFormat::Json,
510            output_directory: std::env::temp_dir()
511                .join("torsh_profiling")
512                .display()
513                .to_string(),
514        }
515    }
516}
517
518impl ProfilerManager {
519    /// Create a new profiler manager
520    pub fn new(config: ProfilerConfig) -> Self {
521        Self {
522            sessions: Arc::new(Mutex::new(IndexMap::new())),
523            counters: Arc::new(Mutex::new(PerformanceCounters::default())),
524            config,
525            external_profilers: Vec::new(),
526            sampling_profiler: None,
527            stats: Arc::new(Mutex::new(ProfilerStats::default())),
528            session_counter: Arc::new(Mutex::new(0)),
529        }
530    }
531
532    /// Create a new profiler manager with default configuration
533    pub fn with_defaults() -> Self {
534        Self::new(ProfilerConfig::default())
535    }
536
537    /// Start a new profiling session
538    pub fn start_session(&mut self, name: &str) -> JitResult<String> {
539        if !self.config.enabled {
540            return Err(JitError::RuntimeError("Profiling disabled".to_string()));
541        }
542
543        let session_id = {
544            let mut counter = self
545                .session_counter
546                .lock()
547                .expect("lock should not be poisoned");
548            *counter += 1;
549            format!("session_{}", *counter)
550        };
551        let session = ProfilingSession {
552            id: session_id.clone(),
553            name: name.to_string(),
554            start_time: Instant::now(),
555            duration: None,
556            events: Vec::new(),
557            call_stacks: Vec::new(),
558            memory_events: Vec::new(),
559            hw_counters: HardwareCounters::default(),
560            metadata: HashMap::new(),
561            status: SessionStatus::Active,
562        };
563
564        {
565            let mut sessions = self.sessions.lock().expect("lock should not be poisoned");
566            sessions.insert(session_id.clone(), session);
567        }
568
569        {
570            let mut stats = self.stats.lock().expect("lock should not be poisoned");
571            stats.total_sessions += 1;
572            stats.active_sessions += 1;
573        }
574
575        // Start external profilers if enabled
576        if self.config.enable_external_profilers {
577            for profiler in &mut self.external_profilers {
578                profiler.start()?;
579            }
580        }
581
582        Ok(session_id)
583    }
584
585    /// Stop a profiling session
586    pub fn stop_session(&mut self, session_id: &str) -> JitResult<()> {
587        let mut sessions = self.sessions.lock().expect("lock should not be poisoned");
588
589        if let Some(session) = sessions.get_mut(session_id) {
590            session.duration = Some(session.start_time.elapsed());
591            session.status = SessionStatus::Completed;
592
593            let mut stats = self.stats.lock().expect("lock should not be poisoned");
594            stats.active_sessions = stats.active_sessions.saturating_sub(1);
595        } else {
596            return Err(JitError::RuntimeError(format!(
597                "Session {} not found",
598                session_id
599            )));
600        }
601
602        // Stop external profilers if no active sessions
603        let active_count = {
604            let stats = self.stats.lock().expect("lock should not be poisoned");
605            stats.active_sessions
606        };
607
608        if active_count == 0 && self.config.enable_external_profilers {
609            for profiler in &mut self.external_profilers {
610                profiler.stop()?;
611            }
612        }
613
614        Ok(())
615    }
616
617    /// Record a performance event
618    pub fn record_event(&mut self, session_id: &str, event: PerformanceEvent) -> JitResult<()> {
619        let mut sessions = self.sessions.lock().expect("lock should not be poisoned");
620
621        if let Some(session) = sessions.get_mut(session_id) {
622            if session.events.len() < self.config.max_events_per_session {
623                session.events.push(event);
624
625                let mut stats = self.stats.lock().expect("lock should not be poisoned");
626                stats.total_events += 1;
627            }
628        }
629
630        Ok(())
631    }
632
633    /// Record a call stack
634    pub fn record_call_stack(&mut self, session_id: &str, call_stack: CallStack) -> JitResult<()> {
635        if !self.config.collect_call_stacks {
636            return Ok(());
637        }
638
639        let mut sessions = self.sessions.lock().expect("lock should not be poisoned");
640
641        if let Some(session) = sessions.get_mut(session_id) {
642            session.call_stacks.push(call_stack);
643        }
644
645        Ok(())
646    }
647
648    /// Start sampling profiler
649    pub fn start_sampling(&mut self) -> JitResult<()> {
650        if self.sampling_profiler.is_some() {
651            return Err(JitError::RuntimeError(
652                "Sampling profiler already running".to_string(),
653            ));
654        }
655
656        let config = SamplingConfig {
657            interval: Duration::from_nanos(1_000_000_000 / self.config.sampling_frequency as u64),
658            collect_stacks: self.config.collect_call_stacks,
659            max_stack_depth: 64,
660            target_threads: Vec::new(),
661        };
662
663        let mut sampling_profiler = SamplingProfiler::new(config)?;
664        sampling_profiler.start()?;
665
666        self.sampling_profiler = Some(sampling_profiler);
667        Ok(())
668    }
669
670    /// Stop sampling profiler
671    pub fn stop_sampling(&mut self) -> JitResult<()> {
672        if let Some(mut profiler) = self.sampling_profiler.take() {
673            profiler.stop()?;
674        }
675        Ok(())
676    }
677
678    /// Add external profiler
679    pub fn add_external_profiler(&mut self, profiler: Box<dyn ExternalProfiler>) {
680        self.external_profilers.push(profiler);
681    }
682
683    /// Export profiling data
684    pub fn export_session_data(&self, session_id: &str, output_path: &str) -> JitResult<()> {
685        let sessions = self.sessions.lock().expect("lock should not be poisoned");
686
687        if let Some(session) = sessions.get(session_id) {
688            match self.config.output_format {
689                ProfilerOutputFormat::Json => self.export_json(session, output_path)?,
690                ProfilerOutputFormat::ChromeTracing => {
691                    self.export_chrome_tracing(session, output_path)?
692                }
693                _ => {
694                    return Err(JitError::RuntimeError(
695                        "Unsupported export format".to_string(),
696                    ))
697                }
698            }
699
700            let mut stats = self.stats.lock().expect("lock should not be poisoned");
701            stats.export_count += 1;
702        }
703
704        Ok(())
705    }
706
707    /// Export session data as JSON
708    fn export_json(&self, session: &ProfilingSession, output_path: &str) -> JitResult<()> {
709        use serde_json::json;
710
711        // Build comprehensive JSON representation
712        let events_json: Vec<_> = session
713            .events
714            .iter()
715            .map(|event| {
716                json!({
717                    "event": format!("{:?}", event)
718                })
719            })
720            .collect();
721
722        let session_json = json!({
723            "name": session.name,
724            "id": session.id,
725            "start_time": session.start_time.elapsed().as_micros(),
726            "duration": session.duration.map(|d| d.as_micros()),
727            "events": events_json,
728            "event_count": session.events.len(),
729            "call_stack_count": session.call_stacks.len(),
730            "memory_event_count": session.memory_events.len()
731        });
732
733        std::fs::write(
734            output_path,
735            serde_json::to_string_pretty(&session_json)
736                .map_err(|e| JitError::RuntimeError(format!("JSON serialization failed: {}", e)))?,
737        )
738        .map_err(|e| JitError::RuntimeError(format!("Failed to write JSON: {}", e)))?;
739
740        Ok(())
741    }
742
743    /// Export session data as Chrome tracing format
744    fn export_chrome_tracing(
745        &self,
746        session: &ProfilingSession,
747        output_path: &str,
748    ) -> JitResult<()> {
749        use serde_json::json;
750
751        // Convert events to Chrome tracing format
752        // See https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/
753        let mut trace_events = Vec::new();
754
755        for event in &session.events {
756            match event {
757                PerformanceEvent::FunctionEntry {
758                    function_name,
759                    timestamp,
760                    thread_id,
761                    ..
762                } => {
763                    trace_events.push(json!({
764                        "name": function_name,
765                        "cat": "function",
766                        "ph": "B",  // Begin
767                        "ts": timestamp.elapsed().as_micros() as u64,
768                        "pid": 1,
769                        "tid": thread_id
770                    }));
771                }
772                PerformanceEvent::FunctionExit {
773                    function_name,
774                    timestamp,
775                    thread_id,
776                    duration,
777                } => {
778                    trace_events.push(json!({
779                        "name": function_name,
780                        "cat": "function",
781                        "ph": "E",  // End
782                        "ts": timestamp.elapsed().as_micros() as u64,
783                        "pid": 1,
784                        "tid": thread_id,
785                        "args": { "duration_us": duration.as_micros() }
786                    }));
787                }
788                PerformanceEvent::MemoryAlloc {
789                    size,
790                    timestamp,
791                    address,
792                    ..
793                } => {
794                    trace_events.push(json!({
795                        "name": "Memory Allocation",
796                        "cat": "memory",
797                        "ph": "i",  // Instant event
798                        "ts": timestamp.elapsed().as_micros() as u64,
799                        "pid": 1,
800                        "tid": 1,
801                        "s": "g",   // Global scope
802                        "args": { "size": size, "address": format!("0x{:x}", address) }
803                    }));
804                }
805                PerformanceEvent::CacheMiss {
806                    level, timestamp, ..
807                } => {
808                    trace_events.push(json!({
809                        "name": format!("L{} Cache Miss", level),
810                        "cat": "cache",
811                        "ph": "i",
812                        "ts": timestamp.elapsed().as_micros() as u64,
813                        "pid": 1,
814                        "tid": 1,
815                        "s": "t",   // Thread scope
816                    }));
817                }
818                _ => {
819                    // Generic event
820                    trace_events.push(json!({
821                        "name": format!("{:?}", event),
822                        "cat": "general",
823                        "ph": "i",
824                        "ts": 0,
825                        "pid": 1,
826                        "tid": 1
827                    }));
828                }
829            }
830        }
831
832        // Add metadata
833        let metadata = json!({
834            "process_name": { "1": session.name.clone() },
835            "thread_name": { "1": "Main Thread" }
836        });
837
838        // Build final trace
839        let trace = json!({
840            "traceEvents": trace_events,
841            "displayTimeUnit": "ms",
842            "metadata": metadata
843        });
844
845        std::fs::write(
846            output_path,
847            serde_json::to_string_pretty(&trace)
848                .map_err(|e| JitError::RuntimeError(format!("JSON serialization failed: {}", e)))?,
849        )
850        .map_err(|e| JitError::RuntimeError(format!("Failed to write tracing data: {}", e)))?;
851
852        Ok(())
853    }
854
855    /// Get session data
856    pub fn get_session(&self, session_id: &str) -> Option<ProfilingSession> {
857        let sessions = self.sessions.lock().expect("lock should not be poisoned");
858        sessions.get(session_id).cloned()
859    }
860
861    /// Get performance counters
862    pub fn get_counters(&self) -> PerformanceCounters {
863        let counters = self.counters.lock().expect("lock should not be poisoned");
864        counters.clone()
865    }
866
867    /// Get profiler statistics
868    pub fn get_stats(&self) -> ProfilerStats {
869        let stats = self.stats.lock().expect("lock should not be poisoned");
870        stats.clone()
871    }
872
873    /// Update performance counters
874    pub fn update_counters<F>(&self, update_fn: F)
875    where
876        F: FnOnce(&mut PerformanceCounters),
877    {
878        let mut counters = self.counters.lock().expect("lock should not be poisoned");
879        update_fn(&mut *counters);
880    }
881}
882
883impl SamplingProfiler {
884    /// Create a new sampling profiler
885    pub fn new(config: SamplingConfig) -> JitResult<Self> {
886        Ok(Self {
887            thread_handle: None,
888            config,
889            samples: Arc::new(Mutex::new(Vec::new())),
890            running: Arc::new(Mutex::new(false)),
891        })
892    }
893
894    /// Start sampling
895    pub fn start(&mut self) -> JitResult<()> {
896        let running = self.running.clone();
897        let samples = self.samples.clone();
898        let config = self.config.clone();
899
900        *running.lock().expect("lock should not be poisoned") = true;
901
902        let thread_handle = std::thread::spawn(move || {
903            let mut last_sample_time = Instant::now();
904
905            while *running.lock().expect("lock should not be poisoned") {
906                let now = Instant::now();
907
908                // Collect sample with actual system information
909                let sample = Sample {
910                    timestamp: now,
911                    thread_id: Self::get_thread_id(),
912                    cpu_id: Self::get_cpu_id(),
913                    pc: 0, // Program counter requires platform-specific code
914                    stack_trace: Self::collect_stack_trace(),
915                    cpu_utilization: Self::calculate_cpu_utilization(&last_sample_time, &now),
916                    memory_usage: Self::get_memory_usage(),
917                };
918
919                {
920                    let mut samples_guard = samples.lock().expect("lock should not be poisoned");
921                    samples_guard.push(sample);
922                }
923
924                last_sample_time = now;
925
926                std::thread::sleep(config.interval);
927            }
928        });
929
930        self.thread_handle = Some(thread_handle);
931        Ok(())
932    }
933
934    /// Stop sampling
935    pub fn stop(&mut self) -> JitResult<()> {
936        *self.running.lock().expect("lock should not be poisoned") = false;
937
938        if let Some(handle) = self.thread_handle.take() {
939            handle.join().map_err(|_| {
940                JitError::RuntimeError("Failed to join sampling thread".to_string())
941            })?;
942        }
943
944        Ok(())
945    }
946
947    /// Get collected samples
948    pub fn get_samples(&self) -> Vec<Sample> {
949        let samples = self.samples.lock().expect("lock should not be poisoned");
950        samples.clone()
951    }
952
953    /// Get the current thread ID
954    fn get_thread_id() -> u64 {
955        // Use thread name hash for portability
956        use std::collections::hash_map::DefaultHasher;
957        use std::hash::{Hash, Hasher};
958        let mut hasher = DefaultHasher::new();
959        std::thread::current().id().hash(&mut hasher);
960        hasher.finish()
961    }
962
963    /// Get the current CPU ID
964    fn get_cpu_id() -> u32 {
965        // Platform-specific CPU ID retrieval
966        // For cross-platform compatibility, default to 0
967        0
968    }
969
970    /// Collect stack trace
971    fn collect_stack_trace() -> Option<Vec<StackFrame>> {
972        // Use backtrace-rs or similar library in production
973        // For now, return a simple placeholder
974        Some(vec![StackFrame {
975            function_name: "sampling_thread".to_string(),
976            address: 0,
977            source_location: Some(SourceLocation {
978                file: "profiler.rs".to_string(),
979                line: 0,
980                column: 0,
981            }),
982            inlined: false,
983            module_name: "torsh_jit".to_string(),
984        }])
985    }
986
987    /// Calculate CPU utilization
988    fn calculate_cpu_utilization(_last_time: &Instant, _current_time: &Instant) -> f32 {
989        // This would require platform-specific CPU time queries
990        // Placeholder implementation
991        (num_cpus::get() as f32) * 0.5 // Assume 50% utilization
992    }
993
994    /// Get current memory usage
995    fn get_memory_usage() -> usize {
996        // Platform-specific memory usage
997        #[cfg(target_os = "linux")]
998        {
999            if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
1000                for line in status.lines() {
1001                    if line.starts_with("VmRSS:") {
1002                        if let Some(kb_str) = line.split_whitespace().nth(1) {
1003                            if let Ok(kb) = kb_str.parse::<usize>() {
1004                                return kb * 1024; // Convert to bytes
1005                            }
1006                        }
1007                    }
1008                }
1009            }
1010        }
1011
1012        #[cfg(target_os = "macos")]
1013        {
1014            use std::process::Command;
1015            if let Ok(output) = Command::new("ps")
1016                .args(["-o", "rss=", "-p", &std::process::id().to_string()])
1017                .output()
1018            {
1019                if let Ok(text) = String::from_utf8(output.stdout) {
1020                    if let Ok(kb) = text.trim().parse::<usize>() {
1021                        return kb * 1024; // Convert to bytes
1022                    }
1023                }
1024            }
1025        }
1026
1027        0 // Default if not available
1028    }
1029}
1030
1031impl ExternalProfiler for PerfProfiler {
1032    fn start(&mut self) -> JitResult<()> {
1033        // Initialize perf profiling
1034        // On Linux, perf uses /tmp/perf-<pid>.map for JIT symbol maps
1035        // NOTE: This MUST remain as /tmp/ - it's a Linux perf convention requirement
1036        #[cfg(target_os = "linux")]
1037        {
1038            let pid = std::process::id();
1039            let map_file = format!("/tmp/perf-{}.map", pid);
1040
1041            // Create or truncate the perf map file
1042            std::fs::write(&map_file, "")
1043                .map_err(|e| JitError::RuntimeError(format!("Failed to create perf map: {}", e)))?;
1044
1045            self.map_file = Some(map_file.into());
1046        }
1047
1048        self.active = true;
1049        Ok(())
1050    }
1051
1052    fn stop(&mut self) -> JitResult<()> {
1053        // Finalize perf profiling
1054        self.active = false;
1055        Ok(())
1056    }
1057
1058    fn add_function(&mut self, name: &str, address: u64, size: usize) -> JitResult<()> {
1059        // Add function to perf symbol map
1060        self.function_map.insert(address, name.to_string());
1061
1062        // Write to perf map file format: <start_addr> <size> <symbol_name>
1063        #[cfg(target_os = "linux")]
1064        {
1065            if let Some(ref map_file) = self.map_file {
1066                use std::io::Write;
1067                let mut file = std::fs::OpenOptions::new()
1068                    .append(true)
1069                    .open(map_file)
1070                    .map_err(|e| {
1071                        JitError::RuntimeError(format!("Failed to open perf map: {}", e))
1072                    })?;
1073
1074                writeln!(file, "{:x} {:x} {}", address, size, name).map_err(|e| {
1075                    JitError::RuntimeError(format!("Failed to write to perf map: {}", e))
1076                })?;
1077            }
1078        }
1079
1080        Ok(())
1081    }
1082
1083    fn remove_function(&mut self, address: u64) -> JitResult<()> {
1084        self.function_map.remove(&address);
1085        // Note: perf map files are append-only, so we can't remove entries
1086        Ok(())
1087    }
1088
1089    fn export_data(&self, output_path: &str) -> JitResult<()> {
1090        // Export perf-compatible symbol map
1091        use std::io::Write;
1092        let mut file = std::fs::File::create(output_path)
1093            .map_err(|e| JitError::RuntimeError(format!("Failed to create export file: {}", e)))?;
1094
1095        // Write function map in perf format
1096        for (address, name) in &self.function_map {
1097            writeln!(file, "{:x} 0 {}", address, name)
1098                .map_err(|e| JitError::RuntimeError(format!("Failed to write perf data: {}", e)))?;
1099        }
1100
1101        Ok(())
1102    }
1103
1104    fn name(&self) -> &str {
1105        "PerfProfiler"
1106    }
1107}
1108
1109impl ExternalProfiler for VTuneProfiler {
1110    fn start(&mut self) -> JitResult<()> {
1111        // Initialize VTune profiling using Intel JIT API
1112        // VTune uses a shared library interface for JIT notification
1113        #[cfg(target_os = "linux")]
1114        {
1115            // In a full implementation, this would dynamically load libittnotify
1116            // and call iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, ...)
1117            log::info!("VTune profiler started (stub implementation)");
1118        }
1119
1120        #[cfg(target_os = "windows")]
1121        {
1122            log::info!("VTune profiler started on Windows (stub implementation)");
1123        }
1124
1125        self.active = true;
1126        Ok(())
1127    }
1128
1129    fn stop(&mut self) -> JitResult<()> {
1130        // Finalize VTune profiling
1131        self.active = false;
1132        Ok(())
1133    }
1134
1135    fn add_function(&mut self, name: &str, address: u64, size: usize) -> JitResult<()> {
1136        // Add function to VTune using JIT API
1137        self.function_map.insert(address, name.to_string());
1138
1139        if self.active {
1140            // In a full implementation, this would call:
1141            // iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, method_load_info)
1142            // where method_load_info contains: method_id, method_name,
1143            // method_load_address, method_size, line_number_table, etc.
1144
1145            log::debug!(
1146                "Registered function '{}' at 0x{:x} (size: {}) with VTune",
1147                name,
1148                address,
1149                size
1150            );
1151        }
1152
1153        Ok(())
1154    }
1155
1156    fn remove_function(&mut self, address: u64) -> JitResult<()> {
1157        self.function_map.remove(&address);
1158
1159        if self.active {
1160            // In a full implementation, this would call:
1161            // iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, method_id)
1162            log::debug!("Unregistered function at 0x{:x} from VTune", address);
1163        }
1164
1165        Ok(())
1166    }
1167
1168    fn export_data(&self, output_path: &str) -> JitResult<()> {
1169        // Export VTune-compatible symbol data
1170        use std::io::Write;
1171        let mut file = std::fs::File::create(output_path)
1172            .map_err(|e| JitError::RuntimeError(format!("Failed to create export file: {}", e)))?;
1173
1174        // Write header
1175        writeln!(file, "# VTune JIT Symbol Map")
1176            .map_err(|e| JitError::RuntimeError(format!("Write failed: {}", e)))?;
1177        writeln!(file, "# Format: <address> <size> <name>")
1178            .map_err(|e| JitError::RuntimeError(format!("Write failed: {}", e)))?;
1179        writeln!(file).map_err(|e| JitError::RuntimeError(format!("Write failed: {}", e)))?;
1180
1181        // Write function map
1182        for (address, name) in &self.function_map {
1183            writeln!(file, "{:016x} 0000 {}", address, name).map_err(|e| {
1184                JitError::RuntimeError(format!("Failed to write VTune data: {}", e))
1185            })?;
1186        }
1187
1188        Ok(())
1189    }
1190
1191    fn name(&self) -> &str {
1192        "VTuneProfiler"
1193    }
1194}
1195
1196impl PerfProfiler {
1197    /// Create a new perf profiler
1198    pub fn new() -> Self {
1199        Self {
1200            active: false,
1201            function_map: HashMap::new(),
1202            jit_dump_file: None,
1203            map_file: None,
1204        }
1205    }
1206}
1207
1208impl VTuneProfiler {
1209    /// Create a new VTune profiler
1210    pub fn new() -> Self {
1211        Self {
1212            active: false,
1213            function_map: HashMap::new(),
1214        }
1215    }
1216}
1217
1218#[cfg(test)]
1219mod tests {
1220    use super::*;
1221
1222    #[test]
1223    fn test_profiler_manager_creation() {
1224        let manager = ProfilerManager::with_defaults();
1225        assert!(manager.config.enabled);
1226        assert_eq!(manager.config.sampling_frequency, 1000);
1227    }
1228
1229    #[test]
1230    fn test_session_lifecycle() {
1231        let mut manager = ProfilerManager::with_defaults();
1232
1233        let session_id = manager.start_session("test_session").unwrap();
1234        assert!(!session_id.is_empty());
1235
1236        let session = manager.get_session(&session_id).unwrap();
1237        assert_eq!(session.name, "test_session");
1238        assert_eq!(session.status, SessionStatus::Active);
1239
1240        manager.stop_session(&session_id).unwrap();
1241
1242        let session = manager.get_session(&session_id).unwrap();
1243        assert_eq!(session.status, SessionStatus::Completed);
1244        assert!(session.duration.is_some());
1245    }
1246
1247    #[test]
1248    fn test_performance_event_recording() {
1249        let mut manager = ProfilerManager::with_defaults();
1250        let session_id = manager.start_session("test_session").unwrap();
1251
1252        let event = PerformanceEvent::FunctionEntry {
1253            function_name: "test_function".to_string(),
1254            timestamp: Instant::now(),
1255            thread_id: 1,
1256            address: 0x1000,
1257        };
1258
1259        manager.record_event(&session_id, event).unwrap();
1260
1261        let session = manager.get_session(&session_id).unwrap();
1262        assert_eq!(session.events.len(), 1);
1263    }
1264
1265    #[test]
1266    fn test_external_profiler_integration() {
1267        let mut manager = ProfilerManager::with_defaults();
1268        let perf_profiler = Box::new(PerfProfiler::new());
1269
1270        manager.add_external_profiler(perf_profiler);
1271        assert_eq!(manager.external_profilers.len(), 1);
1272    }
1273}