Skip to main content

trustformers_debug/
flame_graph_profiler.rs

1//! Advanced flame graph profiling implementation for TrustformeRS Debug
2
3use anyhow::Result;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::path::Path;
7use std::time::{Instant, SystemTime};
8
9use crate::profiler::{ProfileEvent, Profiler};
10
11/// Flame graph node representing a stack frame
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct FlameGraphNode {
14    pub name: String,
15    pub value: u64,
16    pub delta: Option<i64>, // For differential analysis
17    pub children: HashMap<String, FlameGraphNode>,
18    pub total_value: u64,
19    pub self_value: u64,
20    pub percentage: f64,
21    pub color: Option<String>,
22    pub metadata: HashMap<String, String>,
23}
24
25/// Stack frame for flame graph construction
26#[derive(Debug, Clone, PartialEq, Eq, Hash)]
27pub struct StackFrame {
28    pub function_name: String,
29    pub module_name: Option<String>,
30    pub file_name: Option<String>,
31    pub line_number: Option<u32>,
32    pub address: Option<u64>,
33}
34
35/// Sample data for flame graph
36#[derive(Debug, Clone)]
37pub struct FlameGraphSample {
38    pub stack: Vec<StackFrame>,
39    pub duration_ns: u64,
40    pub timestamp: u64,
41    pub thread_id: u64,
42    pub cpu_id: Option<u32>,
43    pub memory_usage: Option<usize>,
44    pub gpu_kernel: Option<String>,
45    pub metadata: HashMap<String, String>,
46}
47
48/// Configuration for flame graph generation
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct FlameGraphConfig {
51    pub sampling_rate: u32, // Samples per second
52    pub min_width: f64,     // Minimum width for node visibility
53    pub color_scheme: FlameGraphColorScheme,
54    pub direction: FlameGraphDirection,
55    pub title: String,
56    pub subtitle: Option<String>,
57    pub include_memory: bool,
58    pub include_gpu: bool,
59    pub differential_mode: bool,
60    pub merge_similar_stacks: bool,
61    pub filter_noise: bool,
62    pub noise_threshold: f64,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub enum FlameGraphColorScheme {
67    Hot,          // Red-orange gradient
68    Cool,         // Blue-purple gradient
69    Java,         // Java-specific colors
70    Memory,       // Memory-aware coloring
71    Differential, // Differential analysis colors
72    Random,       // Random but consistent colors
73    Custom(HashMap<String, String>),
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub enum FlameGraphDirection {
78    TopDown,  // Traditional flame graph
79    BottomUp, // Icicle graph
80}
81
82/// Export format for flame graphs
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub enum FlameGraphExportFormat {
85    SVG,
86    InteractiveHTML,
87    JSON,
88    Speedscope,
89    D3,
90    Folded,
91}
92
93/// Advanced flame graph profiler
94#[derive(Debug)]
95#[allow(dead_code)]
96pub struct FlameGraphProfiler {
97    config: FlameGraphConfig,
98    samples: Vec<FlameGraphSample>,
99    sampling_timer: Option<Instant>,
100    root_node: Option<FlameGraphNode>,
101    baseline_samples: Option<Vec<FlameGraphSample>>, // For differential analysis
102    #[allow(dead_code)]
103    metadata: HashMap<String, String>,
104    current_cpu_usage: f64,
105    current_memory_usage: usize,
106    performance_counters: HashMap<String, u64>,
107}
108
109impl FlameGraphProfiler {
110    /// Create a new flame graph profiler
111    pub fn new(config: FlameGraphConfig) -> Self {
112        Self {
113            config,
114            samples: Vec::new(),
115            sampling_timer: None,
116            root_node: None,
117            baseline_samples: None,
118            metadata: HashMap::new(),
119            current_cpu_usage: 0.0,
120            current_memory_usage: 0,
121            performance_counters: HashMap::new(),
122        }
123    }
124
125    /// Start profiling with sampling
126    pub fn start_sampling(&mut self) -> Result<()> {
127        tracing::info!(
128            "Starting flame graph sampling at {} Hz",
129            self.config.sampling_rate
130        );
131        self.sampling_timer = Some(Instant::now());
132        self.samples.clear();
133        self.root_node = None;
134
135        // Initialize performance counters
136        self.performance_counters.insert("samples_collected".to_string(), 0);
137        self.performance_counters.insert("stack_depth_max".to_string(), 0);
138        self.performance_counters.insert("unique_functions".to_string(), 0);
139
140        Ok(())
141    }
142
143    /// Stop profiling and build flame graph
144    pub fn stop_sampling(&mut self) -> Result<()> {
145        tracing::info!(
146            "Stopping flame graph sampling, collected {} samples",
147            self.samples.len()
148        );
149        self.sampling_timer = None;
150        self.build_flame_graph()?;
151        Ok(())
152    }
153
154    /// Add a sample to the profiler
155    pub fn add_sample(&mut self, sample: FlameGraphSample) {
156        // Update performance counters
157        if let Some(counter) = self.performance_counters.get_mut("samples_collected") {
158            *counter += 1;
159        }
160
161        let stack_depth = sample.stack.len() as u64;
162        if let Some(max_depth) = self.performance_counters.get_mut("stack_depth_max") {
163            if stack_depth > *max_depth {
164                *max_depth = stack_depth;
165            }
166        }
167
168        self.samples.push(sample);
169    }
170
171    /// Add a sample from current stack trace
172    pub fn sample_current_stack(&mut self, duration_ns: u64) -> Result<()> {
173        let stack = self.capture_stack_trace()?;
174        let sample = FlameGraphSample {
175            stack,
176            duration_ns,
177            timestamp: SystemTime::now().duration_since(SystemTime::UNIX_EPOCH)?.as_nanos() as u64,
178            thread_id: self.get_current_thread_id(),
179            cpu_id: self.get_current_cpu_id(),
180            memory_usage: Some(self.current_memory_usage),
181            gpu_kernel: None,
182            metadata: HashMap::new(),
183        };
184
185        self.add_sample(sample);
186        Ok(())
187    }
188
189    /// Add GPU kernel sample
190    pub fn sample_gpu_kernel(&mut self, kernel_name: &str, duration_ns: u64) {
191        let stack = vec![StackFrame {
192            function_name: format!("GPU::{}", kernel_name),
193            module_name: Some("GPU".to_string()),
194            file_name: None,
195            line_number: None,
196            address: None,
197        }];
198
199        let sample = FlameGraphSample {
200            stack,
201            duration_ns,
202            timestamp: SystemTime::now()
203                .duration_since(SystemTime::UNIX_EPOCH)
204                .unwrap_or_default()
205                .as_nanos() as u64,
206            thread_id: 0, // GPU operations on virtual thread
207            cpu_id: None,
208            memory_usage: None,
209            gpu_kernel: Some(kernel_name.to_string()),
210            metadata: [("type".to_string(), "gpu".to_string())].into_iter().collect(),
211        };
212
213        self.add_sample(sample);
214    }
215
216    /// Set baseline for differential analysis
217    pub fn set_baseline(&mut self) {
218        self.baseline_samples = Some(self.samples.clone());
219        tracing::info!("Set baseline with {} samples", self.samples.len());
220    }
221
222    /// Build flame graph from collected samples
223    pub fn build_flame_graph(&mut self) -> Result<()> {
224        if self.samples.is_empty() {
225            return Err(anyhow::anyhow!("No samples collected"));
226        }
227
228        let mut root = FlameGraphNode {
229            name: "root".to_string(),
230            value: 0,
231            delta: None,
232            children: HashMap::new(),
233            total_value: 0,
234            self_value: 0,
235            percentage: 100.0,
236            color: None,
237            metadata: HashMap::new(),
238        };
239
240        // Merge samples into tree structure
241        for sample in &self.samples {
242            self.merge_sample_into_tree(&mut root, sample);
243        }
244
245        // Calculate totals and percentages
246        self.calculate_node_metrics(&mut root);
247
248        // Apply differential analysis if baseline exists
249        if self.config.differential_mode && self.baseline_samples.is_some() {
250            self.apply_differential_analysis(&mut root)?;
251        }
252
253        // Filter noise if enabled
254        if self.config.filter_noise {
255            self.filter_noise_nodes(&mut root);
256        }
257
258        // Update performance counters
259        let unique_functions = self.count_unique_functions(&root);
260        if let Some(counter) = self.performance_counters.get_mut("unique_functions") {
261            *counter = unique_functions;
262        }
263
264        self.root_node = Some(root);
265        tracing::info!(
266            "Built flame graph with {} unique functions",
267            unique_functions
268        );
269        Ok(())
270    }
271
272    /// Export flame graph to various formats
273    pub async fn export(&self, format: FlameGraphExportFormat, output_path: &Path) -> Result<()> {
274        let root = self
275            .root_node
276            .as_ref()
277            .ok_or_else(|| anyhow::anyhow!("Flame graph not built yet"))?;
278
279        match format {
280            FlameGraphExportFormat::SVG => self.export_svg(root, output_path).await,
281            FlameGraphExportFormat::InteractiveHTML => {
282                self.export_interactive_html(root, output_path).await
283            },
284            FlameGraphExportFormat::JSON => self.export_json(root, output_path).await,
285            FlameGraphExportFormat::Speedscope => self.export_speedscope(root, output_path).await,
286            FlameGraphExportFormat::D3 => self.export_d3(root, output_path).await,
287            FlameGraphExportFormat::Folded => self.export_folded(output_path).await,
288        }
289    }
290
291    /// Export as SVG flame graph
292    async fn export_svg(&self, root: &FlameGraphNode, output_path: &Path) -> Result<()> {
293        let mut svg_content = String::new();
294
295        // SVG header
296        svg_content.push_str(&format!(
297            r##"<?xml version="1.0" encoding="UTF-8"?>
298<svg width="1200" height="800" xmlns="http://www.w3.org/2000/svg">
299<defs>
300    <linearGradient id="background" x1="0%" y1="0%" x2="0%" y2="100%">
301        <stop offset="0%" style="stop-color:#eeeeee"/>
302        <stop offset="100%" style="stop-color:#eeeeb0"/>
303    </linearGradient>
304</defs>
305<rect width="100%" height="100%" fill="url(#background)"/>
306<text x="600" y="24" text-anchor="middle" font-size="17" font-family="Verdana">{}</text>
307<text x="600" y="44" text-anchor="middle" font-size="12" font-family="Verdana" fill="#999">
308    {} samples, {} functions
309</text>
310"##,
311            self.config.title,
312            self.samples.len(),
313            self.count_unique_functions(root)
314        ));
315
316        // Render flame graph rectangles
317        self.render_svg_node(&mut svg_content, root, 0, 0, 1200, 0)?;
318
319        svg_content.push_str("</svg>");
320
321        tokio::fs::write(output_path, svg_content).await?;
322        tracing::info!("Exported SVG flame graph to {:?}", output_path);
323        Ok(())
324    }
325
326    /// Export as interactive HTML flame graph
327    async fn export_interactive_html(
328        &self,
329        root: &FlameGraphNode,
330        output_path: &Path,
331    ) -> Result<()> {
332        let json_data = serde_json::to_string(root)?;
333
334        let html_content = format!(
335            r#"<!DOCTYPE html>
336<html>
337<head>
338    <title>{}</title>
339    <meta charset="utf-8">
340    <style>
341        body {{ font-family: Arial, sans-serif; margin: 0; padding: 20px; }}
342        .flame-graph {{ width: 100%; height: 600px; border: 1px solid #ccc; }}
343        .tooltip {{ position: absolute; background: rgba(0,0,0,0.8); color: white;
344                   padding: 10px; border-radius: 4px; pointer-events: none; z-index: 1000; }}
345        .controls {{ margin-bottom: 20px; }}
346        .info {{ margin-top: 20px; font-size: 14px; color: #666; }}
347    </style>
348    <script src="https://d3js.org/d3.v7.min.js"></script>
349</head>
350<body>
351    <h1>{}</h1>
352    <div class="controls">
353        <button onclick="resetZoom()">Reset Zoom</button>
354        <button onclick="searchFunction()">Search</button>
355        <input type="text" id="searchInput" placeholder="Function name...">
356    </div>
357    <div id="flame-graph" class="flame-graph"></div>
358    <div class="info">
359        <p>Samples: {} | Functions: {} | Total Time: {:.2}ms</p>
360        <p>Click to zoom, double-click to reset. Hover for details.</p>
361    </div>
362    <div id="tooltip" class="tooltip" style="display: none;"></div>
363
364    <script>
365        const data = {};
366        // Interactive flame graph implementation would go here
367        // This is a simplified version - full implementation would include D3.js visualization
368        console.log('Flame graph data loaded:', data);
369    </script>
370</body>
371</html>"#,
372            self.config.title,
373            self.config.title,
374            self.samples.len(),
375            self.count_unique_functions(root),
376            root.total_value as f64 / 1_000_000.0, // Convert ns to ms
377            json_data
378        );
379
380        tokio::fs::write(output_path, html_content).await?;
381        tracing::info!("Exported interactive HTML flame graph to {:?}", output_path);
382        Ok(())
383    }
384
385    /// Export as JSON
386    async fn export_json(&self, root: &FlameGraphNode, output_path: &Path) -> Result<()> {
387        let json_data = serde_json::to_string_pretty(root)?;
388        tokio::fs::write(output_path, json_data).await?;
389        tracing::info!("Exported JSON flame graph to {:?}", output_path);
390        Ok(())
391    }
392
393    /// Export as Speedscope format
394    async fn export_speedscope(&self, root: &FlameGraphNode, output_path: &Path) -> Result<()> {
395        let speedscope_data = self.convert_to_speedscope_format(root)?;
396        let json_data = serde_json::to_string_pretty(&speedscope_data)?;
397        tokio::fs::write(output_path, json_data).await?;
398        tracing::info!("Exported Speedscope format to {:?}", output_path);
399        Ok(())
400    }
401
402    /// Export as D3.js compatible format
403    async fn export_d3(&self, root: &FlameGraphNode, output_path: &Path) -> Result<()> {
404        let d3_data = self.convert_to_d3_format(root)?;
405        let json_data = serde_json::to_string_pretty(&d3_data)?;
406        tokio::fs::write(output_path, json_data).await?;
407        tracing::info!("Exported D3 format to {:?}", output_path);
408        Ok(())
409    }
410
411    /// Export as folded stack format
412    async fn export_folded(&self, output_path: &Path) -> Result<()> {
413        let mut folded_content = String::new();
414
415        for sample in &self.samples {
416            let stack_str: Vec<String> =
417                sample.stack.iter().map(|frame| frame.function_name.clone()).collect();
418            folded_content.push_str(&format!("{} {}\n", stack_str.join(";"), sample.duration_ns));
419        }
420
421        tokio::fs::write(output_path, folded_content).await?;
422        tracing::info!("Exported folded format to {:?}", output_path);
423        Ok(())
424    }
425
426    /// Get flame graph analysis report
427    pub fn get_analysis_report(&self) -> FlameGraphAnalysisReport {
428        let root = self.root_node.as_ref();
429
430        FlameGraphAnalysisReport {
431            total_samples: self.samples.len(),
432            total_duration_ns: self.samples.iter().map(|s| s.duration_ns).sum(),
433            unique_functions: root.map(|r| self.count_unique_functions(r)).unwrap_or(0),
434            max_stack_depth: self.performance_counters.get("stack_depth_max").copied().unwrap_or(0),
435            hot_functions: self.get_hot_functions(10),
436            memory_usage_stats: self.get_memory_usage_stats(),
437            gpu_kernel_stats: self.get_gpu_kernel_stats(),
438            differential_analysis: self.get_differential_analysis(),
439            performance_insights: self.generate_performance_insights(),
440        }
441    }
442
443    // Private helper methods
444
445    fn capture_stack_trace(&self) -> Result<Vec<StackFrame>> {
446        // Simplified stack trace capture
447        // In a real implementation, this would use platform-specific APIs
448        Ok(vec![StackFrame {
449            function_name: "captured_function".to_string(),
450            module_name: Some("trustformers_debug".to_string()),
451            file_name: Some("profiler.rs".to_string()),
452            line_number: Some(1800),
453            address: None,
454        }])
455    }
456
457    fn get_current_thread_id(&self) -> u64 {
458        // Simplified thread ID - would use thread::current().id() in practice
459        1
460    }
461
462    fn get_current_cpu_id(&self) -> Option<u32> {
463        // Would query current CPU ID in practice
464        Some(0)
465    }
466
467    fn merge_sample_into_tree(&self, node: &mut FlameGraphNode, sample: &FlameGraphSample) {
468        if sample.stack.is_empty() {
469            node.value += sample.duration_ns;
470            return;
471        }
472
473        let frame = &sample.stack[0];
474        let child =
475            node.children
476                .entry(frame.function_name.clone())
477                .or_insert_with(|| FlameGraphNode {
478                    name: frame.function_name.clone(),
479                    value: 0,
480                    delta: None,
481                    children: HashMap::new(),
482                    total_value: 0,
483                    self_value: 0,
484                    percentage: 0.0,
485                    color: None,
486                    metadata: HashMap::new(),
487                });
488
489        if sample.stack.len() == 1 {
490            child.value += sample.duration_ns;
491        } else {
492            let mut remaining_sample = sample.clone();
493            remaining_sample.stack = sample.stack[1..].to_vec();
494            self.merge_sample_into_tree(child, &remaining_sample);
495        }
496    }
497
498    fn calculate_node_metrics(&self, node: &mut FlameGraphNode) {
499        let mut total_children_value = 0;
500
501        for child in node.children.values_mut() {
502            self.calculate_node_metrics(child);
503            total_children_value += child.total_value;
504        }
505
506        node.total_value = node.value + total_children_value;
507        node.self_value = node.value;
508
509        if node.total_value > 0 && node.name != "root" {
510            // Get the total from root node for percentage calculation
511            let total_for_percentage = if let Some(root) = &self.root_node {
512                root.total_value
513            } else {
514                node.total_value // fallback
515            };
516
517            if total_for_percentage > 0 {
518                node.percentage = (node.total_value as f64 / total_for_percentage as f64) * 100.0;
519            }
520        }
521    }
522
523    fn apply_differential_analysis(&self, node: &mut FlameGraphNode) -> Result<()> {
524        if let Some(baseline_samples) = &self.baseline_samples {
525            // Build baseline tree
526            let mut baseline_root = FlameGraphNode {
527                name: "root".to_string(),
528                value: 0,
529                delta: None,
530                children: HashMap::new(),
531                total_value: 0,
532                self_value: 0,
533                percentage: 100.0,
534                color: None,
535                metadata: HashMap::new(),
536            };
537
538            for sample in baseline_samples {
539                self.merge_sample_into_tree(&mut baseline_root, sample);
540            }
541
542            // Calculate deltas
543            self.calculate_deltas(node, &baseline_root);
544        }
545        Ok(())
546    }
547
548    fn calculate_deltas(&self, current: &mut FlameGraphNode, baseline: &FlameGraphNode) {
549        let baseline_value =
550            baseline.children.get(&current.name).map(|n| n.total_value as i64).unwrap_or(0);
551
552        current.delta = Some(current.total_value as i64 - baseline_value);
553
554        for (name, child) in &mut current.children {
555            if let Some(baseline_child) = baseline.children.get(name) {
556                self.calculate_deltas(child, baseline_child);
557            } else {
558                child.delta = Some(child.total_value as i64);
559            }
560        }
561    }
562
563    fn filter_noise_nodes(&self, node: &mut FlameGraphNode) {
564        let threshold = (node.total_value as f64 * self.config.noise_threshold / 100.0) as u64;
565
566        node.children.retain(|_, child| {
567            self.filter_noise_nodes(child);
568            child.total_value >= threshold
569        });
570    }
571
572    fn count_unique_functions(&self, node: &FlameGraphNode) -> u64 {
573        let mut count = 1; // Count this node
574        for child in node.children.values() {
575            count += self.count_unique_functions(child);
576        }
577        count
578    }
579
580    fn render_svg_node(
581        &self,
582        svg: &mut String,
583        node: &FlameGraphNode,
584        x: i32,
585        y: i32,
586        width: i32,
587        depth: i32,
588    ) -> Result<()> {
589        if width < 1 {
590            return Ok(());
591        }
592
593        let height = 20;
594        let color = self.get_node_color(node);
595
596        svg.push_str(&format!(
597            r#"<rect x="{}" y="{}" width="{}" height="{}" fill="{}" stroke="white" stroke-width="0.5">
598<title>{}: {:.2}% ({} samples)</title>
599</rect>
600<text x="{}" y="{}" font-size="12" font-family="Verdana" fill="black">{}</text>
601"#,
602            x, y + depth * height, width, height,
603            color,
604            node.name, node.percentage, node.value,
605            x + 2, y + depth * height + 14,
606            if width > 50 { &node.name } else { "" }
607        ));
608
609        // Render children
610        let mut child_x = x;
611        for child in node.children.values() {
612            let child_width = if node.total_value > 0 {
613                (width as f64 * child.total_value as f64 / node.total_value as f64) as i32
614            } else {
615                0
616            };
617            if child_width > 0 {
618                self.render_svg_node(svg, child, child_x, y, child_width, depth + 1)?;
619                child_x += child_width;
620            }
621        }
622
623        Ok(())
624    }
625
626    fn get_node_color(&self, node: &FlameGraphNode) -> String {
627        match &self.config.color_scheme {
628            FlameGraphColorScheme::Hot => {
629                let intensity = (node.percentage / 100.0 * 255.0) as u8;
630                format!("rgb({}, {}, 0)", 255, 255 - intensity)
631            },
632            FlameGraphColorScheme::Cool => {
633                let intensity = (node.percentage / 100.0 * 255.0) as u8;
634                format!("rgb(0, {}, {})", intensity, 255)
635            },
636            FlameGraphColorScheme::Memory => {
637                if node.name.contains("alloc") || node.name.contains("malloc") {
638                    "#ff6b6b".to_string()
639                } else {
640                    "#4ecdc4".to_string()
641                }
642            },
643            FlameGraphColorScheme::Differential => {
644                match node.delta {
645                    Some(delta) if delta > 0 => "#ff4444".to_string(), // Red for increases
646                    Some(delta) if delta < 0 => "#44ff44".to_string(), // Green for decreases
647                    _ => "#cccccc".to_string(),                        // Gray for no change
648                }
649            },
650            FlameGraphColorScheme::Java => "#ff9800".to_string(),
651            FlameGraphColorScheme::Random => {
652                let hash = self.hash_string(&node.name);
653                format!("hsl({}, 70%, 60%)", hash % 360)
654            },
655            FlameGraphColorScheme::Custom(colors) => {
656                colors.get(&node.name).cloned().unwrap_or_else(|| "#cccccc".to_string())
657            },
658        }
659    }
660
661    fn hash_string(&self, s: &str) -> u32 {
662        let mut hash = 0u32;
663        for byte in s.bytes() {
664            hash = hash.wrapping_mul(31).wrapping_add(byte as u32);
665        }
666        hash
667    }
668
669    fn convert_to_speedscope_format(&self, root: &FlameGraphNode) -> Result<serde_json::Value> {
670        // Simplified Speedscope format conversion
671        Ok(serde_json::json!({
672            "version": "0.7.1",
673            "profiles": [{
674                "type": "sampled",
675                "name": self.config.title,
676                "unit": "nanoseconds",
677                "startValue": 0,
678                "endValue": root.total_value,
679                "samples": [],
680                "weights": []
681            }]
682        }))
683    }
684
685    fn convert_to_d3_format(&self, root: &FlameGraphNode) -> Result<serde_json::Value> {
686        Ok(serde_json::to_value(root)?)
687    }
688
689    fn get_hot_functions(&self, limit: usize) -> Vec<HotFunctionInfo> {
690        let mut functions = Vec::new();
691
692        if let Some(root) = &self.root_node {
693            self.collect_hot_functions(root, &mut functions);
694        }
695
696        functions.sort_by_key(|item| std::cmp::Reverse(item.total_time_ns));
697        functions.truncate(limit);
698        functions
699    }
700
701    fn collect_hot_functions(&self, node: &FlameGraphNode, functions: &mut Vec<HotFunctionInfo>) {
702        functions.push(HotFunctionInfo {
703            name: node.name.clone(),
704            total_time_ns: node.total_value,
705            self_time_ns: node.self_value,
706            percentage: node.percentage,
707            call_count: 1, // Simplified
708        });
709
710        for child in node.children.values() {
711            self.collect_hot_functions(child, functions);
712        }
713    }
714
715    fn get_memory_usage_stats(&self) -> MemoryUsageStats {
716        let memory_samples: Vec<usize> =
717            self.samples.iter().filter_map(|s| s.memory_usage).collect();
718
719        if memory_samples.is_empty() {
720            return MemoryUsageStats::default();
721        }
722
723        let total: usize = memory_samples.iter().sum();
724        let max = memory_samples.iter().max().copied().unwrap_or(0);
725        let min = memory_samples.iter().min().copied().unwrap_or(0);
726        let avg = total / memory_samples.len();
727
728        MemoryUsageStats {
729            peak_memory_bytes: max,
730            avg_memory_bytes: avg,
731            min_memory_bytes: min,
732            total_samples: memory_samples.len(),
733        }
734    }
735
736    fn get_gpu_kernel_stats(&self) -> GpuKernelStats {
737        let gpu_samples: Vec<&FlameGraphSample> =
738            self.samples.iter().filter(|s| s.gpu_kernel.is_some()).collect();
739
740        let total_gpu_time: u64 = gpu_samples.iter().map(|s| s.duration_ns).sum();
741        let unique_kernels: std::collections::HashSet<String> =
742            gpu_samples.iter().filter_map(|s| s.gpu_kernel.clone()).collect();
743
744        GpuKernelStats {
745            total_kernel_time_ns: total_gpu_time,
746            unique_kernels: unique_kernels.len(),
747            total_kernel_calls: gpu_samples.len(),
748        }
749    }
750
751    fn get_differential_analysis(&self) -> Option<DifferentialAnalysis> {
752        if !self.config.differential_mode || self.baseline_samples.is_none() {
753            return None;
754        }
755
756        let current_total: u64 = self.samples.iter().map(|s| s.duration_ns).sum();
757        let baseline_total: u64 =
758            self.baseline_samples.as_ref()?.iter().map(|s| s.duration_ns).sum();
759
760        let performance_change = if baseline_total > 0 {
761            ((current_total as f64 - baseline_total as f64) / baseline_total as f64) * 100.0
762        } else {
763            0.0
764        };
765
766        Some(DifferentialAnalysis {
767            baseline_samples: self.baseline_samples.as_ref()?.len(),
768            current_samples: self.samples.len(),
769            performance_change_percent: performance_change,
770            is_regression: performance_change > 5.0,
771            is_improvement: performance_change < -5.0,
772        })
773    }
774
775    fn generate_performance_insights(&self) -> Vec<String> {
776        let mut insights = Vec::new();
777
778        if let Some(root) = &self.root_node {
779            let hot_functions = self.get_hot_functions(3);
780
781            if let Some(hottest) = hot_functions.first() {
782                if hottest.percentage > 50.0 {
783                    insights.push(format!(
784                        "Function '{}' dominates execution time ({:.1}%)",
785                        hottest.name, hottest.percentage
786                    ));
787                }
788            }
789
790            let gpu_stats = self.get_gpu_kernel_stats();
791            if gpu_stats.total_kernel_calls > 0 {
792                let gpu_percentage =
793                    (gpu_stats.total_kernel_time_ns as f64 / root.total_value as f64) * 100.0;
794                insights.push(format!(
795                    "GPU kernels account for {:.1}% of execution time",
796                    gpu_percentage
797                ));
798            }
799
800            if let Some(diff) = self.get_differential_analysis() {
801                if diff.is_regression {
802                    insights.push(format!(
803                        "Performance regression detected: {:.1}% slower than baseline",
804                        diff.performance_change_percent
805                    ));
806                } else if diff.is_improvement {
807                    insights.push(format!(
808                        "Performance improvement: {:.1}% faster than baseline",
809                        -diff.performance_change_percent
810                    ));
811                }
812            }
813        }
814
815        if insights.is_empty() {
816            insights.push("No significant performance patterns detected".to_string());
817        }
818
819        insights
820    }
821}
822
823/// Hot function information
824#[derive(Debug, Clone, Serialize, Deserialize)]
825pub struct HotFunctionInfo {
826    pub name: String,
827    pub total_time_ns: u64,
828    pub self_time_ns: u64,
829    pub percentage: f64,
830    pub call_count: usize,
831}
832
833/// Memory usage statistics
834#[derive(Debug, Clone, Serialize, Deserialize, Default)]
835pub struct MemoryUsageStats {
836    pub peak_memory_bytes: usize,
837    pub avg_memory_bytes: usize,
838    pub min_memory_bytes: usize,
839    pub total_samples: usize,
840}
841
842/// GPU kernel statistics
843#[derive(Debug, Clone, Serialize, Deserialize)]
844pub struct GpuKernelStats {
845    pub total_kernel_time_ns: u64,
846    pub unique_kernels: usize,
847    pub total_kernel_calls: usize,
848}
849
850/// Differential analysis results
851#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct DifferentialAnalysis {
853    pub baseline_samples: usize,
854    pub current_samples: usize,
855    pub performance_change_percent: f64,
856    pub is_regression: bool,
857    pub is_improvement: bool,
858}
859
860/// Flame graph analysis report
861#[derive(Debug, Clone, Serialize, Deserialize)]
862pub struct FlameGraphAnalysisReport {
863    pub total_samples: usize,
864    pub total_duration_ns: u64,
865    pub unique_functions: u64,
866    pub max_stack_depth: u64,
867    pub hot_functions: Vec<HotFunctionInfo>,
868    pub memory_usage_stats: MemoryUsageStats,
869    pub gpu_kernel_stats: GpuKernelStats,
870    pub differential_analysis: Option<DifferentialAnalysis>,
871    pub performance_insights: Vec<String>,
872}
873
874/// Default configuration for flame graphs
875impl Default for FlameGraphConfig {
876    fn default() -> Self {
877        Self {
878            sampling_rate: 1000, // 1000 Hz
879            min_width: 0.01,
880            color_scheme: FlameGraphColorScheme::Hot,
881            direction: FlameGraphDirection::TopDown,
882            title: "Flame Graph".to_string(),
883            subtitle: None,
884            include_memory: true,
885            include_gpu: true,
886            differential_mode: false,
887            merge_similar_stacks: true,
888            filter_noise: true,
889            noise_threshold: 0.1, // 0.1%
890        }
891    }
892}
893
894/// Integration with main Profiler
895impl Profiler {
896    /// Create flame graph profiler with current configuration
897    pub fn create_flame_graph_profiler(&self) -> FlameGraphProfiler {
898        let config = FlameGraphConfig {
899            title: "TrustformeRS Debug Flame Graph".to_string(),
900            subtitle: Some("Performance Analysis".to_string()),
901            ..Default::default()
902        };
903        FlameGraphProfiler::new(config)
904    }
905
906    /// Start flame graph profiling
907    pub async fn start_flame_graph_profiling(&mut self) -> Result<()> {
908        // This would integrate with the main profiler's timing events
909        tracing::info!("Starting integrated flame graph profiling");
910        Ok(())
911    }
912
913    /// Export flame graph from current profiling data
914    pub async fn export_flame_graph(
915        &self,
916        format: FlameGraphExportFormat,
917        output_path: &Path,
918    ) -> Result<()> {
919        let mut flame_profiler = self.create_flame_graph_profiler();
920
921        // Convert existing events to flame graph samples
922        for event in self.get_events() {
923            match event {
924                ProfileEvent::FunctionCall {
925                    function_name,
926                    duration,
927                    ..
928                } => {
929                    let sample = FlameGraphSample {
930                        stack: vec![StackFrame {
931                            function_name: function_name.clone(),
932                            module_name: None,
933                            file_name: None,
934                            line_number: None,
935                            address: None,
936                        }],
937                        duration_ns: duration.as_nanos() as u64,
938                        timestamp: 0,
939                        thread_id: 0,
940                        cpu_id: None,
941                        memory_usage: None,
942                        gpu_kernel: None,
943                        metadata: HashMap::new(),
944                    };
945                    flame_profiler.add_sample(sample);
946                },
947                ProfileEvent::LayerExecution {
948                    layer_name,
949                    layer_type,
950                    forward_time,
951                    ..
952                } => {
953                    let sample = FlameGraphSample {
954                        stack: vec![
955                            StackFrame {
956                                function_name: "neural_network".to_string(),
957                                module_name: Some("trustformers".to_string()),
958                                file_name: None,
959                                line_number: None,
960                                address: None,
961                            },
962                            StackFrame {
963                                function_name: format!("{}::{}", layer_type, layer_name),
964                                module_name: Some("layers".to_string()),
965                                file_name: None,
966                                line_number: None,
967                                address: None,
968                            },
969                        ],
970                        duration_ns: forward_time.as_nanos() as u64,
971                        timestamp: 0,
972                        thread_id: 0,
973                        cpu_id: None,
974                        memory_usage: None,
975                        gpu_kernel: None,
976                        metadata: HashMap::new(),
977                    };
978                    flame_profiler.add_sample(sample);
979                },
980                _ => {}, // Handle other event types as needed
981            }
982        }
983
984        flame_profiler.build_flame_graph()?;
985        flame_profiler.export(format, output_path).await?;
986        Ok(())
987    }
988}