1use anyhow::Result;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::path::Path;
7use std::time::{Instant, SystemTime};
8
9use crate::profiler::{ProfileEvent, Profiler};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct FlameGraphNode {
14 pub name: String,
15 pub value: u64,
16 pub delta: Option<i64>, pub children: HashMap<String, FlameGraphNode>,
18 pub total_value: u64,
19 pub self_value: u64,
20 pub percentage: f64,
21 pub color: Option<String>,
22 pub metadata: HashMap<String, String>,
23}
24
25#[derive(Debug, Clone, PartialEq, Eq, Hash)]
27pub struct StackFrame {
28 pub function_name: String,
29 pub module_name: Option<String>,
30 pub file_name: Option<String>,
31 pub line_number: Option<u32>,
32 pub address: Option<u64>,
33}
34
35#[derive(Debug, Clone)]
37pub struct FlameGraphSample {
38 pub stack: Vec<StackFrame>,
39 pub duration_ns: u64,
40 pub timestamp: u64,
41 pub thread_id: u64,
42 pub cpu_id: Option<u32>,
43 pub memory_usage: Option<usize>,
44 pub gpu_kernel: Option<String>,
45 pub metadata: HashMap<String, String>,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct FlameGraphConfig {
51 pub sampling_rate: u32, pub min_width: f64, pub color_scheme: FlameGraphColorScheme,
54 pub direction: FlameGraphDirection,
55 pub title: String,
56 pub subtitle: Option<String>,
57 pub include_memory: bool,
58 pub include_gpu: bool,
59 pub differential_mode: bool,
60 pub merge_similar_stacks: bool,
61 pub filter_noise: bool,
62 pub noise_threshold: f64,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub enum FlameGraphColorScheme {
67 Hot, Cool, Java, Memory, Differential, Random, Custom(HashMap<String, String>),
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub enum FlameGraphDirection {
78 TopDown, BottomUp, }
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
84pub enum FlameGraphExportFormat {
85 SVG,
86 InteractiveHTML,
87 JSON,
88 Speedscope,
89 D3,
90 Folded,
91}
92
93#[derive(Debug)]
95#[allow(dead_code)]
96pub struct FlameGraphProfiler {
97 config: FlameGraphConfig,
98 samples: Vec<FlameGraphSample>,
99 sampling_timer: Option<Instant>,
100 root_node: Option<FlameGraphNode>,
101 baseline_samples: Option<Vec<FlameGraphSample>>, #[allow(dead_code)]
103 metadata: HashMap<String, String>,
104 current_cpu_usage: f64,
105 current_memory_usage: usize,
106 performance_counters: HashMap<String, u64>,
107}
108
109impl FlameGraphProfiler {
110 pub fn new(config: FlameGraphConfig) -> Self {
112 Self {
113 config,
114 samples: Vec::new(),
115 sampling_timer: None,
116 root_node: None,
117 baseline_samples: None,
118 metadata: HashMap::new(),
119 current_cpu_usage: 0.0,
120 current_memory_usage: 0,
121 performance_counters: HashMap::new(),
122 }
123 }
124
125 pub fn start_sampling(&mut self) -> Result<()> {
127 tracing::info!(
128 "Starting flame graph sampling at {} Hz",
129 self.config.sampling_rate
130 );
131 self.sampling_timer = Some(Instant::now());
132 self.samples.clear();
133 self.root_node = None;
134
135 self.performance_counters.insert("samples_collected".to_string(), 0);
137 self.performance_counters.insert("stack_depth_max".to_string(), 0);
138 self.performance_counters.insert("unique_functions".to_string(), 0);
139
140 Ok(())
141 }
142
143 pub fn stop_sampling(&mut self) -> Result<()> {
145 tracing::info!(
146 "Stopping flame graph sampling, collected {} samples",
147 self.samples.len()
148 );
149 self.sampling_timer = None;
150 self.build_flame_graph()?;
151 Ok(())
152 }
153
154 pub fn add_sample(&mut self, sample: FlameGraphSample) {
156 if let Some(counter) = self.performance_counters.get_mut("samples_collected") {
158 *counter += 1;
159 }
160
161 let stack_depth = sample.stack.len() as u64;
162 if let Some(max_depth) = self.performance_counters.get_mut("stack_depth_max") {
163 if stack_depth > *max_depth {
164 *max_depth = stack_depth;
165 }
166 }
167
168 self.samples.push(sample);
169 }
170
171 pub fn sample_current_stack(&mut self, duration_ns: u64) -> Result<()> {
173 let stack = self.capture_stack_trace()?;
174 let sample = FlameGraphSample {
175 stack,
176 duration_ns,
177 timestamp: SystemTime::now().duration_since(SystemTime::UNIX_EPOCH)?.as_nanos() as u64,
178 thread_id: self.get_current_thread_id(),
179 cpu_id: self.get_current_cpu_id(),
180 memory_usage: Some(self.current_memory_usage),
181 gpu_kernel: None,
182 metadata: HashMap::new(),
183 };
184
185 self.add_sample(sample);
186 Ok(())
187 }
188
189 pub fn sample_gpu_kernel(&mut self, kernel_name: &str, duration_ns: u64) {
191 let stack = vec![StackFrame {
192 function_name: format!("GPU::{}", kernel_name),
193 module_name: Some("GPU".to_string()),
194 file_name: None,
195 line_number: None,
196 address: None,
197 }];
198
199 let sample = FlameGraphSample {
200 stack,
201 duration_ns,
202 timestamp: SystemTime::now()
203 .duration_since(SystemTime::UNIX_EPOCH)
204 .unwrap_or_default()
205 .as_nanos() as u64,
206 thread_id: 0, cpu_id: None,
208 memory_usage: None,
209 gpu_kernel: Some(kernel_name.to_string()),
210 metadata: [("type".to_string(), "gpu".to_string())].into_iter().collect(),
211 };
212
213 self.add_sample(sample);
214 }
215
216 pub fn set_baseline(&mut self) {
218 self.baseline_samples = Some(self.samples.clone());
219 tracing::info!("Set baseline with {} samples", self.samples.len());
220 }
221
222 pub fn build_flame_graph(&mut self) -> Result<()> {
224 if self.samples.is_empty() {
225 return Err(anyhow::anyhow!("No samples collected"));
226 }
227
228 let mut root = FlameGraphNode {
229 name: "root".to_string(),
230 value: 0,
231 delta: None,
232 children: HashMap::new(),
233 total_value: 0,
234 self_value: 0,
235 percentage: 100.0,
236 color: None,
237 metadata: HashMap::new(),
238 };
239
240 for sample in &self.samples {
242 self.merge_sample_into_tree(&mut root, sample);
243 }
244
245 self.calculate_node_metrics(&mut root);
247
248 if self.config.differential_mode && self.baseline_samples.is_some() {
250 self.apply_differential_analysis(&mut root)?;
251 }
252
253 if self.config.filter_noise {
255 self.filter_noise_nodes(&mut root);
256 }
257
258 let unique_functions = self.count_unique_functions(&root);
260 if let Some(counter) = self.performance_counters.get_mut("unique_functions") {
261 *counter = unique_functions;
262 }
263
264 self.root_node = Some(root);
265 tracing::info!(
266 "Built flame graph with {} unique functions",
267 unique_functions
268 );
269 Ok(())
270 }
271
272 pub async fn export(&self, format: FlameGraphExportFormat, output_path: &Path) -> Result<()> {
274 let root = self
275 .root_node
276 .as_ref()
277 .ok_or_else(|| anyhow::anyhow!("Flame graph not built yet"))?;
278
279 match format {
280 FlameGraphExportFormat::SVG => self.export_svg(root, output_path).await,
281 FlameGraphExportFormat::InteractiveHTML => {
282 self.export_interactive_html(root, output_path).await
283 },
284 FlameGraphExportFormat::JSON => self.export_json(root, output_path).await,
285 FlameGraphExportFormat::Speedscope => self.export_speedscope(root, output_path).await,
286 FlameGraphExportFormat::D3 => self.export_d3(root, output_path).await,
287 FlameGraphExportFormat::Folded => self.export_folded(output_path).await,
288 }
289 }
290
291 async fn export_svg(&self, root: &FlameGraphNode, output_path: &Path) -> Result<()> {
293 let mut svg_content = String::new();
294
295 svg_content.push_str(&format!(
297 r##"<?xml version="1.0" encoding="UTF-8"?>
298<svg width="1200" height="800" xmlns="http://www.w3.org/2000/svg">
299<defs>
300 <linearGradient id="background" x1="0%" y1="0%" x2="0%" y2="100%">
301 <stop offset="0%" style="stop-color:#eeeeee"/>
302 <stop offset="100%" style="stop-color:#eeeeb0"/>
303 </linearGradient>
304</defs>
305<rect width="100%" height="100%" fill="url(#background)"/>
306<text x="600" y="24" text-anchor="middle" font-size="17" font-family="Verdana">{}</text>
307<text x="600" y="44" text-anchor="middle" font-size="12" font-family="Verdana" fill="#999">
308 {} samples, {} functions
309</text>
310"##,
311 self.config.title,
312 self.samples.len(),
313 self.count_unique_functions(root)
314 ));
315
316 self.render_svg_node(&mut svg_content, root, 0, 0, 1200, 0)?;
318
319 svg_content.push_str("</svg>");
320
321 tokio::fs::write(output_path, svg_content).await?;
322 tracing::info!("Exported SVG flame graph to {:?}", output_path);
323 Ok(())
324 }
325
326 async fn export_interactive_html(
328 &self,
329 root: &FlameGraphNode,
330 output_path: &Path,
331 ) -> Result<()> {
332 let json_data = serde_json::to_string(root)?;
333
334 let html_content = format!(
335 r#"<!DOCTYPE html>
336<html>
337<head>
338 <title>{}</title>
339 <meta charset="utf-8">
340 <style>
341 body {{ font-family: Arial, sans-serif; margin: 0; padding: 20px; }}
342 .flame-graph {{ width: 100%; height: 600px; border: 1px solid #ccc; }}
343 .tooltip {{ position: absolute; background: rgba(0,0,0,0.8); color: white;
344 padding: 10px; border-radius: 4px; pointer-events: none; z-index: 1000; }}
345 .controls {{ margin-bottom: 20px; }}
346 .info {{ margin-top: 20px; font-size: 14px; color: #666; }}
347 </style>
348 <script src="https://d3js.org/d3.v7.min.js"></script>
349</head>
350<body>
351 <h1>{}</h1>
352 <div class="controls">
353 <button onclick="resetZoom()">Reset Zoom</button>
354 <button onclick="searchFunction()">Search</button>
355 <input type="text" id="searchInput" placeholder="Function name...">
356 </div>
357 <div id="flame-graph" class="flame-graph"></div>
358 <div class="info">
359 <p>Samples: {} | Functions: {} | Total Time: {:.2}ms</p>
360 <p>Click to zoom, double-click to reset. Hover for details.</p>
361 </div>
362 <div id="tooltip" class="tooltip" style="display: none;"></div>
363
364 <script>
365 const data = {};
366 // Interactive flame graph implementation would go here
367 // This is a simplified version - full implementation would include D3.js visualization
368 console.log('Flame graph data loaded:', data);
369 </script>
370</body>
371</html>"#,
372 self.config.title,
373 self.config.title,
374 self.samples.len(),
375 self.count_unique_functions(root),
376 root.total_value as f64 / 1_000_000.0, json_data
378 );
379
380 tokio::fs::write(output_path, html_content).await?;
381 tracing::info!("Exported interactive HTML flame graph to {:?}", output_path);
382 Ok(())
383 }
384
385 async fn export_json(&self, root: &FlameGraphNode, output_path: &Path) -> Result<()> {
387 let json_data = serde_json::to_string_pretty(root)?;
388 tokio::fs::write(output_path, json_data).await?;
389 tracing::info!("Exported JSON flame graph to {:?}", output_path);
390 Ok(())
391 }
392
393 async fn export_speedscope(&self, root: &FlameGraphNode, output_path: &Path) -> Result<()> {
395 let speedscope_data = self.convert_to_speedscope_format(root)?;
396 let json_data = serde_json::to_string_pretty(&speedscope_data)?;
397 tokio::fs::write(output_path, json_data).await?;
398 tracing::info!("Exported Speedscope format to {:?}", output_path);
399 Ok(())
400 }
401
402 async fn export_d3(&self, root: &FlameGraphNode, output_path: &Path) -> Result<()> {
404 let d3_data = self.convert_to_d3_format(root)?;
405 let json_data = serde_json::to_string_pretty(&d3_data)?;
406 tokio::fs::write(output_path, json_data).await?;
407 tracing::info!("Exported D3 format to {:?}", output_path);
408 Ok(())
409 }
410
411 async fn export_folded(&self, output_path: &Path) -> Result<()> {
413 let mut folded_content = String::new();
414
415 for sample in &self.samples {
416 let stack_str: Vec<String> =
417 sample.stack.iter().map(|frame| frame.function_name.clone()).collect();
418 folded_content.push_str(&format!("{} {}\n", stack_str.join(";"), sample.duration_ns));
419 }
420
421 tokio::fs::write(output_path, folded_content).await?;
422 tracing::info!("Exported folded format to {:?}", output_path);
423 Ok(())
424 }
425
426 pub fn get_analysis_report(&self) -> FlameGraphAnalysisReport {
428 let root = self.root_node.as_ref();
429
430 FlameGraphAnalysisReport {
431 total_samples: self.samples.len(),
432 total_duration_ns: self.samples.iter().map(|s| s.duration_ns).sum(),
433 unique_functions: root.map(|r| self.count_unique_functions(r)).unwrap_or(0),
434 max_stack_depth: self.performance_counters.get("stack_depth_max").copied().unwrap_or(0),
435 hot_functions: self.get_hot_functions(10),
436 memory_usage_stats: self.get_memory_usage_stats(),
437 gpu_kernel_stats: self.get_gpu_kernel_stats(),
438 differential_analysis: self.get_differential_analysis(),
439 performance_insights: self.generate_performance_insights(),
440 }
441 }
442
443 fn capture_stack_trace(&self) -> Result<Vec<StackFrame>> {
446 Ok(vec![StackFrame {
449 function_name: "captured_function".to_string(),
450 module_name: Some("trustformers_debug".to_string()),
451 file_name: Some("profiler.rs".to_string()),
452 line_number: Some(1800),
453 address: None,
454 }])
455 }
456
457 fn get_current_thread_id(&self) -> u64 {
458 1
460 }
461
462 fn get_current_cpu_id(&self) -> Option<u32> {
463 Some(0)
465 }
466
467 fn merge_sample_into_tree(&self, node: &mut FlameGraphNode, sample: &FlameGraphSample) {
468 if sample.stack.is_empty() {
469 node.value += sample.duration_ns;
470 return;
471 }
472
473 let frame = &sample.stack[0];
474 let child =
475 node.children
476 .entry(frame.function_name.clone())
477 .or_insert_with(|| FlameGraphNode {
478 name: frame.function_name.clone(),
479 value: 0,
480 delta: None,
481 children: HashMap::new(),
482 total_value: 0,
483 self_value: 0,
484 percentage: 0.0,
485 color: None,
486 metadata: HashMap::new(),
487 });
488
489 if sample.stack.len() == 1 {
490 child.value += sample.duration_ns;
491 } else {
492 let mut remaining_sample = sample.clone();
493 remaining_sample.stack = sample.stack[1..].to_vec();
494 self.merge_sample_into_tree(child, &remaining_sample);
495 }
496 }
497
498 fn calculate_node_metrics(&self, node: &mut FlameGraphNode) {
499 let mut total_children_value = 0;
500
501 for child in node.children.values_mut() {
502 self.calculate_node_metrics(child);
503 total_children_value += child.total_value;
504 }
505
506 node.total_value = node.value + total_children_value;
507 node.self_value = node.value;
508
509 if node.total_value > 0 && node.name != "root" {
510 let total_for_percentage = if let Some(root) = &self.root_node {
512 root.total_value
513 } else {
514 node.total_value };
516
517 if total_for_percentage > 0 {
518 node.percentage = (node.total_value as f64 / total_for_percentage as f64) * 100.0;
519 }
520 }
521 }
522
523 fn apply_differential_analysis(&self, node: &mut FlameGraphNode) -> Result<()> {
524 if let Some(baseline_samples) = &self.baseline_samples {
525 let mut baseline_root = FlameGraphNode {
527 name: "root".to_string(),
528 value: 0,
529 delta: None,
530 children: HashMap::new(),
531 total_value: 0,
532 self_value: 0,
533 percentage: 100.0,
534 color: None,
535 metadata: HashMap::new(),
536 };
537
538 for sample in baseline_samples {
539 self.merge_sample_into_tree(&mut baseline_root, sample);
540 }
541
542 self.calculate_deltas(node, &baseline_root);
544 }
545 Ok(())
546 }
547
548 fn calculate_deltas(&self, current: &mut FlameGraphNode, baseline: &FlameGraphNode) {
549 let baseline_value =
550 baseline.children.get(¤t.name).map(|n| n.total_value as i64).unwrap_or(0);
551
552 current.delta = Some(current.total_value as i64 - baseline_value);
553
554 for (name, child) in &mut current.children {
555 if let Some(baseline_child) = baseline.children.get(name) {
556 self.calculate_deltas(child, baseline_child);
557 } else {
558 child.delta = Some(child.total_value as i64);
559 }
560 }
561 }
562
563 fn filter_noise_nodes(&self, node: &mut FlameGraphNode) {
564 let threshold = (node.total_value as f64 * self.config.noise_threshold / 100.0) as u64;
565
566 node.children.retain(|_, child| {
567 self.filter_noise_nodes(child);
568 child.total_value >= threshold
569 });
570 }
571
572 fn count_unique_functions(&self, node: &FlameGraphNode) -> u64 {
573 let mut count = 1; for child in node.children.values() {
575 count += self.count_unique_functions(child);
576 }
577 count
578 }
579
580 fn render_svg_node(
581 &self,
582 svg: &mut String,
583 node: &FlameGraphNode,
584 x: i32,
585 y: i32,
586 width: i32,
587 depth: i32,
588 ) -> Result<()> {
589 if width < 1 {
590 return Ok(());
591 }
592
593 let height = 20;
594 let color = self.get_node_color(node);
595
596 svg.push_str(&format!(
597 r#"<rect x="{}" y="{}" width="{}" height="{}" fill="{}" stroke="white" stroke-width="0.5">
598<title>{}: {:.2}% ({} samples)</title>
599</rect>
600<text x="{}" y="{}" font-size="12" font-family="Verdana" fill="black">{}</text>
601"#,
602 x, y + depth * height, width, height,
603 color,
604 node.name, node.percentage, node.value,
605 x + 2, y + depth * height + 14,
606 if width > 50 { &node.name } else { "" }
607 ));
608
609 let mut child_x = x;
611 for child in node.children.values() {
612 let child_width = if node.total_value > 0 {
613 (width as f64 * child.total_value as f64 / node.total_value as f64) as i32
614 } else {
615 0
616 };
617 if child_width > 0 {
618 self.render_svg_node(svg, child, child_x, y, child_width, depth + 1)?;
619 child_x += child_width;
620 }
621 }
622
623 Ok(())
624 }
625
626 fn get_node_color(&self, node: &FlameGraphNode) -> String {
627 match &self.config.color_scheme {
628 FlameGraphColorScheme::Hot => {
629 let intensity = (node.percentage / 100.0 * 255.0) as u8;
630 format!("rgb({}, {}, 0)", 255, 255 - intensity)
631 },
632 FlameGraphColorScheme::Cool => {
633 let intensity = (node.percentage / 100.0 * 255.0) as u8;
634 format!("rgb(0, {}, {})", intensity, 255)
635 },
636 FlameGraphColorScheme::Memory => {
637 if node.name.contains("alloc") || node.name.contains("malloc") {
638 "#ff6b6b".to_string()
639 } else {
640 "#4ecdc4".to_string()
641 }
642 },
643 FlameGraphColorScheme::Differential => {
644 match node.delta {
645 Some(delta) if delta > 0 => "#ff4444".to_string(), Some(delta) if delta < 0 => "#44ff44".to_string(), _ => "#cccccc".to_string(), }
649 },
650 FlameGraphColorScheme::Java => "#ff9800".to_string(),
651 FlameGraphColorScheme::Random => {
652 let hash = self.hash_string(&node.name);
653 format!("hsl({}, 70%, 60%)", hash % 360)
654 },
655 FlameGraphColorScheme::Custom(colors) => {
656 colors.get(&node.name).cloned().unwrap_or_else(|| "#cccccc".to_string())
657 },
658 }
659 }
660
661 fn hash_string(&self, s: &str) -> u32 {
662 let mut hash = 0u32;
663 for byte in s.bytes() {
664 hash = hash.wrapping_mul(31).wrapping_add(byte as u32);
665 }
666 hash
667 }
668
669 fn convert_to_speedscope_format(&self, root: &FlameGraphNode) -> Result<serde_json::Value> {
670 Ok(serde_json::json!({
672 "version": "0.7.1",
673 "profiles": [{
674 "type": "sampled",
675 "name": self.config.title,
676 "unit": "nanoseconds",
677 "startValue": 0,
678 "endValue": root.total_value,
679 "samples": [],
680 "weights": []
681 }]
682 }))
683 }
684
685 fn convert_to_d3_format(&self, root: &FlameGraphNode) -> Result<serde_json::Value> {
686 Ok(serde_json::to_value(root)?)
687 }
688
689 fn get_hot_functions(&self, limit: usize) -> Vec<HotFunctionInfo> {
690 let mut functions = Vec::new();
691
692 if let Some(root) = &self.root_node {
693 self.collect_hot_functions(root, &mut functions);
694 }
695
696 functions.sort_by_key(|item| std::cmp::Reverse(item.total_time_ns));
697 functions.truncate(limit);
698 functions
699 }
700
701 fn collect_hot_functions(&self, node: &FlameGraphNode, functions: &mut Vec<HotFunctionInfo>) {
702 functions.push(HotFunctionInfo {
703 name: node.name.clone(),
704 total_time_ns: node.total_value,
705 self_time_ns: node.self_value,
706 percentage: node.percentage,
707 call_count: 1, });
709
710 for child in node.children.values() {
711 self.collect_hot_functions(child, functions);
712 }
713 }
714
715 fn get_memory_usage_stats(&self) -> MemoryUsageStats {
716 let memory_samples: Vec<usize> =
717 self.samples.iter().filter_map(|s| s.memory_usage).collect();
718
719 if memory_samples.is_empty() {
720 return MemoryUsageStats::default();
721 }
722
723 let total: usize = memory_samples.iter().sum();
724 let max = memory_samples.iter().max().copied().unwrap_or(0);
725 let min = memory_samples.iter().min().copied().unwrap_or(0);
726 let avg = total / memory_samples.len();
727
728 MemoryUsageStats {
729 peak_memory_bytes: max,
730 avg_memory_bytes: avg,
731 min_memory_bytes: min,
732 total_samples: memory_samples.len(),
733 }
734 }
735
736 fn get_gpu_kernel_stats(&self) -> GpuKernelStats {
737 let gpu_samples: Vec<&FlameGraphSample> =
738 self.samples.iter().filter(|s| s.gpu_kernel.is_some()).collect();
739
740 let total_gpu_time: u64 = gpu_samples.iter().map(|s| s.duration_ns).sum();
741 let unique_kernels: std::collections::HashSet<String> =
742 gpu_samples.iter().filter_map(|s| s.gpu_kernel.clone()).collect();
743
744 GpuKernelStats {
745 total_kernel_time_ns: total_gpu_time,
746 unique_kernels: unique_kernels.len(),
747 total_kernel_calls: gpu_samples.len(),
748 }
749 }
750
751 fn get_differential_analysis(&self) -> Option<DifferentialAnalysis> {
752 if !self.config.differential_mode || self.baseline_samples.is_none() {
753 return None;
754 }
755
756 let current_total: u64 = self.samples.iter().map(|s| s.duration_ns).sum();
757 let baseline_total: u64 =
758 self.baseline_samples.as_ref()?.iter().map(|s| s.duration_ns).sum();
759
760 let performance_change = if baseline_total > 0 {
761 ((current_total as f64 - baseline_total as f64) / baseline_total as f64) * 100.0
762 } else {
763 0.0
764 };
765
766 Some(DifferentialAnalysis {
767 baseline_samples: self.baseline_samples.as_ref()?.len(),
768 current_samples: self.samples.len(),
769 performance_change_percent: performance_change,
770 is_regression: performance_change > 5.0,
771 is_improvement: performance_change < -5.0,
772 })
773 }
774
775 fn generate_performance_insights(&self) -> Vec<String> {
776 let mut insights = Vec::new();
777
778 if let Some(root) = &self.root_node {
779 let hot_functions = self.get_hot_functions(3);
780
781 if let Some(hottest) = hot_functions.first() {
782 if hottest.percentage > 50.0 {
783 insights.push(format!(
784 "Function '{}' dominates execution time ({:.1}%)",
785 hottest.name, hottest.percentage
786 ));
787 }
788 }
789
790 let gpu_stats = self.get_gpu_kernel_stats();
791 if gpu_stats.total_kernel_calls > 0 {
792 let gpu_percentage =
793 (gpu_stats.total_kernel_time_ns as f64 / root.total_value as f64) * 100.0;
794 insights.push(format!(
795 "GPU kernels account for {:.1}% of execution time",
796 gpu_percentage
797 ));
798 }
799
800 if let Some(diff) = self.get_differential_analysis() {
801 if diff.is_regression {
802 insights.push(format!(
803 "Performance regression detected: {:.1}% slower than baseline",
804 diff.performance_change_percent
805 ));
806 } else if diff.is_improvement {
807 insights.push(format!(
808 "Performance improvement: {:.1}% faster than baseline",
809 -diff.performance_change_percent
810 ));
811 }
812 }
813 }
814
815 if insights.is_empty() {
816 insights.push("No significant performance patterns detected".to_string());
817 }
818
819 insights
820 }
821}
822
823#[derive(Debug, Clone, Serialize, Deserialize)]
825pub struct HotFunctionInfo {
826 pub name: String,
827 pub total_time_ns: u64,
828 pub self_time_ns: u64,
829 pub percentage: f64,
830 pub call_count: usize,
831}
832
833#[derive(Debug, Clone, Serialize, Deserialize, Default)]
835pub struct MemoryUsageStats {
836 pub peak_memory_bytes: usize,
837 pub avg_memory_bytes: usize,
838 pub min_memory_bytes: usize,
839 pub total_samples: usize,
840}
841
842#[derive(Debug, Clone, Serialize, Deserialize)]
844pub struct GpuKernelStats {
845 pub total_kernel_time_ns: u64,
846 pub unique_kernels: usize,
847 pub total_kernel_calls: usize,
848}
849
850#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct DifferentialAnalysis {
853 pub baseline_samples: usize,
854 pub current_samples: usize,
855 pub performance_change_percent: f64,
856 pub is_regression: bool,
857 pub is_improvement: bool,
858}
859
860#[derive(Debug, Clone, Serialize, Deserialize)]
862pub struct FlameGraphAnalysisReport {
863 pub total_samples: usize,
864 pub total_duration_ns: u64,
865 pub unique_functions: u64,
866 pub max_stack_depth: u64,
867 pub hot_functions: Vec<HotFunctionInfo>,
868 pub memory_usage_stats: MemoryUsageStats,
869 pub gpu_kernel_stats: GpuKernelStats,
870 pub differential_analysis: Option<DifferentialAnalysis>,
871 pub performance_insights: Vec<String>,
872}
873
874impl Default for FlameGraphConfig {
876 fn default() -> Self {
877 Self {
878 sampling_rate: 1000, min_width: 0.01,
880 color_scheme: FlameGraphColorScheme::Hot,
881 direction: FlameGraphDirection::TopDown,
882 title: "Flame Graph".to_string(),
883 subtitle: None,
884 include_memory: true,
885 include_gpu: true,
886 differential_mode: false,
887 merge_similar_stacks: true,
888 filter_noise: true,
889 noise_threshold: 0.1, }
891 }
892}
893
894impl Profiler {
896 pub fn create_flame_graph_profiler(&self) -> FlameGraphProfiler {
898 let config = FlameGraphConfig {
899 title: "TrustformeRS Debug Flame Graph".to_string(),
900 subtitle: Some("Performance Analysis".to_string()),
901 ..Default::default()
902 };
903 FlameGraphProfiler::new(config)
904 }
905
906 pub async fn start_flame_graph_profiling(&mut self) -> Result<()> {
908 tracing::info!("Starting integrated flame graph profiling");
910 Ok(())
911 }
912
913 pub async fn export_flame_graph(
915 &self,
916 format: FlameGraphExportFormat,
917 output_path: &Path,
918 ) -> Result<()> {
919 let mut flame_profiler = self.create_flame_graph_profiler();
920
921 for event in self.get_events() {
923 match event {
924 ProfileEvent::FunctionCall {
925 function_name,
926 duration,
927 ..
928 } => {
929 let sample = FlameGraphSample {
930 stack: vec![StackFrame {
931 function_name: function_name.clone(),
932 module_name: None,
933 file_name: None,
934 line_number: None,
935 address: None,
936 }],
937 duration_ns: duration.as_nanos() as u64,
938 timestamp: 0,
939 thread_id: 0,
940 cpu_id: None,
941 memory_usage: None,
942 gpu_kernel: None,
943 metadata: HashMap::new(),
944 };
945 flame_profiler.add_sample(sample);
946 },
947 ProfileEvent::LayerExecution {
948 layer_name,
949 layer_type,
950 forward_time,
951 ..
952 } => {
953 let sample = FlameGraphSample {
954 stack: vec![
955 StackFrame {
956 function_name: "neural_network".to_string(),
957 module_name: Some("trustformers".to_string()),
958 file_name: None,
959 line_number: None,
960 address: None,
961 },
962 StackFrame {
963 function_name: format!("{}::{}", layer_type, layer_name),
964 module_name: Some("layers".to_string()),
965 file_name: None,
966 line_number: None,
967 address: None,
968 },
969 ],
970 duration_ns: forward_time.as_nanos() as u64,
971 timestamp: 0,
972 thread_id: 0,
973 cpu_id: None,
974 memory_usage: None,
975 gpu_kernel: None,
976 metadata: HashMap::new(),
977 };
978 flame_profiler.add_sample(sample);
979 },
980 _ => {}, }
982 }
983
984 flame_profiler.build_flame_graph()?;
985 flame_profiler.export(format, output_path).await?;
986 Ok(())
987 }
988}