1#![allow(dead_code)]
73#![allow(unused_imports)]
74#![allow(unused_variables)]
75#![allow(unused_mut)]
76#![allow(static_mut_refs)]
77
78use backtrace::Backtrace;
79use once_cell::sync::Lazy;
80use parking_lot::Mutex;
81use std::sync::Arc;
82use std::time::Instant;
83use torsh_core::{Result, TorshError};
84
85pub type TorshResult<T> = Result<T>;
87
88pub mod core;
94
95pub mod platforms;
97
98pub mod analysis;
100
101pub mod export;
103
104pub mod distributed;
106
107pub mod advanced_visualization;
112pub mod alerts;
113pub mod amd;
114pub mod attributes;
115pub mod chrome_trace;
116pub mod ci_cd;
117pub mod cloud_providers;
118pub mod cloudwatch;
119pub mod cpu;
120pub mod cross_platform;
121pub mod cuda;
122pub mod custom_export;
123pub mod custom_tools;
124pub mod dashboard;
125pub mod grafana;
126pub mod instruments;
127pub mod integrated_profiler;
128pub mod kubernetes;
129pub mod macros;
130pub mod memory;
131pub mod memory_optimization;
132pub mod ml_analysis;
133pub mod nsight;
134pub mod online_learning;
135pub mod optimization;
136pub mod power;
137pub mod prometheus;
138pub mod regression;
139pub mod reporting;
140pub mod scirs2_integration;
141pub mod streaming;
142pub mod tensorboard;
143pub mod thermal;
144pub mod vtune;
145pub mod workload_characterization;
146
147pub use core::{
153 add_global_event,
154 add_global_event as add_event,
155 clear_global_events,
156 events::*,
158 get_global_stats,
159
160 global_profiler,
161 metrics::*,
162 profile_function_with_category,
163
164 start_profiling,
165 stop_profiling,
166 MetricsScope,
167 Profiler,
169 ScopeGuard,
171};
172
173pub use export::{
175 available_format_names,
176 dashboard::*,
178 export_chrome_trace_format,
179 export_csv_format,
180
181 export_events,
182 export_global_events,
183 export_json_format,
184 formats::*,
185 parse_format,
186 reporting::*,
187 ExportFormat,
188};
189
190pub use prometheus::{PrometheusExporter, PrometheusExporterBuilder};
192
193pub use grafana::{
195 Dashboard as GrafanaDashboard, DashboardTemplates, GrafanaDashboardGenerator, GridPos, Panel,
196 Target,
197};
198
199pub use cloudwatch::{
201 CloudWatchConfig, CloudWatchPublisher, CloudWatchPublisherBuilder, Dimension, MetricDatum,
202 StatisticSet, Unit as CloudWatchUnit,
203};
204
205pub use platforms::{cpu::*, gpu::*, system::*};
207
208pub use analysis::{ml_analysis::*, optimization::*, regression::*};
210
211pub use distributed::profiling::*;
213
214pub use streaming::{
216 create_high_performance_streaming_engine, create_low_latency_streaming_engine,
217 create_streaming_engine, AdaptiveBitrateConfig, AdaptiveRateController, AdjustmentReason,
218 AdvancedFeatures, BitrateAdjustment, BufferedEvent, CompressionAlgorithm, CompressionConfig,
219 CompressionManager, ConnectionManager, ControlMessage, EnhancedStreamingEngine, EventBuffer,
220 EventPriority, ProtocolConfig, QualityConfig, QualityLevel, QualityMetricsThreshold,
221 SSEConnection, StreamConnection, StreamingConfig, StreamingProtocol, StreamingStats,
222 StreamingStatsSnapshot, TcpConnection, UdpConnection, WebSocketConnection, WebSocketMessage,
223};
224
225pub use alerts::{
231 create_alert_manager_with_config, get_alert_manager, AlertConfig, AlertManager,
232 NotificationChannel,
233};
234pub use attributes::{
235 get_registry, with_profiling, AsyncProfiler, AttributeRegistry, ConditionalProfiler,
236 ProfileAttribute, ProfiledFunction, ProfiledStruct,
237};
238pub use chrome_trace::{create_chrome_event, export, export_to_writer, phases, scopes};
239pub use ci_cd::{CiCdConfig, CiCdIntegration, CiCdPlatform};
240pub use cpu::{CpuProfiler, ProfileScope};
241pub use cuda::{
242 get_cuda_device_properties, get_cuda_memory_stats, CudaEvent, CudaMemoryStats, CudaProfiler,
243 CudaSynchronizationStats, NvtxRange,
244};
245pub use custom_export::{
246 CsvColumn, CsvFormatter, CustomExportFormat, CustomExporter, ExportSchema,
247};
248pub use dashboard::alerts::create_alert_manager;
249pub use dashboard::{
250 create_dashboard, create_dashboard_with_config, export_dashboard_html, generate_3d_landscape,
251 generate_performance_heatmap, Dashboard, DashboardAlert, DashboardAlertSeverity,
252 DashboardConfig, DashboardData, HeatmapCell, MemoryMetrics, OperationSummary,
253 PerformanceHeatmap, PerformanceLandscape, PerformanceMetrics, PerformancePoint3D,
254 SystemMetrics, VisualizationColorScheme, VisualizationConfig, WebSocketConfig,
255};
256
257pub use scirs2_integration::{
259 AdvancedProfilingConfig, BenchmarkResults, HistogramStats, MetricsSummary, PerformanceAnalysis,
260 PerformanceTargets, SamplingStrategy, ScirS2EnhancedProfiler, ScirS2ProfilingData,
261 ValidationLevel,
262};
263
264pub use instruments::{
266 create_instruments_profiler, create_instruments_profiler_with_config, export_instruments_json,
267 get_instruments_statistics, AllocationType, EnergyComponent, InstrumentsConfig,
268 InstrumentsExportData, InstrumentsProfiler, InstrumentsStats, SignpostInterval,
269};
270pub use macros::ProfileResult;
271pub use memory::{
272 FragmentationAnalysis, LeakDetectionResults, MemoryBlock, MemoryEvent, MemoryEventType,
273 MemoryLeak, MemoryProfiler, MemoryStats, MemoryTimeline, SystemMemoryInfo,
274};
275pub use memory_optimization::{
276 create_memory_optimizer, create_memory_optimizer_for_low_memory,
277 create_memory_optimizer_with_aggressive_settings, AdaptivePoolManager, AdvancedMemoryOptimizer,
278 MemoryOptimizationConfig, MemoryOptimizationStats, MemorySnapshot, MemoryStrategies,
279 MemoryUsagePredictor, OptimizationExportData, OptimizationStatsSummary,
280};
281
282pub struct UnifiedProfiler {
288 pub cpu_platform: platforms::cpu::CpuProfilerPlatform,
289 pub gpu_platform: platforms::gpu::GpuProfilerPlatform,
290 pub system_platform: platforms::system::SystemProfilerPlatform,
291 pub event_collector: core::events::EventCollector,
292}
293
294impl UnifiedProfiler {
295 pub fn new() -> Self {
297 Self {
298 cpu_platform: platforms::cpu::CpuProfilerPlatform::new(),
299 gpu_platform: platforms::gpu::GpuProfilerPlatform::new(),
300 system_platform: platforms::system::SystemProfilerPlatform::new(),
301 event_collector: core::events::EventCollector::new(),
302 }
303 }
304
305 pub fn with_auto_detection() -> Self {
307 let cpu_platform = platforms::cpu::CpuProfilerPlatform::new().with_cpu_profiler();
308
309 #[cfg(target_os = "macos")]
310 let cpu_platform = cpu_platform.with_instruments();
311
312 #[cfg(target_os = "linux")]
313 let cpu_platform = cpu_platform.with_vtune();
314
315 let gpu_platform = platforms::gpu::GpuProfilerPlatform::new().with_optimal_profiler();
316 let system_platform =
317 platforms::system::SystemProfilerPlatform::new().with_all_system_profiling();
318
319 Self {
320 cpu_platform,
321 gpu_platform,
322 system_platform,
323 event_collector: core::events::EventCollector::new(),
324 }
325 }
326
327 pub fn start_all(&mut self) -> TorshResult<()> {
329 self.cpu_platform.start_profiling()?;
330 self.gpu_platform.start_profiling()?;
331 self.system_platform.start_profiling()?;
332 Ok(())
333 }
334
335 pub fn stop_all(&mut self) -> TorshResult<()> {
337 self.cpu_platform.stop_profiling()?;
338 self.gpu_platform.stop_profiling()?;
339 self.system_platform.stop_profiling()?;
340 Ok(())
341 }
342
343 pub fn export_all(&self, format: export::ExportFormat, base_path: &str) -> TorshResult<()> {
345 let profiling_events = self.event_collector.get_events();
346 let events: Vec<ProfileEvent> = profiling_events
348 .iter()
349 .map(|pe| ProfileEvent {
350 name: pe.name.clone(),
351 category: pe.category.clone(),
352 start_us: pe.start_time.elapsed().as_micros() as u64,
353 duration_us: pe.duration.map(|d| d.as_micros() as u64).unwrap_or(0),
354 thread_id: pe.thread_id,
355 operation_count: None,
356 flops: None,
357 bytes_transferred: None,
358 stack_trace: None,
359 })
360 .collect();
361 export::export_events(&events, format, base_path)
362 }
363}
364
365impl Default for UnifiedProfiler {
366 fn default() -> Self {
367 Self::new()
368 }
369}
370
371pub fn create_unified_profiler() -> UnifiedProfiler {
377 UnifiedProfiler::with_auto_detection()
378}
379
380pub fn create_basic_profiler() -> UnifiedProfiler {
382 UnifiedProfiler::new()
383}
384
385pub fn create_production_profiler() -> UnifiedProfiler {
387 let mut profiler = UnifiedProfiler::with_auto_detection();
388 profiler
390}
391
392pub fn export_global_trace(path: &str) -> TorshResult<()> {
398 export_global_events(export::ExportFormat::ChromeTrace, path)
399}
400
401pub fn export_global_json(path: &str) -> TorshResult<()> {
402 export_global_events(export::ExportFormat::Json, path)
403}
404
405pub fn export_global_csv(path: &str) -> TorshResult<()> {
406 export_global_events(export::ExportFormat::Csv, path)
407}
408
409pub fn export_global_tensorboard(base_path: &str) -> TorshResult<()> {
410 let profiler_arc = global_profiler();
411 let profiler_guard = profiler_arc.lock();
412 let events = profiler_guard.events().to_vec();
413
414 crate::tensorboard::export_tensorboard_profile(&events, base_path)
415}
416
417static GLOBAL_CUSTOM_EXPORTER: Lazy<Mutex<custom_export::CustomExporter>> =
419 Lazy::new(|| Mutex::new(custom_export::CustomExporter::new()));
420
421pub fn get_global_custom_export_formats() -> Vec<String> {
423 let exporter = GLOBAL_CUSTOM_EXPORTER.lock();
424 exporter.get_format_names()
425}
426
427pub fn register_global_custom_export_format(format: custom_export::CustomExportFormat) {
429 let mut exporter = GLOBAL_CUSTOM_EXPORTER.lock();
430 exporter.register_format(format);
431}
432
433pub fn export_global_custom(format_name: &str, path: &str) -> TorshResult<()> {
435 let profiler_arc = global_profiler();
436 let profiler_guard = profiler_arc.lock();
437 let events = profiler_guard.events().to_vec();
438 drop(profiler_guard);
439
440 let exporter = GLOBAL_CUSTOM_EXPORTER.lock();
441 exporter.export(&events, format_name, path)
442}
443
444pub fn set_global_stack_traces_enabled(enabled: bool) {
446 core::profiler::set_global_stack_traces_enabled(enabled);
447}
448
449#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
451pub struct PerformanceAnomaly {
452 pub event_name: String,
453 pub description: String,
454 pub confidence: f64,
455 pub severity: String,
456}
457
458#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
460pub struct MemoryAnomaly {
461 pub anomaly_type: String,
462 pub confidence: f64,
463}
464
465#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
467pub struct AnomalyAnalysis {
468 pub performance_anomalies: Vec<PerformanceAnomaly>,
469 pub memory_anomalies: Vec<MemoryAnomaly>,
470 pub throughput_anomalies: Vec<String>,
471 pub temporal_anomalies: Vec<String>,
472}
473
474#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
476pub struct PerformancePattern {
477 pub pattern_type: String,
478 pub description: String,
479 pub confidence_score: f64,
480 pub optimization_type: String,
481 pub potential_improvement: String,
482 pub implementation_complexity: String,
483}
484
485#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
487pub struct PatternAnalysis {
488 pub performance_patterns: Vec<PerformancePattern>,
489 pub bottleneck_patterns: Vec<String>,
490 pub resource_patterns: Vec<String>,
491 pub temporal_patterns: Vec<String>,
492 pub optimization_patterns: Vec<PerformancePattern>,
493}
494
495pub fn detect_global_anomalies() -> AnomalyAnalysis {
497 AnomalyAnalysis {
498 performance_anomalies: Vec::new(),
499 memory_anomalies: Vec::new(),
500 throughput_anomalies: Vec::new(),
501 temporal_anomalies: Vec::new(),
502 }
503}
504
505pub fn detect_global_patterns() -> PatternAnalysis {
507 PatternAnalysis {
508 performance_patterns: Vec::new(),
509 bottleneck_patterns: Vec::new(),
510 resource_patterns: Vec::new(),
511 temporal_patterns: Vec::new(),
512 optimization_patterns: Vec::new(),
513 }
514}
515
516pub fn export_global_anomaly_analysis(path: &str) -> TorshResult<()> {
518 let analysis = detect_global_anomalies();
519 let json = serde_json::to_string_pretty(&analysis).map_err(|e| {
520 TorshError::SerializationError(format!("Failed to serialize anomaly analysis: {e}"))
521 })?;
522 std::fs::write(path, json)
523 .map_err(|e| TorshError::IoError(format!("Failed to write anomaly analysis: {e}")))?;
524 Ok(())
525}
526
527pub fn export_global_pattern_analysis(path: &str) -> TorshResult<()> {
529 let analysis = detect_global_patterns();
530 let json = serde_json::to_string_pretty(&analysis).map_err(|e| {
531 TorshError::SerializationError(format!("Failed to serialize pattern analysis: {e}"))
532 })?;
533 std::fs::write(path, json)
534 .map_err(|e| TorshError::IoError(format!("Failed to write pattern analysis: {e}")))?;
535 Ok(())
536}
537
538pub use core::metrics::{
540 CorrelationAnalysis, CorrelationStrength, CorrelationSummary, CorrelationType,
541 MemoryCorrelation, OperationCorrelation, PerformanceCorrelation, TemporalCorrelation,
542};
543
544pub fn analyze_global_correlations() -> CorrelationAnalysis {
546 use crate::core::metrics::*;
547 use std::collections::HashMap;
548
549 let profiler_arc = global_profiler();
550 let profiler_guard = profiler_arc.lock();
551 let events = profiler_guard.events().to_vec();
552
553 if events.len() < 2 {
554 return CorrelationAnalysis {
555 operation_correlations: Vec::new(),
556 performance_correlations: Vec::new(),
557 memory_correlations: Vec::new(),
558 temporal_correlations: Vec::new(),
559 correlation_summary: CorrelationSummary {
560 total_correlations_analyzed: 0,
561 strong_correlations_found: 0,
562 causal_relationships: 0,
563 bottleneck_correlations: 0,
564 optimization_opportunities: Vec::new(),
565 key_insights: Vec::new(),
566 },
567 };
568 }
569
570 let mut operation_correlations = Vec::new();
571 let mut performance_correlations = Vec::new();
572 let mut memory_correlations = Vec::new();
573 let mut temporal_correlations = Vec::new();
574
575 let mut operation_groups: HashMap<String, Vec<&ProfileEvent>> = HashMap::new();
577 for event in &events {
578 operation_groups
579 .entry(event.name.clone())
580 .or_default()
581 .push(event);
582 }
583
584 let operations: Vec<String> = operation_groups.keys().cloned().collect();
586 for (i, op_a) in operations.iter().enumerate() {
587 for op_b in operations.iter().skip(i + 1) {
588 let events_a = &operation_groups[op_a];
589 let events_b = &operation_groups[op_b];
590
591 let co_occurrence = calculate_co_occurrence(events_a, events_b);
593 let temporal_proximity = calculate_temporal_proximity(events_a, events_b);
594
595 if co_occurrence > 0.1 || temporal_proximity > 0.5 {
596 let correlation_strength = if co_occurrence > 0.8 && temporal_proximity > 0.8 {
597 CorrelationStrength::VeryStrong
598 } else if co_occurrence > 0.6 || temporal_proximity > 0.6 {
599 CorrelationStrength::Strong
600 } else if co_occurrence > 0.4 || temporal_proximity > 0.4 {
601 CorrelationStrength::Moderate
602 } else {
603 CorrelationStrength::Weak
604 };
605
606 let insights =
607 generate_correlation_insights(op_a, op_b, co_occurrence, temporal_proximity);
608
609 operation_correlations.push(OperationCorrelation {
610 operation_a: op_a.clone(),
611 operation_b: op_b.clone(),
612 correlation_coefficient: (co_occurrence + temporal_proximity) / 2.0,
613 co_occurrence_frequency: co_occurrence,
614 temporal_proximity,
615 correlation_strength,
616 correlation_type: if temporal_proximity > co_occurrence {
617 CorrelationType::Sequential
618 } else {
619 CorrelationType::Complementary
620 },
621 insights,
622 });
623 }
624 }
625 }
626
627 for event_group in operation_groups.values() {
629 if event_group.len() >= 2 {
630 let durations: Vec<f64> = event_group.iter().map(|e| e.duration_us as f64).collect();
631 let avg_duration = durations.iter().sum::<f64>() / durations.len() as f64;
632 let variance = durations
633 .iter()
634 .map(|d| (d - avg_duration).powi(2))
635 .sum::<f64>()
636 / durations.len() as f64;
637
638 if variance > 0.0 {
639 performance_correlations.push(PerformanceCorrelation {
640 metric_a: "duration".to_string(),
641 metric_b: "variance".to_string(),
642 correlation_coefficient: (variance / avg_duration).min(1.0),
643 significance_level: if variance > avg_duration * 0.5 {
644 0.95
645 } else {
646 0.7
647 },
648 sample_size: event_group.len(),
649 correlation_strength: if variance > avg_duration {
650 CorrelationStrength::Strong
651 } else {
652 CorrelationStrength::Moderate
653 },
654 });
655 }
656 }
657 }
658
659 let total_correlations = operation_correlations.len() + performance_correlations.len();
661 let strong_count = operation_correlations
662 .iter()
663 .filter(|c| {
664 matches!(
665 c.correlation_strength,
666 CorrelationStrength::Strong | CorrelationStrength::VeryStrong
667 )
668 })
669 .count()
670 + performance_correlations
671 .iter()
672 .filter(|c| {
673 matches!(
674 c.correlation_strength,
675 CorrelationStrength::Strong | CorrelationStrength::VeryStrong
676 )
677 })
678 .count();
679
680 let correlation_summary = CorrelationSummary {
681 total_correlations_analyzed: total_correlations,
682 strong_correlations_found: strong_count,
683 causal_relationships: operation_correlations
684 .iter()
685 .filter(|c| matches!(c.correlation_type, CorrelationType::Causal))
686 .count(),
687 bottleneck_correlations: operation_correlations
688 .iter()
689 .filter(|c| matches!(c.correlation_type, CorrelationType::Competitive))
690 .count(),
691 optimization_opportunities: operation_correlations
692 .iter()
693 .take(3)
694 .map(|c| {
695 format!(
696 "{} ↔ {}: Consider optimization",
697 c.operation_a, c.operation_b
698 )
699 })
700 .collect(),
701 key_insights: vec![
702 format!(
703 "Found {} operation correlations with {} strong relationships",
704 operation_correlations.len(),
705 strong_count
706 ),
707 "Operations with high co-occurrence may benefit from batching".to_string(),
708 "Sequential operations may benefit from pipelining optimizations".to_string(),
709 ],
710 };
711
712 CorrelationAnalysis {
713 operation_correlations,
714 performance_correlations,
715 memory_correlations,
716 temporal_correlations,
717 correlation_summary,
718 }
719}
720
721fn calculate_co_occurrence(events_a: &[&ProfileEvent], events_b: &[&ProfileEvent]) -> f64 {
723 let mut co_occurrences = 0;
724 let window_us = 10000; for event_a in events_a {
727 for event_b in events_b {
728 let time_diff = if event_a.start_us > event_b.start_us {
729 event_a.start_us - event_b.start_us
730 } else {
731 event_b.start_us - event_a.start_us
732 };
733
734 if time_diff <= window_us {
735 co_occurrences += 1;
736 break;
737 }
738 }
739 }
740
741 co_occurrences as f64 / events_a.len().max(events_b.len()) as f64
742}
743
744fn calculate_temporal_proximity(events_a: &[&ProfileEvent], events_b: &[&ProfileEvent]) -> f64 {
745 if events_a.is_empty() || events_b.is_empty() {
746 return 0.0;
747 }
748
749 let avg_gap = events_a
750 .iter()
751 .zip(events_b.iter())
752 .map(|(a, b)| {
753 if a.start_us > b.start_us {
754 a.start_us - b.start_us
755 } else {
756 b.start_us - a.start_us
757 }
758 })
759 .sum::<u64>() as f64
760 / events_a.len().min(events_b.len()) as f64;
761
762 1.0 / (1.0 + avg_gap / 1000000.0) }
765
766fn generate_correlation_insights(
767 op_a: &str,
768 op_b: &str,
769 co_occurrence: f64,
770 temporal_proximity: f64,
771) -> Vec<String> {
772 let mut insights = Vec::new();
773
774 if co_occurrence > 0.8 {
775 insights.push(format!(
776 "{} and {} frequently occur together - consider batching",
777 op_a, op_b
778 ));
779 }
780
781 if temporal_proximity > 0.8 {
782 insights.push(format!(
783 "{} and {} have high temporal proximity - potential for optimization",
784 op_a, op_b
785 ));
786 }
787
788 if co_occurrence > 0.5 && temporal_proximity > 0.5 {
789 insights.push("Strong correlation suggests dependency relationship".to_string());
790 }
791
792 insights
793}
794
795pub fn export_performance_trend_chart(
797 profiler: &parking_lot::MutexGuard<'_, Profiler>,
798 path: &str,
799) -> TorshResult<()> {
800 let html = format!(
801 r#"<!DOCTYPE html>
802<html>
803<head><title>Performance Trends</title></head>
804<body>
805<h1>Performance Trends</h1>
806<p>Total events: {}</p>
807<p>Chart generation placeholder</p>
808</body>
809</html>"#,
810 profiler.events.len()
811 );
812 std::fs::write(path, html)
813 .map_err(|e| TorshError::IoError(format!("Failed to write performance trends: {e}")))?;
814 Ok(())
815}
816
817pub fn export_operation_frequency_chart(
819 profiler: &parking_lot::MutexGuard<'_, Profiler>,
820 path: &str,
821) -> TorshResult<()> {
822 let html = format!(
823 r#"<!DOCTYPE html>
824<html>
825<head><title>Operation Frequency</title></head>
826<body>
827<h1>Operation Frequency</h1>
828<p>Total events: {}</p>
829<p>Frequency chart generation placeholder</p>
830</body>
831</html>"#,
832 profiler.events.len()
833 );
834 std::fs::write(path, html).map_err(|e| {
835 TorshError::IoError(format!("Failed to write operation frequency chart: {e}"))
836 })?;
837 Ok(())
838}
839
840pub fn export_global_correlation_analysis(path: &str) -> TorshResult<()> {
842 let analysis = analyze_global_correlations();
843 let json = serde_json::to_string_pretty(&analysis).map_err(|e| {
844 TorshError::SerializationError(format!("Failed to serialize correlation analysis: {e}"))
845 })?;
846 std::fs::write(path, json)
847 .map_err(|e| TorshError::IoError(format!("Failed to write correlation analysis: {e}")))?;
848 Ok(())
849}
850
851pub fn export_memory_scatter_plot(
853 _memory_profiler: &crate::MemoryProfiler,
854 path: &str,
855) -> TorshResult<()> {
856 let html = format!(
857 r#"<!DOCTYPE html>
858<html>
859<head><title>Memory Scatter Plot</title></head>
860<body>
861<h1>Memory Scatter Plot</h1>
862<p>Memory profiler status: active</p>
863<p>Scatter plot generation placeholder</p>
864</body>
865</html>"#
866 );
867 std::fs::write(path, html)
868 .map_err(|e| TorshError::IoError(format!("Failed to write memory scatter plot: {e}")))?;
869 Ok(())
870}
871
872pub fn export_duration_histogram(
874 profiler: &parking_lot::MutexGuard<'_, Profiler>,
875 path: &str,
876) -> TorshResult<()> {
877 let html = format!(
878 r#"<!DOCTYPE html>
879<html>
880<head><title>Duration Histogram</title></head>
881<body>
882<h1>Duration Histogram</h1>
883<p>Total events: {}</p>
884<p>Histogram generation placeholder</p>
885</body>
886</html>"#,
887 profiler.events.len()
888 );
889 std::fs::write(path, html)
890 .map_err(|e| TorshError::IoError(format!("Failed to write duration histogram: {e}")))?;
891 Ok(())
892}
893
894pub fn are_global_stack_traces_enabled() -> bool {
896 core::profiler::are_global_stack_traces_enabled()
897}
898
899pub fn set_global_overhead_tracking_enabled(enabled: bool) {
901 core::profiler::set_global_overhead_tracking_enabled(enabled);
902}
903
904pub fn is_global_overhead_tracking_enabled() -> bool {
905 core::profiler::is_global_overhead_tracking_enabled()
906}
907
908pub fn get_global_overhead_stats() -> OverheadStats {
909 core::profiler::get_global_overhead_stats()
910}
911
912pub fn reset_global_overhead_stats() {
913 core::profiler::reset_global_overhead_stats();
914}
915
916#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
922pub struct ProfileEvent {
923 pub name: String,
924 pub category: String,
925 pub start_us: u64,
926 pub duration_us: u64,
927 pub thread_id: usize,
928 pub operation_count: Option<u64>,
929 pub flops: Option<u64>,
930 pub bytes_transferred: Option<u64>,
931 pub stack_trace: Option<String>,
932}
933
934#[derive(Debug, Clone, Default)]
936pub struct OverheadStats {
937 pub add_event_time_ns: u64,
938 pub add_event_count: u64,
939 pub stack_trace_time_ns: u64,
940 pub stack_trace_count: u64,
941 pub export_time_ns: u64,
942 pub export_count: u64,
943 pub total_overhead_ns: u64,
944}
945
946#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
948pub struct BottleneckAnalysis {
949 pub slowest_operations: Vec<BottleneckEvent>,
950 pub memory_hotspots: Vec<MemoryHotspot>,
951 pub thread_contention: Vec<ThreadContentionEvent>,
952 pub efficiency_issues: Vec<EfficiencyIssue>,
953 pub recommendations: Vec<String>,
954}
955
956#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
958pub struct BottleneckEvent {
959 pub name: String,
960 pub category: String,
961 pub duration_us: u64,
962 pub thread_id: usize,
963 pub severity: BottleneckSeverity,
964 pub impact_score: f64,
965 pub recommendation: String,
966}
967
968#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
970pub struct MemoryHotspot {
971 pub location: String,
972 pub total_allocations: usize,
973 pub total_bytes: usize,
974 pub average_size: f64,
975 pub peak_concurrent_allocations: usize,
976 pub severity: BottleneckSeverity,
977}
978
979#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
981pub struct ThreadContentionEvent {
982 pub thread_id: usize,
983 pub operation: String,
984 pub wait_time_us: u64,
985 pub contention_count: usize,
986}
987
988#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
990pub struct EfficiencyIssue {
991 pub issue_type: EfficiencyIssueType,
992 pub description: String,
993 pub affected_operations: Vec<String>,
994 pub performance_impact: f64,
995 pub recommendation: String,
996}
997
998#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
1000pub enum EfficiencyIssueType {
1001 LowThroughput,
1002 HighLatency,
1003 MemoryWaste,
1004 CpuUnderutilization,
1005 FrequentAllocation,
1006 LargeAllocation,
1007}
1008
1009#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)]
1011pub enum BottleneckSeverity {
1012 Low,
1013 Medium,
1014 High,
1015 Critical,
1016}
1017
1018#[cfg(test)]
1030mod tests {
1031 use super::*;
1032 use std::thread;
1033 use std::time::Duration;
1034
1035 #[test]
1036 fn test_enhanced_profiling_workflow() {
1037 start_profiling();
1039
1040 {
1041 profile_scope!("test_enhanced_workflow");
1042 thread::sleep(Duration::from_millis(10));
1043
1044 let mut metrics_scope = MetricsScope::new("computation");
1045 metrics_scope.set_operation_count(1000);
1046 metrics_scope.set_flops(5000);
1047 metrics_scope.set_bytes_transferred(2048);
1048
1049 thread::sleep(Duration::from_millis(5));
1050 }
1051
1052 stop_profiling();
1053
1054 let json_path = std::env::temp_dir().join("test_enhanced.json");
1056 let json_str = json_path.display().to_string();
1057 let result = export_global_json(&json_str);
1058 assert!(result.is_ok());
1059
1060 let csv_path = std::env::temp_dir().join("test_enhanced.csv");
1061 let csv_str = csv_path.display().to_string();
1062 let result = export_global_csv(&csv_str);
1063 assert!(result.is_ok());
1064
1065 let _ = std::fs::remove_file(&json_path);
1067 let _ = std::fs::remove_file(&csv_path);
1068 }
1069
1070 #[test]
1071 fn test_unified_profiler() {
1072 let mut profiler = create_unified_profiler();
1073 let result = profiler.start_all();
1074
1075 thread::sleep(Duration::from_millis(5));
1077
1078 let stop_result = profiler.stop_all();
1079 let unified_path = std::env::temp_dir().join("test_unified.json");
1081 let unified_str = unified_path.display().to_string();
1082 let export_result = profiler.export_all(export::ExportFormat::Json, &unified_str);
1083
1084 let _ = std::fs::remove_file(&unified_path);
1086 }
1087
1088 #[test]
1089 fn test_enhanced_export_formats() {
1090 start_profiling();
1091 {
1092 profile_scope!("format_test");
1093 thread::sleep(Duration::from_millis(5));
1094 }
1095 stop_profiling();
1096
1097 let formats = export::available_format_names();
1099 for format_name in formats {
1100 if let Some(format) = export::parse_format(&format_name) {
1101 let path = std::env::temp_dir().join(format!(
1102 "test_{}.{}",
1103 format_name,
1104 format.extension()
1105 ));
1106 let path_str = path.display().to_string();
1107 let result = export_global_events(format, &path_str);
1108
1109 let _ = std::fs::remove_file(&path);
1111 }
1112 }
1113 }
1114
1115 #[test]
1116 #[ignore = "Flaky test - passes individually but may fail in full suite"]
1117 fn test_overhead_tracking() {
1118 set_global_overhead_tracking_enabled(true);
1119 start_profiling();
1120
1121 {
1122 profile_scope!("overhead_test");
1123 thread::sleep(Duration::from_millis(5));
1124 }
1125
1126 stop_profiling();
1127
1128 let stats = get_global_overhead_stats();
1129 assert!(stats.add_event_count > 0);
1130 assert!(stats.total_overhead_ns > 0);
1131
1132 reset_global_overhead_stats();
1133 set_global_overhead_tracking_enabled(false);
1134 }
1135}
1136
1137pub const VERSION: &str = env!("CARGO_PKG_VERSION");
1139pub const VERSION_MAJOR: u32 = 0;
1140pub const VERSION_MINOR: u32 = 1;
1141pub const VERSION_PATCH: u32 = 0;
1142
1143#[allow(ambiguous_glob_reexports)]
1145pub mod prelude {
1146 pub use crate::analysis::*;
1147 pub use crate::core::*;
1148 pub use crate::distributed::*;
1149 pub use crate::export::*;
1150 pub use crate::platforms::*;
1151}