Skip to main content

Crate torsh_profiler

Crate torsh_profiler 

Source
Expand description

Performance profiling for ToRSh

This crate provides comprehensive performance profiling capabilities for the ToRSh deep learning framework, including CPU, GPU, memory, and system profiling.

§Refactored Modular Structure

The profiler has been successfully refactored from a massive 9,517-line monolithic file into a clean, maintainable modular structure:

  • core: Core profiling types, event management, and profiler implementation
  • platforms: Platform-specific profiling (CPU, GPU, system)
  • analysis: Performance analysis and optimization recommendations
  • export: Export and reporting functionality with multiple format support
  • distributed: Distributed profiling coordination

§Usage Examples

§Basic Profiling

use torsh_profiler::{start_profiling, stop_profiling, profile_scope};

// Start global profiling
start_profiling();

{
    profile_scope!("computation");
    // Your code here
}

stop_profiling();

§Advanced Profiling with Metrics

use torsh_profiler::{MetricsScope, export_global_events, ExportFormat};

fn main() -> Result<(), Box<dyn std::error::Error>> {
    {
        let mut scope = MetricsScope::new("training_step");
        scope.set_operation_count(1000);
        scope.set_flops(50000);
        scope.set_bytes_transferred(4096);
        // Training code here
    }

    // Export results
    export_global_events(ExportFormat::ChromeTrace, "profile.json")?;
    Ok(())
}

§Platform-Specific Profiling

use torsh_profiler::{UnifiedProfiler, CudaProfiler, MemoryProfiler};

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut profiler = UnifiedProfiler::with_auto_detection();
    profiler.start_all()?;

    // Your GPU/CPU workload

    profiler.stop_all()?;
    Ok(())
}

Re-exports§

pub use core::add_global_event;
pub use core::add_global_event as add_event;
pub use core::clear_global_events;
pub use core::get_global_stats;
pub use core::global_profiler;
pub use core::profile_function_with_category;
pub use core::start_profiling;
pub use core::stop_profiling;
pub use core::MetricsScope;
pub use core::Profiler;
pub use core::ScopeGuard;
pub use export::available_format_names;
pub use export::export_chrome_trace_format;
pub use export::export_csv_format;
pub use export::export_events;
pub use export::export_global_events;
pub use export::export_json_format;
pub use export::parse_format;
pub use export::ExportFormat;
pub use prometheus::PrometheusExporter;
pub use prometheus::PrometheusExporterBuilder;
pub use grafana::Dashboard as GrafanaDashboard;
pub use grafana::DashboardTemplates;
pub use grafana::GrafanaDashboardGenerator;
pub use grafana::GridPos;
pub use grafana::Panel;
pub use grafana::Target;
pub use cloudwatch::CloudWatchConfig;
pub use cloudwatch::CloudWatchPublisher;
pub use cloudwatch::CloudWatchPublisherBuilder;
pub use cloudwatch::Dimension;
pub use cloudwatch::MetricDatum;
pub use cloudwatch::StatisticSet;
pub use cloudwatch::Unit as CloudWatchUnit;
pub use streaming::create_high_performance_streaming_engine;
pub use streaming::create_low_latency_streaming_engine;
pub use streaming::create_streaming_engine;
pub use streaming::AdaptiveBitrateConfig;
pub use streaming::AdaptiveRateController;
pub use streaming::AdjustmentReason;
pub use streaming::AdvancedFeatures;
pub use streaming::BitrateAdjustment;
pub use streaming::BufferedEvent;
pub use streaming::CompressionAlgorithm;
pub use streaming::CompressionConfig;
pub use streaming::CompressionManager;
pub use streaming::ConnectionManager;
pub use streaming::ControlMessage;
pub use streaming::EnhancedStreamingEngine;
pub use streaming::EventBuffer;
pub use streaming::EventPriority;
pub use streaming::ProtocolConfig;
pub use streaming::QualityConfig;
pub use streaming::QualityLevel;
pub use streaming::QualityMetricsThreshold;
pub use streaming::SSEConnection;
pub use streaming::StreamConnection;
pub use streaming::StreamingConfig;
pub use streaming::StreamingProtocol;
pub use streaming::StreamingStats;
pub use streaming::StreamingStatsSnapshot;
pub use streaming::TcpConnection;
pub use streaming::UdpConnection;
pub use streaming::WebSocketConnection;
pub use streaming::WebSocketMessage;
pub use alerts::create_alert_manager_with_config;
pub use alerts::get_alert_manager;
pub use alerts::AlertConfig;
pub use alerts::AlertManager;
pub use alerts::NotificationChannel;
pub use attributes::get_registry;
pub use attributes::with_profiling;
pub use attributes::AsyncProfiler;
pub use attributes::AttributeRegistry;
pub use attributes::ConditionalProfiler;
pub use attributes::ProfileAttribute;
pub use attributes::ProfiledFunction;
pub use attributes::ProfiledStruct;
pub use chrome_trace::create_chrome_event;
pub use chrome_trace::export;
pub use chrome_trace::export_to_writer;
pub use chrome_trace::phases;
pub use chrome_trace::scopes;
pub use ci_cd::CiCdConfig;
pub use ci_cd::CiCdIntegration;
pub use ci_cd::CiCdPlatform;
pub use cpu::CpuProfiler;
pub use cpu::ProfileScope;
pub use cuda::get_cuda_device_properties;
pub use cuda::get_cuda_memory_stats;
pub use cuda::CudaEvent;
pub use cuda::CudaMemoryStats;
pub use cuda::CudaProfiler;
pub use cuda::CudaSynchronizationStats;
pub use cuda::NvtxRange;
pub use custom_export::CsvColumn;
pub use custom_export::CsvFormatter;
pub use custom_export::CustomExportFormat;
pub use custom_export::CustomExporter;
pub use custom_export::ExportSchema;
pub use dashboard::alerts::create_alert_manager;
pub use dashboard::create_dashboard;
pub use dashboard::create_dashboard_with_config;
pub use dashboard::export_dashboard_html;
pub use dashboard::generate_3d_landscape;
pub use dashboard::generate_performance_heatmap;
pub use dashboard::Dashboard;
pub use dashboard::DashboardAlert;
pub use dashboard::DashboardAlertSeverity;
pub use dashboard::DashboardConfig;
pub use dashboard::DashboardData;
pub use dashboard::HeatmapCell;
pub use dashboard::MemoryMetrics;
pub use dashboard::OperationSummary;
pub use dashboard::PerformanceHeatmap;
pub use dashboard::PerformanceLandscape;
pub use dashboard::PerformanceMetrics;
pub use dashboard::PerformancePoint3D;
pub use dashboard::SystemMetrics;
pub use dashboard::VisualizationColorScheme;
pub use dashboard::VisualizationConfig;
pub use dashboard::WebSocketConfig;
pub use scirs2_integration::AdvancedProfilingConfig;
pub use scirs2_integration::BenchmarkResults;
pub use scirs2_integration::HistogramStats;
pub use scirs2_integration::MetricsSummary;
pub use scirs2_integration::PerformanceAnalysis;
pub use scirs2_integration::PerformanceTargets;
pub use scirs2_integration::SamplingStrategy;
pub use scirs2_integration::ScirS2EnhancedProfiler;
pub use scirs2_integration::ScirS2ProfilingData;
pub use scirs2_integration::ValidationLevel;
pub use instruments::create_instruments_profiler;
pub use instruments::create_instruments_profiler_with_config;
pub use instruments::export_instruments_json;
pub use instruments::get_instruments_statistics;
pub use instruments::AllocationType;
pub use instruments::EnergyComponent;
pub use instruments::InstrumentsConfig;
pub use instruments::InstrumentsExportData;
pub use instruments::InstrumentsProfiler;
pub use instruments::InstrumentsStats;
pub use instruments::SignpostInterval;
pub use macros::ProfileResult;
pub use memory::FragmentationAnalysis;
pub use memory::LeakDetectionResults;
pub use memory::MemoryBlock;
pub use memory::MemoryEvent;
pub use memory::MemoryEventType;
pub use memory::MemoryLeak;
pub use memory::MemoryProfiler;
pub use memory::MemoryStats;
pub use memory::MemoryTimeline;
pub use memory::SystemMemoryInfo;
pub use memory_optimization::create_memory_optimizer;
pub use memory_optimization::create_memory_optimizer_for_low_memory;
pub use memory_optimization::create_memory_optimizer_with_aggressive_settings;
pub use memory_optimization::AdaptivePoolManager;
pub use memory_optimization::AdvancedMemoryOptimizer;
pub use memory_optimization::MemoryOptimizationConfig;
pub use memory_optimization::MemoryOptimizationStats;
pub use memory_optimization::MemorySnapshot;
pub use memory_optimization::MemoryStrategies;
pub use memory_optimization::MemoryUsagePredictor;
pub use memory_optimization::OptimizationExportData;
pub use memory_optimization::OptimizationStatsSummary;
pub use core::metrics::CorrelationAnalysis;
pub use core::metrics::CorrelationStrength;
pub use core::metrics::CorrelationSummary;
pub use core::metrics::CorrelationType;
pub use core::metrics::MemoryCorrelation;
pub use core::metrics::OperationCorrelation;
pub use core::metrics::PerformanceCorrelation;
pub use core::metrics::TemporalCorrelation;
pub use core::events::*;
pub use core::metrics::*;
pub use export::dashboard::*;
pub use export::formats::*;
pub use export::reporting::*;
pub use platforms::cpu::*;
pub use platforms::gpu::*;
pub use platforms::system::*;
pub use analysis::ml_analysis::*;
pub use analysis::optimization::*;
pub use analysis::regression::*;
pub use distributed::profiling::*;

Modules§

advanced_visualization
Advanced Visualization Export
alerts
Alert system for performance monitoring
amd
AMD Tools Integration
analysis
Performance analysis and optimization Performance analysis and optimization recommendations
attributes
Attribute-based profiling support
chrome_trace
Chrome tracing format export
ci_cd
CI/CD integration for performance profiling
cloud_providers
Cloud Provider Integrations for ToRSh Profiler
cloudwatch
AWS CloudWatch metrics integration for torsh-profiler
core
Core profiling functionality Core profiling types and utilities
cpu
CPU profiling
cross_platform
Cross-platform Profiling Support
cuda
CUDA profiling
custom_export
Custom export formats for profiling data
custom_tools
Custom tool APIs for profiler integration
dashboard
Real-time Performance Dashboard
distributed
Distributed profiling coordination Distributed profiling coordination
export
Export and reporting capabilities Export and reporting functionality
grafana
Grafana dashboard integration for torsh-profiler
instruments
Apple Instruments profiling integration
integrated_profiler
Integrated Profiler System
kubernetes
Kubernetes Operator for Cloud-Native Profiling
macros
Convenient macros for profiling operations
memory
Memory profiling
memory_optimization
Advanced Memory Optimization Features
ml_analysis
Machine Learning-based performance analysis
nsight
NVIDIA Nsight profiling integration
online_learning
Online Learning Module for Real-time Performance Analysis
optimization
Performance optimizations and overhead minimization
platforms
Platform-specific profiling implementations Platform-specific profiling implementations
power
Power profiling capabilities for energy-efficient performance monitoring
prelude
Prelude module for convenient imports
prometheus
Prometheus metrics integration for torsh-profiler
regression
Performance regression detection system
reporting
Comprehensive reporting system for performance profiling
scirs2_integration
SCIRS2 Integration for Advanced Profiling
streaming
Enhanced Real-time Streaming Capabilities
tensorboard
TensorBoard export functionality
thermal
Thermal analysis system for performance profiling
vtune
Intel VTune profiling integration
workload_characterization
Workload Characterization

Macros§

benchmark_scirs2
Macro for advanced benchmarking with SciRS2
collect_scirs2_metrics
Macro for advanced metrics collection
cuda_nvtx_range
Macro for NVTX range profiling
profile_alloc
Profile memory allocation with tracking
profile_async
Profile async operations
profile_attribute
Attribute-like macro for profiling functions
profile_block
Profile a block of code with automatic naming
profile_closure
Profile a closure with optional name and category
profile_compare
Benchmark and profile comparison between different implementations
profile_cuda
Profile CUDA operations
profile_current_function
Profile the current function automatically
profile_function
profile_if
Conditionally profile based on a feature flag or condition
profile_loop
Profile loop iterations with automatic batching
profile_metrics
profile_sampled
Profile with sampling (only profile every N calls)
profile_scirs2_comprehensive
Enhanced macro for comprehensive metrics profiling
profile_scirs2_sampling
Convenient macros for SCIRS2-enhanced profiling
profile_scirs2_validated
Advanced profiling macro with validation
profile_scope
Macros for convenient scope profiling
profile_tensor_op
Profile tensor operations with automatic FLOPS counting
profile_thread_local
Profile with thread-local storage for reduced overhead
profile_with_metadata
Profile with custom metadata
profile_with_overhead
Profile with automatic overhead measurement
profiled_fn
Helper macro for creating profiled function wrappers
profiling_scope
Create a profiling scope with automatic cleanup

Structs§

AnomalyAnalysis
Anomaly analysis result structure
BottleneckAnalysis
Bottleneck analysis results
BottleneckEvent
A performance bottleneck event
EfficiencyIssue
Efficiency issue
MemoryAnomaly
Memory anomaly data structure
MemoryHotspot
Memory hotspot information
OverheadStats
Overhead statistics for profiling operations
PatternAnalysis
Pattern analysis result structure
PerformanceAnomaly
Performance anomaly data structure
PerformancePattern
Performance pattern data structure
ProfileEvent
Core profiling event structure
ThreadContentionEvent
Thread contention event
UnifiedProfiler
Enhanced unified profiler combining all platform profilers with simplified API

Enums§

BottleneckSeverity
Severity of a bottleneck
EfficiencyIssueType
Type of efficiency issue

Constants§

VERSION
VERSION_MAJOR
VERSION_MINOR
VERSION_PATCH

Functions§

analyze_global_correlations
Analyze global correlations with proper implementation
are_global_stack_traces_enabled
Check if global stack traces are enabled
create_basic_profiler
Create a basic profiler for development
create_production_profiler
Create a profiler optimized for production use
create_unified_profiler
Create a unified profiler with automatic platform detection
detect_global_anomalies
Detect global anomalies in profiling data (stub implementation)
detect_global_patterns
Detect global patterns in profiling data (stub implementation)
export_duration_histogram
Export duration histogram (stub implementation)
export_global_anomaly_analysis
Export global anomaly analysis (stub implementation)
export_global_correlation_analysis
Export global correlation analysis (stub implementation)
export_global_csv
export_global_custom
Export using a custom format
export_global_json
export_global_pattern_analysis
Export global pattern analysis (stub implementation)
export_global_tensorboard
export_global_trace
Enhanced global export functions with multiple format support
export_memory_scatter_plot
Export memory scatter plot (stub implementation)
export_operation_frequency_chart
Export operation frequency chart (stub implementation)
export_performance_trend_chart
Export performance trend chart (stub implementation)
get_global_custom_export_formats
Get available custom export format names
get_global_overhead_stats
is_global_overhead_tracking_enabled
register_global_custom_export_format
Register a custom export format globally
reset_global_overhead_stats
set_global_overhead_tracking_enabled
Enhanced overhead tracking
set_global_stack_traces_enabled
Set global stack traces enabled with enhanced functionality

Type Aliases§

TorshResult
Convenience type alias for Results in this crate