pub struct Zero3PerformanceStats {Show 13 fields
pub forward_passes: u64,
pub backward_passes: u64,
pub optimizer_steps: u64,
pub total_forward_time: Duration,
pub total_backward_time: Duration,
pub total_optimizer_time: Duration,
pub parameter_transfer_time: Duration,
pub gradient_sync_time: Duration,
pub layer_timings: HashMap<String, LayerTimingStats>,
pub throughput_metrics: ThroughputMetrics,
pub memory_transfer_metrics: MemoryTransferMetrics,
pub communication_stats: CommunicationStats,
pub optimization_efficiency: OptimizationEfficiency,
}Expand description
Comprehensive performance statistics for ZeRO-3 operations
Tracks all aspects of ZeRO-3 performance including:
- Forward and backward pass timing
- Parameter transfer and optimization statistics
- Memory management performance
- Distributed synchronization metrics
- Throughput and efficiency measurements
Fields§
§forward_passes: u64Number of forward passes completed
backward_passes: u64Number of backward passes completed
optimizer_steps: u64Number of optimizer steps completed
total_forward_time: DurationTotal time spent in forward passes
total_backward_time: DurationTotal time spent in backward passes
total_optimizer_time: DurationTotal time spent in optimizer steps
parameter_transfer_time: DurationTime spent transferring parameters between CPU/GPU
gradient_sync_time: DurationTime spent synchronizing gradients across ranks
layer_timings: HashMap<String, LayerTimingStats>Per-layer execution timings
throughput_metrics: ThroughputMetricsThroughput metrics
memory_transfer_metrics: MemoryTransferMetricsMemory transfer performance
communication_stats: CommunicationStatsDistributed communication statistics
optimization_efficiency: OptimizationEfficiencyOptimization efficiency metrics
Implementations§
Source§impl Zero3PerformanceStats
impl Zero3PerformanceStats
Sourcepub fn record_forward_pass(&mut self, duration: Duration, num_tokens: usize)
pub fn record_forward_pass(&mut self, duration: Duration, num_tokens: usize)
Record a completed forward pass
Sourcepub fn record_backward_pass(&mut self, duration: Duration, num_tokens: usize)
pub fn record_backward_pass(&mut self, duration: Duration, num_tokens: usize)
Record a completed backward pass
Sourcepub fn record_optimizer_step(&mut self, duration: Duration, num_params: usize)
pub fn record_optimizer_step(&mut self, duration: Duration, num_params: usize)
Record a completed optimizer step
Sourcepub fn record_layer_execution(&mut self, layer_name: String, duration: Duration)
pub fn record_layer_execution(&mut self, layer_name: String, duration: Duration)
Record layer execution timing
Sourcepub fn record_layer_backward(&mut self, layer_name: String, duration: Duration)
pub fn record_layer_backward(&mut self, layer_name: String, duration: Duration)
Record layer backward pass timing
Sourcepub fn record_parameter_transfer(
&mut self,
duration: Duration,
bytes_transferred: usize,
direction: TransferDirection,
)
pub fn record_parameter_transfer( &mut self, duration: Duration, bytes_transferred: usize, direction: TransferDirection, )
Record parameter transfer operation
Sourcepub fn record_gradient_sync(
&mut self,
duration: Duration,
num_gradients: usize,
world_size: usize,
)
pub fn record_gradient_sync( &mut self, duration: Duration, num_gradients: usize, world_size: usize, )
Record gradient synchronization
Sourcepub fn record_communication(
&mut self,
operation: CommunicationOperation,
duration: Duration,
bytes: usize,
)
pub fn record_communication( &mut self, operation: CommunicationOperation, duration: Duration, bytes: usize, )
Record distributed communication operation
Sourcepub fn average_forward_time(&self) -> Duration
pub fn average_forward_time(&self) -> Duration
Get average forward pass time
Sourcepub fn average_backward_time(&self) -> Duration
pub fn average_backward_time(&self) -> Duration
Get average backward pass time
Sourcepub fn average_optimizer_time(&self) -> Duration
pub fn average_optimizer_time(&self) -> Duration
Get average optimizer step time
Sourcepub fn get_tokens_per_second(&self) -> f64
pub fn get_tokens_per_second(&self) -> f64
Get tokens per second throughput
Sourcepub fn get_memory_bandwidth_gbps(&self) -> f64
pub fn get_memory_bandwidth_gbps(&self) -> f64
Get memory transfer bandwidth in GB/s
Sourcepub fn get_communication_efficiency(&self) -> f64
pub fn get_communication_efficiency(&self) -> f64
Get communication efficiency metrics
Sourcepub fn get_training_efficiency(&self) -> f64
pub fn get_training_efficiency(&self) -> f64
Get overall training efficiency score (0.0 to 1.0)
Sourcepub fn get_performance_summary(&self) -> PerformanceSummary
pub fn get_performance_summary(&self) -> PerformanceSummary
Get detailed performance summary
Sourcepub fn merge(&mut self, other: &Zero3PerformanceStats)
pub fn merge(&mut self, other: &Zero3PerformanceStats)
Merge statistics from another instance (useful for distributed aggregation)
Trait Implementations§
Source§impl Clone for Zero3PerformanceStats
impl Clone for Zero3PerformanceStats
Source§fn clone(&self) -> Zero3PerformanceStats
fn clone(&self) -> Zero3PerformanceStats
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for Zero3PerformanceStats
impl Debug for Zero3PerformanceStats
Auto Trait Implementations§
impl Freeze for Zero3PerformanceStats
impl RefUnwindSafe for Zero3PerformanceStats
impl Send for Zero3PerformanceStats
impl Sync for Zero3PerformanceStats
impl Unpin for Zero3PerformanceStats
impl UnsafeUnpin for Zero3PerformanceStats
impl UnwindSafe for Zero3PerformanceStats
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more