pub trait DistributedBackend: Send + Sync {
// Required methods
fn rank(&self) -> usize;
fn world_size(&self) -> usize;
fn all_reduce<'life0, 'life1, 'async_trait>(
&'life0 self,
buffer: &'life1 mut [u8],
op: ReduceOp,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn barrier<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
}Expand description
Interface for distributed operations.
Required Methods§
Sourcefn world_size(&self) -> usize
fn world_size(&self) -> usize
Get the total number of nodes.
Sourcefn all_reduce<'life0, 'life1, 'async_trait>(
&'life0 self,
buffer: &'life1 mut [u8],
op: ReduceOp,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn all_reduce<'life0, 'life1, 'async_trait>(
&'life0 self,
buffer: &'life1 mut [u8],
op: ReduceOp,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Perform an all-reduce operation on a buffer.
The input buffer contains the local gradients encoded as little-endian
f32 values. On return, all nodes hold the same result:
ReduceOp::Sum– element-wise sum across all nodes.ReduceOp::Mean– element-wise sum divided byworld_size.