Skip to main content

CommunicationBackend

Trait CommunicationBackend 

Source
pub trait CommunicationBackend: Send + Sync {
Show 13 methods // Required methods fn initialize( &mut self, config: &DistributedConfig, ) -> Result<(), ExecutorError>; fn finalize(&mut self) -> Result<(), ExecutorError>; fn rank(&self) -> usize; fn world_size(&self) -> usize; fn all_reduce( &self, tensor_id: &str, reduction: ReductionOp, ) -> Result<(), ExecutorError>; fn broadcast( &self, tensor_id: &str, src_rank: usize, ) -> Result<(), ExecutorError>; fn scatter( &self, tensor_id: &str, src_rank: usize, ) -> Result<(), ExecutorError>; fn gather( &self, tensor_id: &str, dst_rank: usize, ) -> Result<(), ExecutorError>; fn all_gather(&self, tensor_id: &str) -> Result<(), ExecutorError>; fn reduce_scatter( &self, tensor_id: &str, reduction: ReductionOp, ) -> Result<(), ExecutorError>; fn send( &self, tensor_id: &str, dst_rank: usize, ) -> Result<(), ExecutorError>; fn recv( &self, tensor_id: &str, src_rank: usize, ) -> Result<(), ExecutorError>; fn barrier(&self) -> Result<(), ExecutorError>;
}
Expand description

Abstract communication backend for device-to-device communication.

Required Methods§

Source

fn initialize( &mut self, config: &DistributedConfig, ) -> Result<(), ExecutorError>

Initialize the communication backend.

Source

fn finalize(&mut self) -> Result<(), ExecutorError>

Finalize and clean up the backend.

Source

fn rank(&self) -> usize

Get the rank of this process.

Source

fn world_size(&self) -> usize

Get the world size (total number of processes).

Source

fn all_reduce( &self, tensor_id: &str, reduction: ReductionOp, ) -> Result<(), ExecutorError>

Perform an all-reduce operation.

Source

fn broadcast( &self, tensor_id: &str, src_rank: usize, ) -> Result<(), ExecutorError>

Broadcast from source rank to all ranks.

Source

fn scatter(&self, tensor_id: &str, src_rank: usize) -> Result<(), ExecutorError>

Scatter data from source rank to all ranks.

Source

fn gather(&self, tensor_id: &str, dst_rank: usize) -> Result<(), ExecutorError>

Gather data from all ranks to destination rank.

Source

fn all_gather(&self, tensor_id: &str) -> Result<(), ExecutorError>

All-gather operation.

Source

fn reduce_scatter( &self, tensor_id: &str, reduction: ReductionOp, ) -> Result<(), ExecutorError>

Reduce-scatter operation.

Source

fn send(&self, tensor_id: &str, dst_rank: usize) -> Result<(), ExecutorError>

Point-to-point send.

Source

fn recv(&self, tensor_id: &str, src_rank: usize) -> Result<(), ExecutorError>

Point-to-point receive.

Source

fn barrier(&self) -> Result<(), ExecutorError>

Synchronize all processes.

Implementors§