pub trait CommunicationBackend: Send + Sync {
Show 13 methods
// Required methods
fn initialize(
&mut self,
config: &DistributedConfig,
) -> Result<(), ExecutorError>;
fn finalize(&mut self) -> Result<(), ExecutorError>;
fn rank(&self) -> usize;
fn world_size(&self) -> usize;
fn all_reduce(
&self,
tensor_id: &str,
reduction: ReductionOp,
) -> Result<(), ExecutorError>;
fn broadcast(
&self,
tensor_id: &str,
src_rank: usize,
) -> Result<(), ExecutorError>;
fn scatter(
&self,
tensor_id: &str,
src_rank: usize,
) -> Result<(), ExecutorError>;
fn gather(
&self,
tensor_id: &str,
dst_rank: usize,
) -> Result<(), ExecutorError>;
fn all_gather(&self, tensor_id: &str) -> Result<(), ExecutorError>;
fn reduce_scatter(
&self,
tensor_id: &str,
reduction: ReductionOp,
) -> Result<(), ExecutorError>;
fn send(
&self,
tensor_id: &str,
dst_rank: usize,
) -> Result<(), ExecutorError>;
fn recv(
&self,
tensor_id: &str,
src_rank: usize,
) -> Result<(), ExecutorError>;
fn barrier(&self) -> Result<(), ExecutorError>;
}Expand description
Abstract communication backend for device-to-device communication.
Required Methods§
Sourcefn initialize(
&mut self,
config: &DistributedConfig,
) -> Result<(), ExecutorError>
fn initialize( &mut self, config: &DistributedConfig, ) -> Result<(), ExecutorError>
Initialize the communication backend.
Sourcefn finalize(&mut self) -> Result<(), ExecutorError>
fn finalize(&mut self) -> Result<(), ExecutorError>
Finalize and clean up the backend.
Sourcefn world_size(&self) -> usize
fn world_size(&self) -> usize
Get the world size (total number of processes).
Sourcefn all_reduce(
&self,
tensor_id: &str,
reduction: ReductionOp,
) -> Result<(), ExecutorError>
fn all_reduce( &self, tensor_id: &str, reduction: ReductionOp, ) -> Result<(), ExecutorError>
Perform an all-reduce operation.
Sourcefn broadcast(
&self,
tensor_id: &str,
src_rank: usize,
) -> Result<(), ExecutorError>
fn broadcast( &self, tensor_id: &str, src_rank: usize, ) -> Result<(), ExecutorError>
Broadcast from source rank to all ranks.
Sourcefn scatter(&self, tensor_id: &str, src_rank: usize) -> Result<(), ExecutorError>
fn scatter(&self, tensor_id: &str, src_rank: usize) -> Result<(), ExecutorError>
Scatter data from source rank to all ranks.
Sourcefn gather(&self, tensor_id: &str, dst_rank: usize) -> Result<(), ExecutorError>
fn gather(&self, tensor_id: &str, dst_rank: usize) -> Result<(), ExecutorError>
Gather data from all ranks to destination rank.
Sourcefn all_gather(&self, tensor_id: &str) -> Result<(), ExecutorError>
fn all_gather(&self, tensor_id: &str) -> Result<(), ExecutorError>
All-gather operation.
Sourcefn reduce_scatter(
&self,
tensor_id: &str,
reduction: ReductionOp,
) -> Result<(), ExecutorError>
fn reduce_scatter( &self, tensor_id: &str, reduction: ReductionOp, ) -> Result<(), ExecutorError>
Reduce-scatter operation.
Sourcefn send(&self, tensor_id: &str, dst_rank: usize) -> Result<(), ExecutorError>
fn send(&self, tensor_id: &str, dst_rank: usize) -> Result<(), ExecutorError>
Point-to-point send.
Sourcefn recv(&self, tensor_id: &str, src_rank: usize) -> Result<(), ExecutorError>
fn recv(&self, tensor_id: &str, src_rank: usize) -> Result<(), ExecutorError>
Point-to-point receive.
Sourcefn barrier(&self) -> Result<(), ExecutorError>
fn barrier(&self) -> Result<(), ExecutorError>
Synchronize all processes.