pub trait ServerCommunication {
const SERVER_COMM_ENABLED: bool;
// Provided methods
fn sync_collective(
&mut self,
stream_id: StreamId,
) -> Result<(), ServerError> { ... }
fn comm_init(
&mut self,
device_ids: Vec<DeviceId>,
) -> Result<(), ServerError> { ... }
fn all_reduce(
&mut self,
src: Binding,
dst: Binding,
dtype: ElemType,
stream_id: StreamId,
op: ReduceOperation,
device_ids: Vec<DeviceId>,
) -> Result<(), ServerError> { ... }
fn send(
&mut self,
desc: CopyDescriptor,
dtype: ElemType,
stream_id: StreamId,
device_id_dst: DeviceId,
) -> Result<(), ServerError> { ... }
fn recv(
&mut self,
handle: Handle,
dtype: ElemType,
stream_id: StreamId,
device_id_src: DeviceId,
) -> Result<(), ServerError> { ... }
}Expand description
Defines functions for optimized data transfer between servers, supporting custom communication mechanisms such as peer-to-peer communication or specialized implementations.
Required Associated Constants§
Sourceconst SERVER_COMM_ENABLED: bool
const SERVER_COMM_ENABLED: bool
Indicates whether server-to-server communication is enabled for this implementation.
Provided Methods§
Sourcefn sync_collective(&mut self, stream_id: StreamId) -> Result<(), ServerError>
fn sync_collective(&mut self, stream_id: StreamId) -> Result<(), ServerError>
Sourcefn all_reduce(
&mut self,
src: Binding,
dst: Binding,
dtype: ElemType,
stream_id: StreamId,
op: ReduceOperation,
device_ids: Vec<DeviceId>,
) -> Result<(), ServerError>
fn all_reduce( &mut self, src: Binding, dst: Binding, dtype: ElemType, stream_id: StreamId, op: ReduceOperation, device_ids: Vec<DeviceId>, ) -> Result<(), ServerError>
Performs an all_reduce operation on the input data and writes it to the output buffer.
see https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#allreduce
§Arguments
src- The data to be reduced.dst- Where to write the result.dtype- The element type of the data being reducedstream_id- The data’s stream id.op- The reduce’s aggregation operation e.g. mean, sum, etc.device_ids- The list of device ids from which toall_reduce.
§Returns
Returns a Result containing an ServerError if the operation fails.
Sourcefn send(
&mut self,
desc: CopyDescriptor,
dtype: ElemType,
stream_id: StreamId,
device_id_dst: DeviceId,
) -> Result<(), ServerError>
fn send( &mut self, desc: CopyDescriptor, dtype: ElemType, stream_id: StreamId, device_id_dst: DeviceId, ) -> Result<(), ServerError>
Sends data from this server to a destination server.
§Arguments
desc- A descriptor specifying the data to be sent, including shape, strides, and binding.dtype- The element type of the data being sent.stream_id- The stream ID associated with the server’s operation.device_id_dst- ID of the device receiving the data.
§Returns
Returns a Result containing an ServerError if the operation fails.
Sourcefn recv(
&mut self,
handle: Handle,
dtype: ElemType,
stream_id: StreamId,
device_id_src: DeviceId,
) -> Result<(), ServerError>
fn recv( &mut self, handle: Handle, dtype: ElemType, stream_id: StreamId, device_id_src: DeviceId, ) -> Result<(), ServerError>
Receive data from another server.
§Arguments
handle- The handle in which the received data is written.dtype- The element type of the data being sent.stream_id- The stream ID associated with the server’s operation.device_id_src- ID of the device sending the data.
§Returns
Returns a Result containing an ServerError if the operation fails.
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety".