Skip to main content

DistributedOps

Struct DistributedOps 

Source
pub struct DistributedOps<'a> { /* private fields */ }
Expand description

Distributed tensor operations

Implementations§

Source§

impl<'a> DistributedOps<'a>

Source

pub fn new(mesh: &'a DeviceMesh) -> Self

Create a new distributed operations context

Source

pub fn all_reduce_f32( &self, data: &mut [f32], op: ReduceOp, group: &str, ) -> DistResult<()>

All-reduce a f32 tensor across all ranks in a group

After this operation, all ranks will have the same reduced values.

Source

pub fn all_reduce_i32( &self, data: &mut [i32], op: ReduceOp, group: &str, ) -> DistResult<()>

All-reduce a i32 tensor across all ranks in a group

Source

pub fn broadcast_f32( &self, data: &mut [f32], root_rank: usize, group: &str, ) -> DistResult<()>

Broadcast f32 tensor from root rank to all ranks in group

Source

pub fn all_gather_f32( &self, local: &[f32], output: &mut [f32], group: &str, ) -> DistResult<()>

All-gather f32 tensors from all ranks

Each rank contributes local.len() elements, and receives local.len() * world_size elements in the output.

Source

pub fn scatter_f32( &self, data: &[f32], chunk: &mut [f32], root_rank: usize, ) -> DistResult<()>

Scatter a tensor: divide data among ranks

Only the root rank’s data is used for input. After this operation, each rank’s chunk will contain its portion of the data.

Source

pub fn gather_f32( &self, local: &[f32], output: &mut [f32], root_rank: usize, ) -> DistResult<()>

Gather tensors from all ranks to root

Each rank’s local data is gathered to the root rank’s output. Only the root rank’s output will contain the complete gathered data.

Source

pub fn reduce_scatter_f32( &self, data: &mut [f32], output: &mut [f32], op: ReduceOp, group: &str, ) -> DistResult<()>

Reduce-scatter: reduce and distribute results

Combines reduction and scatter in one operation. After this operation, each rank has a portion of the reduced result.

Source

pub fn barrier(&self, group: &str) -> DistResult<()>

Barrier synchronization across all ranks in a group

Source

pub fn send_f32(&self, data: &[f32], dest_rank: usize) -> DistResult<()>

Point-to-point send of f32 tensor

Source

pub fn recv_f32(&self, data: &mut [f32], src_rank: usize) -> DistResult<()>

Point-to-point receive of f32 tensor

Auto Trait Implementations§

§

impl<'a> Freeze for DistributedOps<'a>

§

impl<'a> !RefUnwindSafe for DistributedOps<'a>

§

impl<'a> Send for DistributedOps<'a>

§

impl<'a> Sync for DistributedOps<'a>

§

impl<'a> Unpin for DistributedOps<'a>

§

impl<'a> UnsafeUnpin for DistributedOps<'a>

§

impl<'a> !UnwindSafe for DistributedOps<'a>

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.