pub struct DeviceMesh {
pub world_size: usize,
pub rank: usize,
pub topology: MeshTopology,
pub comm: Option<Arc<dyn MeshComm + Send + Sync>>,
/* private fields */
}Expand description
Device Mesh for distributed ML inference
Manages a set of devices arranged in a logical mesh topology for data/tensor/pipeline parallelism. Optimized for memory efficiency.
Fields§
§world_size: usizeTotal number of devices in the mesh
rank: usizeThis process’s global rank
topology: MeshTopologyMesh topology configuration
comm: Option<Arc<dyn MeshComm + Send + Sync>>Communication backend handle
Implementations§
Source§impl DeviceMesh
impl DeviceMesh
Sourcepub fn new_with_mock_comm(devices: Vec<Device>, rank: usize) -> Self
pub fn new_with_mock_comm(devices: Vec<Device>, rank: usize) -> Self
Create a new device mesh with a mock communication backend for testing
Sourcepub fn with_topology(
devices: Vec<Device>,
topology: MeshTopology,
) -> Result<Self, String>
pub fn with_topology( devices: Vec<Device>, topology: MeshTopology, ) -> Result<Self, String>
Create a mesh with specific topology
Sourcepub fn get_device(&self, rank: usize) -> Option<Arc<Device>>
pub fn get_device(&self, rank: usize) -> Option<Arc<Device>>
Get device by rank (memory efficient - returns Arc clone)
Sourcepub fn local_device(&self) -> Option<Arc<Device>>
pub fn local_device(&self) -> Option<Arc<Device>>
Get the local device for this process
Sourcepub fn all_devices(&self) -> Arc<Vec<Arc<Device>>>
pub fn all_devices(&self) -> Arc<Vec<Arc<Device>>>
Get all devices (returns Arc to avoid cloning the entire Vec)
Sourcepub fn devices_by_backend(&self, backend: DeviceBackend) -> Vec<Arc<Device>>
pub fn devices_by_backend(&self, backend: DeviceBackend) -> Vec<Arc<Device>>
Get devices for a specific backend type (memory efficient)
Sourcepub fn devices_in_group(
&self,
group_name: &str,
) -> Result<Vec<Arc<Device>>, String>
pub fn devices_in_group( &self, group_name: &str, ) -> Result<Vec<Arc<Device>>, String>
Get devices in a specific process group
Sourcepub fn add_group(
&mut self,
name: String,
ranks: Vec<usize>,
backend: GroupBackend,
) -> Result<(), String>
pub fn add_group( &mut self, name: String, ranks: Vec<usize>, backend: GroupBackend, ) -> Result<(), String>
Add a custom process group
Sourcepub fn get_group(&self, name: &str) -> Option<&ProcessGroup>
pub fn get_group(&self, name: &str) -> Option<&ProcessGroup>
Get a process group by name
Sourcepub fn group_names(&self) -> Vec<String>
pub fn group_names(&self) -> Vec<String>
List all group names
Sourcepub fn group_rank(&self, group_name: &str) -> Option<usize>
pub fn group_rank(&self, group_name: &str) -> Option<usize>
Get rank within a group (local rank)
Sourcepub fn set_comm(&mut self, comm: Arc<dyn MeshComm + Send + Sync>)
pub fn set_comm(&mut self, comm: Arc<dyn MeshComm + Send + Sync>)
Set the communication backend
Sourcepub fn total_memory_mb(&self) -> u64
pub fn total_memory_mb(&self) -> u64
Get total memory across all devices
Sourcepub fn total_compute_units(&self) -> u32
pub fn total_compute_units(&self) -> u32
Get total compute units across all devices
Trait Implementations§
Source§impl Clone for DeviceMesh
impl Clone for DeviceMesh
Source§fn clone(&self) -> DeviceMesh
fn clone(&self) -> DeviceMesh
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more