use std::fmt;
#[derive(Debug, Clone, PartialEq)]
pub enum EpError {
ExpertsDivisibility {
num_experts: usize,
num_ep_ranks: usize,
},
InvalidNumRanks(String),
InvalidNumExperts(String),
InvalidHiddenSize(String),
ExpertIndexOutOfRange { index: usize, num_experts: usize },
DimensionMismatch { expected: usize, got: usize },
}
impl fmt::Display for EpError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
EpError::ExpertsDivisibility {
num_experts,
num_ep_ranks,
} => write!(
f,
"num_experts ({num_experts}) must be divisible by num_ep_ranks ({num_ep_ranks})"
),
EpError::InvalidNumRanks(msg) => write!(f, "invalid num_ep_ranks: {msg}"),
EpError::InvalidNumExperts(msg) => write!(f, "invalid num_experts: {msg}"),
EpError::InvalidHiddenSize(msg) => write!(f, "invalid hidden_size: {msg}"),
EpError::ExpertIndexOutOfRange { index, num_experts } => write!(
f,
"expert index {index} is out of range [0, {num_experts})"
),
EpError::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
}
}
}
impl std::error::Error for EpError {}
#[derive(Debug, Clone)]
pub struct ExpertParallelConfig {
pub num_experts: usize,
pub num_ep_ranks: usize,
pub experts_per_rank: usize,
pub hidden_size: usize,
pub max_tokens_per_expert: usize,
}
impl ExpertParallelConfig {
pub fn new(num_experts: usize, num_ep_ranks: usize, hidden_size: usize) -> Self {
let experts_per_rank = if num_ep_ranks > 0 {
num_experts / num_ep_ranks
} else {
0
};
Self {
num_experts,
num_ep_ranks,
experts_per_rank,
hidden_size,
max_tokens_per_expert: 64,
}
}
pub fn validate(&self) -> Result<(), EpError> {
if self.num_ep_ranks == 0 {
return Err(EpError::InvalidNumRanks(
"num_ep_ranks must be ≥ 1".to_string(),
));
}
if self.num_experts == 0 {
return Err(EpError::InvalidNumExperts(
"num_experts must be ≥ 1".to_string(),
));
}
if self.hidden_size == 0 {
return Err(EpError::InvalidHiddenSize(
"hidden_size must be ≥ 1".to_string(),
));
}
if self.num_experts % self.num_ep_ranks != 0 {
return Err(EpError::ExpertsDivisibility {
num_experts: self.num_experts,
num_ep_ranks: self.num_ep_ranks,
});
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct AllToAllPlan {
pub send_counts: Vec<Vec<usize>>,
pub recv_counts: Vec<Vec<usize>>,
}
impl AllToAllPlan {
pub fn total_tokens_moved(&self) -> usize {
self.send_counts
.iter()
.enumerate()
.flat_map(|(src, row)| {
row.iter()
.enumerate()
.filter(move |&(dst, _)| dst != src)
.map(|(_, &cnt)| cnt)
})
.sum()
}
pub fn is_balanced(&self) -> bool {
self.max_imbalance_ratio() <= 2.0
}
pub fn max_imbalance_ratio(&self) -> f32 {
let num_ranks = self.send_counts.len();
if num_ranks == 0 {
return 1.0;
}
let rank_totals: Vec<usize> = self
.send_counts
.iter()
.enumerate()
.map(|(src, row)| {
row.iter()
.enumerate()
.filter(|&(dst, _)| dst != src)
.map(|(_, &c)| c)
.sum()
})
.collect();
let total: usize = rank_totals.iter().sum();
if total == 0 {
return 1.0;
}
let mean = total as f32 / num_ranks as f32;
let max = *rank_totals.iter().max().unwrap_or(&0) as f32;
if mean < 1e-10 {
1.0
} else {
max / mean
}
}
}
#[derive(Debug, Clone)]
pub struct CommunicationVolume {
pub total_bytes: usize,
pub max_bytes_any_rank: usize,
pub mean_bytes_per_rank: f32,
pub bandwidth_utilization: f32,
}
pub struct AllToAllCommunication;
impl AllToAllCommunication {
pub fn plan_all_to_all(
assignments: &[usize],
num_ranks: usize,
num_experts: usize,
) -> AllToAllPlan {
let experts_per_rank = if num_ranks > 0 {
(num_experts + num_ranks - 1) / num_ranks } else {
1
};
let mut send_counts = vec![vec![0usize; num_ranks]; num_ranks];
for &expert in assignments {
let dst_rank = if experts_per_rank > 0 {
(expert / experts_per_rank).min(num_ranks.saturating_sub(1))
} else {
0
};
send_counts[0][dst_rank] += 1;
}
let mut recv_counts = vec![vec![0usize; num_ranks]; num_ranks];
for src in 0..num_ranks {
for dst in 0..num_ranks {
recv_counts[dst][src] = send_counts[src][dst];
}
}
AllToAllPlan {
send_counts,
recv_counts,
}
}
pub fn simulate_communication_volume(
plan: &AllToAllPlan,
hidden_size: usize,
) -> CommunicationVolume {
let bytes_per_token = hidden_size * 4; let num_ranks = plan.send_counts.len();
let rank_send_bytes: Vec<usize> = plan
.send_counts
.iter()
.enumerate()
.map(|(src, row)| {
row.iter()
.enumerate()
.filter(|&(dst, _)| dst != src)
.map(|(_, &c)| c * bytes_per_token)
.sum()
})
.collect();
let total_bytes: usize = rank_send_bytes.iter().sum();
let max_bytes_any_rank = *rank_send_bytes.iter().max().unwrap_or(&0);
let mean_bytes_per_rank = if num_ranks > 0 {
total_bytes as f32 / num_ranks as f32
} else {
0.0
};
let bandwidth_utilization = if num_ranks > 0 && max_bytes_any_rank > 0 {
total_bytes as f32 / (num_ranks as f32 * max_bytes_any_rank as f32)
} else {
0.0
};
CommunicationVolume {
total_bytes,
max_bytes_any_rank,
mean_bytes_per_rank,
bandwidth_utilization,
}
}
}
#[derive(Debug, Clone)]
pub struct EpScheduleResult {
pub all_to_all_plan: AllToAllPlan,
pub communication_volume: CommunicationVolume,
pub local_token_counts: Vec<usize>,
pub is_load_balanced: bool,
}
pub struct ExpertParallelScheduler {
pub config: ExpertParallelConfig,
}
impl ExpertParallelScheduler {
pub fn new(config: ExpertParallelConfig) -> Result<Self, EpError> {
config.validate()?;
Ok(Self { config })
}
pub fn local_expert_indices(&self, rank: usize) -> Vec<usize> {
let start = rank * self.config.experts_per_rank;
let end = start + self.config.experts_per_rank;
(start..end.min(self.config.num_experts)).collect()
}
pub fn schedule(
&self,
assignments: &[usize],
hidden: &[f32],
seq_len: usize,
) -> Result<EpScheduleResult, EpError> {
let expected_hidden_len = seq_len * self.config.hidden_size;
if hidden.len() != expected_hidden_len {
return Err(EpError::DimensionMismatch {
expected: expected_hidden_len,
got: hidden.len(),
});
}
for &e in assignments {
if e >= self.config.num_experts {
return Err(EpError::ExpertIndexOutOfRange {
index: e,
num_experts: self.config.num_experts,
});
}
}
let all_to_all_plan = AllToAllCommunication::plan_all_to_all(
assignments,
self.config.num_ep_ranks,
self.config.num_experts,
);
let communication_volume = AllToAllCommunication::simulate_communication_volume(
&all_to_all_plan,
self.config.hidden_size,
);
let mut local_token_counts = vec![0usize; self.config.num_ep_ranks];
for &e in assignments {
let rank = e / self.config.experts_per_rank;
if rank < self.config.num_ep_ranks {
local_token_counts[rank] += 1;
}
}
let is_load_balanced = all_to_all_plan.is_balanced();
Ok(EpScheduleResult {
all_to_all_plan,
communication_volume,
local_token_counts,
is_load_balanced,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_config(
num_experts: usize,
num_ep_ranks: usize,
hidden_size: usize,
) -> ExpertParallelConfig {
ExpertParallelConfig::new(num_experts, num_ep_ranks, hidden_size)
}
#[test]
fn test_plan_all_to_all_uniform_routing() {
let assignments = vec![0, 1, 2, 3, 0, 1, 2, 3];
let plan = AllToAllCommunication::plan_all_to_all(&assignments, 2, 4);
assert_eq!(plan.send_counts[0][0], 4, "4 tokens stay local at rank 0");
assert_eq!(plan.send_counts[0][1], 4, "4 tokens sent to rank 1");
}
#[test]
fn test_plan_all_to_all_unbalanced_routing() {
let assignments = vec![0usize; 8];
let plan = AllToAllCommunication::plan_all_to_all(&assignments, 2, 4);
assert_eq!(plan.send_counts[0][0], 8, "all tokens stay at rank 0");
assert_eq!(plan.send_counts[0][1], 0, "nothing sent to rank 1");
}
#[test]
fn test_total_tokens_moved_uniform() {
let assignments = vec![0, 0, 0, 0, 3, 3, 3, 3]; let plan = AllToAllCommunication::plan_all_to_all(&assignments, 2, 4);
let moved = plan.total_tokens_moved();
assert_eq!(moved, 4, "4 tokens cross rank boundaries, got {moved}");
}
#[test]
fn test_total_tokens_moved_all_local() {
let assignments = vec![0usize; 16];
let plan = AllToAllCommunication::plan_all_to_all(&assignments, 4, 8);
assert_eq!(
plan.total_tokens_moved(),
0,
"no cross-rank movement when all tokens are local"
);
}
#[test]
fn test_communication_volume_total_bytes() {
let assignments = vec![2, 2, 2, 2]; let plan = AllToAllCommunication::plan_all_to_all(&assignments, 2, 4);
let vol = AllToAllCommunication::simulate_communication_volume(&plan, 8);
assert_eq!(vol.total_bytes, 4 * 8 * 4, "expected 128 bytes, got {}", vol.total_bytes);
}
#[test]
fn test_communication_volume_bandwidth_utilization_range() {
let assignments = vec![0, 1, 2, 3, 0, 1, 2, 3];
let plan = AllToAllCommunication::plan_all_to_all(&assignments, 2, 4);
let vol = AllToAllCommunication::simulate_communication_volume(&plan, 16);
assert!(
vol.bandwidth_utilization >= 0.0 && vol.bandwidth_utilization <= 1.0,
"bandwidth utilization must be in [0, 1], got {}",
vol.bandwidth_utilization
);
}
#[test]
fn test_ep_config_valid() {
let cfg = make_config(8, 4, 512);
assert!(cfg.validate().is_ok());
assert_eq!(cfg.experts_per_rank, 2);
}
#[test]
fn test_ep_config_invalid_divisibility() {
let cfg = make_config(7, 4, 512);
let err = cfg.validate().unwrap_err();
matches!(err, EpError::ExpertsDivisibility { .. });
}
#[test]
fn test_ep_config_invalid_zero_ranks() {
let cfg = make_config(8, 0, 512);
let err = cfg.validate().unwrap_err();
matches!(err, EpError::InvalidNumRanks(_));
}
#[test]
fn test_local_expert_indices() {
let cfg = make_config(8, 4, 512);
let scheduler = ExpertParallelScheduler::new(cfg).expect("config should be valid");
assert_eq!(scheduler.local_expert_indices(0), vec![0, 1]);
assert_eq!(scheduler.local_expert_indices(1), vec![2, 3]);
assert_eq!(scheduler.local_expert_indices(2), vec![4, 5]);
assert_eq!(scheduler.local_expert_indices(3), vec![6, 7]);
}
#[test]
fn test_schedule_uniform_routing() {
let cfg = make_config(4, 2, 8);
let scheduler = ExpertParallelScheduler::new(cfg).expect("valid config");
let seq_len = 8;
let assignments = vec![0, 1, 2, 3, 0, 1, 2, 3];
let hidden = vec![0.0_f32; seq_len * 8];
let result = scheduler
.schedule(&assignments, &hidden, seq_len)
.expect("schedule should succeed");
assert_eq!(result.local_token_counts.len(), 2);
assert_eq!(result.local_token_counts[0], 4);
assert_eq!(result.local_token_counts[1], 4);
}
#[test]
fn test_imbalance_ratio_balanced() {
let assignments = vec![2, 2, 2, 2];
let plan = AllToAllCommunication::plan_all_to_all(&assignments, 2, 4);
let ratio = plan.max_imbalance_ratio();
assert!(ratio >= 1.0, "ratio must be ≥ 1.0, got {ratio}");
}
#[test]
fn test_imbalance_ratio_all_local() {
let assignments = vec![0usize; 8];
let plan = AllToAllCommunication::plan_all_to_all(&assignments, 2, 4);
let ratio = plan.max_imbalance_ratio();
assert_eq!(ratio, 1.0, "all-local routing should yield ratio 1.0, got {ratio}");
}
}