use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EvictionPolicy {
Lru,
Lfu,
Adaptive,
}
#[derive(Debug, Clone)]
pub struct ExpertCacheConfig {
pub max_hot_experts: usize,
pub prefetch_threshold: f32,
pub eviction_policy: EvictionPolicy,
}
impl Default for ExpertCacheConfig {
fn default() -> Self {
Self {
max_hot_experts: 4,
prefetch_threshold: 0.1,
eviction_policy: EvictionPolicy::Lru,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct ExpertCacheStats {
pub hits: usize,
pub misses: usize,
pub evictions: usize,
pub prefetch_hits: usize,
}
impl ExpertCacheStats {
pub fn hit_rate(&self) -> f32 {
let total = self.hits + self.misses;
if total == 0 {
return 0.0;
}
self.hits as f32 / total as f32
}
}
pub struct ExpertCache {
num_experts: usize,
hot_set: Vec<(usize, u64)>,
frequency: Vec<usize>,
prefetched: Vec<bool>,
config: ExpertCacheConfig,
stats: ExpertCacheStats,
access_counter: u64,
}
impl ExpertCache {
pub fn new(num_experts: usize, config: ExpertCacheConfig) -> Self {
Self {
num_experts,
hot_set: Vec::with_capacity(config.max_hot_experts),
frequency: vec![0; num_experts],
prefetched: vec![false; num_experts],
config,
stats: ExpertCacheStats::default(),
access_counter: 0,
}
}
pub fn access(&mut self, expert_id: usize) -> bool {
self.access_counter += 1;
let timestamp = self.access_counter;
if expert_id < self.num_experts {
self.frequency[expert_id] += 1;
}
if let Some(pos) = self.hot_set.iter().position(|&(id, _)| id == expert_id) {
self.hot_set[pos].1 = timestamp;
self.stats.hits += 1;
if expert_id < self.prefetched.len() && self.prefetched[expert_id] {
self.stats.prefetch_hits += 1;
self.prefetched[expert_id] = false;
}
return true;
}
self.stats.misses += 1;
self.admit(expert_id);
false
}
pub fn should_prefetch(&self, expert_id: usize, router_weight: f32) -> bool {
if router_weight <= self.config.prefetch_threshold {
return false;
}
!self.is_hot(expert_id)
}
pub fn suggest_eviction(&self) -> Option<usize> {
if self.hot_set.len() < self.config.max_hot_experts {
return None;
}
match self.config.eviction_policy {
EvictionPolicy::Lru => self.suggest_lru_eviction(),
EvictionPolicy::Lfu => self.suggest_lfu_eviction(),
EvictionPolicy::Adaptive => self.suggest_adaptive_eviction(),
}
}
pub fn evict(&mut self, expert_id: usize) {
if let Some(pos) = self.hot_set.iter().position(|&(id, _)| id == expert_id) {
self.hot_set.swap_remove(pos);
self.stats.evictions += 1;
}
}
pub fn admit(&mut self, expert_id: usize) {
if self.is_hot(expert_id) {
return;
}
if self.hot_set.len() >= self.config.max_hot_experts {
if let Some(victim) = self.suggest_eviction() {
self.evict(victim);
}
}
let timestamp = self.access_counter;
self.hot_set.push((expert_id, timestamp));
}
pub fn prefetch_admit(&mut self, expert_id: usize) {
if expert_id < self.prefetched.len() {
self.prefetched[expert_id] = true;
}
self.admit(expert_id);
}
pub fn is_hot(&self, expert_id: usize) -> bool {
self.hot_set.iter().any(|&(id, _)| id == expert_id)
}
pub fn stats(&self) -> &ExpertCacheStats {
&self.stats
}
pub fn reset_stats(&mut self) {
self.stats = ExpertCacheStats::default();
}
pub fn hot_count(&self) -> usize {
self.hot_set.len()
}
pub fn max_hot(&self) -> usize {
self.config.max_hot_experts
}
pub fn hot_experts(&self) -> Vec<usize> {
self.hot_set.iter().map(|&(id, _)| id).collect()
}
pub fn suggest_eviction_with_affinity(
&self,
affinity: &crate::moe::ExpertAffinity,
affinity_weight: f32,
) -> Option<usize> {
if self.hot_set.len() < self.config.max_hot_experts {
return None;
}
let weight = affinity_weight.clamp(0.0, 1.0);
if weight < 1e-6 {
return self.suggest_eviction();
}
let base_scores = self.compute_base_eviction_scores();
if base_scores.is_empty() {
return None;
}
let mut best_victim: Option<usize> = None;
let mut best_score: f32 = f32::MIN;
for &(id, _) in &self.hot_set {
let base_score = base_scores.get(&id).copied().unwrap_or(0.5);
let expert_affinity = affinity.score(id);
let combined = (1.0 - weight) * base_score + weight * (1.0 - expert_affinity);
if combined > best_score {
best_score = combined;
best_victim = Some(id);
}
}
best_victim
}
pub fn prefetch_by_affinity(
&mut self,
affinity: &crate::moe::ExpertAffinity,
budget: usize,
) -> Vec<usize> {
if budget == 0 {
return Vec::new();
}
let top_experts = affinity.top_k_by_affinity(self.num_experts);
let mut prefetched = Vec::with_capacity(budget);
for expert_id in top_experts {
if prefetched.len() >= budget {
break;
}
if self.is_hot(expert_id) {
continue;
}
if self.hot_set.len() >= self.config.max_hot_experts {
if let Some(victim) = self.suggest_eviction_with_affinity(affinity, 0.5) {
self.evict(victim);
} else {
break; }
}
self.prefetch_admit(expert_id);
prefetched.push(expert_id);
}
prefetched
}
fn compute_base_eviction_scores(&self) -> HashMap<usize, f32> {
let mut scores = HashMap::new();
if self.hot_set.is_empty() {
return scores;
}
match self.config.eviction_policy {
EvictionPolicy::Lru => {
let timestamps: Vec<u64> = self.hot_set.iter().map(|&(_, ts)| ts).collect();
let min_ts = timestamps.iter().copied().min().unwrap_or(0);
let max_ts = timestamps.iter().copied().max().unwrap_or(1);
let range = (max_ts - min_ts) as f32;
for &(id, ts) in &self.hot_set {
let score = if range > 0.0 {
1.0 - ((ts - min_ts) as f32 / range)
} else {
0.5
};
scores.insert(id, score);
}
}
EvictionPolicy::Lfu => {
let freqs: Vec<usize> = self
.hot_set
.iter()
.map(|&(id, _)| self.frequency.get(id).copied().unwrap_or(0))
.collect();
let min_freq = freqs.iter().copied().min().unwrap_or(0);
let max_freq = freqs.iter().copied().max().unwrap_or(1);
let range = (max_freq - min_freq) as f32;
for &(id, _) in &self.hot_set {
let freq = self.frequency.get(id).copied().unwrap_or(0);
let score = if range > 0.0 {
1.0 - ((freq - min_freq) as f32 / range)
} else {
0.5
};
scores.insert(id, score);
}
}
EvictionPolicy::Adaptive => {
let freqs: Vec<usize> = self
.hot_set
.iter()
.map(|&(id, _)| self.frequency.get(id).copied().unwrap_or(0))
.collect();
let max_freq = freqs.iter().copied().max().unwrap_or(0);
let min_freq = freqs.iter().copied().min().unwrap_or(0);
if min_freq > 0 && max_freq >= 3 * min_freq {
let range = (max_freq - min_freq) as f32;
for &(id, _) in &self.hot_set {
let freq = self.frequency.get(id).copied().unwrap_or(0);
let score = if range > 0.0 {
1.0 - ((freq - min_freq) as f32 / range)
} else {
0.5
};
scores.insert(id, score);
}
} else {
let timestamps: Vec<u64> = self.hot_set.iter().map(|&(_, ts)| ts).collect();
let min_ts = timestamps.iter().copied().min().unwrap_or(0);
let max_ts = timestamps.iter().copied().max().unwrap_or(1);
let range = (max_ts - min_ts) as f32;
for &(id, ts) in &self.hot_set {
let score = if range > 0.0 {
1.0 - ((ts - min_ts) as f32 / range)
} else {
0.5
};
scores.insert(id, score);
}
}
}
}
scores
}
fn suggest_lru_eviction(&self) -> Option<usize> {
self.hot_set
.iter()
.min_by_key(|&&(_, ts)| ts)
.map(|&(id, _)| id)
}
fn suggest_lfu_eviction(&self) -> Option<usize> {
self.hot_set
.iter()
.min_by_key(|&&(id, _)| self.frequency.get(id).copied().unwrap_or(0))
.map(|&(id, _)| id)
}
fn suggest_adaptive_eviction(&self) -> Option<usize> {
if self.hot_set.is_empty() {
return None;
}
let freqs: Vec<usize> = self
.hot_set
.iter()
.map(|&(id, _)| self.frequency.get(id).copied().unwrap_or(0))
.collect();
let max_freq = freqs.iter().copied().max().unwrap_or(0);
let min_freq = freqs.iter().copied().min().unwrap_or(0);
if min_freq > 0 && max_freq >= 3 * min_freq {
self.suggest_lfu_eviction()
} else {
self.suggest_lru_eviction()
}
}
}
#[derive(Debug, Clone)]
pub struct ExpertBatch {
pub expert_id: usize,
pub token_indices: Vec<usize>,
pub weights: Vec<f32>,
}
pub struct MoeBatchScheduler;
impl MoeBatchScheduler {
pub fn schedule(routing_decisions: &[(usize, Vec<(usize, f32)>)]) -> Vec<ExpertBatch> {
let mut expert_map: HashMap<usize, Vec<(usize, f32)>> = HashMap::new();
for &(token_idx, ref experts) in routing_decisions {
for &(expert_id, weight) in experts {
expert_map
.entry(expert_id)
.or_default()
.push((token_idx, weight));
}
}
let mut batches: Vec<ExpertBatch> = expert_map
.into_iter()
.map(|(expert_id, entries)| {
let (token_indices, weights): (Vec<usize>, Vec<f32>) = entries.into_iter().unzip();
ExpertBatch {
expert_id,
token_indices,
weights,
}
})
.collect();
batches.sort_by_key(|b| b.expert_id);
batches
}
}
pub trait Prefetcher: Send + Sync {
fn prefetch(&self, data: &[u8], offset: usize, len: usize);
}
pub struct NullPrefetcher;
impl Prefetcher for NullPrefetcher {
#[inline(always)]
fn prefetch(&self, _data: &[u8], _offset: usize, _len: usize) {
}
}
const CACHE_LINE_BYTES: usize = 64;
#[inline]
pub fn align_to_cache_line(ptr: usize) -> usize {
(ptr + CACHE_LINE_BYTES - 1) & !(CACHE_LINE_BYTES - 1)
}
#[inline]
pub fn expert_memory_footprint(rows: usize, cols: usize, block_size: usize) -> usize {
let total_elements = rows * cols;
let packed_bytes = (total_elements + 3) / 4;
let num_blocks = (total_elements + block_size - 1) / block_size;
let scale_bytes = num_blocks * 4; packed_bytes + scale_bytes
}
#[cfg(test)]
mod tests {
use super::*;
fn make_cache(num_experts: usize, max_hot: usize, policy: EvictionPolicy) -> ExpertCache {
let config = ExpertCacheConfig {
max_hot_experts: max_hot,
prefetch_threshold: 0.1,
eviction_policy: policy,
};
ExpertCache::new(num_experts, config)
}
#[test]
fn test_lru_eviction_order() {
let mut cache = make_cache(8, 3, EvictionPolicy::Lru);
cache.access(0);
cache.access(1);
cache.access(2);
assert!(cache.is_hot(0));
assert!(cache.is_hot(1));
assert!(cache.is_hot(2));
cache.access(0);
cache.access(3);
assert!(
cache.is_hot(0),
"Expert 0 was refreshed, should still be hot"
);
assert!(!cache.is_hot(1), "Expert 1 should have been evicted (LRU)");
assert!(
cache.is_hot(2),
"Expert 2 was accessed after 1, should survive"
);
assert!(cache.is_hot(3), "Expert 3 was just admitted");
}
#[test]
fn test_lfu_eviction_order() {
let mut cache = make_cache(8, 3, EvictionPolicy::Lfu);
cache.access(0);
cache.access(0);
cache.access(0);
cache.access(1);
cache.access(2);
cache.access(2);
assert!(cache.is_hot(0));
assert!(cache.is_hot(1));
assert!(cache.is_hot(2));
cache.access(3);
assert!(cache.is_hot(0), "Expert 0 (freq=3) should survive");
assert!(
!cache.is_hot(1),
"Expert 1 (freq=1) should be evicted by LFU"
);
assert!(cache.is_hot(2), "Expert 2 (freq=2) should survive");
assert!(cache.is_hot(3), "Expert 3 was just admitted");
}
#[test]
fn test_hot_set_respects_limit() {
let mut cache = make_cache(16, 4, EvictionPolicy::Lru);
for i in 0..10 {
cache.access(i);
}
assert!(
cache.hot_count() <= 4,
"Hot count {} exceeds max of 4",
cache.hot_count()
);
assert_eq!(cache.hot_count(), 4);
}
#[test]
fn test_access_returns_hit_for_hot() {
let mut cache = make_cache(8, 4, EvictionPolicy::Lru);
assert!(!cache.access(3));
assert!(cache.access(3));
assert!(cache.access(3));
}
#[test]
fn test_access_returns_miss_for_cold() {
let mut cache = make_cache(8, 2, EvictionPolicy::Lru);
cache.access(0);
cache.access(1);
assert!(!cache.access(2));
assert!(!cache.access(3));
assert!(!cache.access(0));
}
#[test]
fn test_hit_rate_calculation() {
let mut cache = make_cache(8, 4, EvictionPolicy::Lru);
assert_eq!(cache.stats().hit_rate(), 0.0);
cache.access(0);
assert_eq!(cache.stats().hits, 0);
assert_eq!(cache.stats().misses, 1);
assert_eq!(cache.stats().hit_rate(), 0.0);
cache.access(0);
assert_eq!(cache.stats().hits, 1);
assert_eq!(cache.stats().misses, 1);
assert!((cache.stats().hit_rate() - 0.5).abs() < 1e-6);
cache.access(0);
cache.access(0);
assert!((cache.stats().hit_rate() - 0.75).abs() < 1e-6);
}
#[test]
fn test_prefetch_threshold() {
let config = ExpertCacheConfig {
max_hot_experts: 4,
prefetch_threshold: 0.15,
eviction_policy: EvictionPolicy::Lru,
};
let mut cache = ExpertCache::new(8, config);
assert!(cache.should_prefetch(0, 0.2));
assert!(cache.should_prefetch(0, 0.16));
assert!(!cache.should_prefetch(0, 0.15)); assert!(!cache.should_prefetch(0, 0.1));
assert!(!cache.should_prefetch(0, 0.0));
cache.access(0);
assert!(!cache.should_prefetch(0, 0.5));
}
#[test]
fn test_batch_scheduler_groups_by_expert() {
let routing = vec![
(0, vec![(2, 0.6), (5, 0.4)]),
(1, vec![(5, 0.7), (3, 0.3)]),
(2, vec![(2, 0.7), (7, 0.3)]),
];
let batches = MoeBatchScheduler::schedule(&routing);
assert_eq!(batches.len(), 4);
let expert_ids: Vec<usize> = batches.iter().map(|b| b.expert_id).collect();
assert_eq!(expert_ids, vec![2, 3, 5, 7]);
let batch_2 = &batches[0];
assert_eq!(batch_2.expert_id, 2);
assert_eq!(batch_2.token_indices, vec![0, 2]);
assert_eq!(batch_2.weights, vec![0.6, 0.7]);
let batch_3 = &batches[1];
assert_eq!(batch_3.expert_id, 3);
assert_eq!(batch_3.token_indices, vec![1]);
assert_eq!(batch_3.weights, vec![0.3]);
let batch_5 = &batches[2];
assert_eq!(batch_5.expert_id, 5);
assert_eq!(batch_5.token_indices, vec![0, 1]);
assert_eq!(batch_5.weights, vec![0.4, 0.7]);
let batch_7 = &batches[3];
assert_eq!(batch_7.expert_id, 7);
assert_eq!(batch_7.token_indices, vec![2]);
assert_eq!(batch_7.weights, vec![0.3]);
}
#[test]
fn test_batch_scheduler_single_token() {
let routing = vec![(0, vec![(4, 0.65), (1, 0.35)])];
let batches = MoeBatchScheduler::schedule(&routing);
assert_eq!(batches.len(), 2);
assert_eq!(batches[0].expert_id, 1);
assert_eq!(batches[0].token_indices, vec![0]);
assert_eq!(batches[0].weights, vec![0.35]);
assert_eq!(batches[1].expert_id, 4);
assert_eq!(batches[1].token_indices, vec![0]);
assert_eq!(batches[1].weights, vec![0.65]);
}
#[test]
fn test_cache_stats_accumulate() {
let mut cache = make_cache(8, 2, EvictionPolicy::Lru);
cache.access(0); cache.access(1); assert_eq!(cache.stats().misses, 2);
assert_eq!(cache.stats().hits, 0);
assert_eq!(cache.stats().evictions, 0);
cache.access(0); assert_eq!(cache.stats().hits, 1);
cache.access(2); assert_eq!(cache.stats().misses, 3);
assert_eq!(cache.stats().evictions, 1);
cache.access(0); assert_eq!(cache.stats().hits, 2);
cache.reset_stats();
assert_eq!(cache.stats().hits, 0);
assert_eq!(cache.stats().misses, 0);
assert_eq!(cache.stats().evictions, 0);
assert_eq!(cache.stats().prefetch_hits, 0);
}
#[test]
fn test_eviction_when_full() {
let mut cache = make_cache(8, 3, EvictionPolicy::Lru);
cache.access(0);
cache.access(1);
cache.access(2);
assert_eq!(cache.hot_count(), 3);
assert_eq!(cache.stats().evictions, 0);
cache.access(3);
assert_eq!(cache.hot_count(), 3);
assert_eq!(cache.stats().evictions, 1);
assert!(!cache.is_hot(0), "Expert 0 (oldest) should be evicted");
assert!(cache.is_hot(3));
}
#[test]
fn test_memory_footprint_calculation() {
let footprint = expert_memory_footprint(256, 256, 256);
assert_eq!(footprint, 17408);
let footprint_small = expert_memory_footprint(1, 4, 256);
assert_eq!(footprint_small, 5);
let rows = 11008usize;
let cols = 4096usize;
let total = rows * cols; let packed = (total + 3) / 4; let blocks = (total + 255) / 256; let scales_bytes = blocks * 4; let expected = packed + scales_bytes; assert_eq!(expert_memory_footprint(rows, cols, 256), expected);
}
#[test]
fn test_align_to_cache_line() {
assert_eq!(align_to_cache_line(0), 0);
assert_eq!(align_to_cache_line(1), 64);
assert_eq!(align_to_cache_line(63), 64);
assert_eq!(align_to_cache_line(64), 64);
assert_eq!(align_to_cache_line(65), 128);
assert_eq!(align_to_cache_line(128), 128);
assert_eq!(align_to_cache_line(129), 192);
}
#[test]
fn test_null_prefetcher_noop() {
let prefetcher = NullPrefetcher;
let data = vec![0u8; 1024];
prefetcher.prefetch(&data, 0, 64);
prefetcher.prefetch(&data, 512, 256);
prefetcher.prefetch(&data, 2000, 100); prefetcher.prefetch(&[], 0, 0);
}
#[test]
fn test_adaptive_eviction_policy() {
let mut cache = make_cache(8, 3, EvictionPolicy::Adaptive);
for _ in 0..9 {
cache.access(0);
}
for _ in 0..3 {
cache.access(1);
}
cache.access(2);
cache.access(3);
assert!(
cache.is_hot(0),
"Expert 0 (freq=9) should survive adaptive LFU"
);
assert!(
cache.is_hot(1),
"Expert 1 (freq=3) should survive adaptive LFU"
);
assert!(
!cache.is_hot(2),
"Expert 2 (freq=1) should be evicted by adaptive LFU"
);
assert!(cache.is_hot(3), "Expert 3 was just admitted");
}
#[test]
fn test_prefetch_admit_tracks_hits() {
let mut cache = make_cache(8, 4, EvictionPolicy::Lru);
cache.prefetch_admit(5);
assert!(cache.is_hot(5));
assert_eq!(cache.stats().prefetch_hits, 0);
let hit = cache.access(5);
assert!(hit, "Expert 5 is in hot set via prefetch");
assert_eq!(cache.stats().prefetch_hits, 1);
cache.access(5);
assert_eq!(cache.stats().prefetch_hits, 1);
}
#[test]
fn test_batch_scheduler_empty() {
let routing: Vec<(usize, Vec<(usize, f32)>)> = vec![];
let batches = MoeBatchScheduler::schedule(&routing);
assert!(batches.is_empty());
}
#[test]
fn test_config_defaults() {
let config = ExpertCacheConfig::default();
assert_eq!(config.max_hot_experts, 4);
assert!((config.prefetch_threshold - 0.1).abs() < 1e-6);
assert_eq!(config.eviction_policy, EvictionPolicy::Lru);
}
#[test]
fn test_suggest_eviction_none_when_not_full() {
let mut cache = make_cache(8, 4, EvictionPolicy::Lru);
assert!(cache.suggest_eviction().is_none());
cache.access(0);
assert!(cache.suggest_eviction().is_none());
cache.access(1);
cache.access(2);
assert!(cache.suggest_eviction().is_none());
cache.access(3);
assert!(cache.suggest_eviction().is_some());
}
#[test]
fn test_admit_idempotent() {
let mut cache = make_cache(8, 4, EvictionPolicy::Lru);
cache.admit(0);
cache.admit(1);
assert_eq!(cache.hot_count(), 2);
cache.admit(0);
cache.admit(1);
assert_eq!(cache.hot_count(), 2);
}
#[test]
fn test_hot_experts_list() {
let mut cache = make_cache(8, 4, EvictionPolicy::Lru);
cache.access(2);
cache.access(5);
cache.access(7);
let hot = cache.hot_experts();
assert_eq!(hot.len(), 3);
assert!(hot.contains(&2));
assert!(hot.contains(&5));
assert!(hot.contains(&7));
assert!(!hot.contains(&0));
}
#[test]
fn test_eviction_with_affinity_prefers_low_affinity() {
use crate::moe::{AffinityConfig, ExpertAffinity};
let mut cache = make_cache(8, 3, EvictionPolicy::Lru);
let mut affinity = ExpertAffinity::new(
AffinityConfig::with_num_experts(8)
.with_decay(1.0)
.with_activation_boost(0.1),
);
cache.access(0);
cache.access(1);
cache.access(2);
for _ in 0..10 {
affinity.update(&[0]);
}
affinity.update(&[2]);
for _ in 0..5 {
affinity.update(&[1]);
}
let victim = cache.suggest_eviction_with_affinity(&affinity, 1.0);
assert_eq!(victim, Some(2), "Should evict lowest affinity expert");
}
#[test]
fn test_prefetch_by_affinity_respects_budget() {
use crate::moe::{AffinityConfig, ExpertAffinity};
let config = ExpertCacheConfig {
max_hot_experts: 6,
prefetch_threshold: 0.1,
eviction_policy: EvictionPolicy::Lru,
};
let mut cache = ExpertCache::new(8, config);
let mut affinity = ExpertAffinity::new(AffinityConfig::with_num_experts(8).with_decay(1.0));
for _ in 0..5 {
affinity.update(&[3, 5, 7]);
}
let prefetched = cache.prefetch_by_affinity(&affinity, 2);
assert!(prefetched.len() <= 2, "Should respect budget");
assert!(
prefetched.len() >= 1,
"Should prefetch at least 1 high-affinity expert"
);
for &id in &prefetched {
assert!(cache.is_hot(id), "Prefetched expert should be hot");
}
}
#[test]
fn test_prefetch_skips_already_hot() {
use crate::moe::{AffinityConfig, ExpertAffinity};
let config = ExpertCacheConfig {
max_hot_experts: 4,
prefetch_threshold: 0.1,
eviction_policy: EvictionPolicy::Lru,
};
let mut cache = ExpertCache::new(8, config);
let mut affinity = ExpertAffinity::new(AffinityConfig::with_num_experts(8).with_decay(1.0));
cache.access(3);
for _ in 0..10 {
affinity.update(&[3]);
}
for _ in 0..5 {
affinity.update(&[5]);
}
let prefetched = cache.prefetch_by_affinity(&affinity, 2);
assert!(
!prefetched.contains(&3),
"Should not prefetch already-hot expert"
);
assert!(
prefetched.contains(&5),
"Should prefetch next highest affinity expert"
);
}
#[test]
fn test_affinity_weighted_eviction() {
use crate::moe::{AffinityConfig, ExpertAffinity};
let mut cache = make_cache(8, 3, EvictionPolicy::Lru);
let mut affinity = ExpertAffinity::new(AffinityConfig::with_num_experts(8).with_decay(1.0));
cache.access(0);
cache.access(1);
cache.access(2);
for _ in 0..20 {
affinity.update(&[0]);
}
let victim_lru = cache.suggest_eviction_with_affinity(&affinity, 0.0);
assert_eq!(victim_lru, Some(0), "Weight 0 should use pure LRU");
let victim_affinity = cache.suggest_eviction_with_affinity(&affinity, 1.0);
assert!(
victim_affinity == Some(1) || victim_affinity == Some(2),
"Weight 1.0 should evict lowest affinity"
);
let victim_balanced = cache.suggest_eviction_with_affinity(&affinity, 0.5);
assert_ne!(
victim_balanced,
Some(0),
"Balanced weight should protect high-affinity expert"
);
}
#[test]
fn test_zero_affinity_weight_fallback() {
use crate::moe::{AffinityConfig, ExpertAffinity};
let mut cache = make_cache(8, 3, EvictionPolicy::Lfu);
let affinity = ExpertAffinity::new(AffinityConfig::with_num_experts(8));
cache.access(0);
cache.access(1);
cache.access(1);
cache.access(1);
cache.access(2);
cache.access(2);
let victim_base = cache.suggest_eviction();
let victim_zero_weight = cache.suggest_eviction_with_affinity(&affinity, 0.0);
assert_eq!(
victim_base, victim_zero_weight,
"Zero weight should match base policy"
);
assert_eq!(victim_base, Some(0), "LFU should evict lowest frequency");
}
}