kwaai_distributed/lib.rs
1//! # kwaai-distributed
2//!
3//! Distributed ML operations for KwaaiNet, implementing Hivemind patterns.
4//!
5//! This crate provides:
6//!
7//! - **Mixture of Experts (MoE)**: Distributed model layers across network
8//! - **Decentralized Averaging**: Parameter sync without master node
9//! - **Fault Tolerance**: Graceful handling of node failures
10//!
11//! ## Architecture
12//!
13//! ```text
14//! ┌─────────────────────────────────────────────────────┐
15//! │ kwaai-distributed │
16//! ├─────────────────┬─────────────────┬─────────────────┤
17//! │ MoE Layer │ Averaging │ Fault Tolerance │
18//! │ (Expert Routing)│ (Gradient Sync) │ (Retry/Fallback)│
19//! ├─────────────────┴─────────────────┴─────────────────┤
20//! │ kwaai-p2p │
21//! │ (P2P Networking / DHT) │
22//! └─────────────────────────────────────────────────────┘
23//! ```
24
25pub mod averaging;
26pub mod coordinator;
27pub mod error;
28pub mod expert;
29pub mod moe;
30
31pub use averaging::{AveragingResult, DecentralizedAverager, ParameterAverager};
32pub use coordinator::DistributedCoordinator;
33pub use error::{DistributedError, DistributedResult};
34pub use expert::{Expert, ExpertId, ExpertRegistry};
35pub use moe::{ExpertRouter, MixtureOfExperts, Routing};
36
37/// Configuration for distributed operations
38#[derive(Debug, Clone)]
39pub struct DistributedConfig {
40 /// Enable MoE distributed layers
41 pub enable_moe: bool,
42 /// Enable parameter averaging
43 pub enable_averaging: bool,
44 /// Number of experts to route to (top-k)
45 pub moe_top_k: usize,
46 /// Target averaging group size
47 pub averaging_group_size: usize,
48 /// Timeout for remote operations (ms)
49 pub timeout_ms: u64,
50 /// Maximum retry attempts
51 pub max_retries: usize,
52}
53
54impl Default for DistributedConfig {
55 fn default() -> Self {
56 Self {
57 enable_moe: true,
58 enable_averaging: true,
59 moe_top_k: 2,
60 averaging_group_size: 4,
61 timeout_ms: 5000,
62 max_retries: 3,
63 }
64 }
65}