Skip to main content

kwaai_distributed/
lib.rs

1//! # kwaai-distributed
2//!
3//! Distributed ML operations for KwaaiNet, implementing Hivemind patterns.
4//!
5//! This crate provides:
6//!
7//! - **Mixture of Experts (MoE)**: Distributed model layers across network
8//! - **Decentralized Averaging**: Parameter sync without master node
9//! - **Fault Tolerance**: Graceful handling of node failures
10//!
11//! ## Architecture
12//!
13//! ```text
14//! ┌─────────────────────────────────────────────────────┐
15//! │                  kwaai-distributed                   │
16//! ├─────────────────┬─────────────────┬─────────────────┤
17//! │   MoE Layer     │   Averaging     │  Fault Tolerance │
18//! │  (Expert Routing)│ (Gradient Sync) │  (Retry/Fallback)│
19//! ├─────────────────┴─────────────────┴─────────────────┤
20//! │                    kwaai-p2p                         │
21//! │               (P2P Networking / DHT)                 │
22//! └─────────────────────────────────────────────────────┘
23//! ```
24
25pub mod averaging;
26pub mod coordinator;
27pub mod error;
28pub mod expert;
29pub mod moe;
30
31pub use averaging::{AveragingResult, DecentralizedAverager, ParameterAverager};
32pub use coordinator::DistributedCoordinator;
33pub use error::{DistributedError, DistributedResult};
34pub use expert::{Expert, ExpertId, ExpertRegistry};
35pub use moe::{ExpertRouter, MixtureOfExperts, Routing};
36
37/// Configuration for distributed operations
38#[derive(Debug, Clone)]
39pub struct DistributedConfig {
40    /// Enable MoE distributed layers
41    pub enable_moe: bool,
42    /// Enable parameter averaging
43    pub enable_averaging: bool,
44    /// Number of experts to route to (top-k)
45    pub moe_top_k: usize,
46    /// Target averaging group size
47    pub averaging_group_size: usize,
48    /// Timeout for remote operations (ms)
49    pub timeout_ms: u64,
50    /// Maximum retry attempts
51    pub max_retries: usize,
52}
53
54impl Default for DistributedConfig {
55    fn default() -> Self {
56        Self {
57            enable_moe: true,
58            enable_averaging: true,
59            moe_top_k: 2,
60            averaging_group_size: 4,
61            timeout_ms: 5000,
62            max_retries: 3,
63        }
64    }
65}