use clap::{Args, Parser, Subcommand};
#[derive(Parser)]
#[command(
name = "kwaainet",
about = "KwaaiNet – Distributed AI node CLI",
long_about = "KwaaiNet — Sovereign AI Infrastructure
─── Install & first run ──────────────────────────────────────────────
kwaainet setup create config dirs and identity
kwaainet setup --get-deps download p2pd (if not bundled)
kwaainet benchmark measure GPU/CPU throughput
─── Join the network ─────────────────────────────────────────────────
kwaainet config set public_name \"alice-m4\" shown on map.kwaai.ai
kwaainet start --daemon start node in background
kwaainet status verify node is online
kwaainet logs --follow tail the daemon log
─── Configuration ────────────────────────────────────────────────────
kwaainet config show current config
kwaainet config set KEY VALUE update a value
kwaainet config set blocks 8 transformer blocks to host
kwaainet config set use_gpu true enable GPU acceleration
─── Direct vs Relay connections ──────────────────────────────────────
By default nodes connect via relay (no port forwarding required).
For direct connections (lower latency, better throughput):
kwaainet config set public_ip <YOUR_PUBLIC_IP>
kwaainet config set announce_addr /ip4/<IP>/tcp/<PORT>
• Forward the chosen TCP port in your router
• Verify: kwaainet status → look for \"using_relay: false\"
─── Local inference (30+ tok/s on Apple Silicon) ────────────────────
kwaainet shard run \"What is the capital of France?\" --local
kwaainet benchmark measure local throughput
─── Distributed inference ────────────────────────────────────────────
kwaainet shard circuit create pre-form a peer path
kwaainet shard run \"Hello\" --circuit ID use the circuit
kwaainet shard run \"Hello\" --stats show per-token timing
# Multi-machine — split the model
# Machine A Machine B
shard serve --blocks 28 shard serve --start-block 28 --blocks 4
shard chain --total-blocks 32 # verify full coverage
─── OpenAI-compatible API ────────────────────────────────────────────
kwaainet shard api --port 8080
curl http://localhost:8080/v1/chat/completions \\
-d '{\"model\":\"default\",\"messages\":[{\"role\":\"user\",\"content\":\"Hello\"}]}'
Learn more: https://github.com/Kwaai-AI-Lab/KwaaiNet",
version
)]
pub struct Cli {
#[command(subcommand)]
pub command: Command,
}
#[derive(Subcommand)]
pub enum Command {
Start(StartArgs),
Stop,
Restart,
Status,
Logs(LogsArgs),
Config(ConfigArgs),
HealthStatus,
HealthEnable,
HealthDisable,
Service(ServiceArgs),
Reconnect,
Monitor(MonitorArgs),
Update(UpdateArgs),
Calibrate(CalibrateArgs),
LoadModel(LoadModelArgs),
Generate(GenerateArgs),
Benchmark(BenchmarkArgs),
Serve(ServeArgs),
Setup(SetupArgs),
Identity(IdentityArgs),
Vpk(VpkArgs),
Uninstall(UninstallArgs),
#[command(long_about = "Distributed transformer block sharding (Petals-style)
Each machine loads a slice of the model and registers an RPC handler.
A coordinator discovers the chain via DHT and orchestrates inference hop-by-hop.
shard serve Load and serve a range of transformer blocks (run on each node)
shard run Coordinate inference across all serving nodes
shard chain Show block coverage across all online peers
shard api OpenAI-compatible HTTP server for distributed inference
shard download Download a HuggingFace SafeTensors model (no huggingface-cli needed)")]
Shard(ShardArgs),
#[command(hide = true)]
RunNode,
}
#[derive(Args)]
pub struct StartArgs {
#[arg(long)]
pub model: Option<String>,
#[arg(long)]
pub blocks: Option<u32>,
#[arg(long)]
pub port: Option<u16>,
#[arg(long)]
pub no_gpu: bool,
#[arg(long)]
pub public_name: Option<String>,
#[arg(long)]
pub public_ip: Option<String>,
#[arg(long)]
pub announce_addr: Option<String>,
#[arg(long)]
pub no_relay: bool,
#[arg(long)]
pub daemon: bool,
#[arg(long)]
pub concurrent: bool,
#[arg(long)]
pub shard: bool,
}
#[derive(Args)]
pub struct LogsArgs {
#[arg(long, short = 'n', default_value = "50")]
pub lines: usize,
#[arg(long, short = 'f')]
pub follow: bool,
#[arg(long)]
pub shard: bool,
}
#[derive(Args)]
pub struct ConfigArgs {
#[command(subcommand)]
pub action: Option<ConfigAction>,
}
#[derive(Subcommand)]
pub enum ConfigAction {
Show,
Set {
key: String,
value: String,
},
}
#[derive(Args)]
pub struct ServiceArgs {
#[command(subcommand)]
pub action: ServiceAction,
}
#[derive(Subcommand)]
pub enum ServiceAction {
Install,
Uninstall,
Status,
Restart,
}
#[derive(Args)]
pub struct MonitorArgs {
#[command(subcommand)]
pub action: MonitorAction,
}
#[derive(Subcommand)]
pub enum MonitorAction {
Stats,
Alert(AlertArgs),
}
#[derive(Args)]
pub struct AlertArgs {
#[arg(long)]
pub enable: bool,
#[arg(long)]
pub disable: bool,
#[arg(long, value_name = "MINUTES")]
pub threshold: Option<u32>,
#[arg(long, value_name = "URL")]
pub webhook: Option<String>,
#[arg(long)]
pub min_connections: Option<u32>,
}
#[derive(Args)]
pub struct UpdateArgs {
#[arg(long)]
pub check: bool,
#[arg(long)]
pub force: bool,
}
#[derive(Args)]
pub struct LoadModelArgs {
pub model: String,
}
#[derive(Args)]
pub struct GenerateArgs {
pub model: String,
pub prompt: String,
}
#[derive(Args)]
pub struct BenchmarkArgs {
pub model: Option<String>,
#[arg(long, default_value = "20")]
pub steps: usize,
#[arg(long)]
pub gpu: bool,
#[arg(long, value_name = "PATH")]
pub model_path: Option<std::path::PathBuf>,
}
#[derive(Args)]
pub struct ServeArgs {
pub model: Option<String>,
#[arg(long, default_value = "11435")]
pub port: u16,
}
#[derive(Args)]
pub struct IdentityArgs {
#[command(subcommand)]
pub action: IdentityAction,
}
#[derive(Subcommand)]
pub enum IdentityAction {
Show,
ImportVc {
#[arg(value_name = "FILE")]
path: std::path::PathBuf,
},
ListVcs,
VerifyVc {
#[arg(value_name = "FILE")]
path: std::path::PathBuf,
},
}
#[derive(Args)]
pub struct VpkArgs {
#[command(subcommand)]
pub action: VpkAction,
}
#[derive(Subcommand)]
pub enum VpkAction {
Enable {
#[arg(long, value_name = "MODE")]
mode: String,
#[arg(long, value_name = "URL")]
endpoint: Option<String>,
#[arg(long, default_value = "7432")]
port: u16,
},
Disable,
Status,
Discover,
Shard {
#[arg(long, value_name = "NAME")]
kb_id: String,
#[arg(long, value_name = "N", default_value = "1")]
eve_count: usize,
},
Resolve {
#[arg(long, value_name = "NAME")]
kb_id: String,
},
}
#[derive(Args)]
pub struct UninstallArgs {
#[arg(long, short = 'y')]
pub yes: bool,
#[arg(long)]
pub keep_data: bool,
}
#[derive(Args)]
pub struct ShardArgs {
#[command(subcommand)]
pub action: ShardAction,
}
#[derive(Subcommand)]
pub enum ShardAction {
Serve(ShardServeArgs),
Run(ShardRunArgs),
Status,
Chain(ShardChainArgs),
Api(ShardApiArgs),
Download(ShardDownloadArgs),
Gap,
#[command(subcommand)]
Circuit(CircuitAction),
}
#[derive(Subcommand)]
pub enum CircuitAction {
Create(CircuitCreateArgs),
List,
Close(CircuitCloseArgs),
}
#[derive(Args)]
pub struct CircuitCreateArgs {
#[arg(long, value_name = "SUBSTR")]
pub name_filter: Option<String>,
#[arg(long, default_value = "30")]
pub ttl_minutes: u64,
}
#[derive(Args)]
pub struct CircuitCloseArgs {
pub id: String,
}
#[derive(Args, Clone)]
pub struct ShardServeArgs {
#[arg(long, value_name = "PATH")]
pub model_path: Option<std::path::PathBuf>,
#[arg(long)]
pub start_block: Option<u32>,
#[arg(long)]
pub blocks: Option<u32>,
#[arg(long)]
pub no_gpu: bool,
#[arg(long)]
pub use_gpu: bool,
#[arg(long)]
pub auto: bool,
#[arg(long)]
pub no_auto: bool,
#[arg(long)]
pub auto_rebalance: bool,
#[arg(long, value_name = "TOKEN")]
pub hf_token: Option<String>,
}
#[derive(Args)]
pub struct ShardRunArgs {
pub prompt: String,
#[arg(long)]
pub model: Option<String>,
#[arg(long)]
pub total_blocks: Option<usize>,
#[arg(long, default_value = "200")]
pub max_tokens: usize,
#[arg(long)]
pub session_id: Option<u64>,
#[arg(long, value_name = "SUBSTR")]
pub name_filter: Option<String>,
#[arg(long, default_value = "1.0")]
pub temperature: f32,
#[arg(long, default_value = "1.0")]
pub top_p: f32,
#[arg(long, default_value = "0")]
pub top_k: usize,
#[arg(long, value_name = "PATH")]
pub model_path: Option<std::path::PathBuf>,
#[arg(long)]
pub local: bool,
#[arg(long)]
pub no_gpu: bool,
#[arg(long)]
pub stats: bool,
#[arg(long, value_name = "ID")]
pub circuit: Option<String>,
}
#[derive(Args)]
pub struct ShardChainArgs {
#[arg(long)]
pub dht_prefix: Option<String>,
#[arg(long, default_value = "32")]
pub total_blocks: usize,
}
#[derive(Args)]
pub struct ShardApiArgs {
#[arg(long, default_value = "8080")]
pub port: u16,
#[arg(long)]
pub total_blocks: Option<usize>,
#[arg(long)]
pub model: Option<String>,
#[arg(long, value_name = "PATH")]
pub model_path: Option<std::path::PathBuf>,
#[arg(long, default_value = "0.7")]
pub temperature: f32,
}
#[derive(Args)]
pub struct ShardDownloadArgs {
pub model: Option<String>,
#[arg(long, value_name = "TOKEN")]
pub hf_token: Option<String>,
#[arg(long)]
pub start_block: Option<usize>,
#[arg(long)]
pub blocks: Option<usize>,
}
#[derive(Args)]
pub struct CalibrateArgs {
#[arg(long)]
pub model: Option<String>,
#[arg(long)]
pub force: bool,
#[arg(long, default_value = "true")]
pub quick: bool,
#[arg(long, value_name = "PROFILE")]
pub apply: Option<String>,
}
#[derive(Args)]
pub struct SetupArgs {
#[arg(long)]
pub get_deps: bool,
}