use clap::{Parser, Subcommand};
#[derive(Parser)]
#[command(name = "octoroute")]
#[command(version)]
#[command(about = "Intelligent multi-model router for self-hosted LLMs")]
#[command(
long_about = "Octoroute intelligently routes LLM requests to optimal model endpoints \
based on task complexity, using rule-based, LLM-based, or hybrid routing strategies."
)]
pub struct Cli {
#[arg(short, long, default_value = "config.toml", global = true)]
pub config: String,
#[command(subcommand)]
pub command: Option<Command>,
}
#[derive(Subcommand)]
pub enum Command {
Config {
#[arg(short, long)]
output: Option<String>,
},
}
pub fn generate_config_template() -> &'static str {
r#"# Octoroute Configuration
# =========================
#
# This file configures the HTTP server, model endpoints, routing strategy,
# and observability settings for Octoroute.
#
# For full documentation, see: https://github.com/slb350/octoroute
# ─────────────────────────────────────────────────────────────────────────────
# SERVER CONFIGURATION
# ─────────────────────────────────────────────────────────────────────────────
[server]
# IP address to bind to (0.0.0.0 for all interfaces, 127.0.0.1 for localhost only)
host = "0.0.0.0"
# Port to listen on
port = 3000
# Default request timeout in seconds (can be overridden per-tier in [timeouts])
request_timeout_seconds = 30
# ─────────────────────────────────────────────────────────────────────────────
# MODEL TIERS
# ─────────────────────────────────────────────────────────────────────────────
#
# Configure endpoints for each model tier. Each tier can have multiple endpoints
# for load balancing and failover. Octoroute will route requests based on:
#
# - FAST (8B models): Simple tasks, casual chat, quick Q&A
# - BALANCED (30B models): Coding, analysis, explanations
# - DEEP (120B+ models): Complex reasoning, creative writing, research
#
# Endpoint fields:
# - name: Model identifier (for OpenAI-compatible APIs)
# - base_url: API base URL (must end with /v1 for OpenAI-compatible APIs)
# - max_tokens: Maximum tokens for generation
# - temperature: Sampling temperature (0.0-2.0)
# - weight: Load balancing weight (higher = more traffic)
# - priority: Selection priority (higher = tried first)
# Fast tier - 8B class models
[[models.fast]]
name = "your-8b-model"
base_url = "http://your-server:port/v1"
max_tokens = 4096
temperature = 0.7
weight = 1.0
priority = 1
# Add additional fast endpoints for load balancing:
# [[models.fast]]
# name = "your-8b-model"
# base_url = "http://another-server:port/v1"
# max_tokens = 4096
# temperature = 0.7
# weight = 1.0
# priority = 1
# Balanced tier - 30B class models
[[models.balanced]]
name = "your-30b-model"
base_url = "http://your-server:port/v1"
max_tokens = 8192
temperature = 0.7
weight = 1.0
priority = 1
# Deep tier - 120B+ class models
[[models.deep]]
name = "your-120b-model"
base_url = "http://your-server:port/v1"
max_tokens = 16384
temperature = 0.7
weight = 1.0
priority = 1
# ─────────────────────────────────────────────────────────────────────────────
# ROUTING CONFIGURATION
# ─────────────────────────────────────────────────────────────────────────────
[routing]
# Routing strategy:
# - "rule": Fast pattern-based routing (~<1ms latency)
# - "llm": Intelligent LLM-powered routing (~250ms latency)
# - "hybrid": Rule-based first, LLM fallback (recommended)
strategy = "hybrid"
# Default importance level for requests that don't specify one
# Options: "low", "normal", "high", "critical"
default_importance = "normal"
# Which tier to use for LLM-based routing decisions
# The router model analyzes requests and selects the optimal target tier
router_tier = "balanced"
# ─────────────────────────────────────────────────────────────────────────────
# OBSERVABILITY
# ─────────────────────────────────────────────────────────────────────────────
[observability]
# Log level: "trace", "debug", "info", "warn", "error"
log_level = "info"
# Prometheus metrics are always available at /metrics on the server port
# For production, consider using a reverse proxy to restrict access
# ─────────────────────────────────────────────────────────────────────────────
# TIMEOUTS (Optional)
# ─────────────────────────────────────────────────────────────────────────────
#
# Per-tier timeout overrides in seconds.
# If not specified, server.request_timeout_seconds is used.
[timeouts]
fast = 15 # 8B models respond quickly
balanced = 30 # 30B models - moderate speed
deep = 60 # 120B+ models need more time
"#
}
#[cfg(test)]
mod tests {
use super::*;
use clap::CommandFactory;
#[test]
fn verify_cli() {
Cli::command().debug_assert();
}
#[test]
fn default_config_path() {
let cli = Cli::parse_from(["octoroute"]);
assert_eq!(cli.config, "config.toml");
assert!(cli.command.is_none());
}
#[test]
fn custom_config_path() {
let cli = Cli::parse_from(["octoroute", "--config", "custom.toml"]);
assert_eq!(cli.config, "custom.toml");
}
#[test]
fn config_subcommand() {
let cli = Cli::parse_from(["octoroute", "config"]);
assert!(matches!(
cli.command,
Some(Command::Config { output: None })
));
}
#[test]
fn config_subcommand_with_output() {
let cli = Cli::parse_from(["octoroute", "config", "-o", "my-config.toml"]);
assert!(matches!(
cli.command,
Some(Command::Config { output: Some(ref path) }) if path == "my-config.toml"
));
}
#[test]
fn template_is_valid_toml() {
let template = generate_config_template();
let result: Result<toml::Value, _> = toml::from_str(template);
assert!(
result.is_ok(),
"Template should be valid TOML: {:?}",
result.err()
);
}
#[test]
fn template_has_all_sections() {
let template = generate_config_template();
assert!(template.contains("[server]"));
assert!(template.contains("[[models.fast]]"));
assert!(template.contains("[[models.balanced]]"));
assert!(template.contains("[[models.deep]]"));
assert!(template.contains("[routing]"));
assert!(template.contains("[observability]"));
assert!(template.contains("[timeouts]"));
}
#[test]
fn template_deserializes_to_valid_config() {
use crate::config::Config;
let template = generate_config_template();
let config: Config =
toml::from_str(template).expect("Template must deserialize to valid Config struct");
assert!(
!config.models.fast.is_empty(),
"Template must include fast tier"
);
assert!(
!config.models.balanced.is_empty(),
"Template must include balanced tier"
);
assert!(
!config.models.deep.is_empty(),
"Template must include deep tier"
);
}
}