pawan/config/routing.rs
1use serde::{Deserialize, Serialize};
2
3use super::provider::LlmProvider;
4
5/// Task-type model routing — use different models for different task categories.
6///
7/// # Example (pawan.toml)
8/// ```toml
9/// [models]
10/// code = "qwen/qwen3.5-122b-a10b" # best for code generation
11/// orchestrate = "minimaxai/minimax-m2.5" # best for tool calling
12/// execute = "mlx-community/Qwen3.5-9B-OptiQ-4bit" # fast local execution
13/// ```
14#[derive(Debug, Clone, Default, Serialize, Deserialize)]
15pub struct ModelRouting {
16 /// Model for code generation tasks (implement, refactor, write tests)
17 pub code: Option<String>,
18 /// Model for orchestration tasks (multi-step tool chains, analysis)
19 pub orchestrate: Option<String>,
20 /// Model for simple execution tasks (bash, write_file, cargo test)
21 pub execute: Option<String>,
22}
23
24impl ModelRouting {
25 /// Select the best model for a given task based on keyword analysis.
26 /// Returns None if no routing matches (use default model).
27 pub fn route(&self, query: &str) -> Option<&str> {
28 let q = query.to_lowercase();
29
30 // Code generation patterns
31 if self.code.is_some() {
32 let code_signals = [
33 "implement",
34 "write",
35 "create",
36 "refactor",
37 "fix",
38 "add test",
39 "add function",
40 "struct",
41 "enum",
42 "trait",
43 "algorithm",
44 "data structure",
45 ];
46 if code_signals.iter().any(|s| q.contains(s)) {
47 return self.code.as_deref();
48 }
49 }
50
51 // Orchestration patterns
52 if self.orchestrate.is_some() {
53 let orch_signals = [
54 "search", "find", "analyze", "review", "explain", "compare", "list", "check",
55 "verify", "diagnose", "audit",
56 ];
57 if orch_signals.iter().any(|s| q.contains(s)) {
58 return self.orchestrate.as_deref();
59 }
60 }
61
62 // Execution patterns
63 if self.execute.is_some() {
64 let exec_signals = [
65 "run", "execute", "bash", "cargo", "test", "build", "deploy", "install", "commit",
66 ];
67 if exec_signals.iter().any(|s| q.contains(s)) {
68 return self.execute.as_deref();
69 }
70 }
71
72 None
73 }
74}
75
76/// Cloud fallback configuration for hybrid local+cloud model routing.
77///
78/// When the primary provider (typically a local model via OpenAI-compatible API)
79/// fails or is unavailable, pawan automatically falls back to this cloud provider.
80/// This enables zero-cost local inference with cloud reliability as a safety net.
81///
82/// # Example (pawan.toml)
83/// ```toml
84/// provider = "openai"
85/// model = "Qwen3.5-9B-Q4_K_M"
86///
87/// [cloud]
88/// provider = "nvidia"
89/// model = "mistralai/devstral-2-123b-instruct-2512"
90/// ```
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct CloudConfig {
93 /// Cloud LLM provider to fall back to (nvidia or openai)
94 pub provider: LlmProvider,
95 /// Primary cloud model to try first on fallback
96 pub model: String,
97 /// Additional cloud models to try if the primary cloud model also fails
98 #[serde(default)]
99 pub fallback_models: Vec<String>,
100}