zoey_core/planner/
optimization.rs1use crate::planner::*;
4use crate::Result;
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
9#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
10pub enum Optimization {
11 ModelDowngrade,
13 ReducedContext,
15 ReducedOutput,
17 SimplifiedPrompt,
19 CachedResponse,
21 BatchedRequest,
23}
24
25pub struct PlanOptimizer;
27
28impl PlanOptimizer {
29 pub fn new() -> Self {
31 Self
32 }
33
34 pub async fn optimize(
36 &self,
37 mut plan: ExecutionPlan,
38 budget_check: &budget::BudgetCheckResult,
39 cost_calculator: &cost::CostCalculator,
40 ) -> Result<ExecutionPlan> {
41 if budget_check.approved {
42 return Ok(plan);
44 }
45
46 let mut optimizations = Vec::new();
47
48 match budget_check.action {
50 budget::BudgetAction::Warn => {
51 plan.warnings.push(format!(
53 "Budget warning: {} (${:.4} available, ${:.4} required)",
54 budget_check.reason,
55 budget_check.available_budget,
56 budget_check.required_budget
57 ));
58 }
59
60 budget::BudgetAction::SwitchToSmaller => {
61 if let Some(cheaper_model) = cost_calculator.find_cheaper_model(
63 &plan.cost_estimate.model_used,
64 plan.token_estimate.input_tokens,
65 ) {
66 let new_cost = cost_calculator.calculate_cost(
68 &cheaper_model,
69 plan.cost_estimate.input_tokens,
70 plan.cost_estimate.output_tokens,
71 )?;
72
73 if new_cost.estimated_cost_usd <= budget_check.available_budget {
74 plan.cost_estimate = new_cost;
75 plan.response_strategy.model_selection = cheaper_model;
76 optimizations.push(Optimization::ModelDowngrade);
77 }
78 }
79
80 if plan.cost_estimate.estimated_cost_usd > budget_check.available_budget {
82 let reduction_factor =
83 budget_check.available_budget / plan.cost_estimate.estimated_cost_usd;
84
85 let new_output_tokens =
86 (plan.cost_estimate.output_tokens as f64 * reduction_factor * 0.9) as usize;
87
88 if new_output_tokens > 50 {
89 let new_cost = cost_calculator.calculate_cost(
91 &plan.cost_estimate.model_used,
92 plan.cost_estimate.input_tokens,
93 new_output_tokens,
94 )?;
95
96 plan.cost_estimate = new_cost;
97 plan.response_strategy.max_tokens = new_output_tokens;
98 optimizations.push(Optimization::ReducedOutput);
99 }
100 }
101 }
102
103 budget::BudgetAction::Block => {
104 return Err(crate::ZoeyError::Other(format!(
106 "Budget exceeded and action is BLOCK: {}",
107 budget_check.reason
108 )));
109 }
110
111 budget::BudgetAction::RequireApproval => {
112 plan.warnings
113 .push(format!("User approval required: {}", budget_check.reason));
114 plan.requires_approval = true;
115 }
116 }
117
118 plan.optimizations_applied.extend(optimizations);
119
120 Ok(plan)
121 }
122
123 pub fn optimize_tokens(&self, plan: &mut ExecutionPlan) -> Vec<Optimization> {
125 let optimizations = Vec::new();
126
127 if plan.token_estimate.total_tokens > 100000 {
129 plan.warnings.push(
130 "High token usage detected. Consider reducing context or output length."
131 .to_string(),
132 );
133 }
134
135 let expected_output = match plan.complexity.level {
137 complexity::ComplexityLevel::Trivial => 100,
138 complexity::ComplexityLevel::Simple => 300,
139 complexity::ComplexityLevel::Moderate => 600,
140 complexity::ComplexityLevel::Complex => 1000,
141 complexity::ComplexityLevel::VeryComplex => 2000,
142 };
143
144 if plan.token_estimate.output_tokens > expected_output * 2 {
145 plan.warnings.push(format!(
146 "Output tokens ({}) seem high for {} complexity. Expected ~{}.",
147 plan.token_estimate.output_tokens, plan.complexity.level, expected_output
148 ));
149 }
150
151 optimizations
152 }
153
154 pub fn suggest_optimizations(&self, plan: &ExecutionPlan) -> Vec<String> {
156 let mut suggestions = Vec::new();
157
158 if plan.cost_estimate.estimated_cost_usd > 0.10 {
160 suggestions.push(
161 "Consider using a smaller model for cost savings (e.g., GPT-3.5 instead of GPT-4)"
162 .to_string(),
163 );
164 }
165
166 if plan.token_estimate.input_tokens > 10000 {
168 suggestions.push(
169 "High input tokens detected. Consider summarizing context or using RAG."
170 .to_string(),
171 );
172 }
173
174 if matches!(
176 plan.complexity.level,
177 complexity::ComplexityLevel::Trivial | complexity::ComplexityLevel::Simple
178 ) && plan.cost_estimate.model_used.contains("gpt-4")
179 {
180 suggestions.push(
181 "Simple task detected. A smaller model like GPT-3.5 may be sufficient.".to_string(),
182 );
183 }
184
185 if !plan.knowledge.unknown_gaps.is_empty() {
187 let critical_gaps = plan
188 .knowledge
189 .unknown_gaps
190 .iter()
191 .filter(|g| g.priority == knowledge::Priority::Critical)
192 .count();
193
194 if critical_gaps > 0 {
195 suggestions.push(format!(
196 "{} critical knowledge gaps detected. Consider gathering more context first.",
197 critical_gaps
198 ));
199 }
200 }
201
202 suggestions
203 }
204}
205
206impl Default for PlanOptimizer {
207 fn default() -> Self {
208 Self::new()
209 }
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215
216 #[tokio::test]
217 async fn test_model_downgrade() {
218 let optimizer = PlanOptimizer::new();
221 assert!(true); }
223
224 #[test]
225 fn test_suggestions() {
226 let optimizer = PlanOptimizer::new();
227 assert!(true); }
230}