1use std::fmt::Write as _;
7use std::sync::Arc;
8
9use super::{Agent, error::AgentError};
10use crate::channel::Channel;
11use crate::experiments::{
12 BenchmarkSet, Evaluator, ExperimentEngine, ExperimentSource, GridStep, SearchSpace,
13};
14
15impl<C: Channel> Agent<C> {
16 pub async fn handle_experiment_command(&mut self, input: &str) -> Result<(), AgentError> {
22 let args = input.strip_prefix("/experiment").unwrap_or("").trim();
23
24 match args {
25 "" | "status" => return self.handle_experiment_status().await,
26 "stop" => return self.handle_experiment_stop().await,
27 "report" => return self.handle_experiment_report().await,
28 "best" => return self.handle_experiment_best().await,
29 _ => {}
30 }
31
32 if args == "start" || args.starts_with("start") {
33 let max_override = args
34 .strip_prefix("start")
35 .and_then(|s| s.trim().parse::<u32>().ok());
36 return self.handle_experiment_start(max_override).await;
37 }
38
39 self.channel
40 .send(
41 "Unknown /experiment subcommand. Available: /experiment start [N], \
42 /experiment stop, /experiment status, /experiment report, /experiment best",
43 )
44 .await?;
45 Ok(())
46 }
47
48 async fn handle_experiment_start(
49 &mut self,
50 max_override: Option<u32>,
51 ) -> Result<(), AgentError> {
52 if self
54 .experiments
55 .cancel
56 .as_ref()
57 .is_some_and(|t| !t.is_cancelled())
58 {
59 self.channel
60 .send("Experiment already running. Use /experiment stop to cancel.")
61 .await?;
62 return Ok(());
63 }
64
65 let mut config = self.experiments.config.clone();
66
67 if !config.enabled {
68 self.channel
69 .send(
70 "Experiments are disabled. Set `experiments.enabled = true` in config \
71 and restart.",
72 )
73 .await?;
74 return Ok(());
75 }
76
77 if let Some(n) = max_override {
78 config.max_experiments = n;
79 }
80
81 if let Err(e) = config.validate() {
82 self.channel
83 .send(&format!("Experiment config is invalid: {e}"))
84 .await?;
85 return Ok(());
86 }
87
88 let max_n = config.max_experiments;
89 let engine = match self.build_experiment_engine(config) {
90 Ok(e) => e,
91 Err(msg) => {
92 self.channel.send(&msg).await?;
93 return Ok(());
94 }
95 };
96
97 self.run_experiment_engine(engine, max_n).await
98 }
99
100 fn build_experiment_engine(
102 &mut self,
103 config: crate::config::ExperimentConfig,
104 ) -> Result<ExperimentEngine, String> {
105 let benchmark_path = if let Some(p) = &config.benchmark_file {
106 p.clone()
107 } else {
108 return Err("experiments.benchmark_file is not set in config.".to_owned());
109 };
110
111 let benchmark = BenchmarkSet::from_file(&benchmark_path)
112 .map_err(|e| format!("Failed to load benchmark: {e}"))?;
113
114 let provider_arc = Arc::new(self.provider.clone());
115 let judge_arc = self
118 .experiments
119 .eval_provider
120 .as_ref()
121 .map_or_else(|| Arc::clone(&provider_arc), |p| Arc::new(p.clone()));
122 let evaluator = Evaluator::new(judge_arc, benchmark, config.eval_budget_tokens)
123 .map_err(|e| format!("Failed to create evaluator: {e}"))?;
124
125 let generator = Box::new(GridStep::new(SearchSpace::default()));
126 let baseline = self.experiments.baseline.clone();
130 let memory = self.memory_state.memory.clone();
131
132 Ok(
133 ExperimentEngine::new(evaluator, generator, provider_arc, baseline, config, memory)
134 .with_source(ExperimentSource::Manual),
135 )
136 }
137
138 async fn run_experiment_engine(
139 &mut self,
140 mut engine: ExperimentEngine,
141 max_n: u32,
142 ) -> Result<(), AgentError> {
143 let cancel = engine.cancel_token();
144 self.experiments.cancel = Some(cancel);
145
146 self.channel
147 .send(&format!(
148 "Experiment session starting (max {max_n} experiments). \
149 Use /experiment stop to cancel. Results will be shown when complete.",
150 ))
151 .await?;
152
153 let notify_tx = self.experiments.notify_tx.clone();
157 tokio::spawn(async move {
158 let msg = match engine.run().await {
159 Ok(report) => {
160 let accepted = report.results.iter().filter(|r| r.accepted).count();
161 let wall_secs =
162 f64::from(u32::try_from(report.wall_time_ms).unwrap_or(u32::MAX)) / 1000.0;
163 format!(
164 "Experiment session `{}` complete: {}/{} accepted, \
165 baseline {:.3} → {:.3} (improvement {:.3}), {wall_secs:.1}s{}",
166 &report.session_id[..report.session_id.len().min(8)],
167 accepted,
168 report.results.len(),
169 report.baseline_score,
170 report.final_score,
171 report.total_improvement,
172 if report.cancelled { " [cancelled]" } else { "" },
173 )
174 }
175 Err(e) => format!("Experiment session failed: {e}"),
176 };
177 let _ = notify_tx.send(msg).await;
179 });
180
181 Ok(())
182 }
183
184 async fn handle_experiment_stop(&mut self) -> Result<(), AgentError> {
185 match &self.experiments.cancel {
186 Some(token) if !token.is_cancelled() => {
187 token.cancel();
188 self.channel
189 .send("Experiment session cancelled. Results so far are saved.")
190 .await?;
191 }
192 _ => {
193 self.channel
194 .send("No experiment is currently running.")
195 .await?;
196 }
197 }
198 Ok(())
199 }
200
201 async fn handle_experiment_status(&mut self) -> Result<(), AgentError> {
202 let running = self
203 .experiments
204 .cancel
205 .as_ref()
206 .is_some_and(|t| !t.is_cancelled());
207
208 let mut msg = if running {
209 String::from("Experiment: **running**. Use `/experiment stop` to cancel.")
210 } else {
211 String::from("Experiment: **idle**. Use `/experiment start [N]` to begin.")
212 };
213
214 if let Some(memory) = &self.memory_state.memory {
215 let rows = memory.sqlite().list_experiment_results(None, 1).await?;
216 if let Some(latest) = rows.first()
217 && let Some(summary) = memory
218 .sqlite()
219 .experiment_session_summary(&latest.session_id)
220 .await?
221 {
222 let sid_len = summary.session_id.len().min(11);
223 let _ = write!(
224 msg,
225 "\nLast session: `{}` | {} experiments | {} accepted | \
226 best delta: {:.3}",
227 &summary.session_id[..sid_len],
228 summary.total,
229 summary.accepted_count,
230 summary.best_delta,
231 );
232 }
233 }
234
235 self.channel.send(&msg).await?;
236 Ok(())
237 }
238
239 async fn handle_experiment_report(&mut self) -> Result<(), AgentError> {
240 let Some(memory) = &self.memory_state.memory else {
241 self.channel
242 .send("Memory is not enabled — cannot query experiment results.")
243 .await?;
244 return Ok(());
245 };
246
247 let rows = memory.sqlite().list_experiment_results(None, 50).await?;
248
249 if rows.is_empty() {
250 self.channel.send("No experiment results found.").await?;
251 return Ok(());
252 }
253
254 let mut out = String::from("**Experiment Results** (last 50, newest first)\n\n```\n");
255 let _ = writeln!(
256 out,
257 "{:<8} {:<12} {:<20} {:<8} {:<8} {:<8} {:<8}",
258 "ID", "Session", "Parameter", "Delta", "Baseline", "Candidate", "Accepted"
259 );
260 for r in &rows {
261 let sid_len = r.session_id.len().min(11);
262 let _ = writeln!(
263 out,
264 "{:<8} {:<12} {:<20} {:<8.3} {:<8.3} {:<8.3} {:<8}",
265 r.id,
266 &r.session_id[..sid_len],
267 &r.parameter,
268 r.delta,
269 r.baseline_score,
270 r.candidate_score,
271 if r.accepted { "yes" } else { "no" },
272 );
273 }
274 out.push_str("```");
275 self.channel.send(&out).await?;
276 Ok(())
277 }
278
279 async fn handle_experiment_best(&mut self) -> Result<(), AgentError> {
280 let Some(memory) = &self.memory_state.memory else {
281 self.channel
282 .send("Memory is not enabled — cannot query experiment results.")
283 .await?;
284 return Ok(());
285 };
286
287 let row = memory.sqlite().best_experiment_result(None).await?;
288
289 match row {
290 None => {
291 self.channel
292 .send("No accepted experiment results found yet.")
293 .await?;
294 }
295 Some(r) => {
296 let sid_len = r.session_id.len().min(11);
297 let msg = format!(
298 "**Best experiment result**\n\
299 - Session: `{}`\n\
300 - Parameter: `{}`\n\
301 - Delta: `{:.3}` ({:.3} → {:.3})\n\
302 - Source: `{}`\n\
303 - At: {}",
304 &r.session_id[..sid_len],
305 r.parameter,
306 r.delta,
307 r.baseline_score,
308 r.candidate_score,
309 r.source,
310 r.created_at,
311 );
312 self.channel.send(&msg).await?;
313 }
314 }
315 Ok(())
316 }
317}
318
319#[cfg(test)]
320mod tests {
321 use super::super::agent_tests::{
322 MockChannel, MockToolExecutor, create_test_registry, mock_provider,
323 };
324 use super::*;
325
326 fn make_agent() -> Agent<MockChannel> {
327 Agent::new(
328 mock_provider(vec![]),
329 MockChannel::new(vec![]),
330 create_test_registry(),
331 None,
332 5,
333 MockToolExecutor::no_tools(),
334 )
335 }
336
337 #[tokio::test]
338 async fn unknown_subcommand_returns_help() {
339 let mut agent = make_agent();
340 agent
341 .handle_experiment_command("/experiment foobar")
342 .await
343 .unwrap();
344 let msgs = agent.channel.sent_messages();
345 assert!(
346 msgs.iter().any(|s| s.contains("Unknown /experiment")),
347 "expected help text, got: {msgs:?}"
348 );
349 }
350
351 #[tokio::test]
352 async fn start_when_disabled_returns_error() {
353 let mut agent = make_agent();
354 agent
356 .handle_experiment_command("/experiment start")
357 .await
358 .unwrap();
359 let msgs = agent.channel.sent_messages();
360 assert!(
361 msgs.iter().any(|s| s.contains("disabled")),
362 "expected disabled message, got: {msgs:?}"
363 );
364 }
365
366 #[tokio::test]
367 async fn stop_when_not_running_returns_not_running() {
368 let mut agent = make_agent();
369 agent
370 .handle_experiment_command("/experiment stop")
371 .await
372 .unwrap();
373 let msgs = agent.channel.sent_messages();
374 assert!(
375 msgs.iter()
376 .any(|s| s.contains("No experiment is currently running")),
377 "expected not-running message, got: {msgs:?}"
378 );
379 }
380
381 #[tokio::test]
382 async fn status_when_idle_returns_idle() {
383 let mut agent = make_agent();
384 agent
385 .handle_experiment_command("/experiment status")
386 .await
387 .unwrap();
388 let msgs = agent.channel.sent_messages();
389 assert!(
390 msgs.iter().any(|s| s.contains("idle")),
391 "expected idle status, got: {msgs:?}"
392 );
393 }
394
395 #[tokio::test]
396 async fn empty_subcommand_returns_status() {
397 let mut agent = make_agent();
398 agent
399 .handle_experiment_command("/experiment")
400 .await
401 .unwrap();
402 let msgs = agent.channel.sent_messages();
403 assert!(
405 msgs.iter().any(|s| s.contains("idle")),
406 "expected idle status for empty subcommand, got: {msgs:?}"
407 );
408 }
409
410 #[tokio::test]
411 async fn start_while_running_returns_already_running() {
412 use tokio_util::sync::CancellationToken;
413
414 let mut agent = make_agent();
415 agent.experiments.cancel = Some(CancellationToken::new());
417
418 agent
419 .handle_experiment_command("/experiment start")
420 .await
421 .unwrap();
422 let msgs = agent.channel.sent_messages();
423 assert!(
424 msgs.iter().any(|s| s.contains("already running")),
425 "expected already-running guard, got: {msgs:?}"
426 );
427 }
428
429 #[tokio::test]
430 async fn report_without_memory_returns_error() {
431 let mut agent = make_agent();
432 agent
434 .handle_experiment_command("/experiment report")
435 .await
436 .unwrap();
437 let msgs = agent.channel.sent_messages();
438 assert!(
439 msgs.iter().any(|s| s.contains("Memory is not enabled")),
440 "expected no-memory message, got: {msgs:?}"
441 );
442 }
443
444 #[tokio::test]
445 async fn best_without_memory_returns_error() {
446 let mut agent = make_agent();
447 agent
448 .handle_experiment_command("/experiment best")
449 .await
450 .unwrap();
451 let msgs = agent.channel.sent_messages();
452 assert!(
453 msgs.iter().any(|s| s.contains("Memory is not enabled")),
454 "expected no-memory message, got: {msgs:?}"
455 );
456 }
457}