1use async_trait::async_trait;
4use chrono::{DateTime, Utc};
5use rustant_core::error::ToolError;
6use rustant_core::types::{RiskLevel, ToolOutput};
7use serde::{Deserialize, Serialize};
8use serde_json::{Value, json};
9use std::path::PathBuf;
10
11use crate::registry::Tool;
12
13#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
18enum HypothesisStatus {
19 Proposed,
20 Testing,
21 Supported,
22 Refuted,
23 Inconclusive,
24}
25
26impl std::fmt::Display for HypothesisStatus {
27 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 match self {
29 Self::Proposed => write!(f, "Proposed"),
30 Self::Testing => write!(f, "Testing"),
31 Self::Supported => write!(f, "Supported"),
32 Self::Refuted => write!(f, "Refuted"),
33 Self::Inconclusive => write!(f, "Inconclusive"),
34 }
35 }
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
39struct Evidence {
40 experiment_id: String,
41 finding: String,
42 supports: bool,
43 confidence: f64,
44 recorded_at: DateTime<Utc>,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48struct Hypothesis {
49 id: String,
50 title: String,
51 description: String,
52 status: HypothesisStatus,
53 evidence: Vec<Evidence>,
54 tags: Vec<String>,
55 created_at: DateTime<Utc>,
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
59enum ExperimentStatus {
60 Planned,
61 Running,
62 Completed,
63 Failed,
64 Cancelled,
65}
66
67impl std::fmt::Display for ExperimentStatus {
68 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69 match self {
70 Self::Planned => write!(f, "Planned"),
71 Self::Running => write!(f, "Running"),
72 Self::Completed => write!(f, "Completed"),
73 Self::Failed => write!(f, "Failed"),
74 Self::Cancelled => write!(f, "Cancelled"),
75 }
76 }
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80struct Experiment {
81 id: String,
82 hypothesis_id: Option<String>,
83 name: String,
84 description: String,
85 config: Value,
86 metrics: Value,
87 status: ExperimentStatus,
88 notes: String,
89 tags: Vec<String>,
90 created_at: DateTime<Utc>,
91 started_at: Option<DateTime<Utc>>,
92 completed_at: Option<DateTime<Utc>>,
93}
94
95#[derive(Debug, Default, Serialize, Deserialize)]
96struct ExperimentState {
97 hypotheses: Vec<Hypothesis>,
98 experiments: Vec<Experiment>,
99 next_hypothesis_id: usize,
100 next_experiment_id: usize,
101}
102
103pub struct ExperimentTrackerTool {
108 workspace: PathBuf,
109}
110
111impl ExperimentTrackerTool {
112 pub fn new(workspace: PathBuf) -> Self {
113 Self { workspace }
114 }
115
116 fn state_path(&self) -> PathBuf {
117 self.workspace
118 .join(".rustant")
119 .join("experiments")
120 .join("tracker.json")
121 }
122
123 fn load_state(&self) -> ExperimentState {
124 let path = self.state_path();
125 if path.exists() {
126 std::fs::read_to_string(&path)
127 .ok()
128 .and_then(|s| serde_json::from_str(&s).ok())
129 .unwrap_or_default()
130 } else {
131 ExperimentState {
132 hypotheses: Vec::new(),
133 experiments: Vec::new(),
134 next_hypothesis_id: 1,
135 next_experiment_id: 1,
136 }
137 }
138 }
139
140 fn save_state(&self, state: &ExperimentState) -> Result<(), ToolError> {
141 let path = self.state_path();
142 if let Some(parent) = path.parent() {
143 std::fs::create_dir_all(parent).map_err(|e| ToolError::ExecutionFailed {
144 name: "experiment_tracker".to_string(),
145 message: format!("Failed to create state dir: {}", e),
146 })?;
147 }
148 let json = serde_json::to_string_pretty(state).map_err(|e| ToolError::ExecutionFailed {
149 name: "experiment_tracker".to_string(),
150 message: format!("Failed to serialize state: {}", e),
151 })?;
152 let tmp = path.with_extension("json.tmp");
153 std::fs::write(&tmp, &json).map_err(|e| ToolError::ExecutionFailed {
154 name: "experiment_tracker".to_string(),
155 message: format!("Failed to write state: {}", e),
156 })?;
157 std::fs::rename(&tmp, &path).map_err(|e| ToolError::ExecutionFailed {
158 name: "experiment_tracker".to_string(),
159 message: format!("Failed to rename state file: {}", e),
160 })?;
161 Ok(())
162 }
163
164 fn action_add_hypothesis(&self, args: &Value) -> Result<ToolOutput, ToolError> {
167 let title = args
168 .get("title")
169 .and_then(|v| v.as_str())
170 .unwrap_or("")
171 .trim();
172 if title.is_empty() {
173 return Ok(ToolOutput::text(
174 "Please provide a title for the hypothesis.",
175 ));
176 }
177 let description = args
178 .get("description")
179 .and_then(|v| v.as_str())
180 .unwrap_or("")
181 .to_string();
182 let tags = parse_tags(args);
183
184 let mut state = self.load_state();
185 let id = format!("h{}", state.next_hypothesis_id);
186 state.next_hypothesis_id += 1;
187 state.hypotheses.push(Hypothesis {
188 id: id.clone(),
189 title: title.to_string(),
190 description,
191 status: HypothesisStatus::Proposed,
192 evidence: Vec::new(),
193 tags,
194 created_at: Utc::now(),
195 });
196 self.save_state(&state)?;
197 Ok(ToolOutput::text(format!(
198 "Added hypothesis {} — '{}'.",
199 id, title
200 )))
201 }
202
203 fn action_update_hypothesis(&self, args: &Value) -> Result<ToolOutput, ToolError> {
204 let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
205 if id.is_empty() {
206 return Ok(ToolOutput::text("Please provide a hypothesis id."));
207 }
208 let mut state = self.load_state();
209 let hyp = state.hypotheses.iter_mut().find(|h| h.id == id);
210 match hyp {
211 Some(h) => {
212 if let Some(title) = args.get("title").and_then(|v| v.as_str()) {
213 h.title = title.to_string();
214 }
215 if let Some(status_str) = args.get("status").and_then(|v| v.as_str())
216 && let Some(status) = parse_hypothesis_status(status_str)
217 {
218 h.status = status;
219 }
220 if let Some(tags_val) = args.get("tags")
221 && let Some(arr) = tags_val.as_array()
222 {
223 h.tags = arr
224 .iter()
225 .filter_map(|v| v.as_str().map(|s| s.to_string()))
226 .collect();
227 }
228 let title = h.title.clone();
229 let status = h.status.clone();
230 self.save_state(&state)?;
231 Ok(ToolOutput::text(format!(
232 "Updated hypothesis {} — '{}' [{}].",
233 id, title, status
234 )))
235 }
236 None => Ok(ToolOutput::text(format!("Hypothesis {} not found.", id))),
237 }
238 }
239
240 fn action_list_hypotheses(&self, args: &Value) -> Result<ToolOutput, ToolError> {
241 let state = self.load_state();
242 let status_filter = args.get("status").and_then(|v| v.as_str());
243 let tag_filter = args.get("tag").and_then(|v| v.as_str());
244
245 let filtered: Vec<&Hypothesis> = state
246 .hypotheses
247 .iter()
248 .filter(|h| {
249 if let Some(sf) = status_filter
250 && let Some(parsed) = parse_hypothesis_status(sf)
251 && h.status != parsed
252 {
253 return false;
254 }
255 if let Some(tf) = tag_filter
256 && !h.tags.iter().any(|t| t == tf)
257 {
258 return false;
259 }
260 true
261 })
262 .collect();
263
264 if filtered.is_empty() {
265 return Ok(ToolOutput::text("No hypotheses found."));
266 }
267 let lines: Vec<String> = filtered
268 .iter()
269 .map(|h| {
270 let tags = if h.tags.is_empty() {
271 String::new()
272 } else {
273 format!(" [{}]", h.tags.join(", "))
274 };
275 format!(
276 " {} — {} [{}] ({} evidence){}",
277 h.id,
278 h.title,
279 h.status,
280 h.evidence.len(),
281 tags
282 )
283 })
284 .collect();
285 Ok(ToolOutput::text(format!(
286 "Hypotheses ({}):\n{}",
287 filtered.len(),
288 lines.join("\n")
289 )))
290 }
291
292 fn action_get_hypothesis(&self, args: &Value) -> Result<ToolOutput, ToolError> {
293 let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
294 if id.is_empty() {
295 return Ok(ToolOutput::text("Please provide a hypothesis id."));
296 }
297 let state = self.load_state();
298 let hyp = state.hypotheses.iter().find(|h| h.id == id);
299 match hyp {
300 Some(h) => {
301 let linked_experiments: Vec<&Experiment> = state
302 .experiments
303 .iter()
304 .filter(|e| e.hypothesis_id.as_deref() == Some(&h.id))
305 .collect();
306
307 let mut out = format!(
308 "Hypothesis: {} — {}\nStatus: {}\nDescription: {}\nTags: {}\nCreated: {}\n",
309 h.id,
310 h.title,
311 h.status,
312 if h.description.is_empty() {
313 "(none)"
314 } else {
315 &h.description
316 },
317 if h.tags.is_empty() {
318 "(none)".to_string()
319 } else {
320 h.tags.join(", ")
321 },
322 h.created_at.format("%Y-%m-%d %H:%M UTC"),
323 );
324
325 if !h.evidence.is_empty() {
326 out.push_str(&format!("\nEvidence ({}):\n", h.evidence.len()));
327 for ev in &h.evidence {
328 out.push_str(&format!(
329 " [{}] {} (confidence: {:.2}, supports: {})\n",
330 ev.experiment_id, ev.finding, ev.confidence, ev.supports
331 ));
332 }
333 }
334
335 if !linked_experiments.is_empty() {
336 out.push_str(&format!(
337 "\nLinked experiments ({}):\n",
338 linked_experiments.len()
339 ));
340 for exp in &linked_experiments {
341 out.push_str(&format!(" {} — {} [{}]\n", exp.id, exp.name, exp.status));
342 }
343 }
344
345 Ok(ToolOutput::text(out))
346 }
347 None => Ok(ToolOutput::text(format!("Hypothesis {} not found.", id))),
348 }
349 }
350
351 fn action_add_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
352 let name = args
353 .get("name")
354 .and_then(|v| v.as_str())
355 .unwrap_or("")
356 .trim();
357 if name.is_empty() {
358 return Ok(ToolOutput::text(
359 "Please provide a name for the experiment.",
360 ));
361 }
362 let description = args
363 .get("description")
364 .and_then(|v| v.as_str())
365 .unwrap_or("")
366 .to_string();
367 let hypothesis_id = args
368 .get("hypothesis_id")
369 .and_then(|v| v.as_str())
370 .map(|s| s.to_string());
371 let config = args.get("config").cloned().unwrap_or(json!({}));
372 let tags = parse_tags(args);
373
374 let mut state = self.load_state();
375
376 if let Some(ref hid) = hypothesis_id
378 && !state.hypotheses.iter().any(|h| h.id == *hid)
379 {
380 return Ok(ToolOutput::text(format!("Hypothesis {} not found.", hid)));
381 }
382
383 let id = format!("e{}", state.next_experiment_id);
384 state.next_experiment_id += 1;
385 state.experiments.push(Experiment {
386 id: id.clone(),
387 hypothesis_id,
388 name: name.to_string(),
389 description,
390 config,
391 metrics: json!({}),
392 status: ExperimentStatus::Planned,
393 notes: String::new(),
394 tags,
395 created_at: Utc::now(),
396 started_at: None,
397 completed_at: None,
398 });
399 self.save_state(&state)?;
400 Ok(ToolOutput::text(format!(
401 "Added experiment {} — '{}'.",
402 id, name
403 )))
404 }
405
406 fn action_start_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
407 let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
408 if id.is_empty() {
409 return Ok(ToolOutput::text("Please provide an experiment id."));
410 }
411 let mut state = self.load_state();
412 let exp = state.experiments.iter_mut().find(|e| e.id == id);
413 match exp {
414 Some(e) => {
415 if e.status != ExperimentStatus::Planned {
416 return Ok(ToolOutput::text(format!(
417 "Experiment {} cannot be started — current status is {}.",
418 id, e.status
419 )));
420 }
421 e.status = ExperimentStatus::Running;
422 e.started_at = Some(Utc::now());
423 let name = e.name.clone();
424 self.save_state(&state)?;
425 Ok(ToolOutput::text(format!(
426 "Experiment {} '{}' is now running.",
427 id, name
428 )))
429 }
430 None => Ok(ToolOutput::text(format!("Experiment {} not found.", id))),
431 }
432 }
433
434 fn action_complete_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
435 let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
436 if id.is_empty() {
437 return Ok(ToolOutput::text("Please provide an experiment id."));
438 }
439 let mut state = self.load_state();
440 let exp = state.experiments.iter_mut().find(|e| e.id == id);
441 match exp {
442 Some(e) => {
443 if e.status != ExperimentStatus::Running {
444 return Ok(ToolOutput::text(format!(
445 "Experiment {} cannot be completed — current status is {}.",
446 id, e.status
447 )));
448 }
449 e.status = ExperimentStatus::Completed;
450 e.completed_at = Some(Utc::now());
451 if let Some(metrics) = args.get("metrics") {
452 e.metrics = metrics.clone();
453 }
454 if let Some(notes) = args.get("notes").and_then(|v| v.as_str()) {
455 e.notes = notes.to_string();
456 }
457 let name = e.name.clone();
458 self.save_state(&state)?;
459 Ok(ToolOutput::text(format!(
460 "Experiment {} '{}' completed.",
461 id, name
462 )))
463 }
464 None => Ok(ToolOutput::text(format!("Experiment {} not found.", id))),
465 }
466 }
467
468 fn action_fail_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
469 let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
470 if id.is_empty() {
471 return Ok(ToolOutput::text("Please provide an experiment id."));
472 }
473 let mut state = self.load_state();
474 let exp = state.experiments.iter_mut().find(|e| e.id == id);
475 match exp {
476 Some(e) => {
477 if e.status != ExperimentStatus::Running {
478 return Ok(ToolOutput::text(format!(
479 "Experiment {} cannot be failed — current status is {}.",
480 id, e.status
481 )));
482 }
483 e.status = ExperimentStatus::Failed;
484 e.completed_at = Some(Utc::now());
485 if let Some(notes) = args.get("notes").and_then(|v| v.as_str()) {
486 e.notes = notes.to_string();
487 }
488 let name = e.name.clone();
489 self.save_state(&state)?;
490 Ok(ToolOutput::text(format!(
491 "Experiment {} '{}' failed.",
492 id, name
493 )))
494 }
495 None => Ok(ToolOutput::text(format!("Experiment {} not found.", id))),
496 }
497 }
498
499 fn action_get_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
500 let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
501 if id.is_empty() {
502 return Ok(ToolOutput::text("Please provide an experiment id."));
503 }
504 let state = self.load_state();
505 let exp = state.experiments.iter().find(|e| e.id == id);
506 match exp {
507 Some(e) => {
508 let mut out = format!(
509 "Experiment: {} — {}\nStatus: {}\nDescription: {}\nHypothesis: {}\nTags: {}\nConfig: {}\nMetrics: {}\nNotes: {}\nCreated: {}\nStarted: {}\nCompleted: {}\n",
510 e.id,
511 e.name,
512 e.status,
513 if e.description.is_empty() {
514 "(none)"
515 } else {
516 &e.description
517 },
518 e.hypothesis_id.as_deref().unwrap_or("(none)"),
519 if e.tags.is_empty() {
520 "(none)".to_string()
521 } else {
522 e.tags.join(", ")
523 },
524 e.config,
525 e.metrics,
526 if e.notes.is_empty() {
527 "(none)"
528 } else {
529 &e.notes
530 },
531 e.created_at.format("%Y-%m-%d %H:%M UTC"),
532 e.started_at
533 .map(|t| t.format("%Y-%m-%d %H:%M UTC").to_string())
534 .unwrap_or_else(|| "(not started)".to_string()),
535 e.completed_at
536 .map(|t| t.format("%Y-%m-%d %H:%M UTC").to_string())
537 .unwrap_or_else(|| "(not completed)".to_string()),
538 );
539
540 if let Some(ref hid) = e.hypothesis_id
542 && let Some(hyp) = state.hypotheses.iter().find(|h| h.id == *hid)
543 {
544 let related: Vec<&Evidence> = hyp
545 .evidence
546 .iter()
547 .filter(|ev| ev.experiment_id == e.id)
548 .collect();
549 if !related.is_empty() {
550 out.push_str(&format!(
551 "\nEvidence from this experiment ({}):\n",
552 related.len()
553 ));
554 for ev in &related {
555 out.push_str(&format!(
556 " {} (confidence: {:.2}, supports: {})\n",
557 ev.finding, ev.confidence, ev.supports
558 ));
559 }
560 }
561 }
562
563 Ok(ToolOutput::text(out))
564 }
565 None => Ok(ToolOutput::text(format!("Experiment {} not found.", id))),
566 }
567 }
568
569 fn action_list_experiments(&self, args: &Value) -> Result<ToolOutput, ToolError> {
570 let state = self.load_state();
571 let hypothesis_id_filter = args.get("hypothesis_id").and_then(|v| v.as_str());
572 let status_filter = args.get("status").and_then(|v| v.as_str());
573 let tag_filter = args.get("tag").and_then(|v| v.as_str());
574
575 let filtered: Vec<&Experiment> = state
576 .experiments
577 .iter()
578 .filter(|e| {
579 if let Some(hid) = hypothesis_id_filter
580 && e.hypothesis_id.as_deref() != Some(hid)
581 {
582 return false;
583 }
584 if let Some(sf) = status_filter
585 && let Some(parsed) = parse_experiment_status(sf)
586 && e.status != parsed
587 {
588 return false;
589 }
590 if let Some(tf) = tag_filter
591 && !e.tags.iter().any(|t| t == tf)
592 {
593 return false;
594 }
595 true
596 })
597 .collect();
598
599 if filtered.is_empty() {
600 return Ok(ToolOutput::text("No experiments found."));
601 }
602 let lines: Vec<String> = filtered
603 .iter()
604 .map(|e| {
605 let hyp = e
606 .hypothesis_id
607 .as_deref()
608 .map(|h| format!(" ({})", h))
609 .unwrap_or_default();
610 let tags = if e.tags.is_empty() {
611 String::new()
612 } else {
613 format!(" [{}]", e.tags.join(", "))
614 };
615 format!(" {} — {} [{}]{}{}", e.id, e.name, e.status, hyp, tags)
616 })
617 .collect();
618 Ok(ToolOutput::text(format!(
619 "Experiments ({}):\n{}",
620 filtered.len(),
621 lines.join("\n")
622 )))
623 }
624
625 fn action_record_evidence(&self, args: &Value) -> Result<ToolOutput, ToolError> {
626 let hypothesis_id = args
627 .get("hypothesis_id")
628 .and_then(|v| v.as_str())
629 .unwrap_or("");
630 let experiment_id = args
631 .get("experiment_id")
632 .and_then(|v| v.as_str())
633 .unwrap_or("");
634 let finding = args.get("finding").and_then(|v| v.as_str()).unwrap_or("");
635 let supports = args
636 .get("supports")
637 .and_then(|v| v.as_bool())
638 .unwrap_or(false);
639 let confidence = args
640 .get("confidence")
641 .and_then(|v| v.as_f64())
642 .unwrap_or(0.5)
643 .clamp(0.0, 1.0);
644
645 if hypothesis_id.is_empty() || experiment_id.is_empty() || finding.is_empty() {
646 return Ok(ToolOutput::text(
647 "Please provide hypothesis_id, experiment_id, and finding.",
648 ));
649 }
650
651 let mut state = self.load_state();
652
653 if !state.experiments.iter().any(|e| e.id == experiment_id) {
655 return Ok(ToolOutput::text(format!(
656 "Experiment {} not found.",
657 experiment_id
658 )));
659 }
660
661 let hyp = state.hypotheses.iter_mut().find(|h| h.id == hypothesis_id);
662 match hyp {
663 Some(h) => {
664 h.evidence.push(Evidence {
665 experiment_id: experiment_id.to_string(),
666 finding: finding.to_string(),
667 supports,
668 confidence,
669 recorded_at: Utc::now(),
670 });
671 self.save_state(&state)?;
672 Ok(ToolOutput::text(format!(
673 "Recorded evidence for {} from {} (supports: {}, confidence: {:.2}).",
674 hypothesis_id, experiment_id, supports, confidence
675 )))
676 }
677 None => Ok(ToolOutput::text(format!(
678 "Hypothesis {} not found.",
679 hypothesis_id
680 ))),
681 }
682 }
683
684 fn action_compare_experiments(&self, args: &Value) -> Result<ToolOutput, ToolError> {
685 let ids = args
686 .get("ids")
687 .and_then(|v| v.as_array())
688 .map(|arr| {
689 arr.iter()
690 .filter_map(|v| v.as_str().map(|s| s.to_string()))
691 .collect::<Vec<_>>()
692 })
693 .unwrap_or_default();
694
695 if ids.len() < 2 {
696 return Ok(ToolOutput::text(
697 "Please provide at least 2 experiment ids to compare.",
698 ));
699 }
700
701 let state = self.load_state();
702 let experiments: Vec<&Experiment> = ids
703 .iter()
704 .filter_map(|id| state.experiments.iter().find(|e| e.id == *id))
705 .collect();
706
707 if experiments.is_empty() {
708 return Ok(ToolOutput::text("No matching experiments found."));
709 }
710
711 let mut out = format!("Comparison of {} experiments:\n\n", experiments.len());
712 for exp in &experiments {
713 out.push_str(&format!("--- {} ---\n", exp.id));
714 out.push_str(&format!(" Name: {}\n", exp.name));
715 out.push_str(&format!(" Status: {}\n", exp.status));
716 out.push_str(&format!(
717 " Hypothesis: {}\n",
718 exp.hypothesis_id.as_deref().unwrap_or("(none)")
719 ));
720 out.push_str(&format!(" Config: {}\n", exp.config));
721 out.push_str(&format!(" Metrics: {}\n", exp.metrics));
722 if !exp.notes.is_empty() {
723 out.push_str(&format!(" Notes: {}\n", exp.notes));
724 }
725 out.push('\n');
726 }
727
728 Ok(ToolOutput::text(out))
729 }
730
731 fn action_summary(&self, args: &Value) -> Result<ToolOutput, ToolError> {
732 let state = self.load_state();
733 let hypothesis_id_filter = args.get("hypothesis_id").and_then(|v| v.as_str());
734
735 let hypotheses: Vec<&Hypothesis> = if let Some(hid) = hypothesis_id_filter {
736 state.hypotheses.iter().filter(|h| h.id == hid).collect()
737 } else {
738 state.hypotheses.iter().collect()
739 };
740
741 let experiments: Vec<&Experiment> = if let Some(hid) = hypothesis_id_filter {
742 state
743 .experiments
744 .iter()
745 .filter(|e| e.hypothesis_id.as_deref() == Some(hid))
746 .collect()
747 } else {
748 state.experiments.iter().collect()
749 };
750
751 if hypotheses.is_empty() && experiments.is_empty() {
752 return Ok(ToolOutput::text("No data to summarize."));
753 }
754
755 let mut out = String::from("Summary:\n\n");
756
757 out.push_str(&format!("Hypotheses: {}\n", hypotheses.len()));
759 let proposed = hypotheses
760 .iter()
761 .filter(|h| h.status == HypothesisStatus::Proposed)
762 .count();
763 let testing = hypotheses
764 .iter()
765 .filter(|h| h.status == HypothesisStatus::Testing)
766 .count();
767 let supported = hypotheses
768 .iter()
769 .filter(|h| h.status == HypothesisStatus::Supported)
770 .count();
771 let refuted = hypotheses
772 .iter()
773 .filter(|h| h.status == HypothesisStatus::Refuted)
774 .count();
775 let inconclusive = hypotheses
776 .iter()
777 .filter(|h| h.status == HypothesisStatus::Inconclusive)
778 .count();
779 out.push_str(&format!(
780 " Proposed: {}, Testing: {}, Supported: {}, Refuted: {}, Inconclusive: {}\n",
781 proposed, testing, supported, refuted, inconclusive
782 ));
783
784 let total_evidence: usize = hypotheses.iter().map(|h| h.evidence.len()).sum();
786 let supporting: usize = hypotheses
787 .iter()
788 .flat_map(|h| h.evidence.iter())
789 .filter(|e| e.supports)
790 .count();
791 let opposing = total_evidence - supporting;
792 out.push_str(&format!(
793 "\nEvidence: {} total ({} supporting, {} opposing)\n",
794 total_evidence, supporting, opposing
795 ));
796
797 if total_evidence > 0 {
798 let avg_confidence: f64 = hypotheses
799 .iter()
800 .flat_map(|h| h.evidence.iter())
801 .map(|e| e.confidence)
802 .sum::<f64>()
803 / total_evidence as f64;
804 out.push_str(&format!(" Average confidence: {:.2}\n", avg_confidence));
805 }
806
807 out.push_str(&format!("\nExperiments: {}\n", experiments.len()));
809 let completed = experiments
810 .iter()
811 .filter(|e| e.status == ExperimentStatus::Completed)
812 .count();
813 let failed = experiments
814 .iter()
815 .filter(|e| e.status == ExperimentStatus::Failed)
816 .count();
817 let running = experiments
818 .iter()
819 .filter(|e| e.status == ExperimentStatus::Running)
820 .count();
821 let planned = experiments
822 .iter()
823 .filter(|e| e.status == ExperimentStatus::Planned)
824 .count();
825 out.push_str(&format!(
826 " Planned: {}, Running: {}, Completed: {}, Failed: {}\n",
827 planned, running, completed, failed
828 ));
829
830 if completed + failed > 0 {
831 let success_rate = completed as f64 / (completed + failed) as f64 * 100.0;
832 out.push_str(&format!(" Success rate: {:.0}%\n", success_rate));
833 }
834
835 Ok(ToolOutput::text(out))
836 }
837
838 fn action_export_markdown(&self, args: &Value) -> Result<ToolOutput, ToolError> {
839 let state = self.load_state();
840 let hypothesis_id_filter = args.get("hypothesis_id").and_then(|v| v.as_str());
841
842 let hypotheses: Vec<&Hypothesis> = if let Some(hid) = hypothesis_id_filter {
843 state.hypotheses.iter().filter(|h| h.id == hid).collect()
844 } else {
845 state.hypotheses.iter().collect()
846 };
847
848 let mut md = String::from("# Experiment Tracker Report\n\n");
849
850 if hypotheses.is_empty() && state.experiments.is_empty() {
851 md.push_str("No data to export.\n");
852 return Ok(ToolOutput::text(md));
853 }
854
855 for hyp in &hypotheses {
856 md.push_str(&format!("## {} — {}\n\n", hyp.id, hyp.title));
857 md.push_str(&format!("**Status:** {}\n\n", hyp.status));
858 if !hyp.description.is_empty() {
859 md.push_str(&format!("{}\n\n", hyp.description));
860 }
861 if !hyp.tags.is_empty() {
862 md.push_str(&format!("**Tags:** {}\n\n", hyp.tags.join(", ")));
863 }
864
865 if !hyp.evidence.is_empty() {
867 md.push_str("### Evidence\n\n");
868 md.push_str("| Experiment | Finding | Supports | Confidence |\n");
869 md.push_str("|---|---|---|---|\n");
870 for ev in &hyp.evidence {
871 md.push_str(&format!(
872 "| {} | {} | {} | {:.2} |\n",
873 ev.experiment_id, ev.finding, ev.supports, ev.confidence
874 ));
875 }
876 md.push('\n');
877 }
878
879 let linked: Vec<&Experiment> = state
881 .experiments
882 .iter()
883 .filter(|e| e.hypothesis_id.as_deref() == Some(&hyp.id))
884 .collect();
885 if !linked.is_empty() {
886 md.push_str("### Experiments\n\n");
887 for exp in &linked {
888 md.push_str(&format!(
889 "- **{}** — {} [{}]\n",
890 exp.id, exp.name, exp.status
891 ));
892 }
893 md.push('\n');
894 }
895 }
896
897 let unlinked: Vec<&Experiment> = state
899 .experiments
900 .iter()
901 .filter(|e| {
902 if hypothesis_id_filter.is_some() {
903 return false;
904 }
905 e.hypothesis_id.is_none()
906 })
907 .collect();
908 if !unlinked.is_empty() {
909 md.push_str("## Unlinked Experiments\n\n");
910 for exp in &unlinked {
911 md.push_str(&format!(
912 "- **{}** — {} [{}]\n",
913 exp.id, exp.name, exp.status
914 ));
915 }
916 md.push('\n');
917 }
918
919 Ok(ToolOutput::text(md))
920 }
921}
922
923fn parse_tags(args: &Value) -> Vec<String> {
928 args.get("tags")
929 .and_then(|v| v.as_array())
930 .map(|arr| {
931 arr.iter()
932 .filter_map(|v| v.as_str().map(|s| s.to_string()))
933 .collect()
934 })
935 .unwrap_or_default()
936}
937
938fn parse_hypothesis_status(s: &str) -> Option<HypothesisStatus> {
939 match s.to_lowercase().as_str() {
940 "proposed" => Some(HypothesisStatus::Proposed),
941 "testing" => Some(HypothesisStatus::Testing),
942 "supported" => Some(HypothesisStatus::Supported),
943 "refuted" => Some(HypothesisStatus::Refuted),
944 "inconclusive" => Some(HypothesisStatus::Inconclusive),
945 _ => None,
946 }
947}
948
949fn parse_experiment_status(s: &str) -> Option<ExperimentStatus> {
950 match s.to_lowercase().as_str() {
951 "planned" => Some(ExperimentStatus::Planned),
952 "running" => Some(ExperimentStatus::Running),
953 "completed" => Some(ExperimentStatus::Completed),
954 "failed" => Some(ExperimentStatus::Failed),
955 "cancelled" => Some(ExperimentStatus::Cancelled),
956 _ => None,
957 }
958}
959
960#[async_trait]
965impl Tool for ExperimentTrackerTool {
966 fn name(&self) -> &str {
967 "experiment_tracker"
968 }
969
970 fn description(&self) -> &str {
971 "Track scientific hypotheses, experiments, results, and evidence. Actions: add_hypothesis, update_hypothesis, list_hypotheses, get_hypothesis, add_experiment, start_experiment, complete_experiment, fail_experiment, get_experiment, list_experiments, record_evidence, compare_experiments, summary, export_markdown."
972 }
973
974 fn parameters_schema(&self) -> Value {
975 json!({
976 "type": "object",
977 "properties": {
978 "action": {
979 "type": "string",
980 "enum": [
981 "add_hypothesis", "update_hypothesis", "list_hypotheses", "get_hypothesis",
982 "add_experiment", "start_experiment", "complete_experiment", "fail_experiment",
983 "get_experiment", "list_experiments",
984 "record_evidence", "compare_experiments", "summary", "export_markdown"
985 ],
986 "description": "Action to perform"
987 },
988 "id": { "type": "string", "description": "Hypothesis or experiment ID" },
989 "title": { "type": "string", "description": "Hypothesis title" },
990 "name": { "type": "string", "description": "Experiment name" },
991 "description": { "type": "string", "description": "Description text" },
992 "status": { "type": "string", "description": "Status to set (for update_hypothesis)" },
993 "hypothesis_id": { "type": "string", "description": "Linked hypothesis ID" },
994 "experiment_id": { "type": "string", "description": "Experiment ID (for record_evidence)" },
995 "finding": { "type": "string", "description": "Evidence finding text" },
996 "supports": { "type": "boolean", "description": "Whether evidence supports the hypothesis" },
997 "confidence": { "type": "number", "description": "Confidence level 0.0-1.0 (default 0.5)" },
998 "config": { "type": "object", "description": "Experiment configuration" },
999 "metrics": { "type": "object", "description": "Experiment result metrics" },
1000 "notes": { "type": "string", "description": "Experiment notes" },
1001 "tags": {
1002 "type": "array",
1003 "items": { "type": "string" },
1004 "description": "Tags for filtering"
1005 },
1006 "ids": {
1007 "type": "array",
1008 "items": { "type": "string" },
1009 "description": "Experiment IDs (for compare_experiments)"
1010 },
1011 "tag": { "type": "string", "description": "Filter by tag" }
1012 },
1013 "required": ["action"]
1014 })
1015 }
1016
1017 fn risk_level(&self) -> RiskLevel {
1018 RiskLevel::Write
1019 }
1020
1021 async fn execute(&self, args: Value) -> Result<ToolOutput, ToolError> {
1022 let action = args.get("action").and_then(|v| v.as_str()).unwrap_or("");
1023
1024 match action {
1025 "add_hypothesis" => self.action_add_hypothesis(&args),
1026 "update_hypothesis" => self.action_update_hypothesis(&args),
1027 "list_hypotheses" => self.action_list_hypotheses(&args),
1028 "get_hypothesis" => self.action_get_hypothesis(&args),
1029 "add_experiment" => self.action_add_experiment(&args),
1030 "start_experiment" => self.action_start_experiment(&args),
1031 "complete_experiment" => self.action_complete_experiment(&args),
1032 "fail_experiment" => self.action_fail_experiment(&args),
1033 "get_experiment" => self.action_get_experiment(&args),
1034 "list_experiments" => self.action_list_experiments(&args),
1035 "record_evidence" => self.action_record_evidence(&args),
1036 "compare_experiments" => self.action_compare_experiments(&args),
1037 "summary" => self.action_summary(&args),
1038 "export_markdown" => self.action_export_markdown(&args),
1039 _ => Ok(ToolOutput::text(format!(
1040 "Unknown action: '{}'. Use: add_hypothesis, update_hypothesis, list_hypotheses, get_hypothesis, add_experiment, start_experiment, complete_experiment, fail_experiment, get_experiment, list_experiments, record_evidence, compare_experiments, summary, export_markdown",
1041 action
1042 ))),
1043 }
1044 }
1045}
1046
1047#[cfg(test)]
1052mod tests {
1053 use super::*;
1054 use tempfile::TempDir;
1055
1056 fn make_tool() -> (TempDir, ExperimentTrackerTool) {
1057 let dir = TempDir::new().unwrap();
1058 let workspace = dir.path().canonicalize().unwrap();
1059 let tool = ExperimentTrackerTool::new(workspace);
1060 (dir, tool)
1061 }
1062
1063 #[test]
1064 fn test_tool_properties() {
1065 let (_dir, tool) = make_tool();
1066 assert_eq!(tool.name(), "experiment_tracker");
1067 assert_eq!(tool.risk_level(), RiskLevel::Write);
1068 assert!(tool.description().contains("hypotheses"));
1069 assert!(tool.description().contains("experiments"));
1070 }
1071
1072 #[test]
1073 fn test_schema_validation() {
1074 let (_dir, tool) = make_tool();
1075 let schema = tool.parameters_schema();
1076 assert!(schema.is_object());
1077 assert!(schema.get("properties").is_some());
1078 let action = &schema["properties"]["action"];
1079 assert!(action.get("enum").is_some());
1080 let actions = action["enum"].as_array().unwrap();
1081 assert_eq!(actions.len(), 14);
1082 }
1083
1084 #[tokio::test]
1085 async fn test_add_hypothesis() {
1086 let (_dir, tool) = make_tool();
1087
1088 let result = tool
1089 .execute(json!({
1090 "action": "add_hypothesis",
1091 "title": "Caching improves latency",
1092 "tags": ["performance"]
1093 }))
1094 .await
1095 .unwrap();
1096 assert!(result.content.contains("h1"));
1097 assert!(result.content.contains("Caching improves latency"));
1098
1099 let result = tool
1100 .execute(json!({"action": "list_hypotheses"}))
1101 .await
1102 .unwrap();
1103 assert!(result.content.contains("Caching improves latency"));
1104 assert!(result.content.contains("Proposed"));
1105 }
1106
1107 #[tokio::test]
1108 async fn test_hypothesis_crud() {
1109 let (_dir, tool) = make_tool();
1110
1111 tool.execute(json!({
1113 "action": "add_hypothesis",
1114 "title": "Batch size matters",
1115 "description": "Larger batches reduce overhead"
1116 }))
1117 .await
1118 .unwrap();
1119
1120 let result = tool
1122 .execute(json!({
1123 "action": "update_hypothesis",
1124 "id": "h1",
1125 "status": "testing"
1126 }))
1127 .await
1128 .unwrap();
1129 assert!(result.content.contains("Testing"));
1130
1131 let result = tool
1133 .execute(json!({"action": "get_hypothesis", "id": "h1"}))
1134 .await
1135 .unwrap();
1136 assert!(result.content.contains("Batch size matters"));
1137 assert!(result.content.contains("Testing"));
1138 assert!(result.content.contains("Larger batches reduce overhead"));
1139 }
1140
1141 #[tokio::test]
1142 async fn test_add_experiment() {
1143 let (_dir, tool) = make_tool();
1144
1145 tool.execute(json!({
1147 "action": "add_hypothesis",
1148 "title": "Test hyp"
1149 }))
1150 .await
1151 .unwrap();
1152
1153 let result = tool
1155 .execute(json!({
1156 "action": "add_experiment",
1157 "name": "Run A",
1158 "hypothesis_id": "h1",
1159 "config": {"learning_rate": 0.01}
1160 }))
1161 .await
1162 .unwrap();
1163 assert!(result.content.contains("e1"));
1164 assert!(result.content.contains("Run A"));
1165
1166 let result = tool
1168 .execute(json!({"action": "list_experiments"}))
1169 .await
1170 .unwrap();
1171 assert!(result.content.contains("Run A"));
1172 assert!(result.content.contains("Planned"));
1173 }
1174
1175 #[tokio::test]
1176 async fn test_experiment_lifecycle_complete() {
1177 let (_dir, tool) = make_tool();
1178
1179 tool.execute(json!({
1180 "action": "add_experiment",
1181 "name": "Exp Alpha"
1182 }))
1183 .await
1184 .unwrap();
1185
1186 let result = tool
1188 .execute(json!({"action": "start_experiment", "id": "e1"}))
1189 .await
1190 .unwrap();
1191 assert!(result.content.contains("running"));
1192
1193 let result = tool
1195 .execute(json!({
1196 "action": "complete_experiment",
1197 "id": "e1",
1198 "metrics": {"accuracy": 0.95},
1199 "notes": "Good results"
1200 }))
1201 .await
1202 .unwrap();
1203 assert!(result.content.contains("completed"));
1204
1205 let result = tool
1207 .execute(json!({"action": "get_experiment", "id": "e1"}))
1208 .await
1209 .unwrap();
1210 assert!(result.content.contains("Completed"));
1211 assert!(result.content.contains("Good results"));
1212 }
1213
1214 #[tokio::test]
1215 async fn test_experiment_lifecycle_fail() {
1216 let (_dir, tool) = make_tool();
1217
1218 tool.execute(json!({
1219 "action": "add_experiment",
1220 "name": "Exp Beta"
1221 }))
1222 .await
1223 .unwrap();
1224
1225 tool.execute(json!({"action": "start_experiment", "id": "e1"}))
1226 .await
1227 .unwrap();
1228
1229 let result = tool
1230 .execute(json!({
1231 "action": "fail_experiment",
1232 "id": "e1",
1233 "notes": "OOM error"
1234 }))
1235 .await
1236 .unwrap();
1237 assert!(result.content.contains("failed"));
1238
1239 let result = tool
1240 .execute(json!({"action": "get_experiment", "id": "e1"}))
1241 .await
1242 .unwrap();
1243 assert!(result.content.contains("Failed"));
1244 assert!(result.content.contains("OOM error"));
1245 }
1246
1247 #[tokio::test]
1248 async fn test_record_evidence() {
1249 let (_dir, tool) = make_tool();
1250
1251 tool.execute(json!({
1252 "action": "add_hypothesis",
1253 "title": "Evidence test"
1254 }))
1255 .await
1256 .unwrap();
1257
1258 tool.execute(json!({
1259 "action": "add_experiment",
1260 "name": "Trial 1",
1261 "hypothesis_id": "h1"
1262 }))
1263 .await
1264 .unwrap();
1265
1266 let result = tool
1267 .execute(json!({
1268 "action": "record_evidence",
1269 "hypothesis_id": "h1",
1270 "experiment_id": "e1",
1271 "finding": "Latency reduced by 40%",
1272 "supports": true,
1273 "confidence": 0.85
1274 }))
1275 .await
1276 .unwrap();
1277 assert!(result.content.contains("Recorded evidence"));
1278 assert!(result.content.contains("h1"));
1279 assert!(result.content.contains("0.85"));
1280
1281 let result = tool
1283 .execute(json!({"action": "get_hypothesis", "id": "h1"}))
1284 .await
1285 .unwrap();
1286 assert!(result.content.contains("Latency reduced by 40%"));
1287 assert!(result.content.contains("0.85"));
1288 }
1289
1290 #[tokio::test]
1291 async fn test_compare_experiments() {
1292 let (_dir, tool) = make_tool();
1293
1294 tool.execute(json!({
1295 "action": "add_experiment",
1296 "name": "Config A",
1297 "config": {"batch_size": 32}
1298 }))
1299 .await
1300 .unwrap();
1301
1302 tool.execute(json!({
1303 "action": "add_experiment",
1304 "name": "Config B",
1305 "config": {"batch_size": 64}
1306 }))
1307 .await
1308 .unwrap();
1309
1310 let result = tool
1311 .execute(json!({
1312 "action": "compare_experiments",
1313 "ids": ["e1", "e2"]
1314 }))
1315 .await
1316 .unwrap();
1317 assert!(result.content.contains("Config A"));
1318 assert!(result.content.contains("Config B"));
1319 assert!(result.content.contains("Comparison of 2 experiments"));
1320 }
1321
1322 #[tokio::test]
1323 async fn test_summary_empty() {
1324 let (_dir, tool) = make_tool();
1325
1326 let result = tool.execute(json!({"action": "summary"})).await.unwrap();
1327 assert!(result.content.contains("No data to summarize"));
1328 }
1329
1330 #[tokio::test]
1331 async fn test_summary_with_data() {
1332 let (_dir, tool) = make_tool();
1333
1334 tool.execute(json!({
1335 "action": "add_hypothesis",
1336 "title": "H1"
1337 }))
1338 .await
1339 .unwrap();
1340
1341 tool.execute(json!({
1342 "action": "add_experiment",
1343 "name": "E1",
1344 "hypothesis_id": "h1"
1345 }))
1346 .await
1347 .unwrap();
1348
1349 tool.execute(json!({"action": "start_experiment", "id": "e1"}))
1350 .await
1351 .unwrap();
1352
1353 tool.execute(json!({"action": "complete_experiment", "id": "e1"}))
1354 .await
1355 .unwrap();
1356
1357 tool.execute(json!({
1358 "action": "record_evidence",
1359 "hypothesis_id": "h1",
1360 "experiment_id": "e1",
1361 "finding": "Positive result",
1362 "supports": true,
1363 "confidence": 0.9
1364 }))
1365 .await
1366 .unwrap();
1367
1368 let result = tool.execute(json!({"action": "summary"})).await.unwrap();
1369 assert!(result.content.contains("Hypotheses: 1"));
1370 assert!(result.content.contains("Experiments: 1"));
1371 assert!(result.content.contains("1 supporting"));
1372 assert!(result.content.contains("Success rate: 100%"));
1373 }
1374
1375 #[tokio::test]
1376 async fn test_export_markdown() {
1377 let (_dir, tool) = make_tool();
1378
1379 tool.execute(json!({
1380 "action": "add_hypothesis",
1381 "title": "Cache hypothesis",
1382 "description": "Caching reduces latency"
1383 }))
1384 .await
1385 .unwrap();
1386
1387 tool.execute(json!({
1388 "action": "add_experiment",
1389 "name": "Cache test",
1390 "hypothesis_id": "h1"
1391 }))
1392 .await
1393 .unwrap();
1394
1395 let result = tool
1396 .execute(json!({"action": "export_markdown"}))
1397 .await
1398 .unwrap();
1399 assert!(result.content.contains("# Experiment Tracker Report"));
1400 assert!(result.content.contains("Cache hypothesis"));
1401 assert!(result.content.contains("**Status:** Proposed"));
1402 assert!(result.content.contains("Cache test"));
1403 }
1404
1405 #[tokio::test]
1406 async fn test_list_hypotheses_filter_status() {
1407 let (_dir, tool) = make_tool();
1408
1409 tool.execute(json!({
1410 "action": "add_hypothesis",
1411 "title": "Hyp A"
1412 }))
1413 .await
1414 .unwrap();
1415
1416 tool.execute(json!({
1417 "action": "add_hypothesis",
1418 "title": "Hyp B"
1419 }))
1420 .await
1421 .unwrap();
1422
1423 tool.execute(json!({
1424 "action": "update_hypothesis",
1425 "id": "h2",
1426 "status": "testing"
1427 }))
1428 .await
1429 .unwrap();
1430
1431 let result = tool
1433 .execute(json!({"action": "list_hypotheses", "status": "proposed"}))
1434 .await
1435 .unwrap();
1436 assert!(result.content.contains("Hyp A"));
1437 assert!(!result.content.contains("Hyp B"));
1438
1439 let result = tool
1441 .execute(json!({"action": "list_hypotheses", "status": "testing"}))
1442 .await
1443 .unwrap();
1444 assert!(result.content.contains("Hyp B"));
1445 assert!(!result.content.contains("Hyp A"));
1446 }
1447
1448 #[tokio::test]
1449 async fn test_list_experiments_filter() {
1450 let (_dir, tool) = make_tool();
1451
1452 tool.execute(json!({
1453 "action": "add_hypothesis",
1454 "title": "H1"
1455 }))
1456 .await
1457 .unwrap();
1458
1459 tool.execute(json!({
1460 "action": "add_hypothesis",
1461 "title": "H2"
1462 }))
1463 .await
1464 .unwrap();
1465
1466 tool.execute(json!({
1467 "action": "add_experiment",
1468 "name": "Exp for H1",
1469 "hypothesis_id": "h1"
1470 }))
1471 .await
1472 .unwrap();
1473
1474 tool.execute(json!({
1475 "action": "add_experiment",
1476 "name": "Exp for H2",
1477 "hypothesis_id": "h2"
1478 }))
1479 .await
1480 .unwrap();
1481
1482 let result = tool
1484 .execute(json!({"action": "list_experiments", "hypothesis_id": "h1"}))
1485 .await
1486 .unwrap();
1487 assert!(result.content.contains("Exp for H1"));
1488 assert!(!result.content.contains("Exp for H2"));
1489 }
1490
1491 #[tokio::test]
1492 async fn test_state_roundtrip() {
1493 let (_dir, tool) = make_tool();
1494
1495 tool.execute(json!({
1496 "action": "add_hypothesis",
1497 "title": "Persist me",
1498 "tags": ["tag1"]
1499 }))
1500 .await
1501 .unwrap();
1502
1503 tool.execute(json!({
1504 "action": "add_experiment",
1505 "name": "Saved exp",
1506 "hypothesis_id": "h1"
1507 }))
1508 .await
1509 .unwrap();
1510
1511 let state = tool.load_state();
1513 assert_eq!(state.hypotheses.len(), 1);
1514 assert_eq!(state.experiments.len(), 1);
1515 assert_eq!(state.hypotheses[0].title, "Persist me");
1516 assert_eq!(state.hypotheses[0].tags, vec!["tag1"]);
1517 assert_eq!(state.experiments[0].name, "Saved exp");
1518 assert_eq!(state.experiments[0].hypothesis_id, Some("h1".to_string()));
1519 assert_eq!(state.next_hypothesis_id, 2);
1520 assert_eq!(state.next_experiment_id, 2);
1521 }
1522
1523 #[tokio::test]
1524 async fn test_evidence_confidence_clamping() {
1525 let (_dir, tool) = make_tool();
1526
1527 tool.execute(json!({
1528 "action": "add_hypothesis",
1529 "title": "Clamp test"
1530 }))
1531 .await
1532 .unwrap();
1533
1534 tool.execute(json!({
1535 "action": "add_experiment",
1536 "name": "Clamp exp"
1537 }))
1538 .await
1539 .unwrap();
1540
1541 let result = tool
1543 .execute(json!({
1544 "action": "record_evidence",
1545 "hypothesis_id": "h1",
1546 "experiment_id": "e1",
1547 "finding": "Over confident",
1548 "supports": true,
1549 "confidence": 1.5
1550 }))
1551 .await
1552 .unwrap();
1553 assert!(result.content.contains("1.00"));
1554
1555 let result = tool
1557 .execute(json!({
1558 "action": "record_evidence",
1559 "hypothesis_id": "h1",
1560 "experiment_id": "e1",
1561 "finding": "Under confident",
1562 "supports": false,
1563 "confidence": -0.5
1564 }))
1565 .await
1566 .unwrap();
1567 assert!(result.content.contains("0.00"));
1568
1569 let state = tool.load_state();
1571 let hyp = &state.hypotheses[0];
1572 assert_eq!(hyp.evidence.len(), 2);
1573 assert!((hyp.evidence[0].confidence - 1.0).abs() < f64::EPSILON);
1574 assert!((hyp.evidence[1].confidence - 0.0).abs() < f64::EPSILON);
1575 }
1576
1577 #[tokio::test]
1578 async fn test_unknown_action() {
1579 let (_dir, tool) = make_tool();
1580
1581 let result = tool
1582 .execute(json!({"action": "nonexistent"}))
1583 .await
1584 .unwrap();
1585 assert!(result.content.contains("Unknown action"));
1586 assert!(result.content.contains("nonexistent"));
1587 }
1588}