Skip to main content

rustant_tools/
experiment_tracker.rs

1//! Experiment tracker tool — track scientific hypotheses, experiments, results, and evidence.
2
3use async_trait::async_trait;
4use chrono::{DateTime, Utc};
5use rustant_core::error::ToolError;
6use rustant_core::types::{RiskLevel, ToolOutput};
7use serde::{Deserialize, Serialize};
8use serde_json::{Value, json};
9use std::path::PathBuf;
10
11use crate::registry::Tool;
12
13// ---------------------------------------------------------------------------
14// Data models
15// ---------------------------------------------------------------------------
16
17#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
18enum HypothesisStatus {
19    Proposed,
20    Testing,
21    Supported,
22    Refuted,
23    Inconclusive,
24}
25
26impl std::fmt::Display for HypothesisStatus {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            Self::Proposed => write!(f, "Proposed"),
30            Self::Testing => write!(f, "Testing"),
31            Self::Supported => write!(f, "Supported"),
32            Self::Refuted => write!(f, "Refuted"),
33            Self::Inconclusive => write!(f, "Inconclusive"),
34        }
35    }
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
39struct Evidence {
40    experiment_id: String,
41    finding: String,
42    supports: bool,
43    confidence: f64,
44    recorded_at: DateTime<Utc>,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48struct Hypothesis {
49    id: String,
50    title: String,
51    description: String,
52    status: HypothesisStatus,
53    evidence: Vec<Evidence>,
54    tags: Vec<String>,
55    created_at: DateTime<Utc>,
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
59enum ExperimentStatus {
60    Planned,
61    Running,
62    Completed,
63    Failed,
64    Cancelled,
65}
66
67impl std::fmt::Display for ExperimentStatus {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        match self {
70            Self::Planned => write!(f, "Planned"),
71            Self::Running => write!(f, "Running"),
72            Self::Completed => write!(f, "Completed"),
73            Self::Failed => write!(f, "Failed"),
74            Self::Cancelled => write!(f, "Cancelled"),
75        }
76    }
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80struct Experiment {
81    id: String,
82    hypothesis_id: Option<String>,
83    name: String,
84    description: String,
85    config: Value,
86    metrics: Value,
87    status: ExperimentStatus,
88    notes: String,
89    tags: Vec<String>,
90    created_at: DateTime<Utc>,
91    started_at: Option<DateTime<Utc>>,
92    completed_at: Option<DateTime<Utc>>,
93}
94
95#[derive(Debug, Default, Serialize, Deserialize)]
96struct ExperimentState {
97    hypotheses: Vec<Hypothesis>,
98    experiments: Vec<Experiment>,
99    next_hypothesis_id: usize,
100    next_experiment_id: usize,
101}
102
103// ---------------------------------------------------------------------------
104// Tool struct
105// ---------------------------------------------------------------------------
106
107pub struct ExperimentTrackerTool {
108    workspace: PathBuf,
109}
110
111impl ExperimentTrackerTool {
112    pub fn new(workspace: PathBuf) -> Self {
113        Self { workspace }
114    }
115
116    fn state_path(&self) -> PathBuf {
117        self.workspace
118            .join(".rustant")
119            .join("experiments")
120            .join("tracker.json")
121    }
122
123    fn load_state(&self) -> ExperimentState {
124        let path = self.state_path();
125        if path.exists() {
126            std::fs::read_to_string(&path)
127                .ok()
128                .and_then(|s| serde_json::from_str(&s).ok())
129                .unwrap_or_default()
130        } else {
131            ExperimentState {
132                hypotheses: Vec::new(),
133                experiments: Vec::new(),
134                next_hypothesis_id: 1,
135                next_experiment_id: 1,
136            }
137        }
138    }
139
140    fn save_state(&self, state: &ExperimentState) -> Result<(), ToolError> {
141        let path = self.state_path();
142        if let Some(parent) = path.parent() {
143            std::fs::create_dir_all(parent).map_err(|e| ToolError::ExecutionFailed {
144                name: "experiment_tracker".to_string(),
145                message: format!("Failed to create state dir: {}", e),
146            })?;
147        }
148        let json = serde_json::to_string_pretty(state).map_err(|e| ToolError::ExecutionFailed {
149            name: "experiment_tracker".to_string(),
150            message: format!("Failed to serialize state: {}", e),
151        })?;
152        let tmp = path.with_extension("json.tmp");
153        std::fs::write(&tmp, &json).map_err(|e| ToolError::ExecutionFailed {
154            name: "experiment_tracker".to_string(),
155            message: format!("Failed to write state: {}", e),
156        })?;
157        std::fs::rename(&tmp, &path).map_err(|e| ToolError::ExecutionFailed {
158            name: "experiment_tracker".to_string(),
159            message: format!("Failed to rename state file: {}", e),
160        })?;
161        Ok(())
162    }
163
164    // --- action helpers ---
165
166    fn action_add_hypothesis(&self, args: &Value) -> Result<ToolOutput, ToolError> {
167        let title = args
168            .get("title")
169            .and_then(|v| v.as_str())
170            .unwrap_or("")
171            .trim();
172        if title.is_empty() {
173            return Ok(ToolOutput::text(
174                "Please provide a title for the hypothesis.",
175            ));
176        }
177        let description = args
178            .get("description")
179            .and_then(|v| v.as_str())
180            .unwrap_or("")
181            .to_string();
182        let tags = parse_tags(args);
183
184        let mut state = self.load_state();
185        let id = format!("h{}", state.next_hypothesis_id);
186        state.next_hypothesis_id += 1;
187        state.hypotheses.push(Hypothesis {
188            id: id.clone(),
189            title: title.to_string(),
190            description,
191            status: HypothesisStatus::Proposed,
192            evidence: Vec::new(),
193            tags,
194            created_at: Utc::now(),
195        });
196        self.save_state(&state)?;
197        Ok(ToolOutput::text(format!(
198            "Added hypothesis {} — '{}'.",
199            id, title
200        )))
201    }
202
203    fn action_update_hypothesis(&self, args: &Value) -> Result<ToolOutput, ToolError> {
204        let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
205        if id.is_empty() {
206            return Ok(ToolOutput::text("Please provide a hypothesis id."));
207        }
208        let mut state = self.load_state();
209        let hyp = state.hypotheses.iter_mut().find(|h| h.id == id);
210        match hyp {
211            Some(h) => {
212                if let Some(title) = args.get("title").and_then(|v| v.as_str()) {
213                    h.title = title.to_string();
214                }
215                if let Some(status_str) = args.get("status").and_then(|v| v.as_str())
216                    && let Some(status) = parse_hypothesis_status(status_str)
217                {
218                    h.status = status;
219                }
220                if let Some(tags_val) = args.get("tags")
221                    && let Some(arr) = tags_val.as_array()
222                {
223                    h.tags = arr
224                        .iter()
225                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
226                        .collect();
227                }
228                let title = h.title.clone();
229                let status = h.status.clone();
230                self.save_state(&state)?;
231                Ok(ToolOutput::text(format!(
232                    "Updated hypothesis {} — '{}' [{}].",
233                    id, title, status
234                )))
235            }
236            None => Ok(ToolOutput::text(format!("Hypothesis {} not found.", id))),
237        }
238    }
239
240    fn action_list_hypotheses(&self, args: &Value) -> Result<ToolOutput, ToolError> {
241        let state = self.load_state();
242        let status_filter = args.get("status").and_then(|v| v.as_str());
243        let tag_filter = args.get("tag").and_then(|v| v.as_str());
244
245        let filtered: Vec<&Hypothesis> = state
246            .hypotheses
247            .iter()
248            .filter(|h| {
249                if let Some(sf) = status_filter
250                    && let Some(parsed) = parse_hypothesis_status(sf)
251                    && h.status != parsed
252                {
253                    return false;
254                }
255                if let Some(tf) = tag_filter
256                    && !h.tags.iter().any(|t| t == tf)
257                {
258                    return false;
259                }
260                true
261            })
262            .collect();
263
264        if filtered.is_empty() {
265            return Ok(ToolOutput::text("No hypotheses found."));
266        }
267        let lines: Vec<String> = filtered
268            .iter()
269            .map(|h| {
270                let tags = if h.tags.is_empty() {
271                    String::new()
272                } else {
273                    format!(" [{}]", h.tags.join(", "))
274                };
275                format!(
276                    "  {} — {} [{}] ({} evidence){}",
277                    h.id,
278                    h.title,
279                    h.status,
280                    h.evidence.len(),
281                    tags
282                )
283            })
284            .collect();
285        Ok(ToolOutput::text(format!(
286            "Hypotheses ({}):\n{}",
287            filtered.len(),
288            lines.join("\n")
289        )))
290    }
291
292    fn action_get_hypothesis(&self, args: &Value) -> Result<ToolOutput, ToolError> {
293        let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
294        if id.is_empty() {
295            return Ok(ToolOutput::text("Please provide a hypothesis id."));
296        }
297        let state = self.load_state();
298        let hyp = state.hypotheses.iter().find(|h| h.id == id);
299        match hyp {
300            Some(h) => {
301                let linked_experiments: Vec<&Experiment> = state
302                    .experiments
303                    .iter()
304                    .filter(|e| e.hypothesis_id.as_deref() == Some(&h.id))
305                    .collect();
306
307                let mut out = format!(
308                    "Hypothesis: {} — {}\nStatus: {}\nDescription: {}\nTags: {}\nCreated: {}\n",
309                    h.id,
310                    h.title,
311                    h.status,
312                    if h.description.is_empty() {
313                        "(none)"
314                    } else {
315                        &h.description
316                    },
317                    if h.tags.is_empty() {
318                        "(none)".to_string()
319                    } else {
320                        h.tags.join(", ")
321                    },
322                    h.created_at.format("%Y-%m-%d %H:%M UTC"),
323                );
324
325                if !h.evidence.is_empty() {
326                    out.push_str(&format!("\nEvidence ({}):\n", h.evidence.len()));
327                    for ev in &h.evidence {
328                        out.push_str(&format!(
329                            "  [{}] {} (confidence: {:.2}, supports: {})\n",
330                            ev.experiment_id, ev.finding, ev.confidence, ev.supports
331                        ));
332                    }
333                }
334
335                if !linked_experiments.is_empty() {
336                    out.push_str(&format!(
337                        "\nLinked experiments ({}):\n",
338                        linked_experiments.len()
339                    ));
340                    for exp in &linked_experiments {
341                        out.push_str(&format!("  {} — {} [{}]\n", exp.id, exp.name, exp.status));
342                    }
343                }
344
345                Ok(ToolOutput::text(out))
346            }
347            None => Ok(ToolOutput::text(format!("Hypothesis {} not found.", id))),
348        }
349    }
350
351    fn action_add_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
352        let name = args
353            .get("name")
354            .and_then(|v| v.as_str())
355            .unwrap_or("")
356            .trim();
357        if name.is_empty() {
358            return Ok(ToolOutput::text(
359                "Please provide a name for the experiment.",
360            ));
361        }
362        let description = args
363            .get("description")
364            .and_then(|v| v.as_str())
365            .unwrap_or("")
366            .to_string();
367        let hypothesis_id = args
368            .get("hypothesis_id")
369            .and_then(|v| v.as_str())
370            .map(|s| s.to_string());
371        let config = args.get("config").cloned().unwrap_or(json!({}));
372        let tags = parse_tags(args);
373
374        let mut state = self.load_state();
375
376        // Validate hypothesis_id if provided
377        if let Some(ref hid) = hypothesis_id
378            && !state.hypotheses.iter().any(|h| h.id == *hid)
379        {
380            return Ok(ToolOutput::text(format!("Hypothesis {} not found.", hid)));
381        }
382
383        let id = format!("e{}", state.next_experiment_id);
384        state.next_experiment_id += 1;
385        state.experiments.push(Experiment {
386            id: id.clone(),
387            hypothesis_id,
388            name: name.to_string(),
389            description,
390            config,
391            metrics: json!({}),
392            status: ExperimentStatus::Planned,
393            notes: String::new(),
394            tags,
395            created_at: Utc::now(),
396            started_at: None,
397            completed_at: None,
398        });
399        self.save_state(&state)?;
400        Ok(ToolOutput::text(format!(
401            "Added experiment {} — '{}'.",
402            id, name
403        )))
404    }
405
406    fn action_start_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
407        let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
408        if id.is_empty() {
409            return Ok(ToolOutput::text("Please provide an experiment id."));
410        }
411        let mut state = self.load_state();
412        let exp = state.experiments.iter_mut().find(|e| e.id == id);
413        match exp {
414            Some(e) => {
415                if e.status != ExperimentStatus::Planned {
416                    return Ok(ToolOutput::text(format!(
417                        "Experiment {} cannot be started — current status is {}.",
418                        id, e.status
419                    )));
420                }
421                e.status = ExperimentStatus::Running;
422                e.started_at = Some(Utc::now());
423                let name = e.name.clone();
424                self.save_state(&state)?;
425                Ok(ToolOutput::text(format!(
426                    "Experiment {} '{}' is now running.",
427                    id, name
428                )))
429            }
430            None => Ok(ToolOutput::text(format!("Experiment {} not found.", id))),
431        }
432    }
433
434    fn action_complete_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
435        let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
436        if id.is_empty() {
437            return Ok(ToolOutput::text("Please provide an experiment id."));
438        }
439        let mut state = self.load_state();
440        let exp = state.experiments.iter_mut().find(|e| e.id == id);
441        match exp {
442            Some(e) => {
443                if e.status != ExperimentStatus::Running {
444                    return Ok(ToolOutput::text(format!(
445                        "Experiment {} cannot be completed — current status is {}.",
446                        id, e.status
447                    )));
448                }
449                e.status = ExperimentStatus::Completed;
450                e.completed_at = Some(Utc::now());
451                if let Some(metrics) = args.get("metrics") {
452                    e.metrics = metrics.clone();
453                }
454                if let Some(notes) = args.get("notes").and_then(|v| v.as_str()) {
455                    e.notes = notes.to_string();
456                }
457                let name = e.name.clone();
458                self.save_state(&state)?;
459                Ok(ToolOutput::text(format!(
460                    "Experiment {} '{}' completed.",
461                    id, name
462                )))
463            }
464            None => Ok(ToolOutput::text(format!("Experiment {} not found.", id))),
465        }
466    }
467
468    fn action_fail_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
469        let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
470        if id.is_empty() {
471            return Ok(ToolOutput::text("Please provide an experiment id."));
472        }
473        let mut state = self.load_state();
474        let exp = state.experiments.iter_mut().find(|e| e.id == id);
475        match exp {
476            Some(e) => {
477                if e.status != ExperimentStatus::Running {
478                    return Ok(ToolOutput::text(format!(
479                        "Experiment {} cannot be failed — current status is {}.",
480                        id, e.status
481                    )));
482                }
483                e.status = ExperimentStatus::Failed;
484                e.completed_at = Some(Utc::now());
485                if let Some(notes) = args.get("notes").and_then(|v| v.as_str()) {
486                    e.notes = notes.to_string();
487                }
488                let name = e.name.clone();
489                self.save_state(&state)?;
490                Ok(ToolOutput::text(format!(
491                    "Experiment {} '{}' failed.",
492                    id, name
493                )))
494            }
495            None => Ok(ToolOutput::text(format!("Experiment {} not found.", id))),
496        }
497    }
498
499    fn action_get_experiment(&self, args: &Value) -> Result<ToolOutput, ToolError> {
500        let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("");
501        if id.is_empty() {
502            return Ok(ToolOutput::text("Please provide an experiment id."));
503        }
504        let state = self.load_state();
505        let exp = state.experiments.iter().find(|e| e.id == id);
506        match exp {
507            Some(e) => {
508                let mut out = format!(
509                    "Experiment: {} — {}\nStatus: {}\nDescription: {}\nHypothesis: {}\nTags: {}\nConfig: {}\nMetrics: {}\nNotes: {}\nCreated: {}\nStarted: {}\nCompleted: {}\n",
510                    e.id,
511                    e.name,
512                    e.status,
513                    if e.description.is_empty() {
514                        "(none)"
515                    } else {
516                        &e.description
517                    },
518                    e.hypothesis_id.as_deref().unwrap_or("(none)"),
519                    if e.tags.is_empty() {
520                        "(none)".to_string()
521                    } else {
522                        e.tags.join(", ")
523                    },
524                    e.config,
525                    e.metrics,
526                    if e.notes.is_empty() {
527                        "(none)"
528                    } else {
529                        &e.notes
530                    },
531                    e.created_at.format("%Y-%m-%d %H:%M UTC"),
532                    e.started_at
533                        .map(|t| t.format("%Y-%m-%d %H:%M UTC").to_string())
534                        .unwrap_or_else(|| "(not started)".to_string()),
535                    e.completed_at
536                        .map(|t| t.format("%Y-%m-%d %H:%M UTC").to_string())
537                        .unwrap_or_else(|| "(not completed)".to_string()),
538                );
539
540                // Show related evidence if linked to a hypothesis
541                if let Some(ref hid) = e.hypothesis_id
542                    && let Some(hyp) = state.hypotheses.iter().find(|h| h.id == *hid)
543                {
544                    let related: Vec<&Evidence> = hyp
545                        .evidence
546                        .iter()
547                        .filter(|ev| ev.experiment_id == e.id)
548                        .collect();
549                    if !related.is_empty() {
550                        out.push_str(&format!(
551                            "\nEvidence from this experiment ({}):\n",
552                            related.len()
553                        ));
554                        for ev in &related {
555                            out.push_str(&format!(
556                                "  {} (confidence: {:.2}, supports: {})\n",
557                                ev.finding, ev.confidence, ev.supports
558                            ));
559                        }
560                    }
561                }
562
563                Ok(ToolOutput::text(out))
564            }
565            None => Ok(ToolOutput::text(format!("Experiment {} not found.", id))),
566        }
567    }
568
569    fn action_list_experiments(&self, args: &Value) -> Result<ToolOutput, ToolError> {
570        let state = self.load_state();
571        let hypothesis_id_filter = args.get("hypothesis_id").and_then(|v| v.as_str());
572        let status_filter = args.get("status").and_then(|v| v.as_str());
573        let tag_filter = args.get("tag").and_then(|v| v.as_str());
574
575        let filtered: Vec<&Experiment> = state
576            .experiments
577            .iter()
578            .filter(|e| {
579                if let Some(hid) = hypothesis_id_filter
580                    && e.hypothesis_id.as_deref() != Some(hid)
581                {
582                    return false;
583                }
584                if let Some(sf) = status_filter
585                    && let Some(parsed) = parse_experiment_status(sf)
586                    && e.status != parsed
587                {
588                    return false;
589                }
590                if let Some(tf) = tag_filter
591                    && !e.tags.iter().any(|t| t == tf)
592                {
593                    return false;
594                }
595                true
596            })
597            .collect();
598
599        if filtered.is_empty() {
600            return Ok(ToolOutput::text("No experiments found."));
601        }
602        let lines: Vec<String> = filtered
603            .iter()
604            .map(|e| {
605                let hyp = e
606                    .hypothesis_id
607                    .as_deref()
608                    .map(|h| format!(" ({})", h))
609                    .unwrap_or_default();
610                let tags = if e.tags.is_empty() {
611                    String::new()
612                } else {
613                    format!(" [{}]", e.tags.join(", "))
614                };
615                format!("  {} — {} [{}]{}{}", e.id, e.name, e.status, hyp, tags)
616            })
617            .collect();
618        Ok(ToolOutput::text(format!(
619            "Experiments ({}):\n{}",
620            filtered.len(),
621            lines.join("\n")
622        )))
623    }
624
625    fn action_record_evidence(&self, args: &Value) -> Result<ToolOutput, ToolError> {
626        let hypothesis_id = args
627            .get("hypothesis_id")
628            .and_then(|v| v.as_str())
629            .unwrap_or("");
630        let experiment_id = args
631            .get("experiment_id")
632            .and_then(|v| v.as_str())
633            .unwrap_or("");
634        let finding = args.get("finding").and_then(|v| v.as_str()).unwrap_or("");
635        let supports = args
636            .get("supports")
637            .and_then(|v| v.as_bool())
638            .unwrap_or(false);
639        let confidence = args
640            .get("confidence")
641            .and_then(|v| v.as_f64())
642            .unwrap_or(0.5)
643            .clamp(0.0, 1.0);
644
645        if hypothesis_id.is_empty() || experiment_id.is_empty() || finding.is_empty() {
646            return Ok(ToolOutput::text(
647                "Please provide hypothesis_id, experiment_id, and finding.",
648            ));
649        }
650
651        let mut state = self.load_state();
652
653        // Validate experiment exists
654        if !state.experiments.iter().any(|e| e.id == experiment_id) {
655            return Ok(ToolOutput::text(format!(
656                "Experiment {} not found.",
657                experiment_id
658            )));
659        }
660
661        let hyp = state.hypotheses.iter_mut().find(|h| h.id == hypothesis_id);
662        match hyp {
663            Some(h) => {
664                h.evidence.push(Evidence {
665                    experiment_id: experiment_id.to_string(),
666                    finding: finding.to_string(),
667                    supports,
668                    confidence,
669                    recorded_at: Utc::now(),
670                });
671                self.save_state(&state)?;
672                Ok(ToolOutput::text(format!(
673                    "Recorded evidence for {} from {} (supports: {}, confidence: {:.2}).",
674                    hypothesis_id, experiment_id, supports, confidence
675                )))
676            }
677            None => Ok(ToolOutput::text(format!(
678                "Hypothesis {} not found.",
679                hypothesis_id
680            ))),
681        }
682    }
683
684    fn action_compare_experiments(&self, args: &Value) -> Result<ToolOutput, ToolError> {
685        let ids = args
686            .get("ids")
687            .and_then(|v| v.as_array())
688            .map(|arr| {
689                arr.iter()
690                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
691                    .collect::<Vec<_>>()
692            })
693            .unwrap_or_default();
694
695        if ids.len() < 2 {
696            return Ok(ToolOutput::text(
697                "Please provide at least 2 experiment ids to compare.",
698            ));
699        }
700
701        let state = self.load_state();
702        let experiments: Vec<&Experiment> = ids
703            .iter()
704            .filter_map(|id| state.experiments.iter().find(|e| e.id == *id))
705            .collect();
706
707        if experiments.is_empty() {
708            return Ok(ToolOutput::text("No matching experiments found."));
709        }
710
711        let mut out = format!("Comparison of {} experiments:\n\n", experiments.len());
712        for exp in &experiments {
713            out.push_str(&format!("--- {} ---\n", exp.id));
714            out.push_str(&format!("  Name: {}\n", exp.name));
715            out.push_str(&format!("  Status: {}\n", exp.status));
716            out.push_str(&format!(
717                "  Hypothesis: {}\n",
718                exp.hypothesis_id.as_deref().unwrap_or("(none)")
719            ));
720            out.push_str(&format!("  Config: {}\n", exp.config));
721            out.push_str(&format!("  Metrics: {}\n", exp.metrics));
722            if !exp.notes.is_empty() {
723                out.push_str(&format!("  Notes: {}\n", exp.notes));
724            }
725            out.push('\n');
726        }
727
728        Ok(ToolOutput::text(out))
729    }
730
731    fn action_summary(&self, args: &Value) -> Result<ToolOutput, ToolError> {
732        let state = self.load_state();
733        let hypothesis_id_filter = args.get("hypothesis_id").and_then(|v| v.as_str());
734
735        let hypotheses: Vec<&Hypothesis> = if let Some(hid) = hypothesis_id_filter {
736            state.hypotheses.iter().filter(|h| h.id == hid).collect()
737        } else {
738            state.hypotheses.iter().collect()
739        };
740
741        let experiments: Vec<&Experiment> = if let Some(hid) = hypothesis_id_filter {
742            state
743                .experiments
744                .iter()
745                .filter(|e| e.hypothesis_id.as_deref() == Some(hid))
746                .collect()
747        } else {
748            state.experiments.iter().collect()
749        };
750
751        if hypotheses.is_empty() && experiments.is_empty() {
752            return Ok(ToolOutput::text("No data to summarize."));
753        }
754
755        let mut out = String::from("Summary:\n\n");
756
757        // Hypothesis stats
758        out.push_str(&format!("Hypotheses: {}\n", hypotheses.len()));
759        let proposed = hypotheses
760            .iter()
761            .filter(|h| h.status == HypothesisStatus::Proposed)
762            .count();
763        let testing = hypotheses
764            .iter()
765            .filter(|h| h.status == HypothesisStatus::Testing)
766            .count();
767        let supported = hypotheses
768            .iter()
769            .filter(|h| h.status == HypothesisStatus::Supported)
770            .count();
771        let refuted = hypotheses
772            .iter()
773            .filter(|h| h.status == HypothesisStatus::Refuted)
774            .count();
775        let inconclusive = hypotheses
776            .iter()
777            .filter(|h| h.status == HypothesisStatus::Inconclusive)
778            .count();
779        out.push_str(&format!(
780            "  Proposed: {}, Testing: {}, Supported: {}, Refuted: {}, Inconclusive: {}\n",
781            proposed, testing, supported, refuted, inconclusive
782        ));
783
784        // Evidence balance
785        let total_evidence: usize = hypotheses.iter().map(|h| h.evidence.len()).sum();
786        let supporting: usize = hypotheses
787            .iter()
788            .flat_map(|h| h.evidence.iter())
789            .filter(|e| e.supports)
790            .count();
791        let opposing = total_evidence - supporting;
792        out.push_str(&format!(
793            "\nEvidence: {} total ({} supporting, {} opposing)\n",
794            total_evidence, supporting, opposing
795        ));
796
797        if total_evidence > 0 {
798            let avg_confidence: f64 = hypotheses
799                .iter()
800                .flat_map(|h| h.evidence.iter())
801                .map(|e| e.confidence)
802                .sum::<f64>()
803                / total_evidence as f64;
804            out.push_str(&format!("  Average confidence: {:.2}\n", avg_confidence));
805        }
806
807        // Experiment stats
808        out.push_str(&format!("\nExperiments: {}\n", experiments.len()));
809        let completed = experiments
810            .iter()
811            .filter(|e| e.status == ExperimentStatus::Completed)
812            .count();
813        let failed = experiments
814            .iter()
815            .filter(|e| e.status == ExperimentStatus::Failed)
816            .count();
817        let running = experiments
818            .iter()
819            .filter(|e| e.status == ExperimentStatus::Running)
820            .count();
821        let planned = experiments
822            .iter()
823            .filter(|e| e.status == ExperimentStatus::Planned)
824            .count();
825        out.push_str(&format!(
826            "  Planned: {}, Running: {}, Completed: {}, Failed: {}\n",
827            planned, running, completed, failed
828        ));
829
830        if completed + failed > 0 {
831            let success_rate = completed as f64 / (completed + failed) as f64 * 100.0;
832            out.push_str(&format!("  Success rate: {:.0}%\n", success_rate));
833        }
834
835        Ok(ToolOutput::text(out))
836    }
837
838    fn action_export_markdown(&self, args: &Value) -> Result<ToolOutput, ToolError> {
839        let state = self.load_state();
840        let hypothesis_id_filter = args.get("hypothesis_id").and_then(|v| v.as_str());
841
842        let hypotheses: Vec<&Hypothesis> = if let Some(hid) = hypothesis_id_filter {
843            state.hypotheses.iter().filter(|h| h.id == hid).collect()
844        } else {
845            state.hypotheses.iter().collect()
846        };
847
848        let mut md = String::from("# Experiment Tracker Report\n\n");
849
850        if hypotheses.is_empty() && state.experiments.is_empty() {
851            md.push_str("No data to export.\n");
852            return Ok(ToolOutput::text(md));
853        }
854
855        for hyp in &hypotheses {
856            md.push_str(&format!("## {} — {}\n\n", hyp.id, hyp.title));
857            md.push_str(&format!("**Status:** {}\n\n", hyp.status));
858            if !hyp.description.is_empty() {
859                md.push_str(&format!("{}\n\n", hyp.description));
860            }
861            if !hyp.tags.is_empty() {
862                md.push_str(&format!("**Tags:** {}\n\n", hyp.tags.join(", ")));
863            }
864
865            // Evidence
866            if !hyp.evidence.is_empty() {
867                md.push_str("### Evidence\n\n");
868                md.push_str("| Experiment | Finding | Supports | Confidence |\n");
869                md.push_str("|---|---|---|---|\n");
870                for ev in &hyp.evidence {
871                    md.push_str(&format!(
872                        "| {} | {} | {} | {:.2} |\n",
873                        ev.experiment_id, ev.finding, ev.supports, ev.confidence
874                    ));
875                }
876                md.push('\n');
877            }
878
879            // Linked experiments
880            let linked: Vec<&Experiment> = state
881                .experiments
882                .iter()
883                .filter(|e| e.hypothesis_id.as_deref() == Some(&hyp.id))
884                .collect();
885            if !linked.is_empty() {
886                md.push_str("### Experiments\n\n");
887                for exp in &linked {
888                    md.push_str(&format!(
889                        "- **{}** — {} [{}]\n",
890                        exp.id, exp.name, exp.status
891                    ));
892                }
893                md.push('\n');
894            }
895        }
896
897        // Unlinked experiments
898        let unlinked: Vec<&Experiment> = state
899            .experiments
900            .iter()
901            .filter(|e| {
902                if hypothesis_id_filter.is_some() {
903                    return false;
904                }
905                e.hypothesis_id.is_none()
906            })
907            .collect();
908        if !unlinked.is_empty() {
909            md.push_str("## Unlinked Experiments\n\n");
910            for exp in &unlinked {
911                md.push_str(&format!(
912                    "- **{}** — {} [{}]\n",
913                    exp.id, exp.name, exp.status
914                ));
915            }
916            md.push('\n');
917        }
918
919        Ok(ToolOutput::text(md))
920    }
921}
922
923// ---------------------------------------------------------------------------
924// Helpers
925// ---------------------------------------------------------------------------
926
927fn parse_tags(args: &Value) -> Vec<String> {
928    args.get("tags")
929        .and_then(|v| v.as_array())
930        .map(|arr| {
931            arr.iter()
932                .filter_map(|v| v.as_str().map(|s| s.to_string()))
933                .collect()
934        })
935        .unwrap_or_default()
936}
937
938fn parse_hypothesis_status(s: &str) -> Option<HypothesisStatus> {
939    match s.to_lowercase().as_str() {
940        "proposed" => Some(HypothesisStatus::Proposed),
941        "testing" => Some(HypothesisStatus::Testing),
942        "supported" => Some(HypothesisStatus::Supported),
943        "refuted" => Some(HypothesisStatus::Refuted),
944        "inconclusive" => Some(HypothesisStatus::Inconclusive),
945        _ => None,
946    }
947}
948
949fn parse_experiment_status(s: &str) -> Option<ExperimentStatus> {
950    match s.to_lowercase().as_str() {
951        "planned" => Some(ExperimentStatus::Planned),
952        "running" => Some(ExperimentStatus::Running),
953        "completed" => Some(ExperimentStatus::Completed),
954        "failed" => Some(ExperimentStatus::Failed),
955        "cancelled" => Some(ExperimentStatus::Cancelled),
956        _ => None,
957    }
958}
959
960// ---------------------------------------------------------------------------
961// Tool trait implementation
962// ---------------------------------------------------------------------------
963
964#[async_trait]
965impl Tool for ExperimentTrackerTool {
966    fn name(&self) -> &str {
967        "experiment_tracker"
968    }
969
970    fn description(&self) -> &str {
971        "Track scientific hypotheses, experiments, results, and evidence. Actions: add_hypothesis, update_hypothesis, list_hypotheses, get_hypothesis, add_experiment, start_experiment, complete_experiment, fail_experiment, get_experiment, list_experiments, record_evidence, compare_experiments, summary, export_markdown."
972    }
973
974    fn parameters_schema(&self) -> Value {
975        json!({
976            "type": "object",
977            "properties": {
978                "action": {
979                    "type": "string",
980                    "enum": [
981                        "add_hypothesis", "update_hypothesis", "list_hypotheses", "get_hypothesis",
982                        "add_experiment", "start_experiment", "complete_experiment", "fail_experiment",
983                        "get_experiment", "list_experiments",
984                        "record_evidence", "compare_experiments", "summary", "export_markdown"
985                    ],
986                    "description": "Action to perform"
987                },
988                "id": { "type": "string", "description": "Hypothesis or experiment ID" },
989                "title": { "type": "string", "description": "Hypothesis title" },
990                "name": { "type": "string", "description": "Experiment name" },
991                "description": { "type": "string", "description": "Description text" },
992                "status": { "type": "string", "description": "Status to set (for update_hypothesis)" },
993                "hypothesis_id": { "type": "string", "description": "Linked hypothesis ID" },
994                "experiment_id": { "type": "string", "description": "Experiment ID (for record_evidence)" },
995                "finding": { "type": "string", "description": "Evidence finding text" },
996                "supports": { "type": "boolean", "description": "Whether evidence supports the hypothesis" },
997                "confidence": { "type": "number", "description": "Confidence level 0.0-1.0 (default 0.5)" },
998                "config": { "type": "object", "description": "Experiment configuration" },
999                "metrics": { "type": "object", "description": "Experiment result metrics" },
1000                "notes": { "type": "string", "description": "Experiment notes" },
1001                "tags": {
1002                    "type": "array",
1003                    "items": { "type": "string" },
1004                    "description": "Tags for filtering"
1005                },
1006                "ids": {
1007                    "type": "array",
1008                    "items": { "type": "string" },
1009                    "description": "Experiment IDs (for compare_experiments)"
1010                },
1011                "tag": { "type": "string", "description": "Filter by tag" }
1012            },
1013            "required": ["action"]
1014        })
1015    }
1016
1017    fn risk_level(&self) -> RiskLevel {
1018        RiskLevel::Write
1019    }
1020
1021    async fn execute(&self, args: Value) -> Result<ToolOutput, ToolError> {
1022        let action = args.get("action").and_then(|v| v.as_str()).unwrap_or("");
1023
1024        match action {
1025            "add_hypothesis" => self.action_add_hypothesis(&args),
1026            "update_hypothesis" => self.action_update_hypothesis(&args),
1027            "list_hypotheses" => self.action_list_hypotheses(&args),
1028            "get_hypothesis" => self.action_get_hypothesis(&args),
1029            "add_experiment" => self.action_add_experiment(&args),
1030            "start_experiment" => self.action_start_experiment(&args),
1031            "complete_experiment" => self.action_complete_experiment(&args),
1032            "fail_experiment" => self.action_fail_experiment(&args),
1033            "get_experiment" => self.action_get_experiment(&args),
1034            "list_experiments" => self.action_list_experiments(&args),
1035            "record_evidence" => self.action_record_evidence(&args),
1036            "compare_experiments" => self.action_compare_experiments(&args),
1037            "summary" => self.action_summary(&args),
1038            "export_markdown" => self.action_export_markdown(&args),
1039            _ => Ok(ToolOutput::text(format!(
1040                "Unknown action: '{}'. Use: add_hypothesis, update_hypothesis, list_hypotheses, get_hypothesis, add_experiment, start_experiment, complete_experiment, fail_experiment, get_experiment, list_experiments, record_evidence, compare_experiments, summary, export_markdown",
1041                action
1042            ))),
1043        }
1044    }
1045}
1046
1047// ---------------------------------------------------------------------------
1048// Tests
1049// ---------------------------------------------------------------------------
1050
1051#[cfg(test)]
1052mod tests {
1053    use super::*;
1054    use tempfile::TempDir;
1055
1056    fn make_tool() -> (TempDir, ExperimentTrackerTool) {
1057        let dir = TempDir::new().unwrap();
1058        let workspace = dir.path().canonicalize().unwrap();
1059        let tool = ExperimentTrackerTool::new(workspace);
1060        (dir, tool)
1061    }
1062
1063    #[test]
1064    fn test_tool_properties() {
1065        let (_dir, tool) = make_tool();
1066        assert_eq!(tool.name(), "experiment_tracker");
1067        assert_eq!(tool.risk_level(), RiskLevel::Write);
1068        assert!(tool.description().contains("hypotheses"));
1069        assert!(tool.description().contains("experiments"));
1070    }
1071
1072    #[test]
1073    fn test_schema_validation() {
1074        let (_dir, tool) = make_tool();
1075        let schema = tool.parameters_schema();
1076        assert!(schema.is_object());
1077        assert!(schema.get("properties").is_some());
1078        let action = &schema["properties"]["action"];
1079        assert!(action.get("enum").is_some());
1080        let actions = action["enum"].as_array().unwrap();
1081        assert_eq!(actions.len(), 14);
1082    }
1083
1084    #[tokio::test]
1085    async fn test_add_hypothesis() {
1086        let (_dir, tool) = make_tool();
1087
1088        let result = tool
1089            .execute(json!({
1090                "action": "add_hypothesis",
1091                "title": "Caching improves latency",
1092                "tags": ["performance"]
1093            }))
1094            .await
1095            .unwrap();
1096        assert!(result.content.contains("h1"));
1097        assert!(result.content.contains("Caching improves latency"));
1098
1099        let result = tool
1100            .execute(json!({"action": "list_hypotheses"}))
1101            .await
1102            .unwrap();
1103        assert!(result.content.contains("Caching improves latency"));
1104        assert!(result.content.contains("Proposed"));
1105    }
1106
1107    #[tokio::test]
1108    async fn test_hypothesis_crud() {
1109        let (_dir, tool) = make_tool();
1110
1111        // Add
1112        tool.execute(json!({
1113            "action": "add_hypothesis",
1114            "title": "Batch size matters",
1115            "description": "Larger batches reduce overhead"
1116        }))
1117        .await
1118        .unwrap();
1119
1120        // Update status
1121        let result = tool
1122            .execute(json!({
1123                "action": "update_hypothesis",
1124                "id": "h1",
1125                "status": "testing"
1126            }))
1127            .await
1128            .unwrap();
1129        assert!(result.content.contains("Testing"));
1130
1131        // Get full detail
1132        let result = tool
1133            .execute(json!({"action": "get_hypothesis", "id": "h1"}))
1134            .await
1135            .unwrap();
1136        assert!(result.content.contains("Batch size matters"));
1137        assert!(result.content.contains("Testing"));
1138        assert!(result.content.contains("Larger batches reduce overhead"));
1139    }
1140
1141    #[tokio::test]
1142    async fn test_add_experiment() {
1143        let (_dir, tool) = make_tool();
1144
1145        // Create hypothesis first
1146        tool.execute(json!({
1147            "action": "add_hypothesis",
1148            "title": "Test hyp"
1149        }))
1150        .await
1151        .unwrap();
1152
1153        // Add experiment linked to hypothesis
1154        let result = tool
1155            .execute(json!({
1156                "action": "add_experiment",
1157                "name": "Run A",
1158                "hypothesis_id": "h1",
1159                "config": {"learning_rate": 0.01}
1160            }))
1161            .await
1162            .unwrap();
1163        assert!(result.content.contains("e1"));
1164        assert!(result.content.contains("Run A"));
1165
1166        // Verify experiment appears in list
1167        let result = tool
1168            .execute(json!({"action": "list_experiments"}))
1169            .await
1170            .unwrap();
1171        assert!(result.content.contains("Run A"));
1172        assert!(result.content.contains("Planned"));
1173    }
1174
1175    #[tokio::test]
1176    async fn test_experiment_lifecycle_complete() {
1177        let (_dir, tool) = make_tool();
1178
1179        tool.execute(json!({
1180            "action": "add_experiment",
1181            "name": "Exp Alpha"
1182        }))
1183        .await
1184        .unwrap();
1185
1186        // Start
1187        let result = tool
1188            .execute(json!({"action": "start_experiment", "id": "e1"}))
1189            .await
1190            .unwrap();
1191        assert!(result.content.contains("running"));
1192
1193        // Complete
1194        let result = tool
1195            .execute(json!({
1196                "action": "complete_experiment",
1197                "id": "e1",
1198                "metrics": {"accuracy": 0.95},
1199                "notes": "Good results"
1200            }))
1201            .await
1202            .unwrap();
1203        assert!(result.content.contains("completed"));
1204
1205        // Verify via get
1206        let result = tool
1207            .execute(json!({"action": "get_experiment", "id": "e1"}))
1208            .await
1209            .unwrap();
1210        assert!(result.content.contains("Completed"));
1211        assert!(result.content.contains("Good results"));
1212    }
1213
1214    #[tokio::test]
1215    async fn test_experiment_lifecycle_fail() {
1216        let (_dir, tool) = make_tool();
1217
1218        tool.execute(json!({
1219            "action": "add_experiment",
1220            "name": "Exp Beta"
1221        }))
1222        .await
1223        .unwrap();
1224
1225        tool.execute(json!({"action": "start_experiment", "id": "e1"}))
1226            .await
1227            .unwrap();
1228
1229        let result = tool
1230            .execute(json!({
1231                "action": "fail_experiment",
1232                "id": "e1",
1233                "notes": "OOM error"
1234            }))
1235            .await
1236            .unwrap();
1237        assert!(result.content.contains("failed"));
1238
1239        let result = tool
1240            .execute(json!({"action": "get_experiment", "id": "e1"}))
1241            .await
1242            .unwrap();
1243        assert!(result.content.contains("Failed"));
1244        assert!(result.content.contains("OOM error"));
1245    }
1246
1247    #[tokio::test]
1248    async fn test_record_evidence() {
1249        let (_dir, tool) = make_tool();
1250
1251        tool.execute(json!({
1252            "action": "add_hypothesis",
1253            "title": "Evidence test"
1254        }))
1255        .await
1256        .unwrap();
1257
1258        tool.execute(json!({
1259            "action": "add_experiment",
1260            "name": "Trial 1",
1261            "hypothesis_id": "h1"
1262        }))
1263        .await
1264        .unwrap();
1265
1266        let result = tool
1267            .execute(json!({
1268                "action": "record_evidence",
1269                "hypothesis_id": "h1",
1270                "experiment_id": "e1",
1271                "finding": "Latency reduced by 40%",
1272                "supports": true,
1273                "confidence": 0.85
1274            }))
1275            .await
1276            .unwrap();
1277        assert!(result.content.contains("Recorded evidence"));
1278        assert!(result.content.contains("h1"));
1279        assert!(result.content.contains("0.85"));
1280
1281        // Verify on get_hypothesis
1282        let result = tool
1283            .execute(json!({"action": "get_hypothesis", "id": "h1"}))
1284            .await
1285            .unwrap();
1286        assert!(result.content.contains("Latency reduced by 40%"));
1287        assert!(result.content.contains("0.85"));
1288    }
1289
1290    #[tokio::test]
1291    async fn test_compare_experiments() {
1292        let (_dir, tool) = make_tool();
1293
1294        tool.execute(json!({
1295            "action": "add_experiment",
1296            "name": "Config A",
1297            "config": {"batch_size": 32}
1298        }))
1299        .await
1300        .unwrap();
1301
1302        tool.execute(json!({
1303            "action": "add_experiment",
1304            "name": "Config B",
1305            "config": {"batch_size": 64}
1306        }))
1307        .await
1308        .unwrap();
1309
1310        let result = tool
1311            .execute(json!({
1312                "action": "compare_experiments",
1313                "ids": ["e1", "e2"]
1314            }))
1315            .await
1316            .unwrap();
1317        assert!(result.content.contains("Config A"));
1318        assert!(result.content.contains("Config B"));
1319        assert!(result.content.contains("Comparison of 2 experiments"));
1320    }
1321
1322    #[tokio::test]
1323    async fn test_summary_empty() {
1324        let (_dir, tool) = make_tool();
1325
1326        let result = tool.execute(json!({"action": "summary"})).await.unwrap();
1327        assert!(result.content.contains("No data to summarize"));
1328    }
1329
1330    #[tokio::test]
1331    async fn test_summary_with_data() {
1332        let (_dir, tool) = make_tool();
1333
1334        tool.execute(json!({
1335            "action": "add_hypothesis",
1336            "title": "H1"
1337        }))
1338        .await
1339        .unwrap();
1340
1341        tool.execute(json!({
1342            "action": "add_experiment",
1343            "name": "E1",
1344            "hypothesis_id": "h1"
1345        }))
1346        .await
1347        .unwrap();
1348
1349        tool.execute(json!({"action": "start_experiment", "id": "e1"}))
1350            .await
1351            .unwrap();
1352
1353        tool.execute(json!({"action": "complete_experiment", "id": "e1"}))
1354            .await
1355            .unwrap();
1356
1357        tool.execute(json!({
1358            "action": "record_evidence",
1359            "hypothesis_id": "h1",
1360            "experiment_id": "e1",
1361            "finding": "Positive result",
1362            "supports": true,
1363            "confidence": 0.9
1364        }))
1365        .await
1366        .unwrap();
1367
1368        let result = tool.execute(json!({"action": "summary"})).await.unwrap();
1369        assert!(result.content.contains("Hypotheses: 1"));
1370        assert!(result.content.contains("Experiments: 1"));
1371        assert!(result.content.contains("1 supporting"));
1372        assert!(result.content.contains("Success rate: 100%"));
1373    }
1374
1375    #[tokio::test]
1376    async fn test_export_markdown() {
1377        let (_dir, tool) = make_tool();
1378
1379        tool.execute(json!({
1380            "action": "add_hypothesis",
1381            "title": "Cache hypothesis",
1382            "description": "Caching reduces latency"
1383        }))
1384        .await
1385        .unwrap();
1386
1387        tool.execute(json!({
1388            "action": "add_experiment",
1389            "name": "Cache test",
1390            "hypothesis_id": "h1"
1391        }))
1392        .await
1393        .unwrap();
1394
1395        let result = tool
1396            .execute(json!({"action": "export_markdown"}))
1397            .await
1398            .unwrap();
1399        assert!(result.content.contains("# Experiment Tracker Report"));
1400        assert!(result.content.contains("Cache hypothesis"));
1401        assert!(result.content.contains("**Status:** Proposed"));
1402        assert!(result.content.contains("Cache test"));
1403    }
1404
1405    #[tokio::test]
1406    async fn test_list_hypotheses_filter_status() {
1407        let (_dir, tool) = make_tool();
1408
1409        tool.execute(json!({
1410            "action": "add_hypothesis",
1411            "title": "Hyp A"
1412        }))
1413        .await
1414        .unwrap();
1415
1416        tool.execute(json!({
1417            "action": "add_hypothesis",
1418            "title": "Hyp B"
1419        }))
1420        .await
1421        .unwrap();
1422
1423        tool.execute(json!({
1424            "action": "update_hypothesis",
1425            "id": "h2",
1426            "status": "testing"
1427        }))
1428        .await
1429        .unwrap();
1430
1431        // Filter by Proposed — only h1
1432        let result = tool
1433            .execute(json!({"action": "list_hypotheses", "status": "proposed"}))
1434            .await
1435            .unwrap();
1436        assert!(result.content.contains("Hyp A"));
1437        assert!(!result.content.contains("Hyp B"));
1438
1439        // Filter by Testing — only h2
1440        let result = tool
1441            .execute(json!({"action": "list_hypotheses", "status": "testing"}))
1442            .await
1443            .unwrap();
1444        assert!(result.content.contains("Hyp B"));
1445        assert!(!result.content.contains("Hyp A"));
1446    }
1447
1448    #[tokio::test]
1449    async fn test_list_experiments_filter() {
1450        let (_dir, tool) = make_tool();
1451
1452        tool.execute(json!({
1453            "action": "add_hypothesis",
1454            "title": "H1"
1455        }))
1456        .await
1457        .unwrap();
1458
1459        tool.execute(json!({
1460            "action": "add_hypothesis",
1461            "title": "H2"
1462        }))
1463        .await
1464        .unwrap();
1465
1466        tool.execute(json!({
1467            "action": "add_experiment",
1468            "name": "Exp for H1",
1469            "hypothesis_id": "h1"
1470        }))
1471        .await
1472        .unwrap();
1473
1474        tool.execute(json!({
1475            "action": "add_experiment",
1476            "name": "Exp for H2",
1477            "hypothesis_id": "h2"
1478        }))
1479        .await
1480        .unwrap();
1481
1482        // Filter by hypothesis_id
1483        let result = tool
1484            .execute(json!({"action": "list_experiments", "hypothesis_id": "h1"}))
1485            .await
1486            .unwrap();
1487        assert!(result.content.contains("Exp for H1"));
1488        assert!(!result.content.contains("Exp for H2"));
1489    }
1490
1491    #[tokio::test]
1492    async fn test_state_roundtrip() {
1493        let (_dir, tool) = make_tool();
1494
1495        tool.execute(json!({
1496            "action": "add_hypothesis",
1497            "title": "Persist me",
1498            "tags": ["tag1"]
1499        }))
1500        .await
1501        .unwrap();
1502
1503        tool.execute(json!({
1504            "action": "add_experiment",
1505            "name": "Saved exp",
1506            "hypothesis_id": "h1"
1507        }))
1508        .await
1509        .unwrap();
1510
1511        // Reload state manually and verify
1512        let state = tool.load_state();
1513        assert_eq!(state.hypotheses.len(), 1);
1514        assert_eq!(state.experiments.len(), 1);
1515        assert_eq!(state.hypotheses[0].title, "Persist me");
1516        assert_eq!(state.hypotheses[0].tags, vec!["tag1"]);
1517        assert_eq!(state.experiments[0].name, "Saved exp");
1518        assert_eq!(state.experiments[0].hypothesis_id, Some("h1".to_string()));
1519        assert_eq!(state.next_hypothesis_id, 2);
1520        assert_eq!(state.next_experiment_id, 2);
1521    }
1522
1523    #[tokio::test]
1524    async fn test_evidence_confidence_clamping() {
1525        let (_dir, tool) = make_tool();
1526
1527        tool.execute(json!({
1528            "action": "add_hypothesis",
1529            "title": "Clamp test"
1530        }))
1531        .await
1532        .unwrap();
1533
1534        tool.execute(json!({
1535            "action": "add_experiment",
1536            "name": "Clamp exp"
1537        }))
1538        .await
1539        .unwrap();
1540
1541        // Confidence above 1.0 should clamp to 1.0
1542        let result = tool
1543            .execute(json!({
1544                "action": "record_evidence",
1545                "hypothesis_id": "h1",
1546                "experiment_id": "e1",
1547                "finding": "Over confident",
1548                "supports": true,
1549                "confidence": 1.5
1550            }))
1551            .await
1552            .unwrap();
1553        assert!(result.content.contains("1.00"));
1554
1555        // Confidence below 0.0 should clamp to 0.0
1556        let result = tool
1557            .execute(json!({
1558                "action": "record_evidence",
1559                "hypothesis_id": "h1",
1560                "experiment_id": "e1",
1561                "finding": "Under confident",
1562                "supports": false,
1563                "confidence": -0.5
1564            }))
1565            .await
1566            .unwrap();
1567        assert!(result.content.contains("0.00"));
1568
1569        // Verify clamped values in state
1570        let state = tool.load_state();
1571        let hyp = &state.hypotheses[0];
1572        assert_eq!(hyp.evidence.len(), 2);
1573        assert!((hyp.evidence[0].confidence - 1.0).abs() < f64::EPSILON);
1574        assert!((hyp.evidence[1].confidence - 0.0).abs() < f64::EPSILON);
1575    }
1576
1577    #[tokio::test]
1578    async fn test_unknown_action() {
1579        let (_dir, tool) = make_tool();
1580
1581        let result = tool
1582            .execute(json!({"action": "nonexistent"}))
1583            .await
1584            .unwrap();
1585        assert!(result.content.contains("Unknown action"));
1586        assert!(result.content.contains("nonexistent"));
1587    }
1588}