Skip to main content

harn_vm/orchestration/
mod.rs

1use std::path::PathBuf;
2use std::{cell::RefCell, thread_local};
3
4use serde::{Deserialize, Serialize};
5
6use crate::llm::vm_value_to_json;
7use crate::value::{VmError, VmValue};
8
9pub(crate) fn now_rfc3339() -> String {
10    use std::time::{SystemTime, UNIX_EPOCH};
11    let ts = SystemTime::now()
12        .duration_since(UNIX_EPOCH)
13        .unwrap_or_default()
14        .as_secs();
15    format!("{ts}")
16}
17
18pub(crate) fn new_id(prefix: &str) -> String {
19    format!("{prefix}_{}", uuid::Uuid::now_v7())
20}
21
22pub(crate) fn default_run_dir() -> PathBuf {
23    std::env::var("HARN_RUN_DIR")
24        .map(PathBuf::from)
25        .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
26}
27
28mod hooks;
29pub use hooks::*;
30
31mod compaction;
32pub use compaction::*;
33
34mod artifacts;
35pub use artifacts::*;
36
37mod policy;
38pub use policy::*;
39
40mod workflow;
41pub use workflow::*;
42
43mod records;
44pub use records::*;
45
46thread_local! {
47    static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
48}
49
50#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
51#[serde(default)]
52pub struct MutationSessionRecord {
53    pub session_id: String,
54    pub parent_session_id: Option<String>,
55    pub run_id: Option<String>,
56    pub worker_id: Option<String>,
57    pub execution_kind: Option<String>,
58    pub mutation_scope: String,
59    pub approval_mode: String,
60}
61
62impl MutationSessionRecord {
63    pub fn normalize(mut self) -> Self {
64        if self.session_id.is_empty() {
65            self.session_id = new_id("session");
66        }
67        if self.mutation_scope.is_empty() {
68            self.mutation_scope = "read_only".to_string();
69        }
70        if self.approval_mode.is_empty() {
71            self.approval_mode = "host_enforced".to_string();
72        }
73        self
74    }
75}
76
77pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
78    CURRENT_MUTATION_SESSION.with(|slot| {
79        *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
80    });
81}
82
83pub fn current_mutation_session() -> Option<MutationSessionRecord> {
84    CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
85}
86pub(crate) fn parse_json_payload<T: for<'de> Deserialize<'de>>(
87    json: serde_json::Value,
88    label: &str,
89) -> Result<T, VmError> {
90    let payload = json.to_string();
91    let mut deserializer = serde_json::Deserializer::from_str(&payload);
92    let mut tracker = serde_path_to_error::Track::new();
93    let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
94    T::deserialize(path_deserializer).map_err(|error| {
95        let snippet = if payload.len() > 600 {
96            format!("{}...", &payload[..600])
97        } else {
98            payload.clone()
99        };
100        VmError::Runtime(format!(
101            "{label} parse error at {}: {} | payload={}",
102            tracker.path(),
103            error,
104            snippet
105        ))
106    })
107}
108
109pub(crate) fn parse_json_value<T: for<'de> Deserialize<'de>>(
110    value: &VmValue,
111) -> Result<T, VmError> {
112    parse_json_payload(vm_value_to_json(value), "orchestration")
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118    use std::collections::BTreeMap;
119    use std::rc::Rc;
120
121    #[test]
122    fn capability_intersection_rejects_privilege_expansion() {
123        let ceiling = CapabilityPolicy {
124            tools: vec!["read".to_string()],
125            side_effect_level: Some("read_only".to_string()),
126            recursion_limit: Some(2),
127            ..Default::default()
128        };
129        let requested = CapabilityPolicy {
130            tools: vec!["read".to_string(), "edit".to_string()],
131            ..Default::default()
132        };
133        let error = ceiling.intersect(&requested).unwrap_err();
134        assert!(error.contains("host ceiling"));
135    }
136
137    #[test]
138    fn mutation_session_normalize_fills_defaults() {
139        let normalized = MutationSessionRecord::default().normalize();
140        assert!(normalized.session_id.starts_with("session_"));
141        assert_eq!(normalized.mutation_scope, "read_only");
142        assert_eq!(normalized.approval_mode, "host_enforced");
143    }
144
145    #[test]
146    fn install_current_mutation_session_round_trips() {
147        install_current_mutation_session(Some(MutationSessionRecord {
148            session_id: "session_test".to_string(),
149            mutation_scope: "apply_workspace".to_string(),
150            approval_mode: "explicit".to_string(),
151            ..Default::default()
152        }));
153        let current = current_mutation_session().expect("session installed");
154        assert_eq!(current.session_id, "session_test");
155        assert_eq!(current.mutation_scope, "apply_workspace");
156        assert_eq!(current.approval_mode, "explicit");
157
158        install_current_mutation_session(None);
159        assert!(current_mutation_session().is_none());
160    }
161
162    #[test]
163    fn active_execution_policy_rejects_unknown_bridge_builtin() {
164        push_execution_policy(CapabilityPolicy {
165            tools: vec!["read".to_string()],
166            capabilities: BTreeMap::from([(
167                "workspace".to_string(),
168                vec!["read_text".to_string()],
169            )]),
170            side_effect_level: Some("read_only".to_string()),
171            recursion_limit: Some(1),
172            ..Default::default()
173        });
174        let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
175        pop_execution_policy();
176        assert!(matches!(
177            error,
178            VmError::CategorizedError {
179                category: crate::value::ErrorCategory::ToolRejected,
180                ..
181            }
182        ));
183    }
184
185    #[test]
186    fn active_execution_policy_rejects_mcp_escape_hatch() {
187        push_execution_policy(CapabilityPolicy {
188            tools: vec!["read".to_string()],
189            capabilities: BTreeMap::from([(
190                "workspace".to_string(),
191                vec!["read_text".to_string()],
192            )]),
193            side_effect_level: Some("read_only".to_string()),
194            recursion_limit: Some(1),
195            ..Default::default()
196        });
197        let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
198        pop_execution_policy();
199        assert!(matches!(
200            error,
201            VmError::CategorizedError {
202                category: crate::value::ErrorCategory::ToolRejected,
203                ..
204            }
205        ));
206    }
207
208    #[test]
209    fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
210        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
211            "name": "legacy",
212            "act": {"mode": "llm"},
213            "verify": {"kind": "verify"},
214            "repair": {"mode": "agent"},
215        }));
216        let graph = normalize_workflow_value(&value).unwrap();
217        assert_eq!(graph.type_name, "workflow_graph");
218        assert!(graph.nodes.contains_key("act"));
219        assert!(graph.nodes.contains_key("verify"));
220        assert!(graph.nodes.contains_key("repair"));
221        assert_eq!(graph.entry, "act");
222    }
223
224    #[test]
225    fn workflow_normalization_accepts_tool_registry_nodes() {
226        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
227            "name": "registry_tools",
228            "entry": "implement",
229            "nodes": {
230                "implement": {
231                    "kind": "stage",
232                    "mode": "agent",
233                    "tools": {
234                        "_type": "tool_registry",
235                        "tools": [
236                            {"name": "read", "description": "Read files"},
237                            {"name": "run", "description": "Run commands"}
238                        ]
239                    }
240                }
241            },
242            "edges": []
243        }));
244        let graph = normalize_workflow_value(&value).unwrap();
245        let node = graph.nodes.get("implement").unwrap();
246        assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
247    }
248
249    #[test]
250    fn artifact_selection_honors_budget_and_priority() {
251        let policy = ContextPolicy {
252            max_artifacts: Some(2),
253            max_tokens: Some(30),
254            prefer_recent: true,
255            prefer_fresh: true,
256            prioritize_kinds: vec!["verification_result".to_string()],
257            ..Default::default()
258        };
259        let artifacts = vec![
260            ArtifactRecord {
261                type_name: "artifact".to_string(),
262                id: "a".to_string(),
263                kind: "summary".to_string(),
264                text: Some("short".to_string()),
265                relevance: Some(0.9),
266                created_at: now_rfc3339(),
267                ..Default::default()
268            }
269            .normalize(),
270            ArtifactRecord {
271                type_name: "artifact".to_string(),
272                id: "b".to_string(),
273                kind: "summary".to_string(),
274                text: Some("this is a much larger artifact body".to_string()),
275                relevance: Some(1.0),
276                created_at: now_rfc3339(),
277                ..Default::default()
278            }
279            .normalize(),
280            ArtifactRecord {
281                type_name: "artifact".to_string(),
282                id: "c".to_string(),
283                kind: "summary".to_string(),
284                text: Some("tiny".to_string()),
285                relevance: Some(0.5),
286                created_at: now_rfc3339(),
287                ..Default::default()
288            }
289            .normalize(),
290        ];
291        let selected = select_artifacts(artifacts, &policy);
292        assert_eq!(selected.len(), 2);
293        assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
294    }
295
296    #[test]
297    fn workflow_validation_rejects_condition_without_true_false_edges() {
298        let graph = WorkflowGraph {
299            entry: "gate".to_string(),
300            nodes: BTreeMap::from([(
301                "gate".to_string(),
302                WorkflowNode {
303                    id: Some("gate".to_string()),
304                    kind: "condition".to_string(),
305                    ..Default::default()
306                },
307            )]),
308            edges: vec![WorkflowEdge {
309                from: "gate".to_string(),
310                to: "next".to_string(),
311                branch: Some("true".to_string()),
312                label: None,
313            }],
314            ..Default::default()
315        };
316        let report = validate_workflow(&graph, None);
317        assert!(!report.valid);
318        assert!(report
319            .errors
320            .iter()
321            .any(|error| error.contains("true") && error.contains("false")));
322    }
323
324    #[test]
325    fn replay_fixture_round_trip_passes() {
326        let run = RunRecord {
327            type_name: "run_record".to_string(),
328            id: "run_1".to_string(),
329            workflow_id: "wf".to_string(),
330            workflow_name: Some("demo".to_string()),
331            task: "demo".to_string(),
332            status: "completed".to_string(),
333            started_at: "1".to_string(),
334            finished_at: Some("2".to_string()),
335            parent_run_id: None,
336            root_run_id: Some("run_1".to_string()),
337            stages: vec![RunStageRecord {
338                id: "stage_1".to_string(),
339                node_id: "act".to_string(),
340                kind: "stage".to_string(),
341                status: "completed".to_string(),
342                outcome: "success".to_string(),
343                branch: Some("success".to_string()),
344                started_at: "1".to_string(),
345                finished_at: Some("2".to_string()),
346                visible_text: Some("done".to_string()),
347                private_reasoning: None,
348                transcript: None,
349                verification: None,
350                usage: None,
351                artifacts: vec![ArtifactRecord {
352                    type_name: "artifact".to_string(),
353                    id: "a1".to_string(),
354                    kind: "summary".to_string(),
355                    text: Some("done".to_string()),
356                    created_at: "1".to_string(),
357                    ..Default::default()
358                }
359                .normalize()],
360                consumed_artifact_ids: vec![],
361                produced_artifact_ids: vec!["a1".to_string()],
362                attempts: vec![],
363                metadata: BTreeMap::new(),
364            }],
365            transitions: vec![],
366            checkpoints: vec![],
367            pending_nodes: vec![],
368            completed_nodes: vec!["act".to_string()],
369            child_runs: vec![],
370            artifacts: vec![],
371            policy: CapabilityPolicy::default(),
372            execution: None,
373            transcript: None,
374            usage: None,
375            replay_fixture: None,
376            trace_spans: vec![],
377            tool_recordings: vec![],
378            metadata: BTreeMap::new(),
379            persisted_path: None,
380        };
381        let fixture = replay_fixture_from_run(&run);
382        let report = evaluate_run_against_fixture(&run, &fixture);
383        assert!(report.pass);
384        assert!(report.failures.is_empty());
385    }
386
387    #[test]
388    fn replay_eval_suite_reports_failed_case() {
389        let good = RunRecord {
390            id: "run_good".to_string(),
391            workflow_id: "wf".to_string(),
392            status: "completed".to_string(),
393            stages: vec![RunStageRecord {
394                node_id: "act".to_string(),
395                status: "completed".to_string(),
396                outcome: "success".to_string(),
397                ..Default::default()
398            }],
399            ..Default::default()
400        };
401        let bad = RunRecord {
402            id: "run_bad".to_string(),
403            workflow_id: "wf".to_string(),
404            status: "failed".to_string(),
405            stages: vec![RunStageRecord {
406                node_id: "act".to_string(),
407                status: "failed".to_string(),
408                outcome: "error".to_string(),
409                ..Default::default()
410            }],
411            ..Default::default()
412        };
413        let suite = evaluate_run_suite(vec![
414            (
415                good.clone(),
416                replay_fixture_from_run(&good),
417                Some("good.json".to_string()),
418            ),
419            (
420                bad.clone(),
421                replay_fixture_from_run(&good),
422                Some("bad.json".to_string()),
423            ),
424        ]);
425        assert!(!suite.pass);
426        assert_eq!(suite.total, 2);
427        assert_eq!(suite.failed, 1);
428        assert!(suite.cases.iter().any(|case| !case.pass));
429    }
430
431    #[test]
432    fn run_diff_reports_changed_stage() {
433        let left = RunRecord {
434            id: "left".to_string(),
435            workflow_id: "wf".to_string(),
436            status: "completed".to_string(),
437            stages: vec![RunStageRecord {
438                node_id: "act".to_string(),
439                status: "completed".to_string(),
440                outcome: "success".to_string(),
441                ..Default::default()
442            }],
443            ..Default::default()
444        };
445        let right = RunRecord {
446            id: "right".to_string(),
447            workflow_id: "wf".to_string(),
448            status: "failed".to_string(),
449            stages: vec![RunStageRecord {
450                node_id: "act".to_string(),
451                status: "failed".to_string(),
452                outcome: "error".to_string(),
453                ..Default::default()
454            }],
455            ..Default::default()
456        };
457        let diff = diff_run_records(&left, &right);
458        assert!(diff.status_changed);
459        assert!(!diff.identical);
460        assert_eq!(diff.stage_diffs.len(), 1);
461    }
462
463    #[test]
464    fn eval_suite_manifest_can_fail_on_baseline_diff() {
465        let temp_dir =
466            std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
467        std::fs::create_dir_all(&temp_dir).unwrap();
468        let baseline_path = temp_dir.join("baseline.json");
469        let candidate_path = temp_dir.join("candidate.json");
470
471        let baseline = RunRecord {
472            id: "baseline".to_string(),
473            workflow_id: "wf".to_string(),
474            status: "completed".to_string(),
475            stages: vec![RunStageRecord {
476                node_id: "act".to_string(),
477                status: "completed".to_string(),
478                outcome: "success".to_string(),
479                ..Default::default()
480            }],
481            ..Default::default()
482        };
483        let candidate = RunRecord {
484            id: "candidate".to_string(),
485            workflow_id: "wf".to_string(),
486            status: "failed".to_string(),
487            stages: vec![RunStageRecord {
488                node_id: "act".to_string(),
489                status: "failed".to_string(),
490                outcome: "error".to_string(),
491                ..Default::default()
492            }],
493            ..Default::default()
494        };
495
496        save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
497        save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
498
499        let manifest = EvalSuiteManifest {
500            base_dir: Some(temp_dir.display().to_string()),
501            cases: vec![EvalSuiteCase {
502                label: Some("candidate".to_string()),
503                run_path: "candidate.json".to_string(),
504                fixture_path: None,
505                compare_to: Some("baseline.json".to_string()),
506            }],
507            ..Default::default()
508        };
509        let suite = evaluate_run_suite_manifest(&manifest).unwrap();
510        assert!(!suite.pass);
511        assert_eq!(suite.failed, 1);
512        assert!(suite.cases[0].comparison.is_some());
513        assert!(suite.cases[0]
514            .failures
515            .iter()
516            .any(|failure| failure.contains("baseline")));
517    }
518
519    #[test]
520    fn render_unified_diff_marks_removed_and_added_lines() {
521        let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
522        assert!(diff.contains("--- a/src/main.rs"));
523        assert!(diff.contains("+++ b/src/main.rs"));
524        assert!(diff.contains("-old"));
525        assert!(diff.contains("+new"));
526        assert!(diff.contains(" same"));
527    }
528
529    #[test]
530    fn render_unified_diff_identical_inputs() {
531        let text = "line1\nline2\nline3";
532        let diff = render_unified_diff(None, text, text);
533        assert!(diff.contains("--- a/artifact"));
534        let body: Vec<&str> = diff.lines().skip(2).collect();
535        assert!(!body.iter().any(|l| l.starts_with('-')));
536        assert!(!body.iter().any(|l| l.starts_with('+')));
537        assert_eq!(body.len(), 3);
538    }
539
540    #[test]
541    fn render_unified_diff_empty_before() {
542        let diff = render_unified_diff(None, "", "new1\nnew2");
543        assert!(diff.contains("+new1"));
544        assert!(diff.contains("+new2"));
545        let body: Vec<&str> = diff.lines().skip(2).collect();
546        assert!(!body.iter().any(|l| l.starts_with('-')));
547    }
548
549    #[test]
550    fn render_unified_diff_empty_after() {
551        let diff = render_unified_diff(None, "old1\nold2", "");
552        assert!(diff.contains("-old1"));
553        assert!(diff.contains("-old2"));
554        let body: Vec<&str> = diff.lines().skip(2).collect();
555        assert!(!body.iter().any(|l| l.starts_with('+')));
556    }
557
558    #[test]
559    fn render_unified_diff_both_empty() {
560        let diff = render_unified_diff(None, "", "");
561        assert!(diff.contains("--- a/artifact"));
562        assert!(diff.contains("+++ b/artifact"));
563        // No content lines
564        let body: String = diff.lines().skip(2).collect();
565        assert!(body.is_empty());
566    }
567
568    #[test]
569    fn render_unified_diff_all_changed() {
570        let diff = render_unified_diff(None, "a\nb", "x\ny");
571        assert!(diff.contains("-a"));
572        assert!(diff.contains("-b"));
573        assert!(diff.contains("+x"));
574        assert!(diff.contains("+y"));
575    }
576
577    #[test]
578    fn render_unified_diff_insertion_in_middle() {
579        let diff = render_unified_diff(None, "a\nc", "a\nb\nc");
580        assert!(diff.contains(" a"));
581        assert!(diff.contains("+b"));
582        assert!(diff.contains(" c"));
583        let body: Vec<&str> = diff.lines().skip(2).collect();
584        assert!(!body.iter().any(|l| l.starts_with('-')));
585    }
586
587    #[test]
588    fn render_unified_diff_deletion_from_middle() {
589        let diff = render_unified_diff(None, "a\nb\nc", "a\nc");
590        assert!(diff.contains(" a"));
591        assert!(diff.contains("-b"));
592        assert!(diff.contains(" c"));
593        let body: Vec<&str> = diff.lines().skip(2).collect();
594        assert!(!body.iter().any(|l| l.starts_with('+')));
595    }
596
597    #[test]
598    fn render_unified_diff_default_path() {
599        let diff = render_unified_diff(None, "a", "b");
600        assert!(diff.contains("--- a/artifact"));
601        assert!(diff.contains("+++ b/artifact"));
602    }
603
604    #[test]
605    fn render_unified_diff_large_similar() {
606        // Test performance: 1000 lines with one change in the middle
607        let mut before = Vec::new();
608        let mut after = Vec::new();
609        for i in 0..1000 {
610            before.push(format!("line {i}"));
611            after.push(format!("line {i}"));
612        }
613        before[500] = "OLD LINE 500".to_string();
614        after[500] = "NEW LINE 500".to_string();
615        let before_str = before.join("\n");
616        let after_str = after.join("\n");
617        let diff = render_unified_diff(None, &before_str, &after_str);
618        assert!(diff.contains("-OLD LINE 500"));
619        assert!(diff.contains("+NEW LINE 500"));
620        // Context lines should be present
621        assert!(diff.contains(" line 499"));
622        assert!(diff.contains(" line 501"));
623    }
624
625    #[test]
626    fn myers_diff_empty_sequences() {
627        let ops = myers_diff(&[], &[]);
628        assert!(ops.is_empty());
629    }
630
631    #[test]
632    fn myers_diff_insert_only() {
633        let ops = myers_diff(&[], &["a", "b"]);
634        assert_eq!(ops.len(), 2);
635        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Insert));
636    }
637
638    #[test]
639    fn myers_diff_delete_only() {
640        let ops = myers_diff(&["a", "b"], &[]);
641        assert_eq!(ops.len(), 2);
642        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Delete));
643    }
644
645    #[test]
646    fn myers_diff_equal() {
647        let ops = myers_diff(&["a", "b", "c"], &["a", "b", "c"]);
648        assert_eq!(ops.len(), 3);
649        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Equal));
650    }
651
652    #[test]
653    fn execution_policy_rejects_process_exec_when_read_only() {
654        push_execution_policy(CapabilityPolicy {
655            side_effect_level: Some("read_only".to_string()),
656            capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
657            ..Default::default()
658        });
659        let result = enforce_current_policy_for_builtin("exec", &[]);
660        pop_execution_policy();
661        assert!(result.is_err());
662    }
663
664    #[test]
665    fn execution_policy_rejects_unlisted_tool() {
666        push_execution_policy(CapabilityPolicy {
667            tools: vec!["read".to_string()],
668            ..Default::default()
669        });
670        let result = enforce_current_policy_for_tool("edit");
671        pop_execution_policy();
672        assert!(result.is_err());
673    }
674
675    #[test]
676    fn normalize_run_record_preserves_trace_spans() {
677        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
678            "_type": "run_record",
679            "id": "run_trace",
680            "workflow_id": "wf",
681            "status": "completed",
682            "started_at": "1",
683            "trace_spans": [
684                {
685                    "span_id": 1,
686                    "parent_id": null,
687                    "kind": "pipeline",
688                    "name": "workflow",
689                    "start_ms": 0,
690                    "duration_ms": 42,
691                    "metadata": {"model": "demo"}
692                }
693            ]
694        }));
695
696        let run = normalize_run_record(&value).unwrap();
697        assert_eq!(run.trace_spans.len(), 1);
698        assert_eq!(run.trace_spans[0].kind, "pipeline");
699        assert_eq!(
700            run.trace_spans[0].metadata["model"],
701            serde_json::json!("demo")
702        );
703    }
704
705    // ── Tool hook tests ──────────────────────────────────────────────
706
707    #[test]
708    fn pre_tool_hook_deny_blocks_execution() {
709        clear_tool_hooks();
710        register_tool_hook(ToolHook {
711            pattern: "dangerous_*".to_string(),
712            pre: Some(Rc::new(|_name, _args| {
713                PreToolAction::Deny("blocked by policy".to_string())
714            })),
715            post: None,
716        });
717        let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
718        clear_tool_hooks();
719        assert!(matches!(result, PreToolAction::Deny(_)));
720    }
721
722    #[test]
723    fn pre_tool_hook_allow_passes_through() {
724        clear_tool_hooks();
725        register_tool_hook(ToolHook {
726            pattern: "safe_*".to_string(),
727            pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
728            post: None,
729        });
730        let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
731        clear_tool_hooks();
732        assert!(matches!(result, PreToolAction::Allow));
733    }
734
735    #[test]
736    fn pre_tool_hook_modify_rewrites_args() {
737        clear_tool_hooks();
738        register_tool_hook(ToolHook {
739            pattern: "*".to_string(),
740            pre: Some(Rc::new(|_name, _args| {
741                PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
742            })),
743            post: None,
744        });
745        let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
746        clear_tool_hooks();
747        match result {
748            PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
749            _ => panic!("expected Modify"),
750        }
751    }
752
753    #[test]
754    fn post_tool_hook_modifies_result() {
755        clear_tool_hooks();
756        register_tool_hook(ToolHook {
757            pattern: "exec".to_string(),
758            pre: None,
759            post: Some(Rc::new(|_name, result| {
760                if result.contains("SECRET") {
761                    PostToolAction::Modify("[REDACTED]".to_string())
762                } else {
763                    PostToolAction::Pass
764                }
765            })),
766        });
767        let result = run_post_tool_hooks("exec", "output with SECRET data");
768        let clean = run_post_tool_hooks("exec", "clean output");
769        clear_tool_hooks();
770        assert_eq!(result, "[REDACTED]");
771        assert_eq!(clean, "clean output");
772    }
773
774    #[test]
775    fn unmatched_hook_pattern_does_not_fire() {
776        clear_tool_hooks();
777        register_tool_hook(ToolHook {
778            pattern: "exec".to_string(),
779            pre: Some(Rc::new(|_name, _args| {
780                PreToolAction::Deny("should not match".to_string())
781            })),
782            post: None,
783        });
784        let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
785        clear_tool_hooks();
786        assert!(matches!(result, PreToolAction::Allow));
787    }
788
789    #[test]
790    fn glob_match_patterns() {
791        assert!(glob_match("*", "anything"));
792        assert!(glob_match("exec*", "exec_at"));
793        assert!(glob_match("*_file", "read_file"));
794        assert!(!glob_match("exec*", "read_file"));
795        assert!(glob_match("read_file", "read_file"));
796        assert!(!glob_match("read_file", "write_file"));
797    }
798
799    // ── Auto-compaction tests ────────────────────────────────────────
800
801    #[test]
802    fn microcompact_snips_large_output() {
803        let large = "x".repeat(50_000);
804        let result = microcompact_tool_output(&large, 10_000);
805        assert!(result.len() < 15_000);
806        assert!(result.contains("snipped"));
807    }
808
809    #[test]
810    fn microcompact_preserves_small_output() {
811        let small = "hello world";
812        let result = microcompact_tool_output(small, 10_000);
813        assert_eq!(result, small);
814    }
815
816    #[test]
817    fn microcompact_preserves_strong_keyword_lines_without_file_line() {
818        // Regression: diagnostic extraction used to require both a
819        // file:line reference AND a keyword. Strong keywords like "FAIL"
820        // and "panic" should preserve the line on their own, because they
821        // carry signal even when they appear on narrative lines (Go's
822        // "--- FAIL: TestName", Rust's "thread '...' panicked at ...",
823        // pytest's "FAILED tests/..."). The exact patterns are language-
824        // specific and don't belong in the VM — but the generic rule
825        // "strong keywords count even without file:line" does.
826        let mut output = String::new();
827        for i in 0..100 {
828            output.push_str(&format!("verbose progress line {i}\n"));
829        }
830        output.push_str("--- FAIL: TestEmpty (0.00s)\n");
831        output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
832        output.push_str("FAILED tests/test_parser.py::test_empty\n");
833        for i in 0..100 {
834            output.push_str(&format!("more output after failures {i}\n"));
835        }
836        let result = microcompact_tool_output(&output, 2_000);
837        assert!(
838            result.contains("--- FAIL: TestEmpty"),
839            "strong 'FAIL' keyword should preserve the line:\n{result}"
840        );
841        assert!(
842            result.contains("panicked at"),
843            "strong 'panic' keyword should preserve the line:\n{result}"
844        );
845        assert!(
846            result.contains("FAILED tests/test_parser.py"),
847            "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
848        );
849    }
850
851    #[test]
852    fn auto_compact_messages_reduces_count() {
853        let mut messages: Vec<serde_json::Value> = (0..20)
854            .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
855            .collect();
856        let runtime = tokio::runtime::Builder::new_current_thread()
857            .enable_all()
858            .build()
859            .unwrap();
860        let compacted = runtime.block_on(auto_compact_messages(
861            &mut messages,
862            &AutoCompactConfig {
863                compact_strategy: CompactStrategy::Truncate,
864                keep_last: 6,
865                ..Default::default()
866            },
867            None,
868        ));
869        let summary = compacted.unwrap();
870        assert!(summary.is_some());
871        assert!(messages.len() <= 7); // 6 kept + 1 summary
872        assert!(messages[0]["content"]
873            .as_str()
874            .unwrap()
875            .contains("auto-compacted"));
876    }
877
878    #[test]
879    fn auto_compact_noop_when_under_threshold() {
880        let mut messages: Vec<serde_json::Value> = (0..4)
881            .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
882            .collect();
883        let runtime = tokio::runtime::Builder::new_current_thread()
884            .enable_all()
885            .build()
886            .unwrap();
887        let compacted = runtime.block_on(auto_compact_messages(
888            &mut messages,
889            &AutoCompactConfig {
890                compact_strategy: CompactStrategy::Truncate,
891                keep_last: 6,
892                ..Default::default()
893            },
894            None,
895        ));
896        assert!(compacted.unwrap().is_none());
897        assert_eq!(messages.len(), 4);
898    }
899
900    #[test]
901    fn observation_mask_preserves_errors_masks_verbose_output() {
902        // Build a verbose output string (>500 chars) that should be masked
903        let verbose_lines: Vec<String> = (0..60)
904            .map(|i| format!("// source line {} of the generated file", i))
905            .collect();
906        let verbose_content = format!(
907            "File created: a.go\npackage main\n{}",
908            verbose_lines.join("\n")
909        );
910        let mut messages = vec![
911            serde_json::json!({"role": "assistant", "content": "I'll create the file now."}),
912            serde_json::json!({"role": "user", "content": verbose_content}),
913            serde_json::json!({"role": "assistant", "content": "Now let me run the tests."}),
914            serde_json::json!({"role": "user", "content": "error: cannot find module\nexit code 1\nfailed to compile"}),
915            serde_json::json!({"role": "assistant", "content": "I see the issue. Let me fix it."}),
916            serde_json::json!({"role": "user", "content": "File patched successfully."}),
917            // These last 2 will be kept verbatim (keep_last)
918            serde_json::json!({"role": "assistant", "content": "Running tests again."}),
919            serde_json::json!({"role": "user", "content": "All tests passed."}),
920        ];
921        let runtime = tokio::runtime::Builder::new_current_thread()
922            .enable_all()
923            .build()
924            .unwrap();
925        let compacted = runtime.block_on(auto_compact_messages(
926            &mut messages,
927            &AutoCompactConfig {
928                compact_strategy: CompactStrategy::ObservationMask,
929                keep_last: 2,
930                ..Default::default()
931            },
932            None,
933        ));
934        let summary = compacted.unwrap().unwrap();
935        // Assistant messages preserved verbatim
936        assert!(summary.contains("I'll create the file now."));
937        assert!(summary.contains("Now let me run the tests."));
938        assert!(summary.contains("I see the issue. Let me fix it."));
939        // Short error output preserved verbatim (under 500 chars)
940        assert!(summary.contains("error: cannot find module"));
941        assert!(summary.contains("exit code 1"));
942        // Verbose tool output masked (over 500 chars)
943        assert!(summary.contains("masked]"));
944        assert!(summary.contains("File created: a.go"));
945        // Short tool output in kept portion (boundary adjustment moves split_at to user msg)
946        assert!(!summary.contains("File patched successfully."));
947        // Kept messages not in summary
948        assert!(!summary.contains("Running tests again."));
949        assert!(!summary.contains("All tests passed."));
950        // 3 kept (split moved backward to user boundary) + 1 summary = 4
951        assert_eq!(messages.len(), 4);
952    }
953
954    #[test]
955    fn observation_mask_keeps_short_tool_output() {
956        let messages = vec![
957            serde_json::json!({"role": "user", "content": "OK"}),
958            serde_json::json!({"role": "user", "content": "Done."}),
959        ];
960        let summary = observation_mask_compaction(&messages, 2);
961        assert!(summary.contains("[user] OK"));
962        assert!(summary.contains("[user] Done."));
963        assert!(!summary.contains("masked"));
964    }
965
966    #[test]
967    fn estimate_message_tokens_basic() {
968        let messages = vec![
969            serde_json::json!({"role": "user", "content": "a".repeat(400)}),
970            serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
971        ];
972        let tokens = estimate_message_tokens(&messages);
973        assert_eq!(tokens, 200); // 800 chars / 4
974    }
975
976    // ── Artifact dedup and microcompaction tests ─────────────────────
977
978    #[test]
979    fn dedup_artifacts_removes_duplicates() {
980        let mut artifacts = vec![
981            ArtifactRecord {
982                id: "a1".to_string(),
983                kind: "test".to_string(),
984                text: Some("duplicate content".to_string()),
985                ..Default::default()
986            },
987            ArtifactRecord {
988                id: "a2".to_string(),
989                kind: "test".to_string(),
990                text: Some("duplicate content".to_string()),
991                ..Default::default()
992            },
993            ArtifactRecord {
994                id: "a3".to_string(),
995                kind: "test".to_string(),
996                text: Some("unique content".to_string()),
997                ..Default::default()
998            },
999        ];
1000        dedup_artifacts(&mut artifacts);
1001        assert_eq!(artifacts.len(), 2);
1002    }
1003
1004    #[test]
1005    fn microcompact_artifact_snips_oversized() {
1006        let mut artifact = ArtifactRecord {
1007            id: "a1".to_string(),
1008            kind: "test".to_string(),
1009            text: Some("x".repeat(10_000)),
1010            estimated_tokens: Some(2_500),
1011            ..Default::default()
1012        };
1013        microcompact_artifact(&mut artifact, 500);
1014        assert!(artifact.text.as_ref().unwrap().len() < 5_000);
1015        assert_eq!(artifact.estimated_tokens, Some(500));
1016    }
1017
1018    // ── Tool argument constraint tests ───────────────────────────────
1019
1020    #[test]
1021    fn arg_constraint_allows_matching_pattern() {
1022        let policy = CapabilityPolicy {
1023            tool_arg_constraints: vec![ToolArgConstraint {
1024                tool: "exec".to_string(),
1025                arg_patterns: vec!["cargo *".to_string()],
1026            }],
1027            ..Default::default()
1028        };
1029        let result = enforce_tool_arg_constraints(
1030            &policy,
1031            "exec",
1032            &serde_json::json!({"command": "cargo test"}),
1033        );
1034        assert!(result.is_ok());
1035    }
1036
1037    #[test]
1038    fn arg_constraint_rejects_non_matching_pattern() {
1039        let policy = CapabilityPolicy {
1040            tool_arg_constraints: vec![ToolArgConstraint {
1041                tool: "exec".to_string(),
1042                arg_patterns: vec!["cargo *".to_string()],
1043            }],
1044            ..Default::default()
1045        };
1046        let result = enforce_tool_arg_constraints(
1047            &policy,
1048            "exec",
1049            &serde_json::json!({"command": "rm -rf /"}),
1050        );
1051        assert!(result.is_err());
1052    }
1053
1054    #[test]
1055    fn arg_constraint_ignores_unmatched_tool() {
1056        let policy = CapabilityPolicy {
1057            tool_arg_constraints: vec![ToolArgConstraint {
1058                tool: "exec".to_string(),
1059                arg_patterns: vec!["cargo *".to_string()],
1060            }],
1061            ..Default::default()
1062        };
1063        let result = enforce_tool_arg_constraints(
1064            &policy,
1065            "read_file",
1066            &serde_json::json!({"path": "/etc/passwd"}),
1067        );
1068        assert!(result.is_ok());
1069    }
1070
1071    #[test]
1072    fn microcompact_handles_multibyte_utf8() {
1073        // Emoji are 4 bytes each — slicing at arbitrary byte offsets would panic
1074        let emoji_output = "🔥".repeat(500); // 2000 bytes, 500 chars
1075        let result = microcompact_tool_output(&emoji_output, 400);
1076        // Should not panic and should contain the snip marker
1077        assert!(result.contains("snipped"));
1078
1079        // Mixed ASCII + multi-byte
1080        let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
1081        let result2 = microcompact_tool_output(&mixed, 400);
1082        assert!(result2.contains("snipped"));
1083
1084        // CJK characters (3 bytes each)
1085        let cjk = "中文".repeat(500);
1086        let result3 = microcompact_tool_output(&cjk, 400);
1087        assert!(result3.contains("snipped"));
1088    }
1089}