harn_vm/orchestration/
mod.rs

1use std::path::PathBuf;
2use std::{cell::RefCell, thread_local};
3
4use serde::{Deserialize, Serialize};
5
6use crate::llm::vm_value_to_json;
7use crate::value::{VmError, VmValue};
8
9pub(crate) fn now_rfc3339() -> String {
10    use std::time::{SystemTime, UNIX_EPOCH};
11    let ts = SystemTime::now()
12        .duration_since(UNIX_EPOCH)
13        .unwrap_or_default()
14        .as_secs();
15    format!("{ts}")
16}
17
18pub(crate) fn new_id(prefix: &str) -> String {
19    format!("{prefix}_{}", uuid::Uuid::now_v7())
20}
21
22pub(crate) fn default_run_dir() -> PathBuf {
23    let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
24    crate::runtime_paths::run_root(&base)
25}
26
27mod hooks;
28pub use hooks::*;
29
30mod compaction;
31pub use compaction::*;
32
33mod artifacts;
34pub use artifacts::*;
35
36mod policy;
37pub use policy::*;
38
39mod workflow;
40pub use workflow::*;
41
42mod records;
43pub use records::*;
44
45thread_local! {
46    static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
47}
48
49#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
50#[serde(default)]
51pub struct MutationSessionRecord {
52    pub session_id: String,
53    pub parent_session_id: Option<String>,
54    pub run_id: Option<String>,
55    pub worker_id: Option<String>,
56    pub execution_kind: Option<String>,
57    pub mutation_scope: String,
58    /// Declarative per-tool approval policy for this session. When `None`,
59    /// the host drives approval out-of-band via `tool/pre_use` (legacy path).
60    pub approval_policy: Option<ToolApprovalPolicy>,
61}
62
63impl MutationSessionRecord {
64    pub fn normalize(mut self) -> Self {
65        if self.session_id.is_empty() {
66            self.session_id = new_id("session");
67        }
68        if self.mutation_scope.is_empty() {
69            self.mutation_scope = "read_only".to_string();
70        }
71        self
72    }
73}
74
75pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
76    CURRENT_MUTATION_SESSION.with(|slot| {
77        *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
78    });
79}
80
81pub fn current_mutation_session() -> Option<MutationSessionRecord> {
82    CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
83}
84pub(crate) fn parse_json_payload<T: for<'de> Deserialize<'de>>(
85    json: serde_json::Value,
86    label: &str,
87) -> Result<T, VmError> {
88    let payload = json.to_string();
89    let mut deserializer = serde_json::Deserializer::from_str(&payload);
90    let mut tracker = serde_path_to_error::Track::new();
91    let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
92    T::deserialize(path_deserializer).map_err(|error| {
93        let snippet = if payload.len() > 600 {
94            format!("{}...", &payload[..600])
95        } else {
96            payload.clone()
97        };
98        VmError::Runtime(format!(
99            "{label} parse error at {}: {} | payload={}",
100            tracker.path(),
101            error,
102            snippet
103        ))
104    })
105}
106
107pub(crate) fn parse_json_value<T: for<'de> Deserialize<'de>>(
108    value: &VmValue,
109) -> Result<T, VmError> {
110    parse_json_payload(vm_value_to_json(value), "orchestration")
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use std::collections::BTreeMap;
117    use std::rc::Rc;
118
119    #[test]
120    fn capability_intersection_rejects_privilege_expansion() {
121        let ceiling = CapabilityPolicy {
122            tools: vec!["read".to_string()],
123            side_effect_level: Some("read_only".to_string()),
124            recursion_limit: Some(2),
125            ..Default::default()
126        };
127        let requested = CapabilityPolicy {
128            tools: vec!["read".to_string(), "edit".to_string()],
129            ..Default::default()
130        };
131        let error = ceiling.intersect(&requested).unwrap_err();
132        assert!(error.contains("host ceiling"));
133    }
134
135    #[test]
136    fn mutation_session_normalize_fills_defaults() {
137        let normalized = MutationSessionRecord::default().normalize();
138        assert!(normalized.session_id.starts_with("session_"));
139        assert_eq!(normalized.mutation_scope, "read_only");
140        assert!(normalized.approval_policy.is_none());
141    }
142
143    #[test]
144    fn install_current_mutation_session_round_trips() {
145        let policy = ToolApprovalPolicy {
146            require_approval: vec!["edit*".to_string()],
147            ..Default::default()
148        };
149        install_current_mutation_session(Some(MutationSessionRecord {
150            session_id: "session_test".to_string(),
151            mutation_scope: "apply_workspace".to_string(),
152            approval_policy: Some(policy.clone()),
153            ..Default::default()
154        }));
155        let current = current_mutation_session().expect("session installed");
156        assert_eq!(current.session_id, "session_test");
157        assert_eq!(current.mutation_scope, "apply_workspace");
158        assert_eq!(current.approval_policy.as_ref(), Some(&policy));
159
160        install_current_mutation_session(None);
161        assert!(current_mutation_session().is_none());
162    }
163
164    #[test]
165    fn active_execution_policy_rejects_unknown_bridge_builtin() {
166        push_execution_policy(CapabilityPolicy {
167            tools: vec!["read".to_string()],
168            capabilities: BTreeMap::from([(
169                "workspace".to_string(),
170                vec!["read_text".to_string()],
171            )]),
172            side_effect_level: Some("read_only".to_string()),
173            recursion_limit: Some(1),
174            ..Default::default()
175        });
176        let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
177        pop_execution_policy();
178        assert!(matches!(
179            error,
180            VmError::CategorizedError {
181                category: crate::value::ErrorCategory::ToolRejected,
182                ..
183            }
184        ));
185    }
186
187    #[test]
188    fn active_execution_policy_rejects_mcp_escape_hatch() {
189        push_execution_policy(CapabilityPolicy {
190            tools: vec!["read".to_string()],
191            capabilities: BTreeMap::from([(
192                "workspace".to_string(),
193                vec!["read_text".to_string()],
194            )]),
195            side_effect_level: Some("read_only".to_string()),
196            recursion_limit: Some(1),
197            ..Default::default()
198        });
199        let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
200        pop_execution_policy();
201        assert!(matches!(
202            error,
203            VmError::CategorizedError {
204                category: crate::value::ErrorCategory::ToolRejected,
205                ..
206            }
207        ));
208    }
209
210    #[test]
211    fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
212        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
213            "name": "legacy",
214            "act": {"mode": "llm"},
215            "verify": {"kind": "verify"},
216            "repair": {"mode": "agent"},
217        }));
218        let graph = normalize_workflow_value(&value).unwrap();
219        assert_eq!(graph.type_name, "workflow_graph");
220        assert!(graph.nodes.contains_key("act"));
221        assert!(graph.nodes.contains_key("verify"));
222        assert!(graph.nodes.contains_key("repair"));
223        assert_eq!(graph.entry, "act");
224    }
225
226    #[test]
227    fn workflow_normalization_accepts_tool_registry_nodes() {
228        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
229            "name": "registry_tools",
230            "entry": "implement",
231            "nodes": {
232                "implement": {
233                    "kind": "stage",
234                    "mode": "agent",
235                    "tools": {
236                        "_type": "tool_registry",
237                        "tools": [
238                            {"name": "read", "description": "Read files"},
239                            {"name": "run", "description": "Run commands"}
240                        ]
241                    }
242                }
243            },
244            "edges": []
245        }));
246        let graph = normalize_workflow_value(&value).unwrap();
247        let node = graph.nodes.get("implement").unwrap();
248        assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
249    }
250
251    #[test]
252    fn artifact_selection_honors_budget_and_priority() {
253        let policy = ContextPolicy {
254            max_artifacts: Some(2),
255            max_tokens: Some(30),
256            prefer_recent: true,
257            prefer_fresh: true,
258            prioritize_kinds: vec!["verification_result".to_string()],
259            ..Default::default()
260        };
261        let artifacts = vec![
262            ArtifactRecord {
263                type_name: "artifact".to_string(),
264                id: "a".to_string(),
265                kind: "summary".to_string(),
266                text: Some("short".to_string()),
267                relevance: Some(0.9),
268                created_at: now_rfc3339(),
269                ..Default::default()
270            }
271            .normalize(),
272            ArtifactRecord {
273                type_name: "artifact".to_string(),
274                id: "b".to_string(),
275                kind: "summary".to_string(),
276                text: Some("this is a much larger artifact body".to_string()),
277                relevance: Some(1.0),
278                created_at: now_rfc3339(),
279                ..Default::default()
280            }
281            .normalize(),
282            ArtifactRecord {
283                type_name: "artifact".to_string(),
284                id: "c".to_string(),
285                kind: "summary".to_string(),
286                text: Some("tiny".to_string()),
287                relevance: Some(0.5),
288                created_at: now_rfc3339(),
289                ..Default::default()
290            }
291            .normalize(),
292        ];
293        let selected = select_artifacts(artifacts, &policy);
294        assert_eq!(selected.len(), 2);
295        assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
296    }
297
298    #[test]
299    fn workflow_validation_rejects_condition_without_true_false_edges() {
300        let graph = WorkflowGraph {
301            entry: "gate".to_string(),
302            nodes: BTreeMap::from([(
303                "gate".to_string(),
304                WorkflowNode {
305                    id: Some("gate".to_string()),
306                    kind: "condition".to_string(),
307                    ..Default::default()
308                },
309            )]),
310            edges: vec![WorkflowEdge {
311                from: "gate".to_string(),
312                to: "next".to_string(),
313                branch: Some("true".to_string()),
314                label: None,
315            }],
316            ..Default::default()
317        };
318        let report = validate_workflow(&graph, None);
319        assert!(!report.valid);
320        assert!(report
321            .errors
322            .iter()
323            .any(|error| error.contains("true") && error.contains("false")));
324    }
325
326    #[test]
327    fn replay_fixture_round_trip_passes() {
328        let run = RunRecord {
329            type_name: "run_record".to_string(),
330            id: "run_1".to_string(),
331            workflow_id: "wf".to_string(),
332            workflow_name: Some("demo".to_string()),
333            task: "demo".to_string(),
334            status: "completed".to_string(),
335            started_at: "1".to_string(),
336            finished_at: Some("2".to_string()),
337            parent_run_id: None,
338            root_run_id: Some("run_1".to_string()),
339            stages: vec![RunStageRecord {
340                id: "stage_1".to_string(),
341                node_id: "act".to_string(),
342                kind: "stage".to_string(),
343                status: "completed".to_string(),
344                outcome: "success".to_string(),
345                branch: Some("success".to_string()),
346                started_at: "1".to_string(),
347                finished_at: Some("2".to_string()),
348                visible_text: Some("done".to_string()),
349                private_reasoning: None,
350                transcript: None,
351                verification: None,
352                usage: None,
353                artifacts: vec![ArtifactRecord {
354                    type_name: "artifact".to_string(),
355                    id: "a1".to_string(),
356                    kind: "summary".to_string(),
357                    text: Some("done".to_string()),
358                    created_at: "1".to_string(),
359                    ..Default::default()
360                }
361                .normalize()],
362                consumed_artifact_ids: vec![],
363                produced_artifact_ids: vec!["a1".to_string()],
364                attempts: vec![],
365                metadata: BTreeMap::new(),
366            }],
367            transitions: vec![],
368            checkpoints: vec![],
369            pending_nodes: vec![],
370            completed_nodes: vec!["act".to_string()],
371            child_runs: vec![],
372            artifacts: vec![],
373            policy: CapabilityPolicy::default(),
374            execution: None,
375            transcript: None,
376            usage: None,
377            replay_fixture: None,
378            trace_spans: vec![],
379            tool_recordings: vec![],
380            metadata: BTreeMap::new(),
381            persisted_path: None,
382        };
383        let fixture = replay_fixture_from_run(&run);
384        let report = evaluate_run_against_fixture(&run, &fixture);
385        assert!(report.pass);
386        assert!(report.failures.is_empty());
387    }
388
389    #[test]
390    fn replay_eval_suite_reports_failed_case() {
391        let good = RunRecord {
392            id: "run_good".to_string(),
393            workflow_id: "wf".to_string(),
394            status: "completed".to_string(),
395            stages: vec![RunStageRecord {
396                node_id: "act".to_string(),
397                status: "completed".to_string(),
398                outcome: "success".to_string(),
399                ..Default::default()
400            }],
401            ..Default::default()
402        };
403        let bad = RunRecord {
404            id: "run_bad".to_string(),
405            workflow_id: "wf".to_string(),
406            status: "failed".to_string(),
407            stages: vec![RunStageRecord {
408                node_id: "act".to_string(),
409                status: "failed".to_string(),
410                outcome: "error".to_string(),
411                ..Default::default()
412            }],
413            ..Default::default()
414        };
415        let suite = evaluate_run_suite(vec![
416            (
417                good.clone(),
418                replay_fixture_from_run(&good),
419                Some("good.json".to_string()),
420            ),
421            (
422                bad.clone(),
423                replay_fixture_from_run(&good),
424                Some("bad.json".to_string()),
425            ),
426        ]);
427        assert!(!suite.pass);
428        assert_eq!(suite.total, 2);
429        assert_eq!(suite.failed, 1);
430        assert!(suite.cases.iter().any(|case| !case.pass));
431    }
432
433    #[test]
434    fn run_diff_reports_changed_stage() {
435        let left = RunRecord {
436            id: "left".to_string(),
437            workflow_id: "wf".to_string(),
438            status: "completed".to_string(),
439            stages: vec![RunStageRecord {
440                node_id: "act".to_string(),
441                status: "completed".to_string(),
442                outcome: "success".to_string(),
443                ..Default::default()
444            }],
445            ..Default::default()
446        };
447        let right = RunRecord {
448            id: "right".to_string(),
449            workflow_id: "wf".to_string(),
450            status: "failed".to_string(),
451            stages: vec![RunStageRecord {
452                node_id: "act".to_string(),
453                status: "failed".to_string(),
454                outcome: "error".to_string(),
455                ..Default::default()
456            }],
457            ..Default::default()
458        };
459        let diff = diff_run_records(&left, &right);
460        assert!(diff.status_changed);
461        assert!(!diff.identical);
462        assert_eq!(diff.stage_diffs.len(), 1);
463    }
464
465    #[test]
466    fn eval_suite_manifest_can_fail_on_baseline_diff() {
467        let temp_dir =
468            std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
469        std::fs::create_dir_all(&temp_dir).unwrap();
470        let baseline_path = temp_dir.join("baseline.json");
471        let candidate_path = temp_dir.join("candidate.json");
472
473        let baseline = RunRecord {
474            id: "baseline".to_string(),
475            workflow_id: "wf".to_string(),
476            status: "completed".to_string(),
477            stages: vec![RunStageRecord {
478                node_id: "act".to_string(),
479                status: "completed".to_string(),
480                outcome: "success".to_string(),
481                ..Default::default()
482            }],
483            ..Default::default()
484        };
485        let candidate = RunRecord {
486            id: "candidate".to_string(),
487            workflow_id: "wf".to_string(),
488            status: "failed".to_string(),
489            stages: vec![RunStageRecord {
490                node_id: "act".to_string(),
491                status: "failed".to_string(),
492                outcome: "error".to_string(),
493                ..Default::default()
494            }],
495            ..Default::default()
496        };
497
498        save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
499        save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
500
501        let manifest = EvalSuiteManifest {
502            base_dir: Some(temp_dir.display().to_string()),
503            cases: vec![EvalSuiteCase {
504                label: Some("candidate".to_string()),
505                run_path: "candidate.json".to_string(),
506                fixture_path: None,
507                compare_to: Some("baseline.json".to_string()),
508            }],
509            ..Default::default()
510        };
511        let suite = evaluate_run_suite_manifest(&manifest).unwrap();
512        assert!(!suite.pass);
513        assert_eq!(suite.failed, 1);
514        assert!(suite.cases[0].comparison.is_some());
515        assert!(suite.cases[0]
516            .failures
517            .iter()
518            .any(|failure| failure.contains("baseline")));
519    }
520
521    #[test]
522    fn render_unified_diff_marks_removed_and_added_lines() {
523        let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
524        assert!(diff.contains("--- a/src/main.rs"));
525        assert!(diff.contains("+++ b/src/main.rs"));
526        assert!(diff.contains("-old"));
527        assert!(diff.contains("+new"));
528        assert!(diff.contains(" same"));
529    }
530
531    #[test]
532    fn render_unified_diff_identical_inputs() {
533        let text = "line1\nline2\nline3";
534        let diff = render_unified_diff(None, text, text);
535        assert!(diff.contains("--- a/artifact"));
536        let body: Vec<&str> = diff.lines().skip(2).collect();
537        assert!(!body.iter().any(|l| l.starts_with('-')));
538        assert!(!body.iter().any(|l| l.starts_with('+')));
539        assert_eq!(body.len(), 3);
540    }
541
542    #[test]
543    fn render_unified_diff_empty_before() {
544        let diff = render_unified_diff(None, "", "new1\nnew2");
545        assert!(diff.contains("+new1"));
546        assert!(diff.contains("+new2"));
547        let body: Vec<&str> = diff.lines().skip(2).collect();
548        assert!(!body.iter().any(|l| l.starts_with('-')));
549    }
550
551    #[test]
552    fn render_unified_diff_empty_after() {
553        let diff = render_unified_diff(None, "old1\nold2", "");
554        assert!(diff.contains("-old1"));
555        assert!(diff.contains("-old2"));
556        let body: Vec<&str> = diff.lines().skip(2).collect();
557        assert!(!body.iter().any(|l| l.starts_with('+')));
558    }
559
560    #[test]
561    fn render_unified_diff_both_empty() {
562        let diff = render_unified_diff(None, "", "");
563        assert!(diff.contains("--- a/artifact"));
564        assert!(diff.contains("+++ b/artifact"));
565        // No content lines
566        let body: String = diff.lines().skip(2).collect();
567        assert!(body.is_empty());
568    }
569
570    #[test]
571    fn render_unified_diff_all_changed() {
572        let diff = render_unified_diff(None, "a\nb", "x\ny");
573        assert!(diff.contains("-a"));
574        assert!(diff.contains("-b"));
575        assert!(diff.contains("+x"));
576        assert!(diff.contains("+y"));
577    }
578
579    #[test]
580    fn render_unified_diff_insertion_in_middle() {
581        let diff = render_unified_diff(None, "a\nc", "a\nb\nc");
582        assert!(diff.contains(" a"));
583        assert!(diff.contains("+b"));
584        assert!(diff.contains(" c"));
585        let body: Vec<&str> = diff.lines().skip(2).collect();
586        assert!(!body.iter().any(|l| l.starts_with('-')));
587    }
588
589    #[test]
590    fn render_unified_diff_deletion_from_middle() {
591        let diff = render_unified_diff(None, "a\nb\nc", "a\nc");
592        assert!(diff.contains(" a"));
593        assert!(diff.contains("-b"));
594        assert!(diff.contains(" c"));
595        let body: Vec<&str> = diff.lines().skip(2).collect();
596        assert!(!body.iter().any(|l| l.starts_with('+')));
597    }
598
599    #[test]
600    fn render_unified_diff_default_path() {
601        let diff = render_unified_diff(None, "a", "b");
602        assert!(diff.contains("--- a/artifact"));
603        assert!(diff.contains("+++ b/artifact"));
604    }
605
606    #[test]
607    fn render_unified_diff_large_similar() {
608        // Test performance: 1000 lines with one change in the middle
609        let mut before = Vec::new();
610        let mut after = Vec::new();
611        for i in 0..1000 {
612            before.push(format!("line {i}"));
613            after.push(format!("line {i}"));
614        }
615        before[500] = "OLD LINE 500".to_string();
616        after[500] = "NEW LINE 500".to_string();
617        let before_str = before.join("\n");
618        let after_str = after.join("\n");
619        let diff = render_unified_diff(None, &before_str, &after_str);
620        assert!(diff.contains("-OLD LINE 500"));
621        assert!(diff.contains("+NEW LINE 500"));
622        // Context lines should be present
623        assert!(diff.contains(" line 499"));
624        assert!(diff.contains(" line 501"));
625    }
626
627    #[test]
628    fn myers_diff_empty_sequences() {
629        let ops = myers_diff(&[], &[]);
630        assert!(ops.is_empty());
631    }
632
633    #[test]
634    fn myers_diff_insert_only() {
635        let ops = myers_diff(&[], &["a", "b"]);
636        assert_eq!(ops.len(), 2);
637        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Insert));
638    }
639
640    #[test]
641    fn myers_diff_delete_only() {
642        let ops = myers_diff(&["a", "b"], &[]);
643        assert_eq!(ops.len(), 2);
644        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Delete));
645    }
646
647    #[test]
648    fn myers_diff_equal() {
649        let ops = myers_diff(&["a", "b", "c"], &["a", "b", "c"]);
650        assert_eq!(ops.len(), 3);
651        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Equal));
652    }
653
654    #[test]
655    fn execution_policy_rejects_process_exec_when_read_only() {
656        push_execution_policy(CapabilityPolicy {
657            side_effect_level: Some("read_only".to_string()),
658            capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
659            ..Default::default()
660        });
661        let result = enforce_current_policy_for_builtin("exec", &[]);
662        pop_execution_policy();
663        assert!(result.is_err());
664    }
665
666    #[test]
667    fn execution_policy_rejects_unlisted_tool() {
668        push_execution_policy(CapabilityPolicy {
669            tools: vec!["read".to_string()],
670            ..Default::default()
671        });
672        let result = enforce_current_policy_for_tool("edit");
673        pop_execution_policy();
674        assert!(result.is_err());
675    }
676
677    #[test]
678    fn normalize_run_record_preserves_trace_spans() {
679        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
680            "_type": "run_record",
681            "id": "run_trace",
682            "workflow_id": "wf",
683            "status": "completed",
684            "started_at": "1",
685            "trace_spans": [
686                {
687                    "span_id": 1,
688                    "parent_id": null,
689                    "kind": "pipeline",
690                    "name": "workflow",
691                    "start_ms": 0,
692                    "duration_ms": 42,
693                    "metadata": {"model": "demo"}
694                }
695            ]
696        }));
697
698        let run = normalize_run_record(&value).unwrap();
699        assert_eq!(run.trace_spans.len(), 1);
700        assert_eq!(run.trace_spans[0].kind, "pipeline");
701        assert_eq!(
702            run.trace_spans[0].metadata["model"],
703            serde_json::json!("demo")
704        );
705    }
706
707    // ── Tool hook tests ──────────────────────────────────────────────
708
709    #[test]
710    fn pre_tool_hook_deny_blocks_execution() {
711        clear_tool_hooks();
712        register_tool_hook(ToolHook {
713            pattern: "dangerous_*".to_string(),
714            pre: Some(Rc::new(|_name, _args| {
715                PreToolAction::Deny("blocked by policy".to_string())
716            })),
717            post: None,
718        });
719        let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
720        clear_tool_hooks();
721        assert!(matches!(result, PreToolAction::Deny(_)));
722    }
723
724    #[test]
725    fn pre_tool_hook_allow_passes_through() {
726        clear_tool_hooks();
727        register_tool_hook(ToolHook {
728            pattern: "safe_*".to_string(),
729            pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
730            post: None,
731        });
732        let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
733        clear_tool_hooks();
734        assert!(matches!(result, PreToolAction::Allow));
735    }
736
737    #[test]
738    fn pre_tool_hook_modify_rewrites_args() {
739        clear_tool_hooks();
740        register_tool_hook(ToolHook {
741            pattern: "*".to_string(),
742            pre: Some(Rc::new(|_name, _args| {
743                PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
744            })),
745            post: None,
746        });
747        let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
748        clear_tool_hooks();
749        match result {
750            PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
751            _ => panic!("expected Modify"),
752        }
753    }
754
755    #[test]
756    fn post_tool_hook_modifies_result() {
757        clear_tool_hooks();
758        register_tool_hook(ToolHook {
759            pattern: "exec".to_string(),
760            pre: None,
761            post: Some(Rc::new(|_name, result| {
762                if result.contains("SECRET") {
763                    PostToolAction::Modify("[REDACTED]".to_string())
764                } else {
765                    PostToolAction::Pass
766                }
767            })),
768        });
769        let result = run_post_tool_hooks("exec", "output with SECRET data");
770        let clean = run_post_tool_hooks("exec", "clean output");
771        clear_tool_hooks();
772        assert_eq!(result, "[REDACTED]");
773        assert_eq!(clean, "clean output");
774    }
775
776    #[test]
777    fn unmatched_hook_pattern_does_not_fire() {
778        clear_tool_hooks();
779        register_tool_hook(ToolHook {
780            pattern: "exec".to_string(),
781            pre: Some(Rc::new(|_name, _args| {
782                PreToolAction::Deny("should not match".to_string())
783            })),
784            post: None,
785        });
786        let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
787        clear_tool_hooks();
788        assert!(matches!(result, PreToolAction::Allow));
789    }
790
791    #[test]
792    fn glob_match_patterns() {
793        assert!(glob_match("*", "anything"));
794        assert!(glob_match("exec*", "exec_at"));
795        assert!(glob_match("*_file", "read_file"));
796        assert!(!glob_match("exec*", "read_file"));
797        assert!(glob_match("read_file", "read_file"));
798        assert!(!glob_match("read_file", "write_file"));
799    }
800
801    // ── Auto-compaction tests ────────────────────────────────────────
802
803    #[test]
804    fn microcompact_snips_large_output() {
805        let large = "x".repeat(50_000);
806        let result = microcompact_tool_output(&large, 10_000);
807        assert!(result.len() < 15_000);
808        assert!(result.contains("snipped"));
809    }
810
811    #[test]
812    fn microcompact_preserves_small_output() {
813        let small = "hello world";
814        let result = microcompact_tool_output(small, 10_000);
815        assert_eq!(result, small);
816    }
817
818    #[test]
819    fn microcompact_preserves_strong_keyword_lines_without_file_line() {
820        // Regression: diagnostic extraction used to require both a
821        // file:line reference AND a keyword. Strong keywords like "FAIL"
822        // and "panic" should preserve the line on their own, because they
823        // carry signal even when they appear on narrative lines (Go's
824        // "--- FAIL: TestName", Rust's "thread '...' panicked at ...",
825        // pytest's "FAILED tests/..."). The exact patterns are language-
826        // specific and don't belong in the VM — but the generic rule
827        // "strong keywords count even without file:line" does.
828        let mut output = String::new();
829        for i in 0..100 {
830            output.push_str(&format!("verbose progress line {i}\n"));
831        }
832        output.push_str("--- FAIL: TestEmpty (0.00s)\n");
833        output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
834        output.push_str("FAILED tests/test_parser.py::test_empty\n");
835        for i in 0..100 {
836            output.push_str(&format!("more output after failures {i}\n"));
837        }
838        let result = microcompact_tool_output(&output, 2_000);
839        assert!(
840            result.contains("--- FAIL: TestEmpty"),
841            "strong 'FAIL' keyword should preserve the line:\n{result}"
842        );
843        assert!(
844            result.contains("panicked at"),
845            "strong 'panic' keyword should preserve the line:\n{result}"
846        );
847        assert!(
848            result.contains("FAILED tests/test_parser.py"),
849            "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
850        );
851    }
852
853    #[test]
854    fn auto_compact_messages_reduces_count() {
855        let mut messages: Vec<serde_json::Value> = (0..20)
856            .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
857            .collect();
858        let runtime = tokio::runtime::Builder::new_current_thread()
859            .enable_all()
860            .build()
861            .unwrap();
862        let compacted = runtime.block_on(auto_compact_messages(
863            &mut messages,
864            &AutoCompactConfig {
865                compact_strategy: CompactStrategy::Truncate,
866                keep_last: 6,
867                ..Default::default()
868            },
869            None,
870        ));
871        let summary = compacted.unwrap();
872        assert!(summary.is_some());
873        assert!(messages.len() <= 7); // 6 kept + 1 summary
874        assert!(messages[0]["content"]
875            .as_str()
876            .unwrap()
877            .contains("auto-compacted"));
878    }
879
880    #[test]
881    fn auto_compact_noop_when_under_threshold() {
882        let mut messages: Vec<serde_json::Value> = (0..4)
883            .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
884            .collect();
885        let runtime = tokio::runtime::Builder::new_current_thread()
886            .enable_all()
887            .build()
888            .unwrap();
889        let compacted = runtime.block_on(auto_compact_messages(
890            &mut messages,
891            &AutoCompactConfig {
892                compact_strategy: CompactStrategy::Truncate,
893                keep_last: 6,
894                ..Default::default()
895            },
896            None,
897        ));
898        assert!(compacted.unwrap().is_none());
899        assert_eq!(messages.len(), 4);
900    }
901
902    #[test]
903    fn observation_mask_preserves_errors_masks_verbose_output() {
904        // Build a verbose output string (>500 chars) that should be masked
905        let verbose_lines: Vec<String> = (0..60)
906            .map(|i| format!("// source line {} of the generated file", i))
907            .collect();
908        let verbose_content = format!(
909            "File created: a.go\npackage main\n{}",
910            verbose_lines.join("\n")
911        );
912        let mut messages = vec![
913            serde_json::json!({"role": "assistant", "content": "I'll create the file now."}),
914            serde_json::json!({"role": "user", "content": verbose_content}),
915            serde_json::json!({"role": "assistant", "content": "Now let me run the tests."}),
916            serde_json::json!({"role": "user", "content": "error: cannot find module\nexit code 1\nfailed to compile"}),
917            serde_json::json!({"role": "assistant", "content": "I see the issue. Let me fix it."}),
918            serde_json::json!({"role": "user", "content": "File patched successfully."}),
919            // These last 2 will be kept verbatim (keep_last)
920            serde_json::json!({"role": "assistant", "content": "Running tests again."}),
921            serde_json::json!({"role": "user", "content": "All tests passed."}),
922        ];
923        let runtime = tokio::runtime::Builder::new_current_thread()
924            .enable_all()
925            .build()
926            .unwrap();
927        let compacted = runtime.block_on(auto_compact_messages(
928            &mut messages,
929            &AutoCompactConfig {
930                compact_strategy: CompactStrategy::ObservationMask,
931                keep_last: 2,
932                ..Default::default()
933            },
934            None,
935        ));
936        let summary = compacted.unwrap().unwrap();
937        // Assistant messages preserved verbatim
938        assert!(summary.contains("I'll create the file now."));
939        assert!(summary.contains("Now let me run the tests."));
940        assert!(summary.contains("I see the issue. Let me fix it."));
941        // Short error output preserved verbatim (under 500 chars)
942        assert!(summary.contains("error: cannot find module"));
943        assert!(summary.contains("exit code 1"));
944        // Verbose tool output masked (over 500 chars)
945        assert!(summary.contains("masked]"));
946        assert!(summary.contains("File created: a.go"));
947        // Short tool output in kept portion (boundary adjustment moves split_at to user msg)
948        assert!(!summary.contains("File patched successfully."));
949        // Kept messages not in summary
950        assert!(!summary.contains("Running tests again."));
951        assert!(!summary.contains("All tests passed."));
952        // 3 kept (split moved backward to user boundary) + 1 summary = 4
953        assert_eq!(messages.len(), 4);
954    }
955
956    #[test]
957    fn observation_mask_keeps_short_tool_output() {
958        let messages = vec![
959            serde_json::json!({"role": "user", "content": "OK"}),
960            serde_json::json!({"role": "user", "content": "Done."}),
961        ];
962        let summary = observation_mask_compaction(&messages, 2);
963        assert!(summary.contains("[user] OK"));
964        assert!(summary.contains("[user] Done."));
965        assert!(!summary.contains("masked"));
966    }
967
968    #[test]
969    fn estimate_message_tokens_basic() {
970        let messages = vec![
971            serde_json::json!({"role": "user", "content": "a".repeat(400)}),
972            serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
973        ];
974        let tokens = estimate_message_tokens(&messages);
975        assert_eq!(tokens, 200); // 800 chars / 4
976    }
977
978    // ── Artifact dedup and microcompaction tests ─────────────────────
979
980    #[test]
981    fn dedup_artifacts_removes_duplicates() {
982        let mut artifacts = vec![
983            ArtifactRecord {
984                id: "a1".to_string(),
985                kind: "test".to_string(),
986                text: Some("duplicate content".to_string()),
987                ..Default::default()
988            },
989            ArtifactRecord {
990                id: "a2".to_string(),
991                kind: "test".to_string(),
992                text: Some("duplicate content".to_string()),
993                ..Default::default()
994            },
995            ArtifactRecord {
996                id: "a3".to_string(),
997                kind: "test".to_string(),
998                text: Some("unique content".to_string()),
999                ..Default::default()
1000            },
1001        ];
1002        dedup_artifacts(&mut artifacts);
1003        assert_eq!(artifacts.len(), 2);
1004    }
1005
1006    #[test]
1007    fn microcompact_artifact_snips_oversized() {
1008        let mut artifact = ArtifactRecord {
1009            id: "a1".to_string(),
1010            kind: "test".to_string(),
1011            text: Some("x".repeat(10_000)),
1012            estimated_tokens: Some(2_500),
1013            ..Default::default()
1014        };
1015        microcompact_artifact(&mut artifact, 500);
1016        assert!(artifact.text.as_ref().unwrap().len() < 5_000);
1017        assert_eq!(artifact.estimated_tokens, Some(500));
1018    }
1019
1020    // ── Tool argument constraint tests ───────────────────────────────
1021
1022    #[test]
1023    fn arg_constraint_allows_matching_pattern() {
1024        let policy = CapabilityPolicy {
1025            tool_arg_constraints: vec![ToolArgConstraint {
1026                tool: "exec".to_string(),
1027                arg_patterns: vec!["cargo *".to_string()],
1028            }],
1029            ..Default::default()
1030        };
1031        let result = enforce_tool_arg_constraints(
1032            &policy,
1033            "exec",
1034            &serde_json::json!({"command": "cargo test"}),
1035        );
1036        assert!(result.is_ok());
1037    }
1038
1039    #[test]
1040    fn arg_constraint_rejects_non_matching_pattern() {
1041        let policy = CapabilityPolicy {
1042            tool_arg_constraints: vec![ToolArgConstraint {
1043                tool: "exec".to_string(),
1044                arg_patterns: vec!["cargo *".to_string()],
1045            }],
1046            ..Default::default()
1047        };
1048        let result = enforce_tool_arg_constraints(
1049            &policy,
1050            "exec",
1051            &serde_json::json!({"command": "rm -rf /"}),
1052        );
1053        assert!(result.is_err());
1054    }
1055
1056    #[test]
1057    fn arg_constraint_ignores_unmatched_tool() {
1058        let policy = CapabilityPolicy {
1059            tool_arg_constraints: vec![ToolArgConstraint {
1060                tool: "exec".to_string(),
1061                arg_patterns: vec!["cargo *".to_string()],
1062            }],
1063            ..Default::default()
1064        };
1065        let result = enforce_tool_arg_constraints(
1066            &policy,
1067            "read_file",
1068            &serde_json::json!({"path": "/etc/passwd"}),
1069        );
1070        assert!(result.is_ok());
1071    }
1072
1073    #[test]
1074    fn arg_constraint_prefers_declared_path_param_metadata() {
1075        let mut tool_metadata = std::collections::BTreeMap::new();
1076        tool_metadata.insert(
1077            "edit".to_string(),
1078            ToolRuntimePolicyMetadata {
1079                path_params: vec!["path".to_string()],
1080                ..Default::default()
1081            },
1082        );
1083        let policy = CapabilityPolicy {
1084            tool_arg_constraints: vec![ToolArgConstraint {
1085                tool: "edit".to_string(),
1086                arg_patterns: vec!["tests/*".to_string()],
1087            }],
1088            tool_metadata,
1089            ..Default::default()
1090        };
1091        let result = enforce_tool_arg_constraints(
1092            &policy,
1093            "edit",
1094            &serde_json::json!({
1095                "action": "replace_range",
1096                "path": "tests/unit/test_experiment_service.py",
1097                "content": "..."
1098            }),
1099        );
1100        assert!(result.is_ok());
1101    }
1102
1103    #[test]
1104    fn microcompact_handles_multibyte_utf8() {
1105        // Emoji are 4 bytes each — slicing at arbitrary byte offsets would panic
1106        let emoji_output = "🔥".repeat(500); // 2000 bytes, 500 chars
1107        let result = microcompact_tool_output(&emoji_output, 400);
1108        // Should not panic and should contain the snip marker
1109        assert!(result.contains("snipped"));
1110
1111        // Mixed ASCII + multi-byte
1112        let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
1113        let result2 = microcompact_tool_output(&mixed, 400);
1114        assert!(result2.contains("snipped"));
1115
1116        // CJK characters (3 bytes each)
1117        let cjk = "中文".repeat(500);
1118        let result3 = microcompact_tool_output(&cjk, 400);
1119        assert!(result3.contains("snipped"));
1120    }
1121
1122    #[test]
1123    fn workflow_node_defaults_exit_when_verified_to_false() {
1124        let node = WorkflowNode::default();
1125        assert!(!node.exit_when_verified);
1126    }
1127
1128    #[test]
1129    fn workflow_node_exit_when_verified_round_trips_through_serde() {
1130        let node = WorkflowNode {
1131            id: Some("execute".to_string()),
1132            kind: "stage".to_string(),
1133            exit_when_verified: true,
1134            ..Default::default()
1135        };
1136        let encoded = serde_json::to_value(&node).expect("serialize");
1137        assert_eq!(
1138            encoded.get("exit_when_verified"),
1139            Some(&serde_json::json!(true))
1140        );
1141        let decoded: WorkflowNode = serde_json::from_value(encoded).expect("deserialize");
1142        assert!(decoded.exit_when_verified);
1143    }
1144
1145    #[test]
1146    fn workflow_node_exit_when_verified_accepts_missing_field_for_backcompat() {
1147        let encoded = serde_json::json!({
1148            "id": "legacy_stage",
1149            "kind": "stage",
1150        });
1151        let decoded: WorkflowNode = serde_json::from_value(encoded).expect("deserialize");
1152        assert!(
1153            !decoded.exit_when_verified,
1154            "nodes serialized before this field was added must deserialize with the default"
1155        );
1156    }
1157}
harn_vm/orchestration/mod.rs

harn_vm/orchestration/
mod.rs