harn_vm/orchestration/
mod.rs

1use std::path::PathBuf;
2use std::{cell::RefCell, thread_local};
3
4use serde::{Deserialize, Serialize};
5
6use crate::llm::vm_value_to_json;
7use crate::value::{VmError, VmValue};
8
9pub(crate) fn now_rfc3339() -> String {
10    use std::time::{SystemTime, UNIX_EPOCH};
11    let ts = SystemTime::now()
12        .duration_since(UNIX_EPOCH)
13        .unwrap_or_default()
14        .as_secs();
15    format!("{ts}")
16}
17
18pub(crate) fn new_id(prefix: &str) -> String {
19    format!("{prefix}_{}", uuid::Uuid::now_v7())
20}
21
22pub(crate) fn default_run_dir() -> PathBuf {
23    let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
24    crate::runtime_paths::run_root(&base)
25}
26
27mod hooks;
28pub use hooks::*;
29
30mod compaction;
31pub use compaction::*;
32
33mod artifacts;
34pub use artifacts::*;
35
36mod policy;
37pub use policy::*;
38
39mod workflow;
40pub use workflow::*;
41
42mod records;
43pub use records::*;
44
45thread_local! {
46    static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
47}
48
49#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
50#[serde(default)]
51pub struct MutationSessionRecord {
52    pub session_id: String,
53    pub parent_session_id: Option<String>,
54    pub run_id: Option<String>,
55    pub worker_id: Option<String>,
56    pub execution_kind: Option<String>,
57    pub mutation_scope: String,
58    pub approval_mode: String,
59}
60
61impl MutationSessionRecord {
62    pub fn normalize(mut self) -> Self {
63        if self.session_id.is_empty() {
64            self.session_id = new_id("session");
65        }
66        if self.mutation_scope.is_empty() {
67            self.mutation_scope = "read_only".to_string();
68        }
69        if self.approval_mode.is_empty() {
70            self.approval_mode = "host_enforced".to_string();
71        }
72        self
73    }
74}
75
76pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
77    CURRENT_MUTATION_SESSION.with(|slot| {
78        *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
79    });
80}
81
82pub fn current_mutation_session() -> Option<MutationSessionRecord> {
83    CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
84}
85pub(crate) fn parse_json_payload<T: for<'de> Deserialize<'de>>(
86    json: serde_json::Value,
87    label: &str,
88) -> Result<T, VmError> {
89    let payload = json.to_string();
90    let mut deserializer = serde_json::Deserializer::from_str(&payload);
91    let mut tracker = serde_path_to_error::Track::new();
92    let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
93    T::deserialize(path_deserializer).map_err(|error| {
94        let snippet = if payload.len() > 600 {
95            format!("{}...", &payload[..600])
96        } else {
97            payload.clone()
98        };
99        VmError::Runtime(format!(
100            "{label} parse error at {}: {} | payload={}",
101            tracker.path(),
102            error,
103            snippet
104        ))
105    })
106}
107
108pub(crate) fn parse_json_value<T: for<'de> Deserialize<'de>>(
109    value: &VmValue,
110) -> Result<T, VmError> {
111    parse_json_payload(vm_value_to_json(value), "orchestration")
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117    use std::collections::BTreeMap;
118    use std::rc::Rc;
119
120    #[test]
121    fn capability_intersection_rejects_privilege_expansion() {
122        let ceiling = CapabilityPolicy {
123            tools: vec!["read".to_string()],
124            side_effect_level: Some("read_only".to_string()),
125            recursion_limit: Some(2),
126            ..Default::default()
127        };
128        let requested = CapabilityPolicy {
129            tools: vec!["read".to_string(), "edit".to_string()],
130            ..Default::default()
131        };
132        let error = ceiling.intersect(&requested).unwrap_err();
133        assert!(error.contains("host ceiling"));
134    }
135
136    #[test]
137    fn mutation_session_normalize_fills_defaults() {
138        let normalized = MutationSessionRecord::default().normalize();
139        assert!(normalized.session_id.starts_with("session_"));
140        assert_eq!(normalized.mutation_scope, "read_only");
141        assert_eq!(normalized.approval_mode, "host_enforced");
142    }
143
144    #[test]
145    fn install_current_mutation_session_round_trips() {
146        install_current_mutation_session(Some(MutationSessionRecord {
147            session_id: "session_test".to_string(),
148            mutation_scope: "apply_workspace".to_string(),
149            approval_mode: "explicit".to_string(),
150            ..Default::default()
151        }));
152        let current = current_mutation_session().expect("session installed");
153        assert_eq!(current.session_id, "session_test");
154        assert_eq!(current.mutation_scope, "apply_workspace");
155        assert_eq!(current.approval_mode, "explicit");
156
157        install_current_mutation_session(None);
158        assert!(current_mutation_session().is_none());
159    }
160
161    #[test]
162    fn active_execution_policy_rejects_unknown_bridge_builtin() {
163        push_execution_policy(CapabilityPolicy {
164            tools: vec!["read".to_string()],
165            capabilities: BTreeMap::from([(
166                "workspace".to_string(),
167                vec!["read_text".to_string()],
168            )]),
169            side_effect_level: Some("read_only".to_string()),
170            recursion_limit: Some(1),
171            ..Default::default()
172        });
173        let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
174        pop_execution_policy();
175        assert!(matches!(
176            error,
177            VmError::CategorizedError {
178                category: crate::value::ErrorCategory::ToolRejected,
179                ..
180            }
181        ));
182    }
183
184    #[test]
185    fn active_execution_policy_rejects_mcp_escape_hatch() {
186        push_execution_policy(CapabilityPolicy {
187            tools: vec!["read".to_string()],
188            capabilities: BTreeMap::from([(
189                "workspace".to_string(),
190                vec!["read_text".to_string()],
191            )]),
192            side_effect_level: Some("read_only".to_string()),
193            recursion_limit: Some(1),
194            ..Default::default()
195        });
196        let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
197        pop_execution_policy();
198        assert!(matches!(
199            error,
200            VmError::CategorizedError {
201                category: crate::value::ErrorCategory::ToolRejected,
202                ..
203            }
204        ));
205    }
206
207    #[test]
208    fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
209        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
210            "name": "legacy",
211            "act": {"mode": "llm"},
212            "verify": {"kind": "verify"},
213            "repair": {"mode": "agent"},
214        }));
215        let graph = normalize_workflow_value(&value).unwrap();
216        assert_eq!(graph.type_name, "workflow_graph");
217        assert!(graph.nodes.contains_key("act"));
218        assert!(graph.nodes.contains_key("verify"));
219        assert!(graph.nodes.contains_key("repair"));
220        assert_eq!(graph.entry, "act");
221    }
222
223    #[test]
224    fn workflow_normalization_accepts_tool_registry_nodes() {
225        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
226            "name": "registry_tools",
227            "entry": "implement",
228            "nodes": {
229                "implement": {
230                    "kind": "stage",
231                    "mode": "agent",
232                    "tools": {
233                        "_type": "tool_registry",
234                        "tools": [
235                            {"name": "read", "description": "Read files"},
236                            {"name": "run", "description": "Run commands"}
237                        ]
238                    }
239                }
240            },
241            "edges": []
242        }));
243        let graph = normalize_workflow_value(&value).unwrap();
244        let node = graph.nodes.get("implement").unwrap();
245        assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
246    }
247
248    #[test]
249    fn artifact_selection_honors_budget_and_priority() {
250        let policy = ContextPolicy {
251            max_artifacts: Some(2),
252            max_tokens: Some(30),
253            prefer_recent: true,
254            prefer_fresh: true,
255            prioritize_kinds: vec!["verification_result".to_string()],
256            ..Default::default()
257        };
258        let artifacts = vec![
259            ArtifactRecord {
260                type_name: "artifact".to_string(),
261                id: "a".to_string(),
262                kind: "summary".to_string(),
263                text: Some("short".to_string()),
264                relevance: Some(0.9),
265                created_at: now_rfc3339(),
266                ..Default::default()
267            }
268            .normalize(),
269            ArtifactRecord {
270                type_name: "artifact".to_string(),
271                id: "b".to_string(),
272                kind: "summary".to_string(),
273                text: Some("this is a much larger artifact body".to_string()),
274                relevance: Some(1.0),
275                created_at: now_rfc3339(),
276                ..Default::default()
277            }
278            .normalize(),
279            ArtifactRecord {
280                type_name: "artifact".to_string(),
281                id: "c".to_string(),
282                kind: "summary".to_string(),
283                text: Some("tiny".to_string()),
284                relevance: Some(0.5),
285                created_at: now_rfc3339(),
286                ..Default::default()
287            }
288            .normalize(),
289        ];
290        let selected = select_artifacts(artifacts, &policy);
291        assert_eq!(selected.len(), 2);
292        assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
293    }
294
295    #[test]
296    fn workflow_validation_rejects_condition_without_true_false_edges() {
297        let graph = WorkflowGraph {
298            entry: "gate".to_string(),
299            nodes: BTreeMap::from([(
300                "gate".to_string(),
301                WorkflowNode {
302                    id: Some("gate".to_string()),
303                    kind: "condition".to_string(),
304                    ..Default::default()
305                },
306            )]),
307            edges: vec![WorkflowEdge {
308                from: "gate".to_string(),
309                to: "next".to_string(),
310                branch: Some("true".to_string()),
311                label: None,
312            }],
313            ..Default::default()
314        };
315        let report = validate_workflow(&graph, None);
316        assert!(!report.valid);
317        assert!(report
318            .errors
319            .iter()
320            .any(|error| error.contains("true") && error.contains("false")));
321    }
322
323    #[test]
324    fn replay_fixture_round_trip_passes() {
325        let run = RunRecord {
326            type_name: "run_record".to_string(),
327            id: "run_1".to_string(),
328            workflow_id: "wf".to_string(),
329            workflow_name: Some("demo".to_string()),
330            task: "demo".to_string(),
331            status: "completed".to_string(),
332            started_at: "1".to_string(),
333            finished_at: Some("2".to_string()),
334            parent_run_id: None,
335            root_run_id: Some("run_1".to_string()),
336            stages: vec![RunStageRecord {
337                id: "stage_1".to_string(),
338                node_id: "act".to_string(),
339                kind: "stage".to_string(),
340                status: "completed".to_string(),
341                outcome: "success".to_string(),
342                branch: Some("success".to_string()),
343                started_at: "1".to_string(),
344                finished_at: Some("2".to_string()),
345                visible_text: Some("done".to_string()),
346                private_reasoning: None,
347                transcript: None,
348                verification: None,
349                usage: None,
350                artifacts: vec![ArtifactRecord {
351                    type_name: "artifact".to_string(),
352                    id: "a1".to_string(),
353                    kind: "summary".to_string(),
354                    text: Some("done".to_string()),
355                    created_at: "1".to_string(),
356                    ..Default::default()
357                }
358                .normalize()],
359                consumed_artifact_ids: vec![],
360                produced_artifact_ids: vec!["a1".to_string()],
361                attempts: vec![],
362                metadata: BTreeMap::new(),
363            }],
364            transitions: vec![],
365            checkpoints: vec![],
366            pending_nodes: vec![],
367            completed_nodes: vec!["act".to_string()],
368            child_runs: vec![],
369            artifacts: vec![],
370            policy: CapabilityPolicy::default(),
371            execution: None,
372            transcript: None,
373            usage: None,
374            replay_fixture: None,
375            trace_spans: vec![],
376            tool_recordings: vec![],
377            metadata: BTreeMap::new(),
378            persisted_path: None,
379        };
380        let fixture = replay_fixture_from_run(&run);
381        let report = evaluate_run_against_fixture(&run, &fixture);
382        assert!(report.pass);
383        assert!(report.failures.is_empty());
384    }
385
386    #[test]
387    fn replay_eval_suite_reports_failed_case() {
388        let good = RunRecord {
389            id: "run_good".to_string(),
390            workflow_id: "wf".to_string(),
391            status: "completed".to_string(),
392            stages: vec![RunStageRecord {
393                node_id: "act".to_string(),
394                status: "completed".to_string(),
395                outcome: "success".to_string(),
396                ..Default::default()
397            }],
398            ..Default::default()
399        };
400        let bad = RunRecord {
401            id: "run_bad".to_string(),
402            workflow_id: "wf".to_string(),
403            status: "failed".to_string(),
404            stages: vec![RunStageRecord {
405                node_id: "act".to_string(),
406                status: "failed".to_string(),
407                outcome: "error".to_string(),
408                ..Default::default()
409            }],
410            ..Default::default()
411        };
412        let suite = evaluate_run_suite(vec![
413            (
414                good.clone(),
415                replay_fixture_from_run(&good),
416                Some("good.json".to_string()),
417            ),
418            (
419                bad.clone(),
420                replay_fixture_from_run(&good),
421                Some("bad.json".to_string()),
422            ),
423        ]);
424        assert!(!suite.pass);
425        assert_eq!(suite.total, 2);
426        assert_eq!(suite.failed, 1);
427        assert!(suite.cases.iter().any(|case| !case.pass));
428    }
429
430    #[test]
431    fn run_diff_reports_changed_stage() {
432        let left = RunRecord {
433            id: "left".to_string(),
434            workflow_id: "wf".to_string(),
435            status: "completed".to_string(),
436            stages: vec![RunStageRecord {
437                node_id: "act".to_string(),
438                status: "completed".to_string(),
439                outcome: "success".to_string(),
440                ..Default::default()
441            }],
442            ..Default::default()
443        };
444        let right = RunRecord {
445            id: "right".to_string(),
446            workflow_id: "wf".to_string(),
447            status: "failed".to_string(),
448            stages: vec![RunStageRecord {
449                node_id: "act".to_string(),
450                status: "failed".to_string(),
451                outcome: "error".to_string(),
452                ..Default::default()
453            }],
454            ..Default::default()
455        };
456        let diff = diff_run_records(&left, &right);
457        assert!(diff.status_changed);
458        assert!(!diff.identical);
459        assert_eq!(diff.stage_diffs.len(), 1);
460    }
461
462    #[test]
463    fn eval_suite_manifest_can_fail_on_baseline_diff() {
464        let temp_dir =
465            std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
466        std::fs::create_dir_all(&temp_dir).unwrap();
467        let baseline_path = temp_dir.join("baseline.json");
468        let candidate_path = temp_dir.join("candidate.json");
469
470        let baseline = RunRecord {
471            id: "baseline".to_string(),
472            workflow_id: "wf".to_string(),
473            status: "completed".to_string(),
474            stages: vec![RunStageRecord {
475                node_id: "act".to_string(),
476                status: "completed".to_string(),
477                outcome: "success".to_string(),
478                ..Default::default()
479            }],
480            ..Default::default()
481        };
482        let candidate = RunRecord {
483            id: "candidate".to_string(),
484            workflow_id: "wf".to_string(),
485            status: "failed".to_string(),
486            stages: vec![RunStageRecord {
487                node_id: "act".to_string(),
488                status: "failed".to_string(),
489                outcome: "error".to_string(),
490                ..Default::default()
491            }],
492            ..Default::default()
493        };
494
495        save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
496        save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
497
498        let manifest = EvalSuiteManifest {
499            base_dir: Some(temp_dir.display().to_string()),
500            cases: vec![EvalSuiteCase {
501                label: Some("candidate".to_string()),
502                run_path: "candidate.json".to_string(),
503                fixture_path: None,
504                compare_to: Some("baseline.json".to_string()),
505            }],
506            ..Default::default()
507        };
508        let suite = evaluate_run_suite_manifest(&manifest).unwrap();
509        assert!(!suite.pass);
510        assert_eq!(suite.failed, 1);
511        assert!(suite.cases[0].comparison.is_some());
512        assert!(suite.cases[0]
513            .failures
514            .iter()
515            .any(|failure| failure.contains("baseline")));
516    }
517
518    #[test]
519    fn render_unified_diff_marks_removed_and_added_lines() {
520        let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
521        assert!(diff.contains("--- a/src/main.rs"));
522        assert!(diff.contains("+++ b/src/main.rs"));
523        assert!(diff.contains("-old"));
524        assert!(diff.contains("+new"));
525        assert!(diff.contains(" same"));
526    }
527
528    #[test]
529    fn render_unified_diff_identical_inputs() {
530        let text = "line1\nline2\nline3";
531        let diff = render_unified_diff(None, text, text);
532        assert!(diff.contains("--- a/artifact"));
533        let body: Vec<&str> = diff.lines().skip(2).collect();
534        assert!(!body.iter().any(|l| l.starts_with('-')));
535        assert!(!body.iter().any(|l| l.starts_with('+')));
536        assert_eq!(body.len(), 3);
537    }
538
539    #[test]
540    fn render_unified_diff_empty_before() {
541        let diff = render_unified_diff(None, "", "new1\nnew2");
542        assert!(diff.contains("+new1"));
543        assert!(diff.contains("+new2"));
544        let body: Vec<&str> = diff.lines().skip(2).collect();
545        assert!(!body.iter().any(|l| l.starts_with('-')));
546    }
547
548    #[test]
549    fn render_unified_diff_empty_after() {
550        let diff = render_unified_diff(None, "old1\nold2", "");
551        assert!(diff.contains("-old1"));
552        assert!(diff.contains("-old2"));
553        let body: Vec<&str> = diff.lines().skip(2).collect();
554        assert!(!body.iter().any(|l| l.starts_with('+')));
555    }
556
557    #[test]
558    fn render_unified_diff_both_empty() {
559        let diff = render_unified_diff(None, "", "");
560        assert!(diff.contains("--- a/artifact"));
561        assert!(diff.contains("+++ b/artifact"));
562        // No content lines
563        let body: String = diff.lines().skip(2).collect();
564        assert!(body.is_empty());
565    }
566
567    #[test]
568    fn render_unified_diff_all_changed() {
569        let diff = render_unified_diff(None, "a\nb", "x\ny");
570        assert!(diff.contains("-a"));
571        assert!(diff.contains("-b"));
572        assert!(diff.contains("+x"));
573        assert!(diff.contains("+y"));
574    }
575
576    #[test]
577    fn render_unified_diff_insertion_in_middle() {
578        let diff = render_unified_diff(None, "a\nc", "a\nb\nc");
579        assert!(diff.contains(" a"));
580        assert!(diff.contains("+b"));
581        assert!(diff.contains(" c"));
582        let body: Vec<&str> = diff.lines().skip(2).collect();
583        assert!(!body.iter().any(|l| l.starts_with('-')));
584    }
585
586    #[test]
587    fn render_unified_diff_deletion_from_middle() {
588        let diff = render_unified_diff(None, "a\nb\nc", "a\nc");
589        assert!(diff.contains(" a"));
590        assert!(diff.contains("-b"));
591        assert!(diff.contains(" c"));
592        let body: Vec<&str> = diff.lines().skip(2).collect();
593        assert!(!body.iter().any(|l| l.starts_with('+')));
594    }
595
596    #[test]
597    fn render_unified_diff_default_path() {
598        let diff = render_unified_diff(None, "a", "b");
599        assert!(diff.contains("--- a/artifact"));
600        assert!(diff.contains("+++ b/artifact"));
601    }
602
603    #[test]
604    fn render_unified_diff_large_similar() {
605        // Test performance: 1000 lines with one change in the middle
606        let mut before = Vec::new();
607        let mut after = Vec::new();
608        for i in 0..1000 {
609            before.push(format!("line {i}"));
610            after.push(format!("line {i}"));
611        }
612        before[500] = "OLD LINE 500".to_string();
613        after[500] = "NEW LINE 500".to_string();
614        let before_str = before.join("\n");
615        let after_str = after.join("\n");
616        let diff = render_unified_diff(None, &before_str, &after_str);
617        assert!(diff.contains("-OLD LINE 500"));
618        assert!(diff.contains("+NEW LINE 500"));
619        // Context lines should be present
620        assert!(diff.contains(" line 499"));
621        assert!(diff.contains(" line 501"));
622    }
623
624    #[test]
625    fn myers_diff_empty_sequences() {
626        let ops = myers_diff(&[], &[]);
627        assert!(ops.is_empty());
628    }
629
630    #[test]
631    fn myers_diff_insert_only() {
632        let ops = myers_diff(&[], &["a", "b"]);
633        assert_eq!(ops.len(), 2);
634        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Insert));
635    }
636
637    #[test]
638    fn myers_diff_delete_only() {
639        let ops = myers_diff(&["a", "b"], &[]);
640        assert_eq!(ops.len(), 2);
641        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Delete));
642    }
643
644    #[test]
645    fn myers_diff_equal() {
646        let ops = myers_diff(&["a", "b", "c"], &["a", "b", "c"]);
647        assert_eq!(ops.len(), 3);
648        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Equal));
649    }
650
651    #[test]
652    fn execution_policy_rejects_process_exec_when_read_only() {
653        push_execution_policy(CapabilityPolicy {
654            side_effect_level: Some("read_only".to_string()),
655            capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
656            ..Default::default()
657        });
658        let result = enforce_current_policy_for_builtin("exec", &[]);
659        pop_execution_policy();
660        assert!(result.is_err());
661    }
662
663    #[test]
664    fn execution_policy_rejects_unlisted_tool() {
665        push_execution_policy(CapabilityPolicy {
666            tools: vec!["read".to_string()],
667            ..Default::default()
668        });
669        let result = enforce_current_policy_for_tool("edit");
670        pop_execution_policy();
671        assert!(result.is_err());
672    }
673
674    #[test]
675    fn normalize_run_record_preserves_trace_spans() {
676        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
677            "_type": "run_record",
678            "id": "run_trace",
679            "workflow_id": "wf",
680            "status": "completed",
681            "started_at": "1",
682            "trace_spans": [
683                {
684                    "span_id": 1,
685                    "parent_id": null,
686                    "kind": "pipeline",
687                    "name": "workflow",
688                    "start_ms": 0,
689                    "duration_ms": 42,
690                    "metadata": {"model": "demo"}
691                }
692            ]
693        }));
694
695        let run = normalize_run_record(&value).unwrap();
696        assert_eq!(run.trace_spans.len(), 1);
697        assert_eq!(run.trace_spans[0].kind, "pipeline");
698        assert_eq!(
699            run.trace_spans[0].metadata["model"],
700            serde_json::json!("demo")
701        );
702    }
703
704    // ── Tool hook tests ──────────────────────────────────────────────
705
706    #[test]
707    fn pre_tool_hook_deny_blocks_execution() {
708        clear_tool_hooks();
709        register_tool_hook(ToolHook {
710            pattern: "dangerous_*".to_string(),
711            pre: Some(Rc::new(|_name, _args| {
712                PreToolAction::Deny("blocked by policy".to_string())
713            })),
714            post: None,
715        });
716        let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
717        clear_tool_hooks();
718        assert!(matches!(result, PreToolAction::Deny(_)));
719    }
720
721    #[test]
722    fn pre_tool_hook_allow_passes_through() {
723        clear_tool_hooks();
724        register_tool_hook(ToolHook {
725            pattern: "safe_*".to_string(),
726            pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
727            post: None,
728        });
729        let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
730        clear_tool_hooks();
731        assert!(matches!(result, PreToolAction::Allow));
732    }
733
734    #[test]
735    fn pre_tool_hook_modify_rewrites_args() {
736        clear_tool_hooks();
737        register_tool_hook(ToolHook {
738            pattern: "*".to_string(),
739            pre: Some(Rc::new(|_name, _args| {
740                PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
741            })),
742            post: None,
743        });
744        let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
745        clear_tool_hooks();
746        match result {
747            PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
748            _ => panic!("expected Modify"),
749        }
750    }
751
752    #[test]
753    fn post_tool_hook_modifies_result() {
754        clear_tool_hooks();
755        register_tool_hook(ToolHook {
756            pattern: "exec".to_string(),
757            pre: None,
758            post: Some(Rc::new(|_name, result| {
759                if result.contains("SECRET") {
760                    PostToolAction::Modify("[REDACTED]".to_string())
761                } else {
762                    PostToolAction::Pass
763                }
764            })),
765        });
766        let result = run_post_tool_hooks("exec", "output with SECRET data");
767        let clean = run_post_tool_hooks("exec", "clean output");
768        clear_tool_hooks();
769        assert_eq!(result, "[REDACTED]");
770        assert_eq!(clean, "clean output");
771    }
772
773    #[test]
774    fn unmatched_hook_pattern_does_not_fire() {
775        clear_tool_hooks();
776        register_tool_hook(ToolHook {
777            pattern: "exec".to_string(),
778            pre: Some(Rc::new(|_name, _args| {
779                PreToolAction::Deny("should not match".to_string())
780            })),
781            post: None,
782        });
783        let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
784        clear_tool_hooks();
785        assert!(matches!(result, PreToolAction::Allow));
786    }
787
788    #[test]
789    fn glob_match_patterns() {
790        assert!(glob_match("*", "anything"));
791        assert!(glob_match("exec*", "exec_at"));
792        assert!(glob_match("*_file", "read_file"));
793        assert!(!glob_match("exec*", "read_file"));
794        assert!(glob_match("read_file", "read_file"));
795        assert!(!glob_match("read_file", "write_file"));
796    }
797
798    // ── Auto-compaction tests ────────────────────────────────────────
799
800    #[test]
801    fn microcompact_snips_large_output() {
802        let large = "x".repeat(50_000);
803        let result = microcompact_tool_output(&large, 10_000);
804        assert!(result.len() < 15_000);
805        assert!(result.contains("snipped"));
806    }
807
808    #[test]
809    fn microcompact_preserves_small_output() {
810        let small = "hello world";
811        let result = microcompact_tool_output(small, 10_000);
812        assert_eq!(result, small);
813    }
814
815    #[test]
816    fn microcompact_preserves_strong_keyword_lines_without_file_line() {
817        // Regression: diagnostic extraction used to require both a
818        // file:line reference AND a keyword. Strong keywords like "FAIL"
819        // and "panic" should preserve the line on their own, because they
820        // carry signal even when they appear on narrative lines (Go's
821        // "--- FAIL: TestName", Rust's "thread '...' panicked at ...",
822        // pytest's "FAILED tests/..."). The exact patterns are language-
823        // specific and don't belong in the VM — but the generic rule
824        // "strong keywords count even without file:line" does.
825        let mut output = String::new();
826        for i in 0..100 {
827            output.push_str(&format!("verbose progress line {i}\n"));
828        }
829        output.push_str("--- FAIL: TestEmpty (0.00s)\n");
830        output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
831        output.push_str("FAILED tests/test_parser.py::test_empty\n");
832        for i in 0..100 {
833            output.push_str(&format!("more output after failures {i}\n"));
834        }
835        let result = microcompact_tool_output(&output, 2_000);
836        assert!(
837            result.contains("--- FAIL: TestEmpty"),
838            "strong 'FAIL' keyword should preserve the line:\n{result}"
839        );
840        assert!(
841            result.contains("panicked at"),
842            "strong 'panic' keyword should preserve the line:\n{result}"
843        );
844        assert!(
845            result.contains("FAILED tests/test_parser.py"),
846            "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
847        );
848    }
849
850    #[test]
851    fn auto_compact_messages_reduces_count() {
852        let mut messages: Vec<serde_json::Value> = (0..20)
853            .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
854            .collect();
855        let runtime = tokio::runtime::Builder::new_current_thread()
856            .enable_all()
857            .build()
858            .unwrap();
859        let compacted = runtime.block_on(auto_compact_messages(
860            &mut messages,
861            &AutoCompactConfig {
862                compact_strategy: CompactStrategy::Truncate,
863                keep_last: 6,
864                ..Default::default()
865            },
866            None,
867        ));
868        let summary = compacted.unwrap();
869        assert!(summary.is_some());
870        assert!(messages.len() <= 7); // 6 kept + 1 summary
871        assert!(messages[0]["content"]
872            .as_str()
873            .unwrap()
874            .contains("auto-compacted"));
875    }
876
877    #[test]
878    fn auto_compact_noop_when_under_threshold() {
879        let mut messages: Vec<serde_json::Value> = (0..4)
880            .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
881            .collect();
882        let runtime = tokio::runtime::Builder::new_current_thread()
883            .enable_all()
884            .build()
885            .unwrap();
886        let compacted = runtime.block_on(auto_compact_messages(
887            &mut messages,
888            &AutoCompactConfig {
889                compact_strategy: CompactStrategy::Truncate,
890                keep_last: 6,
891                ..Default::default()
892            },
893            None,
894        ));
895        assert!(compacted.unwrap().is_none());
896        assert_eq!(messages.len(), 4);
897    }
898
899    #[test]
900    fn observation_mask_preserves_errors_masks_verbose_output() {
901        // Build a verbose output string (>500 chars) that should be masked
902        let verbose_lines: Vec<String> = (0..60)
903            .map(|i| format!("// source line {} of the generated file", i))
904            .collect();
905        let verbose_content = format!(
906            "File created: a.go\npackage main\n{}",
907            verbose_lines.join("\n")
908        );
909        let mut messages = vec![
910            serde_json::json!({"role": "assistant", "content": "I'll create the file now."}),
911            serde_json::json!({"role": "user", "content": verbose_content}),
912            serde_json::json!({"role": "assistant", "content": "Now let me run the tests."}),
913            serde_json::json!({"role": "user", "content": "error: cannot find module\nexit code 1\nfailed to compile"}),
914            serde_json::json!({"role": "assistant", "content": "I see the issue. Let me fix it."}),
915            serde_json::json!({"role": "user", "content": "File patched successfully."}),
916            // These last 2 will be kept verbatim (keep_last)
917            serde_json::json!({"role": "assistant", "content": "Running tests again."}),
918            serde_json::json!({"role": "user", "content": "All tests passed."}),
919        ];
920        let runtime = tokio::runtime::Builder::new_current_thread()
921            .enable_all()
922            .build()
923            .unwrap();
924        let compacted = runtime.block_on(auto_compact_messages(
925            &mut messages,
926            &AutoCompactConfig {
927                compact_strategy: CompactStrategy::ObservationMask,
928                keep_last: 2,
929                ..Default::default()
930            },
931            None,
932        ));
933        let summary = compacted.unwrap().unwrap();
934        // Assistant messages preserved verbatim
935        assert!(summary.contains("I'll create the file now."));
936        assert!(summary.contains("Now let me run the tests."));
937        assert!(summary.contains("I see the issue. Let me fix it."));
938        // Short error output preserved verbatim (under 500 chars)
939        assert!(summary.contains("error: cannot find module"));
940        assert!(summary.contains("exit code 1"));
941        // Verbose tool output masked (over 500 chars)
942        assert!(summary.contains("masked]"));
943        assert!(summary.contains("File created: a.go"));
944        // Short tool output in kept portion (boundary adjustment moves split_at to user msg)
945        assert!(!summary.contains("File patched successfully."));
946        // Kept messages not in summary
947        assert!(!summary.contains("Running tests again."));
948        assert!(!summary.contains("All tests passed."));
949        // 3 kept (split moved backward to user boundary) + 1 summary = 4
950        assert_eq!(messages.len(), 4);
951    }
952
953    #[test]
954    fn observation_mask_keeps_short_tool_output() {
955        let messages = vec![
956            serde_json::json!({"role": "user", "content": "OK"}),
957            serde_json::json!({"role": "user", "content": "Done."}),
958        ];
959        let summary = observation_mask_compaction(&messages, 2);
960        assert!(summary.contains("[user] OK"));
961        assert!(summary.contains("[user] Done."));
962        assert!(!summary.contains("masked"));
963    }
964
965    #[test]
966    fn estimate_message_tokens_basic() {
967        let messages = vec![
968            serde_json::json!({"role": "user", "content": "a".repeat(400)}),
969            serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
970        ];
971        let tokens = estimate_message_tokens(&messages);
972        assert_eq!(tokens, 200); // 800 chars / 4
973    }
974
975    // ── Artifact dedup and microcompaction tests ─────────────────────
976
977    #[test]
978    fn dedup_artifacts_removes_duplicates() {
979        let mut artifacts = vec![
980            ArtifactRecord {
981                id: "a1".to_string(),
982                kind: "test".to_string(),
983                text: Some("duplicate content".to_string()),
984                ..Default::default()
985            },
986            ArtifactRecord {
987                id: "a2".to_string(),
988                kind: "test".to_string(),
989                text: Some("duplicate content".to_string()),
990                ..Default::default()
991            },
992            ArtifactRecord {
993                id: "a3".to_string(),
994                kind: "test".to_string(),
995                text: Some("unique content".to_string()),
996                ..Default::default()
997            },
998        ];
999        dedup_artifacts(&mut artifacts);
1000        assert_eq!(artifacts.len(), 2);
1001    }
1002
1003    #[test]
1004    fn microcompact_artifact_snips_oversized() {
1005        let mut artifact = ArtifactRecord {
1006            id: "a1".to_string(),
1007            kind: "test".to_string(),
1008            text: Some("x".repeat(10_000)),
1009            estimated_tokens: Some(2_500),
1010            ..Default::default()
1011        };
1012        microcompact_artifact(&mut artifact, 500);
1013        assert!(artifact.text.as_ref().unwrap().len() < 5_000);
1014        assert_eq!(artifact.estimated_tokens, Some(500));
1015    }
1016
1017    // ── Tool argument constraint tests ───────────────────────────────
1018
1019    #[test]
1020    fn arg_constraint_allows_matching_pattern() {
1021        let policy = CapabilityPolicy {
1022            tool_arg_constraints: vec![ToolArgConstraint {
1023                tool: "exec".to_string(),
1024                arg_patterns: vec!["cargo *".to_string()],
1025            }],
1026            ..Default::default()
1027        };
1028        let result = enforce_tool_arg_constraints(
1029            &policy,
1030            "exec",
1031            &serde_json::json!({"command": "cargo test"}),
1032        );
1033        assert!(result.is_ok());
1034    }
1035
1036    #[test]
1037    fn arg_constraint_rejects_non_matching_pattern() {
1038        let policy = CapabilityPolicy {
1039            tool_arg_constraints: vec![ToolArgConstraint {
1040                tool: "exec".to_string(),
1041                arg_patterns: vec!["cargo *".to_string()],
1042            }],
1043            ..Default::default()
1044        };
1045        let result = enforce_tool_arg_constraints(
1046            &policy,
1047            "exec",
1048            &serde_json::json!({"command": "rm -rf /"}),
1049        );
1050        assert!(result.is_err());
1051    }
1052
1053    #[test]
1054    fn arg_constraint_ignores_unmatched_tool() {
1055        let policy = CapabilityPolicy {
1056            tool_arg_constraints: vec![ToolArgConstraint {
1057                tool: "exec".to_string(),
1058                arg_patterns: vec!["cargo *".to_string()],
1059            }],
1060            ..Default::default()
1061        };
1062        let result = enforce_tool_arg_constraints(
1063            &policy,
1064            "read_file",
1065            &serde_json::json!({"path": "/etc/passwd"}),
1066        );
1067        assert!(result.is_ok());
1068    }
1069
1070    #[test]
1071    fn arg_constraint_prefers_declared_path_param_metadata() {
1072        let mut tool_metadata = std::collections::BTreeMap::new();
1073        tool_metadata.insert(
1074            "edit".to_string(),
1075            ToolRuntimePolicyMetadata {
1076                path_params: vec!["path".to_string()],
1077                ..Default::default()
1078            },
1079        );
1080        let policy = CapabilityPolicy {
1081            tool_arg_constraints: vec![ToolArgConstraint {
1082                tool: "edit".to_string(),
1083                arg_patterns: vec!["tests/*".to_string()],
1084            }],
1085            tool_metadata,
1086            ..Default::default()
1087        };
1088        let result = enforce_tool_arg_constraints(
1089            &policy,
1090            "edit",
1091            &serde_json::json!({
1092                "action": "replace_range",
1093                "path": "tests/unit/test_experiment_service.py",
1094                "content": "..."
1095            }),
1096        );
1097        assert!(result.is_ok());
1098    }
1099
1100    #[test]
1101    fn microcompact_handles_multibyte_utf8() {
1102        // Emoji are 4 bytes each — slicing at arbitrary byte offsets would panic
1103        let emoji_output = "🔥".repeat(500); // 2000 bytes, 500 chars
1104        let result = microcompact_tool_output(&emoji_output, 400);
1105        // Should not panic and should contain the snip marker
1106        assert!(result.contains("snipped"));
1107
1108        // Mixed ASCII + multi-byte
1109        let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
1110        let result2 = microcompact_tool_output(&mixed, 400);
1111        assert!(result2.contains("snipped"));
1112
1113        // CJK characters (3 bytes each)
1114        let cjk = "中文".repeat(500);
1115        let result3 = microcompact_tool_output(&cjk, 400);
1116        assert!(result3.contains("snipped"));
1117    }
1118}
harn_vm/orchestration/mod.rs

harn_vm/orchestration/
mod.rs