1use std::path::PathBuf;
2use std::{cell::RefCell, thread_local};
3
4use serde::{Deserialize, Serialize};
5
6use crate::llm::vm_value_to_json;
7use crate::value::{VmError, VmValue};
8
9pub(crate) fn now_rfc3339() -> String {
10 use std::time::{SystemTime, UNIX_EPOCH};
11 let ts = SystemTime::now()
12 .duration_since(UNIX_EPOCH)
13 .unwrap_or_default()
14 .as_secs();
15 format!("{ts}")
16}
17
18pub(crate) fn new_id(prefix: &str) -> String {
19 format!("{prefix}_{}", uuid::Uuid::now_v7())
20}
21
22pub(crate) fn default_run_dir() -> PathBuf {
23 std::env::var("HARN_RUN_DIR")
24 .map(PathBuf::from)
25 .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
26}
27
28mod hooks;
29pub use hooks::*;
30
31mod compaction;
32pub use compaction::*;
33
34mod artifacts;
35pub use artifacts::*;
36
37mod policy;
38pub use policy::*;
39
40mod workflow;
41pub use workflow::*;
42
43mod records;
44pub use records::*;
45
46thread_local! {
47 static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
48}
49
50#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
51#[serde(default)]
52pub struct MutationSessionRecord {
53 pub session_id: String,
54 pub parent_session_id: Option<String>,
55 pub run_id: Option<String>,
56 pub worker_id: Option<String>,
57 pub execution_kind: Option<String>,
58 pub mutation_scope: String,
59 pub approval_mode: String,
60}
61
62impl MutationSessionRecord {
63 pub fn normalize(mut self) -> Self {
64 if self.session_id.is_empty() {
65 self.session_id = new_id("session");
66 }
67 if self.mutation_scope.is_empty() {
68 self.mutation_scope = "read_only".to_string();
69 }
70 if self.approval_mode.is_empty() {
71 self.approval_mode = "host_enforced".to_string();
72 }
73 self
74 }
75}
76
77pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
78 CURRENT_MUTATION_SESSION.with(|slot| {
79 *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
80 });
81}
82
83pub fn current_mutation_session() -> Option<MutationSessionRecord> {
84 CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
85}
86pub(crate) fn parse_json_payload<T: for<'de> Deserialize<'de>>(
87 json: serde_json::Value,
88 label: &str,
89) -> Result<T, VmError> {
90 let payload = json.to_string();
91 let mut deserializer = serde_json::Deserializer::from_str(&payload);
92 let mut tracker = serde_path_to_error::Track::new();
93 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
94 T::deserialize(path_deserializer).map_err(|error| {
95 let snippet = if payload.len() > 600 {
96 format!("{}...", &payload[..600])
97 } else {
98 payload.clone()
99 };
100 VmError::Runtime(format!(
101 "{label} parse error at {}: {} | payload={}",
102 tracker.path(),
103 error,
104 snippet
105 ))
106 })
107}
108
109pub(crate) fn parse_json_value<T: for<'de> Deserialize<'de>>(
110 value: &VmValue,
111) -> Result<T, VmError> {
112 parse_json_payload(vm_value_to_json(value), "orchestration")
113}
114
115#[cfg(test)]
116mod tests {
117 use super::*;
118 use std::collections::BTreeMap;
119 use std::rc::Rc;
120
121 #[test]
122 fn capability_intersection_rejects_privilege_expansion() {
123 let ceiling = CapabilityPolicy {
124 tools: vec!["read".to_string()],
125 side_effect_level: Some("read_only".to_string()),
126 recursion_limit: Some(2),
127 ..Default::default()
128 };
129 let requested = CapabilityPolicy {
130 tools: vec!["read".to_string(), "edit".to_string()],
131 ..Default::default()
132 };
133 let error = ceiling.intersect(&requested).unwrap_err();
134 assert!(error.contains("host ceiling"));
135 }
136
137 #[test]
138 fn mutation_session_normalize_fills_defaults() {
139 let normalized = MutationSessionRecord::default().normalize();
140 assert!(normalized.session_id.starts_with("session_"));
141 assert_eq!(normalized.mutation_scope, "read_only");
142 assert_eq!(normalized.approval_mode, "host_enforced");
143 }
144
145 #[test]
146 fn install_current_mutation_session_round_trips() {
147 install_current_mutation_session(Some(MutationSessionRecord {
148 session_id: "session_test".to_string(),
149 mutation_scope: "apply_workspace".to_string(),
150 approval_mode: "explicit".to_string(),
151 ..Default::default()
152 }));
153 let current = current_mutation_session().expect("session installed");
154 assert_eq!(current.session_id, "session_test");
155 assert_eq!(current.mutation_scope, "apply_workspace");
156 assert_eq!(current.approval_mode, "explicit");
157
158 install_current_mutation_session(None);
159 assert!(current_mutation_session().is_none());
160 }
161
162 #[test]
163 fn active_execution_policy_rejects_unknown_bridge_builtin() {
164 push_execution_policy(CapabilityPolicy {
165 tools: vec!["read".to_string()],
166 capabilities: BTreeMap::from([(
167 "workspace".to_string(),
168 vec!["read_text".to_string()],
169 )]),
170 side_effect_level: Some("read_only".to_string()),
171 recursion_limit: Some(1),
172 ..Default::default()
173 });
174 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
175 pop_execution_policy();
176 assert!(matches!(
177 error,
178 VmError::CategorizedError {
179 category: crate::value::ErrorCategory::ToolRejected,
180 ..
181 }
182 ));
183 }
184
185 #[test]
186 fn active_execution_policy_rejects_mcp_escape_hatch() {
187 push_execution_policy(CapabilityPolicy {
188 tools: vec!["read".to_string()],
189 capabilities: BTreeMap::from([(
190 "workspace".to_string(),
191 vec!["read_text".to_string()],
192 )]),
193 side_effect_level: Some("read_only".to_string()),
194 recursion_limit: Some(1),
195 ..Default::default()
196 });
197 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
198 pop_execution_policy();
199 assert!(matches!(
200 error,
201 VmError::CategorizedError {
202 category: crate::value::ErrorCategory::ToolRejected,
203 ..
204 }
205 ));
206 }
207
208 #[test]
209 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
210 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
211 "name": "legacy",
212 "act": {"mode": "llm"},
213 "verify": {"kind": "verify"},
214 "repair": {"mode": "agent"},
215 }));
216 let graph = normalize_workflow_value(&value).unwrap();
217 assert_eq!(graph.type_name, "workflow_graph");
218 assert!(graph.nodes.contains_key("act"));
219 assert!(graph.nodes.contains_key("verify"));
220 assert!(graph.nodes.contains_key("repair"));
221 assert_eq!(graph.entry, "act");
222 }
223
224 #[test]
225 fn workflow_normalization_accepts_tool_registry_nodes() {
226 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
227 "name": "registry_tools",
228 "entry": "implement",
229 "nodes": {
230 "implement": {
231 "kind": "stage",
232 "mode": "agent",
233 "tools": {
234 "_type": "tool_registry",
235 "tools": [
236 {"name": "read", "description": "Read files"},
237 {"name": "run", "description": "Run commands"}
238 ]
239 }
240 }
241 },
242 "edges": []
243 }));
244 let graph = normalize_workflow_value(&value).unwrap();
245 let node = graph.nodes.get("implement").unwrap();
246 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
247 }
248
249 #[test]
250 fn artifact_selection_honors_budget_and_priority() {
251 let policy = ContextPolicy {
252 max_artifacts: Some(2),
253 max_tokens: Some(30),
254 prefer_recent: true,
255 prefer_fresh: true,
256 prioritize_kinds: vec!["verification_result".to_string()],
257 ..Default::default()
258 };
259 let artifacts = vec![
260 ArtifactRecord {
261 type_name: "artifact".to_string(),
262 id: "a".to_string(),
263 kind: "summary".to_string(),
264 text: Some("short".to_string()),
265 relevance: Some(0.9),
266 created_at: now_rfc3339(),
267 ..Default::default()
268 }
269 .normalize(),
270 ArtifactRecord {
271 type_name: "artifact".to_string(),
272 id: "b".to_string(),
273 kind: "summary".to_string(),
274 text: Some("this is a much larger artifact body".to_string()),
275 relevance: Some(1.0),
276 created_at: now_rfc3339(),
277 ..Default::default()
278 }
279 .normalize(),
280 ArtifactRecord {
281 type_name: "artifact".to_string(),
282 id: "c".to_string(),
283 kind: "summary".to_string(),
284 text: Some("tiny".to_string()),
285 relevance: Some(0.5),
286 created_at: now_rfc3339(),
287 ..Default::default()
288 }
289 .normalize(),
290 ];
291 let selected = select_artifacts(artifacts, &policy);
292 assert_eq!(selected.len(), 2);
293 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
294 }
295
296 #[test]
297 fn workflow_validation_rejects_condition_without_true_false_edges() {
298 let graph = WorkflowGraph {
299 entry: "gate".to_string(),
300 nodes: BTreeMap::from([(
301 "gate".to_string(),
302 WorkflowNode {
303 id: Some("gate".to_string()),
304 kind: "condition".to_string(),
305 ..Default::default()
306 },
307 )]),
308 edges: vec![WorkflowEdge {
309 from: "gate".to_string(),
310 to: "next".to_string(),
311 branch: Some("true".to_string()),
312 label: None,
313 }],
314 ..Default::default()
315 };
316 let report = validate_workflow(&graph, None);
317 assert!(!report.valid);
318 assert!(report
319 .errors
320 .iter()
321 .any(|error| error.contains("true") && error.contains("false")));
322 }
323
324 #[test]
325 fn replay_fixture_round_trip_passes() {
326 let run = RunRecord {
327 type_name: "run_record".to_string(),
328 id: "run_1".to_string(),
329 workflow_id: "wf".to_string(),
330 workflow_name: Some("demo".to_string()),
331 task: "demo".to_string(),
332 status: "completed".to_string(),
333 started_at: "1".to_string(),
334 finished_at: Some("2".to_string()),
335 parent_run_id: None,
336 root_run_id: Some("run_1".to_string()),
337 stages: vec![RunStageRecord {
338 id: "stage_1".to_string(),
339 node_id: "act".to_string(),
340 kind: "stage".to_string(),
341 status: "completed".to_string(),
342 outcome: "success".to_string(),
343 branch: Some("success".to_string()),
344 started_at: "1".to_string(),
345 finished_at: Some("2".to_string()),
346 visible_text: Some("done".to_string()),
347 private_reasoning: None,
348 transcript: None,
349 verification: None,
350 usage: None,
351 artifacts: vec![ArtifactRecord {
352 type_name: "artifact".to_string(),
353 id: "a1".to_string(),
354 kind: "summary".to_string(),
355 text: Some("done".to_string()),
356 created_at: "1".to_string(),
357 ..Default::default()
358 }
359 .normalize()],
360 consumed_artifact_ids: vec![],
361 produced_artifact_ids: vec!["a1".to_string()],
362 attempts: vec![],
363 metadata: BTreeMap::new(),
364 }],
365 transitions: vec![],
366 checkpoints: vec![],
367 pending_nodes: vec![],
368 completed_nodes: vec!["act".to_string()],
369 child_runs: vec![],
370 artifacts: vec![],
371 policy: CapabilityPolicy::default(),
372 execution: None,
373 transcript: None,
374 usage: None,
375 replay_fixture: None,
376 trace_spans: vec![],
377 tool_recordings: vec![],
378 metadata: BTreeMap::new(),
379 persisted_path: None,
380 };
381 let fixture = replay_fixture_from_run(&run);
382 let report = evaluate_run_against_fixture(&run, &fixture);
383 assert!(report.pass);
384 assert!(report.failures.is_empty());
385 }
386
387 #[test]
388 fn replay_eval_suite_reports_failed_case() {
389 let good = RunRecord {
390 id: "run_good".to_string(),
391 workflow_id: "wf".to_string(),
392 status: "completed".to_string(),
393 stages: vec![RunStageRecord {
394 node_id: "act".to_string(),
395 status: "completed".to_string(),
396 outcome: "success".to_string(),
397 ..Default::default()
398 }],
399 ..Default::default()
400 };
401 let bad = RunRecord {
402 id: "run_bad".to_string(),
403 workflow_id: "wf".to_string(),
404 status: "failed".to_string(),
405 stages: vec![RunStageRecord {
406 node_id: "act".to_string(),
407 status: "failed".to_string(),
408 outcome: "error".to_string(),
409 ..Default::default()
410 }],
411 ..Default::default()
412 };
413 let suite = evaluate_run_suite(vec![
414 (
415 good.clone(),
416 replay_fixture_from_run(&good),
417 Some("good.json".to_string()),
418 ),
419 (
420 bad.clone(),
421 replay_fixture_from_run(&good),
422 Some("bad.json".to_string()),
423 ),
424 ]);
425 assert!(!suite.pass);
426 assert_eq!(suite.total, 2);
427 assert_eq!(suite.failed, 1);
428 assert!(suite.cases.iter().any(|case| !case.pass));
429 }
430
431 #[test]
432 fn run_diff_reports_changed_stage() {
433 let left = RunRecord {
434 id: "left".to_string(),
435 workflow_id: "wf".to_string(),
436 status: "completed".to_string(),
437 stages: vec![RunStageRecord {
438 node_id: "act".to_string(),
439 status: "completed".to_string(),
440 outcome: "success".to_string(),
441 ..Default::default()
442 }],
443 ..Default::default()
444 };
445 let right = RunRecord {
446 id: "right".to_string(),
447 workflow_id: "wf".to_string(),
448 status: "failed".to_string(),
449 stages: vec![RunStageRecord {
450 node_id: "act".to_string(),
451 status: "failed".to_string(),
452 outcome: "error".to_string(),
453 ..Default::default()
454 }],
455 ..Default::default()
456 };
457 let diff = diff_run_records(&left, &right);
458 assert!(diff.status_changed);
459 assert!(!diff.identical);
460 assert_eq!(diff.stage_diffs.len(), 1);
461 }
462
463 #[test]
464 fn eval_suite_manifest_can_fail_on_baseline_diff() {
465 let temp_dir =
466 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
467 std::fs::create_dir_all(&temp_dir).unwrap();
468 let baseline_path = temp_dir.join("baseline.json");
469 let candidate_path = temp_dir.join("candidate.json");
470
471 let baseline = RunRecord {
472 id: "baseline".to_string(),
473 workflow_id: "wf".to_string(),
474 status: "completed".to_string(),
475 stages: vec![RunStageRecord {
476 node_id: "act".to_string(),
477 status: "completed".to_string(),
478 outcome: "success".to_string(),
479 ..Default::default()
480 }],
481 ..Default::default()
482 };
483 let candidate = RunRecord {
484 id: "candidate".to_string(),
485 workflow_id: "wf".to_string(),
486 status: "failed".to_string(),
487 stages: vec![RunStageRecord {
488 node_id: "act".to_string(),
489 status: "failed".to_string(),
490 outcome: "error".to_string(),
491 ..Default::default()
492 }],
493 ..Default::default()
494 };
495
496 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
497 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
498
499 let manifest = EvalSuiteManifest {
500 base_dir: Some(temp_dir.display().to_string()),
501 cases: vec![EvalSuiteCase {
502 label: Some("candidate".to_string()),
503 run_path: "candidate.json".to_string(),
504 fixture_path: None,
505 compare_to: Some("baseline.json".to_string()),
506 }],
507 ..Default::default()
508 };
509 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
510 assert!(!suite.pass);
511 assert_eq!(suite.failed, 1);
512 assert!(suite.cases[0].comparison.is_some());
513 assert!(suite.cases[0]
514 .failures
515 .iter()
516 .any(|failure| failure.contains("baseline")));
517 }
518
519 #[test]
520 fn render_unified_diff_marks_removed_and_added_lines() {
521 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
522 assert!(diff.contains("--- a/src/main.rs"));
523 assert!(diff.contains("+++ b/src/main.rs"));
524 assert!(diff.contains("-old"));
525 assert!(diff.contains("+new"));
526 assert!(diff.contains(" same"));
527 }
528
529 #[test]
530 fn render_unified_diff_identical_inputs() {
531 let text = "line1\nline2\nline3";
532 let diff = render_unified_diff(None, text, text);
533 assert!(diff.contains("--- a/artifact"));
534 let body: Vec<&str> = diff.lines().skip(2).collect();
535 assert!(!body.iter().any(|l| l.starts_with('-')));
536 assert!(!body.iter().any(|l| l.starts_with('+')));
537 assert_eq!(body.len(), 3);
538 }
539
540 #[test]
541 fn render_unified_diff_empty_before() {
542 let diff = render_unified_diff(None, "", "new1\nnew2");
543 assert!(diff.contains("+new1"));
544 assert!(diff.contains("+new2"));
545 let body: Vec<&str> = diff.lines().skip(2).collect();
546 assert!(!body.iter().any(|l| l.starts_with('-')));
547 }
548
549 #[test]
550 fn render_unified_diff_empty_after() {
551 let diff = render_unified_diff(None, "old1\nold2", "");
552 assert!(diff.contains("-old1"));
553 assert!(diff.contains("-old2"));
554 let body: Vec<&str> = diff.lines().skip(2).collect();
555 assert!(!body.iter().any(|l| l.starts_with('+')));
556 }
557
558 #[test]
559 fn render_unified_diff_both_empty() {
560 let diff = render_unified_diff(None, "", "");
561 assert!(diff.contains("--- a/artifact"));
562 assert!(diff.contains("+++ b/artifact"));
563 let body: String = diff.lines().skip(2).collect();
565 assert!(body.is_empty());
566 }
567
568 #[test]
569 fn render_unified_diff_all_changed() {
570 let diff = render_unified_diff(None, "a\nb", "x\ny");
571 assert!(diff.contains("-a"));
572 assert!(diff.contains("-b"));
573 assert!(diff.contains("+x"));
574 assert!(diff.contains("+y"));
575 }
576
577 #[test]
578 fn render_unified_diff_insertion_in_middle() {
579 let diff = render_unified_diff(None, "a\nc", "a\nb\nc");
580 assert!(diff.contains(" a"));
581 assert!(diff.contains("+b"));
582 assert!(diff.contains(" c"));
583 let body: Vec<&str> = diff.lines().skip(2).collect();
584 assert!(!body.iter().any(|l| l.starts_with('-')));
585 }
586
587 #[test]
588 fn render_unified_diff_deletion_from_middle() {
589 let diff = render_unified_diff(None, "a\nb\nc", "a\nc");
590 assert!(diff.contains(" a"));
591 assert!(diff.contains("-b"));
592 assert!(diff.contains(" c"));
593 let body: Vec<&str> = diff.lines().skip(2).collect();
594 assert!(!body.iter().any(|l| l.starts_with('+')));
595 }
596
597 #[test]
598 fn render_unified_diff_default_path() {
599 let diff = render_unified_diff(None, "a", "b");
600 assert!(diff.contains("--- a/artifact"));
601 assert!(diff.contains("+++ b/artifact"));
602 }
603
604 #[test]
605 fn render_unified_diff_large_similar() {
606 let mut before = Vec::new();
608 let mut after = Vec::new();
609 for i in 0..1000 {
610 before.push(format!("line {i}"));
611 after.push(format!("line {i}"));
612 }
613 before[500] = "OLD LINE 500".to_string();
614 after[500] = "NEW LINE 500".to_string();
615 let before_str = before.join("\n");
616 let after_str = after.join("\n");
617 let diff = render_unified_diff(None, &before_str, &after_str);
618 assert!(diff.contains("-OLD LINE 500"));
619 assert!(diff.contains("+NEW LINE 500"));
620 assert!(diff.contains(" line 499"));
622 assert!(diff.contains(" line 501"));
623 }
624
625 #[test]
626 fn myers_diff_empty_sequences() {
627 let ops = myers_diff(&[], &[]);
628 assert!(ops.is_empty());
629 }
630
631 #[test]
632 fn myers_diff_insert_only() {
633 let ops = myers_diff(&[], &["a", "b"]);
634 assert_eq!(ops.len(), 2);
635 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Insert));
636 }
637
638 #[test]
639 fn myers_diff_delete_only() {
640 let ops = myers_diff(&["a", "b"], &[]);
641 assert_eq!(ops.len(), 2);
642 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Delete));
643 }
644
645 #[test]
646 fn myers_diff_equal() {
647 let ops = myers_diff(&["a", "b", "c"], &["a", "b", "c"]);
648 assert_eq!(ops.len(), 3);
649 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Equal));
650 }
651
652 #[test]
653 fn execution_policy_rejects_process_exec_when_read_only() {
654 push_execution_policy(CapabilityPolicy {
655 side_effect_level: Some("read_only".to_string()),
656 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
657 ..Default::default()
658 });
659 let result = enforce_current_policy_for_builtin("exec", &[]);
660 pop_execution_policy();
661 assert!(result.is_err());
662 }
663
664 #[test]
665 fn execution_policy_rejects_unlisted_tool() {
666 push_execution_policy(CapabilityPolicy {
667 tools: vec!["read".to_string()],
668 ..Default::default()
669 });
670 let result = enforce_current_policy_for_tool("edit");
671 pop_execution_policy();
672 assert!(result.is_err());
673 }
674
675 #[test]
676 fn normalize_run_record_preserves_trace_spans() {
677 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
678 "_type": "run_record",
679 "id": "run_trace",
680 "workflow_id": "wf",
681 "status": "completed",
682 "started_at": "1",
683 "trace_spans": [
684 {
685 "span_id": 1,
686 "parent_id": null,
687 "kind": "pipeline",
688 "name": "workflow",
689 "start_ms": 0,
690 "duration_ms": 42,
691 "metadata": {"model": "demo"}
692 }
693 ]
694 }));
695
696 let run = normalize_run_record(&value).unwrap();
697 assert_eq!(run.trace_spans.len(), 1);
698 assert_eq!(run.trace_spans[0].kind, "pipeline");
699 assert_eq!(
700 run.trace_spans[0].metadata["model"],
701 serde_json::json!("demo")
702 );
703 }
704
705 #[test]
708 fn pre_tool_hook_deny_blocks_execution() {
709 clear_tool_hooks();
710 register_tool_hook(ToolHook {
711 pattern: "dangerous_*".to_string(),
712 pre: Some(Rc::new(|_name, _args| {
713 PreToolAction::Deny("blocked by policy".to_string())
714 })),
715 post: None,
716 });
717 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
718 clear_tool_hooks();
719 assert!(matches!(result, PreToolAction::Deny(_)));
720 }
721
722 #[test]
723 fn pre_tool_hook_allow_passes_through() {
724 clear_tool_hooks();
725 register_tool_hook(ToolHook {
726 pattern: "safe_*".to_string(),
727 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
728 post: None,
729 });
730 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
731 clear_tool_hooks();
732 assert!(matches!(result, PreToolAction::Allow));
733 }
734
735 #[test]
736 fn pre_tool_hook_modify_rewrites_args() {
737 clear_tool_hooks();
738 register_tool_hook(ToolHook {
739 pattern: "*".to_string(),
740 pre: Some(Rc::new(|_name, _args| {
741 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
742 })),
743 post: None,
744 });
745 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
746 clear_tool_hooks();
747 match result {
748 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
749 _ => panic!("expected Modify"),
750 }
751 }
752
753 #[test]
754 fn post_tool_hook_modifies_result() {
755 clear_tool_hooks();
756 register_tool_hook(ToolHook {
757 pattern: "exec".to_string(),
758 pre: None,
759 post: Some(Rc::new(|_name, result| {
760 if result.contains("SECRET") {
761 PostToolAction::Modify("[REDACTED]".to_string())
762 } else {
763 PostToolAction::Pass
764 }
765 })),
766 });
767 let result = run_post_tool_hooks("exec", "output with SECRET data");
768 let clean = run_post_tool_hooks("exec", "clean output");
769 clear_tool_hooks();
770 assert_eq!(result, "[REDACTED]");
771 assert_eq!(clean, "clean output");
772 }
773
774 #[test]
775 fn unmatched_hook_pattern_does_not_fire() {
776 clear_tool_hooks();
777 register_tool_hook(ToolHook {
778 pattern: "exec".to_string(),
779 pre: Some(Rc::new(|_name, _args| {
780 PreToolAction::Deny("should not match".to_string())
781 })),
782 post: None,
783 });
784 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
785 clear_tool_hooks();
786 assert!(matches!(result, PreToolAction::Allow));
787 }
788
789 #[test]
790 fn glob_match_patterns() {
791 assert!(glob_match("*", "anything"));
792 assert!(glob_match("exec*", "exec_at"));
793 assert!(glob_match("*_file", "read_file"));
794 assert!(!glob_match("exec*", "read_file"));
795 assert!(glob_match("read_file", "read_file"));
796 assert!(!glob_match("read_file", "write_file"));
797 }
798
799 #[test]
802 fn microcompact_snips_large_output() {
803 let large = "x".repeat(50_000);
804 let result = microcompact_tool_output(&large, 10_000);
805 assert!(result.len() < 15_000);
806 assert!(result.contains("snipped"));
807 }
808
809 #[test]
810 fn microcompact_preserves_small_output() {
811 let small = "hello world";
812 let result = microcompact_tool_output(small, 10_000);
813 assert_eq!(result, small);
814 }
815
816 #[test]
817 fn microcompact_preserves_strong_keyword_lines_without_file_line() {
818 let mut output = String::new();
827 for i in 0..100 {
828 output.push_str(&format!("verbose progress line {i}\n"));
829 }
830 output.push_str("--- FAIL: TestEmpty (0.00s)\n");
831 output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
832 output.push_str("FAILED tests/test_parser.py::test_empty\n");
833 for i in 0..100 {
834 output.push_str(&format!("more output after failures {i}\n"));
835 }
836 let result = microcompact_tool_output(&output, 2_000);
837 assert!(
838 result.contains("--- FAIL: TestEmpty"),
839 "strong 'FAIL' keyword should preserve the line:\n{result}"
840 );
841 assert!(
842 result.contains("panicked at"),
843 "strong 'panic' keyword should preserve the line:\n{result}"
844 );
845 assert!(
846 result.contains("FAILED tests/test_parser.py"),
847 "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
848 );
849 }
850
851 #[test]
852 fn auto_compact_messages_reduces_count() {
853 let mut messages: Vec<serde_json::Value> = (0..20)
854 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
855 .collect();
856 let runtime = tokio::runtime::Builder::new_current_thread()
857 .enable_all()
858 .build()
859 .unwrap();
860 let compacted = runtime.block_on(auto_compact_messages(
861 &mut messages,
862 &AutoCompactConfig {
863 compact_strategy: CompactStrategy::Truncate,
864 keep_last: 6,
865 ..Default::default()
866 },
867 None,
868 ));
869 let summary = compacted.unwrap();
870 assert!(summary.is_some());
871 assert!(messages.len() <= 7); assert!(messages[0]["content"]
873 .as_str()
874 .unwrap()
875 .contains("auto-compacted"));
876 }
877
878 #[test]
879 fn auto_compact_noop_when_under_threshold() {
880 let mut messages: Vec<serde_json::Value> = (0..4)
881 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
882 .collect();
883 let runtime = tokio::runtime::Builder::new_current_thread()
884 .enable_all()
885 .build()
886 .unwrap();
887 let compacted = runtime.block_on(auto_compact_messages(
888 &mut messages,
889 &AutoCompactConfig {
890 compact_strategy: CompactStrategy::Truncate,
891 keep_last: 6,
892 ..Default::default()
893 },
894 None,
895 ));
896 assert!(compacted.unwrap().is_none());
897 assert_eq!(messages.len(), 4);
898 }
899
900 #[test]
901 fn observation_mask_preserves_errors_masks_verbose_output() {
902 let verbose_lines: Vec<String> = (0..60)
904 .map(|i| format!("// source line {} of the generated file", i))
905 .collect();
906 let verbose_content = format!(
907 "File created: a.go\npackage main\n{}",
908 verbose_lines.join("\n")
909 );
910 let mut messages = vec![
911 serde_json::json!({"role": "assistant", "content": "I'll create the file now."}),
912 serde_json::json!({"role": "user", "content": verbose_content}),
913 serde_json::json!({"role": "assistant", "content": "Now let me run the tests."}),
914 serde_json::json!({"role": "user", "content": "error: cannot find module\nexit code 1\nfailed to compile"}),
915 serde_json::json!({"role": "assistant", "content": "I see the issue. Let me fix it."}),
916 serde_json::json!({"role": "user", "content": "File patched successfully."}),
917 serde_json::json!({"role": "assistant", "content": "Running tests again."}),
919 serde_json::json!({"role": "user", "content": "All tests passed."}),
920 ];
921 let runtime = tokio::runtime::Builder::new_current_thread()
922 .enable_all()
923 .build()
924 .unwrap();
925 let compacted = runtime.block_on(auto_compact_messages(
926 &mut messages,
927 &AutoCompactConfig {
928 compact_strategy: CompactStrategy::ObservationMask,
929 keep_last: 2,
930 ..Default::default()
931 },
932 None,
933 ));
934 let summary = compacted.unwrap().unwrap();
935 assert!(summary.contains("I'll create the file now."));
937 assert!(summary.contains("Now let me run the tests."));
938 assert!(summary.contains("I see the issue. Let me fix it."));
939 assert!(summary.contains("error: cannot find module"));
941 assert!(summary.contains("exit code 1"));
942 assert!(summary.contains("masked]"));
944 assert!(summary.contains("File created: a.go"));
945 assert!(!summary.contains("File patched successfully."));
947 assert!(!summary.contains("Running tests again."));
949 assert!(!summary.contains("All tests passed."));
950 assert_eq!(messages.len(), 4);
952 }
953
954 #[test]
955 fn observation_mask_keeps_short_tool_output() {
956 let messages = vec![
957 serde_json::json!({"role": "user", "content": "OK"}),
958 serde_json::json!({"role": "user", "content": "Done."}),
959 ];
960 let summary = observation_mask_compaction(&messages, 2);
961 assert!(summary.contains("[user] OK"));
962 assert!(summary.contains("[user] Done."));
963 assert!(!summary.contains("masked"));
964 }
965
966 #[test]
967 fn estimate_message_tokens_basic() {
968 let messages = vec![
969 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
970 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
971 ];
972 let tokens = estimate_message_tokens(&messages);
973 assert_eq!(tokens, 200); }
975
976 #[test]
979 fn dedup_artifacts_removes_duplicates() {
980 let mut artifacts = vec![
981 ArtifactRecord {
982 id: "a1".to_string(),
983 kind: "test".to_string(),
984 text: Some("duplicate content".to_string()),
985 ..Default::default()
986 },
987 ArtifactRecord {
988 id: "a2".to_string(),
989 kind: "test".to_string(),
990 text: Some("duplicate content".to_string()),
991 ..Default::default()
992 },
993 ArtifactRecord {
994 id: "a3".to_string(),
995 kind: "test".to_string(),
996 text: Some("unique content".to_string()),
997 ..Default::default()
998 },
999 ];
1000 dedup_artifacts(&mut artifacts);
1001 assert_eq!(artifacts.len(), 2);
1002 }
1003
1004 #[test]
1005 fn microcompact_artifact_snips_oversized() {
1006 let mut artifact = ArtifactRecord {
1007 id: "a1".to_string(),
1008 kind: "test".to_string(),
1009 text: Some("x".repeat(10_000)),
1010 estimated_tokens: Some(2_500),
1011 ..Default::default()
1012 };
1013 microcompact_artifact(&mut artifact, 500);
1014 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
1015 assert_eq!(artifact.estimated_tokens, Some(500));
1016 }
1017
1018 #[test]
1021 fn arg_constraint_allows_matching_pattern() {
1022 let policy = CapabilityPolicy {
1023 tool_arg_constraints: vec![ToolArgConstraint {
1024 tool: "exec".to_string(),
1025 arg_patterns: vec!["cargo *".to_string()],
1026 }],
1027 ..Default::default()
1028 };
1029 let result = enforce_tool_arg_constraints(
1030 &policy,
1031 "exec",
1032 &serde_json::json!({"command": "cargo test"}),
1033 );
1034 assert!(result.is_ok());
1035 }
1036
1037 #[test]
1038 fn arg_constraint_rejects_non_matching_pattern() {
1039 let policy = CapabilityPolicy {
1040 tool_arg_constraints: vec![ToolArgConstraint {
1041 tool: "exec".to_string(),
1042 arg_patterns: vec!["cargo *".to_string()],
1043 }],
1044 ..Default::default()
1045 };
1046 let result = enforce_tool_arg_constraints(
1047 &policy,
1048 "exec",
1049 &serde_json::json!({"command": "rm -rf /"}),
1050 );
1051 assert!(result.is_err());
1052 }
1053
1054 #[test]
1055 fn arg_constraint_ignores_unmatched_tool() {
1056 let policy = CapabilityPolicy {
1057 tool_arg_constraints: vec![ToolArgConstraint {
1058 tool: "exec".to_string(),
1059 arg_patterns: vec!["cargo *".to_string()],
1060 }],
1061 ..Default::default()
1062 };
1063 let result = enforce_tool_arg_constraints(
1064 &policy,
1065 "read_file",
1066 &serde_json::json!({"path": "/etc/passwd"}),
1067 );
1068 assert!(result.is_ok());
1069 }
1070
1071 #[test]
1072 fn microcompact_handles_multibyte_utf8() {
1073 let emoji_output = "🔥".repeat(500); let result = microcompact_tool_output(&emoji_output, 400);
1076 assert!(result.contains("snipped"));
1078
1079 let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
1081 let result2 = microcompact_tool_output(&mixed, 400);
1082 assert!(result2.contains("snipped"));
1083
1084 let cjk = "中文".repeat(500);
1086 let result3 = microcompact_tool_output(&cjk, 400);
1087 assert!(result3.contains("snipped"));
1088 }
1089}