1use std::path::PathBuf;
2use std::{cell::RefCell, thread_local};
3
4use serde::{Deserialize, Serialize};
5
6use crate::llm::vm_value_to_json;
7use crate::value::{VmError, VmValue};
8
9pub(crate) fn now_rfc3339() -> String {
10 use std::time::{SystemTime, UNIX_EPOCH};
11 let ts = SystemTime::now()
12 .duration_since(UNIX_EPOCH)
13 .unwrap_or_default()
14 .as_secs();
15 format!("{ts}")
16}
17
18pub(crate) fn new_id(prefix: &str) -> String {
19 format!("{prefix}_{}", uuid::Uuid::now_v7())
20}
21
22pub(crate) fn default_run_dir() -> PathBuf {
23 let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
24 crate::runtime_paths::run_root(&base)
25}
26
27mod hooks;
28pub use hooks::*;
29
30mod compaction;
31pub use compaction::*;
32
33mod artifacts;
34pub use artifacts::*;
35
36mod policy;
37pub use policy::*;
38
39mod workflow;
40pub use workflow::*;
41
42mod records;
43pub use records::*;
44
45thread_local! {
46 static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
47}
48
49#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
50#[serde(default)]
51pub struct MutationSessionRecord {
52 pub session_id: String,
53 pub parent_session_id: Option<String>,
54 pub run_id: Option<String>,
55 pub worker_id: Option<String>,
56 pub execution_kind: Option<String>,
57 pub mutation_scope: String,
58 pub approval_mode: String,
59}
60
61impl MutationSessionRecord {
62 pub fn normalize(mut self) -> Self {
63 if self.session_id.is_empty() {
64 self.session_id = new_id("session");
65 }
66 if self.mutation_scope.is_empty() {
67 self.mutation_scope = "read_only".to_string();
68 }
69 if self.approval_mode.is_empty() {
70 self.approval_mode = "host_enforced".to_string();
71 }
72 self
73 }
74}
75
76pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
77 CURRENT_MUTATION_SESSION.with(|slot| {
78 *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
79 });
80}
81
82pub fn current_mutation_session() -> Option<MutationSessionRecord> {
83 CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
84}
85pub(crate) fn parse_json_payload<T: for<'de> Deserialize<'de>>(
86 json: serde_json::Value,
87 label: &str,
88) -> Result<T, VmError> {
89 let payload = json.to_string();
90 let mut deserializer = serde_json::Deserializer::from_str(&payload);
91 let mut tracker = serde_path_to_error::Track::new();
92 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
93 T::deserialize(path_deserializer).map_err(|error| {
94 let snippet = if payload.len() > 600 {
95 format!("{}...", &payload[..600])
96 } else {
97 payload.clone()
98 };
99 VmError::Runtime(format!(
100 "{label} parse error at {}: {} | payload={}",
101 tracker.path(),
102 error,
103 snippet
104 ))
105 })
106}
107
108pub(crate) fn parse_json_value<T: for<'de> Deserialize<'de>>(
109 value: &VmValue,
110) -> Result<T, VmError> {
111 parse_json_payload(vm_value_to_json(value), "orchestration")
112}
113
114#[cfg(test)]
115mod tests {
116 use super::*;
117 use std::collections::BTreeMap;
118 use std::rc::Rc;
119
120 #[test]
121 fn capability_intersection_rejects_privilege_expansion() {
122 let ceiling = CapabilityPolicy {
123 tools: vec!["read".to_string()],
124 side_effect_level: Some("read_only".to_string()),
125 recursion_limit: Some(2),
126 ..Default::default()
127 };
128 let requested = CapabilityPolicy {
129 tools: vec!["read".to_string(), "edit".to_string()],
130 ..Default::default()
131 };
132 let error = ceiling.intersect(&requested).unwrap_err();
133 assert!(error.contains("host ceiling"));
134 }
135
136 #[test]
137 fn mutation_session_normalize_fills_defaults() {
138 let normalized = MutationSessionRecord::default().normalize();
139 assert!(normalized.session_id.starts_with("session_"));
140 assert_eq!(normalized.mutation_scope, "read_only");
141 assert_eq!(normalized.approval_mode, "host_enforced");
142 }
143
144 #[test]
145 fn install_current_mutation_session_round_trips() {
146 install_current_mutation_session(Some(MutationSessionRecord {
147 session_id: "session_test".to_string(),
148 mutation_scope: "apply_workspace".to_string(),
149 approval_mode: "explicit".to_string(),
150 ..Default::default()
151 }));
152 let current = current_mutation_session().expect("session installed");
153 assert_eq!(current.session_id, "session_test");
154 assert_eq!(current.mutation_scope, "apply_workspace");
155 assert_eq!(current.approval_mode, "explicit");
156
157 install_current_mutation_session(None);
158 assert!(current_mutation_session().is_none());
159 }
160
161 #[test]
162 fn active_execution_policy_rejects_unknown_bridge_builtin() {
163 push_execution_policy(CapabilityPolicy {
164 tools: vec!["read".to_string()],
165 capabilities: BTreeMap::from([(
166 "workspace".to_string(),
167 vec!["read_text".to_string()],
168 )]),
169 side_effect_level: Some("read_only".to_string()),
170 recursion_limit: Some(1),
171 ..Default::default()
172 });
173 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
174 pop_execution_policy();
175 assert!(matches!(
176 error,
177 VmError::CategorizedError {
178 category: crate::value::ErrorCategory::ToolRejected,
179 ..
180 }
181 ));
182 }
183
184 #[test]
185 fn active_execution_policy_rejects_mcp_escape_hatch() {
186 push_execution_policy(CapabilityPolicy {
187 tools: vec!["read".to_string()],
188 capabilities: BTreeMap::from([(
189 "workspace".to_string(),
190 vec!["read_text".to_string()],
191 )]),
192 side_effect_level: Some("read_only".to_string()),
193 recursion_limit: Some(1),
194 ..Default::default()
195 });
196 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
197 pop_execution_policy();
198 assert!(matches!(
199 error,
200 VmError::CategorizedError {
201 category: crate::value::ErrorCategory::ToolRejected,
202 ..
203 }
204 ));
205 }
206
207 #[test]
208 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
209 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
210 "name": "legacy",
211 "act": {"mode": "llm"},
212 "verify": {"kind": "verify"},
213 "repair": {"mode": "agent"},
214 }));
215 let graph = normalize_workflow_value(&value).unwrap();
216 assert_eq!(graph.type_name, "workflow_graph");
217 assert!(graph.nodes.contains_key("act"));
218 assert!(graph.nodes.contains_key("verify"));
219 assert!(graph.nodes.contains_key("repair"));
220 assert_eq!(graph.entry, "act");
221 }
222
223 #[test]
224 fn workflow_normalization_accepts_tool_registry_nodes() {
225 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
226 "name": "registry_tools",
227 "entry": "implement",
228 "nodes": {
229 "implement": {
230 "kind": "stage",
231 "mode": "agent",
232 "tools": {
233 "_type": "tool_registry",
234 "tools": [
235 {"name": "read", "description": "Read files"},
236 {"name": "run", "description": "Run commands"}
237 ]
238 }
239 }
240 },
241 "edges": []
242 }));
243 let graph = normalize_workflow_value(&value).unwrap();
244 let node = graph.nodes.get("implement").unwrap();
245 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
246 }
247
248 #[test]
249 fn artifact_selection_honors_budget_and_priority() {
250 let policy = ContextPolicy {
251 max_artifacts: Some(2),
252 max_tokens: Some(30),
253 prefer_recent: true,
254 prefer_fresh: true,
255 prioritize_kinds: vec!["verification_result".to_string()],
256 ..Default::default()
257 };
258 let artifacts = vec![
259 ArtifactRecord {
260 type_name: "artifact".to_string(),
261 id: "a".to_string(),
262 kind: "summary".to_string(),
263 text: Some("short".to_string()),
264 relevance: Some(0.9),
265 created_at: now_rfc3339(),
266 ..Default::default()
267 }
268 .normalize(),
269 ArtifactRecord {
270 type_name: "artifact".to_string(),
271 id: "b".to_string(),
272 kind: "summary".to_string(),
273 text: Some("this is a much larger artifact body".to_string()),
274 relevance: Some(1.0),
275 created_at: now_rfc3339(),
276 ..Default::default()
277 }
278 .normalize(),
279 ArtifactRecord {
280 type_name: "artifact".to_string(),
281 id: "c".to_string(),
282 kind: "summary".to_string(),
283 text: Some("tiny".to_string()),
284 relevance: Some(0.5),
285 created_at: now_rfc3339(),
286 ..Default::default()
287 }
288 .normalize(),
289 ];
290 let selected = select_artifacts(artifacts, &policy);
291 assert_eq!(selected.len(), 2);
292 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
293 }
294
295 #[test]
296 fn workflow_validation_rejects_condition_without_true_false_edges() {
297 let graph = WorkflowGraph {
298 entry: "gate".to_string(),
299 nodes: BTreeMap::from([(
300 "gate".to_string(),
301 WorkflowNode {
302 id: Some("gate".to_string()),
303 kind: "condition".to_string(),
304 ..Default::default()
305 },
306 )]),
307 edges: vec![WorkflowEdge {
308 from: "gate".to_string(),
309 to: "next".to_string(),
310 branch: Some("true".to_string()),
311 label: None,
312 }],
313 ..Default::default()
314 };
315 let report = validate_workflow(&graph, None);
316 assert!(!report.valid);
317 assert!(report
318 .errors
319 .iter()
320 .any(|error| error.contains("true") && error.contains("false")));
321 }
322
323 #[test]
324 fn replay_fixture_round_trip_passes() {
325 let run = RunRecord {
326 type_name: "run_record".to_string(),
327 id: "run_1".to_string(),
328 workflow_id: "wf".to_string(),
329 workflow_name: Some("demo".to_string()),
330 task: "demo".to_string(),
331 status: "completed".to_string(),
332 started_at: "1".to_string(),
333 finished_at: Some("2".to_string()),
334 parent_run_id: None,
335 root_run_id: Some("run_1".to_string()),
336 stages: vec![RunStageRecord {
337 id: "stage_1".to_string(),
338 node_id: "act".to_string(),
339 kind: "stage".to_string(),
340 status: "completed".to_string(),
341 outcome: "success".to_string(),
342 branch: Some("success".to_string()),
343 started_at: "1".to_string(),
344 finished_at: Some("2".to_string()),
345 visible_text: Some("done".to_string()),
346 private_reasoning: None,
347 transcript: None,
348 verification: None,
349 usage: None,
350 artifacts: vec![ArtifactRecord {
351 type_name: "artifact".to_string(),
352 id: "a1".to_string(),
353 kind: "summary".to_string(),
354 text: Some("done".to_string()),
355 created_at: "1".to_string(),
356 ..Default::default()
357 }
358 .normalize()],
359 consumed_artifact_ids: vec![],
360 produced_artifact_ids: vec!["a1".to_string()],
361 attempts: vec![],
362 metadata: BTreeMap::new(),
363 }],
364 transitions: vec![],
365 checkpoints: vec![],
366 pending_nodes: vec![],
367 completed_nodes: vec!["act".to_string()],
368 child_runs: vec![],
369 artifacts: vec![],
370 policy: CapabilityPolicy::default(),
371 execution: None,
372 transcript: None,
373 usage: None,
374 replay_fixture: None,
375 trace_spans: vec![],
376 tool_recordings: vec![],
377 metadata: BTreeMap::new(),
378 persisted_path: None,
379 };
380 let fixture = replay_fixture_from_run(&run);
381 let report = evaluate_run_against_fixture(&run, &fixture);
382 assert!(report.pass);
383 assert!(report.failures.is_empty());
384 }
385
386 #[test]
387 fn replay_eval_suite_reports_failed_case() {
388 let good = RunRecord {
389 id: "run_good".to_string(),
390 workflow_id: "wf".to_string(),
391 status: "completed".to_string(),
392 stages: vec![RunStageRecord {
393 node_id: "act".to_string(),
394 status: "completed".to_string(),
395 outcome: "success".to_string(),
396 ..Default::default()
397 }],
398 ..Default::default()
399 };
400 let bad = RunRecord {
401 id: "run_bad".to_string(),
402 workflow_id: "wf".to_string(),
403 status: "failed".to_string(),
404 stages: vec![RunStageRecord {
405 node_id: "act".to_string(),
406 status: "failed".to_string(),
407 outcome: "error".to_string(),
408 ..Default::default()
409 }],
410 ..Default::default()
411 };
412 let suite = evaluate_run_suite(vec![
413 (
414 good.clone(),
415 replay_fixture_from_run(&good),
416 Some("good.json".to_string()),
417 ),
418 (
419 bad.clone(),
420 replay_fixture_from_run(&good),
421 Some("bad.json".to_string()),
422 ),
423 ]);
424 assert!(!suite.pass);
425 assert_eq!(suite.total, 2);
426 assert_eq!(suite.failed, 1);
427 assert!(suite.cases.iter().any(|case| !case.pass));
428 }
429
430 #[test]
431 fn run_diff_reports_changed_stage() {
432 let left = RunRecord {
433 id: "left".to_string(),
434 workflow_id: "wf".to_string(),
435 status: "completed".to_string(),
436 stages: vec![RunStageRecord {
437 node_id: "act".to_string(),
438 status: "completed".to_string(),
439 outcome: "success".to_string(),
440 ..Default::default()
441 }],
442 ..Default::default()
443 };
444 let right = RunRecord {
445 id: "right".to_string(),
446 workflow_id: "wf".to_string(),
447 status: "failed".to_string(),
448 stages: vec![RunStageRecord {
449 node_id: "act".to_string(),
450 status: "failed".to_string(),
451 outcome: "error".to_string(),
452 ..Default::default()
453 }],
454 ..Default::default()
455 };
456 let diff = diff_run_records(&left, &right);
457 assert!(diff.status_changed);
458 assert!(!diff.identical);
459 assert_eq!(diff.stage_diffs.len(), 1);
460 }
461
462 #[test]
463 fn eval_suite_manifest_can_fail_on_baseline_diff() {
464 let temp_dir =
465 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
466 std::fs::create_dir_all(&temp_dir).unwrap();
467 let baseline_path = temp_dir.join("baseline.json");
468 let candidate_path = temp_dir.join("candidate.json");
469
470 let baseline = RunRecord {
471 id: "baseline".to_string(),
472 workflow_id: "wf".to_string(),
473 status: "completed".to_string(),
474 stages: vec![RunStageRecord {
475 node_id: "act".to_string(),
476 status: "completed".to_string(),
477 outcome: "success".to_string(),
478 ..Default::default()
479 }],
480 ..Default::default()
481 };
482 let candidate = RunRecord {
483 id: "candidate".to_string(),
484 workflow_id: "wf".to_string(),
485 status: "failed".to_string(),
486 stages: vec![RunStageRecord {
487 node_id: "act".to_string(),
488 status: "failed".to_string(),
489 outcome: "error".to_string(),
490 ..Default::default()
491 }],
492 ..Default::default()
493 };
494
495 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
496 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
497
498 let manifest = EvalSuiteManifest {
499 base_dir: Some(temp_dir.display().to_string()),
500 cases: vec![EvalSuiteCase {
501 label: Some("candidate".to_string()),
502 run_path: "candidate.json".to_string(),
503 fixture_path: None,
504 compare_to: Some("baseline.json".to_string()),
505 }],
506 ..Default::default()
507 };
508 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
509 assert!(!suite.pass);
510 assert_eq!(suite.failed, 1);
511 assert!(suite.cases[0].comparison.is_some());
512 assert!(suite.cases[0]
513 .failures
514 .iter()
515 .any(|failure| failure.contains("baseline")));
516 }
517
518 #[test]
519 fn render_unified_diff_marks_removed_and_added_lines() {
520 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
521 assert!(diff.contains("--- a/src/main.rs"));
522 assert!(diff.contains("+++ b/src/main.rs"));
523 assert!(diff.contains("-old"));
524 assert!(diff.contains("+new"));
525 assert!(diff.contains(" same"));
526 }
527
528 #[test]
529 fn render_unified_diff_identical_inputs() {
530 let text = "line1\nline2\nline3";
531 let diff = render_unified_diff(None, text, text);
532 assert!(diff.contains("--- a/artifact"));
533 let body: Vec<&str> = diff.lines().skip(2).collect();
534 assert!(!body.iter().any(|l| l.starts_with('-')));
535 assert!(!body.iter().any(|l| l.starts_with('+')));
536 assert_eq!(body.len(), 3);
537 }
538
539 #[test]
540 fn render_unified_diff_empty_before() {
541 let diff = render_unified_diff(None, "", "new1\nnew2");
542 assert!(diff.contains("+new1"));
543 assert!(diff.contains("+new2"));
544 let body: Vec<&str> = diff.lines().skip(2).collect();
545 assert!(!body.iter().any(|l| l.starts_with('-')));
546 }
547
548 #[test]
549 fn render_unified_diff_empty_after() {
550 let diff = render_unified_diff(None, "old1\nold2", "");
551 assert!(diff.contains("-old1"));
552 assert!(diff.contains("-old2"));
553 let body: Vec<&str> = diff.lines().skip(2).collect();
554 assert!(!body.iter().any(|l| l.starts_with('+')));
555 }
556
557 #[test]
558 fn render_unified_diff_both_empty() {
559 let diff = render_unified_diff(None, "", "");
560 assert!(diff.contains("--- a/artifact"));
561 assert!(diff.contains("+++ b/artifact"));
562 let body: String = diff.lines().skip(2).collect();
564 assert!(body.is_empty());
565 }
566
567 #[test]
568 fn render_unified_diff_all_changed() {
569 let diff = render_unified_diff(None, "a\nb", "x\ny");
570 assert!(diff.contains("-a"));
571 assert!(diff.contains("-b"));
572 assert!(diff.contains("+x"));
573 assert!(diff.contains("+y"));
574 }
575
576 #[test]
577 fn render_unified_diff_insertion_in_middle() {
578 let diff = render_unified_diff(None, "a\nc", "a\nb\nc");
579 assert!(diff.contains(" a"));
580 assert!(diff.contains("+b"));
581 assert!(diff.contains(" c"));
582 let body: Vec<&str> = diff.lines().skip(2).collect();
583 assert!(!body.iter().any(|l| l.starts_with('-')));
584 }
585
586 #[test]
587 fn render_unified_diff_deletion_from_middle() {
588 let diff = render_unified_diff(None, "a\nb\nc", "a\nc");
589 assert!(diff.contains(" a"));
590 assert!(diff.contains("-b"));
591 assert!(diff.contains(" c"));
592 let body: Vec<&str> = diff.lines().skip(2).collect();
593 assert!(!body.iter().any(|l| l.starts_with('+')));
594 }
595
596 #[test]
597 fn render_unified_diff_default_path() {
598 let diff = render_unified_diff(None, "a", "b");
599 assert!(diff.contains("--- a/artifact"));
600 assert!(diff.contains("+++ b/artifact"));
601 }
602
603 #[test]
604 fn render_unified_diff_large_similar() {
605 let mut before = Vec::new();
607 let mut after = Vec::new();
608 for i in 0..1000 {
609 before.push(format!("line {i}"));
610 after.push(format!("line {i}"));
611 }
612 before[500] = "OLD LINE 500".to_string();
613 after[500] = "NEW LINE 500".to_string();
614 let before_str = before.join("\n");
615 let after_str = after.join("\n");
616 let diff = render_unified_diff(None, &before_str, &after_str);
617 assert!(diff.contains("-OLD LINE 500"));
618 assert!(diff.contains("+NEW LINE 500"));
619 assert!(diff.contains(" line 499"));
621 assert!(diff.contains(" line 501"));
622 }
623
624 #[test]
625 fn myers_diff_empty_sequences() {
626 let ops = myers_diff(&[], &[]);
627 assert!(ops.is_empty());
628 }
629
630 #[test]
631 fn myers_diff_insert_only() {
632 let ops = myers_diff(&[], &["a", "b"]);
633 assert_eq!(ops.len(), 2);
634 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Insert));
635 }
636
637 #[test]
638 fn myers_diff_delete_only() {
639 let ops = myers_diff(&["a", "b"], &[]);
640 assert_eq!(ops.len(), 2);
641 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Delete));
642 }
643
644 #[test]
645 fn myers_diff_equal() {
646 let ops = myers_diff(&["a", "b", "c"], &["a", "b", "c"]);
647 assert_eq!(ops.len(), 3);
648 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Equal));
649 }
650
651 #[test]
652 fn execution_policy_rejects_process_exec_when_read_only() {
653 push_execution_policy(CapabilityPolicy {
654 side_effect_level: Some("read_only".to_string()),
655 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
656 ..Default::default()
657 });
658 let result = enforce_current_policy_for_builtin("exec", &[]);
659 pop_execution_policy();
660 assert!(result.is_err());
661 }
662
663 #[test]
664 fn execution_policy_rejects_unlisted_tool() {
665 push_execution_policy(CapabilityPolicy {
666 tools: vec!["read".to_string()],
667 ..Default::default()
668 });
669 let result = enforce_current_policy_for_tool("edit");
670 pop_execution_policy();
671 assert!(result.is_err());
672 }
673
674 #[test]
675 fn normalize_run_record_preserves_trace_spans() {
676 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
677 "_type": "run_record",
678 "id": "run_trace",
679 "workflow_id": "wf",
680 "status": "completed",
681 "started_at": "1",
682 "trace_spans": [
683 {
684 "span_id": 1,
685 "parent_id": null,
686 "kind": "pipeline",
687 "name": "workflow",
688 "start_ms": 0,
689 "duration_ms": 42,
690 "metadata": {"model": "demo"}
691 }
692 ]
693 }));
694
695 let run = normalize_run_record(&value).unwrap();
696 assert_eq!(run.trace_spans.len(), 1);
697 assert_eq!(run.trace_spans[0].kind, "pipeline");
698 assert_eq!(
699 run.trace_spans[0].metadata["model"],
700 serde_json::json!("demo")
701 );
702 }
703
704 #[test]
707 fn pre_tool_hook_deny_blocks_execution() {
708 clear_tool_hooks();
709 register_tool_hook(ToolHook {
710 pattern: "dangerous_*".to_string(),
711 pre: Some(Rc::new(|_name, _args| {
712 PreToolAction::Deny("blocked by policy".to_string())
713 })),
714 post: None,
715 });
716 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
717 clear_tool_hooks();
718 assert!(matches!(result, PreToolAction::Deny(_)));
719 }
720
721 #[test]
722 fn pre_tool_hook_allow_passes_through() {
723 clear_tool_hooks();
724 register_tool_hook(ToolHook {
725 pattern: "safe_*".to_string(),
726 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
727 post: None,
728 });
729 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
730 clear_tool_hooks();
731 assert!(matches!(result, PreToolAction::Allow));
732 }
733
734 #[test]
735 fn pre_tool_hook_modify_rewrites_args() {
736 clear_tool_hooks();
737 register_tool_hook(ToolHook {
738 pattern: "*".to_string(),
739 pre: Some(Rc::new(|_name, _args| {
740 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
741 })),
742 post: None,
743 });
744 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
745 clear_tool_hooks();
746 match result {
747 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
748 _ => panic!("expected Modify"),
749 }
750 }
751
752 #[test]
753 fn post_tool_hook_modifies_result() {
754 clear_tool_hooks();
755 register_tool_hook(ToolHook {
756 pattern: "exec".to_string(),
757 pre: None,
758 post: Some(Rc::new(|_name, result| {
759 if result.contains("SECRET") {
760 PostToolAction::Modify("[REDACTED]".to_string())
761 } else {
762 PostToolAction::Pass
763 }
764 })),
765 });
766 let result = run_post_tool_hooks("exec", "output with SECRET data");
767 let clean = run_post_tool_hooks("exec", "clean output");
768 clear_tool_hooks();
769 assert_eq!(result, "[REDACTED]");
770 assert_eq!(clean, "clean output");
771 }
772
773 #[test]
774 fn unmatched_hook_pattern_does_not_fire() {
775 clear_tool_hooks();
776 register_tool_hook(ToolHook {
777 pattern: "exec".to_string(),
778 pre: Some(Rc::new(|_name, _args| {
779 PreToolAction::Deny("should not match".to_string())
780 })),
781 post: None,
782 });
783 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
784 clear_tool_hooks();
785 assert!(matches!(result, PreToolAction::Allow));
786 }
787
788 #[test]
789 fn glob_match_patterns() {
790 assert!(glob_match("*", "anything"));
791 assert!(glob_match("exec*", "exec_at"));
792 assert!(glob_match("*_file", "read_file"));
793 assert!(!glob_match("exec*", "read_file"));
794 assert!(glob_match("read_file", "read_file"));
795 assert!(!glob_match("read_file", "write_file"));
796 }
797
798 #[test]
801 fn microcompact_snips_large_output() {
802 let large = "x".repeat(50_000);
803 let result = microcompact_tool_output(&large, 10_000);
804 assert!(result.len() < 15_000);
805 assert!(result.contains("snipped"));
806 }
807
808 #[test]
809 fn microcompact_preserves_small_output() {
810 let small = "hello world";
811 let result = microcompact_tool_output(small, 10_000);
812 assert_eq!(result, small);
813 }
814
815 #[test]
816 fn microcompact_preserves_strong_keyword_lines_without_file_line() {
817 let mut output = String::new();
826 for i in 0..100 {
827 output.push_str(&format!("verbose progress line {i}\n"));
828 }
829 output.push_str("--- FAIL: TestEmpty (0.00s)\n");
830 output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
831 output.push_str("FAILED tests/test_parser.py::test_empty\n");
832 for i in 0..100 {
833 output.push_str(&format!("more output after failures {i}\n"));
834 }
835 let result = microcompact_tool_output(&output, 2_000);
836 assert!(
837 result.contains("--- FAIL: TestEmpty"),
838 "strong 'FAIL' keyword should preserve the line:\n{result}"
839 );
840 assert!(
841 result.contains("panicked at"),
842 "strong 'panic' keyword should preserve the line:\n{result}"
843 );
844 assert!(
845 result.contains("FAILED tests/test_parser.py"),
846 "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
847 );
848 }
849
850 #[test]
851 fn auto_compact_messages_reduces_count() {
852 let mut messages: Vec<serde_json::Value> = (0..20)
853 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
854 .collect();
855 let runtime = tokio::runtime::Builder::new_current_thread()
856 .enable_all()
857 .build()
858 .unwrap();
859 let compacted = runtime.block_on(auto_compact_messages(
860 &mut messages,
861 &AutoCompactConfig {
862 compact_strategy: CompactStrategy::Truncate,
863 keep_last: 6,
864 ..Default::default()
865 },
866 None,
867 ));
868 let summary = compacted.unwrap();
869 assert!(summary.is_some());
870 assert!(messages.len() <= 7); assert!(messages[0]["content"]
872 .as_str()
873 .unwrap()
874 .contains("auto-compacted"));
875 }
876
877 #[test]
878 fn auto_compact_noop_when_under_threshold() {
879 let mut messages: Vec<serde_json::Value> = (0..4)
880 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
881 .collect();
882 let runtime = tokio::runtime::Builder::new_current_thread()
883 .enable_all()
884 .build()
885 .unwrap();
886 let compacted = runtime.block_on(auto_compact_messages(
887 &mut messages,
888 &AutoCompactConfig {
889 compact_strategy: CompactStrategy::Truncate,
890 keep_last: 6,
891 ..Default::default()
892 },
893 None,
894 ));
895 assert!(compacted.unwrap().is_none());
896 assert_eq!(messages.len(), 4);
897 }
898
899 #[test]
900 fn observation_mask_preserves_errors_masks_verbose_output() {
901 let verbose_lines: Vec<String> = (0..60)
903 .map(|i| format!("// source line {} of the generated file", i))
904 .collect();
905 let verbose_content = format!(
906 "File created: a.go\npackage main\n{}",
907 verbose_lines.join("\n")
908 );
909 let mut messages = vec![
910 serde_json::json!({"role": "assistant", "content": "I'll create the file now."}),
911 serde_json::json!({"role": "user", "content": verbose_content}),
912 serde_json::json!({"role": "assistant", "content": "Now let me run the tests."}),
913 serde_json::json!({"role": "user", "content": "error: cannot find module\nexit code 1\nfailed to compile"}),
914 serde_json::json!({"role": "assistant", "content": "I see the issue. Let me fix it."}),
915 serde_json::json!({"role": "user", "content": "File patched successfully."}),
916 serde_json::json!({"role": "assistant", "content": "Running tests again."}),
918 serde_json::json!({"role": "user", "content": "All tests passed."}),
919 ];
920 let runtime = tokio::runtime::Builder::new_current_thread()
921 .enable_all()
922 .build()
923 .unwrap();
924 let compacted = runtime.block_on(auto_compact_messages(
925 &mut messages,
926 &AutoCompactConfig {
927 compact_strategy: CompactStrategy::ObservationMask,
928 keep_last: 2,
929 ..Default::default()
930 },
931 None,
932 ));
933 let summary = compacted.unwrap().unwrap();
934 assert!(summary.contains("I'll create the file now."));
936 assert!(summary.contains("Now let me run the tests."));
937 assert!(summary.contains("I see the issue. Let me fix it."));
938 assert!(summary.contains("error: cannot find module"));
940 assert!(summary.contains("exit code 1"));
941 assert!(summary.contains("masked]"));
943 assert!(summary.contains("File created: a.go"));
944 assert!(!summary.contains("File patched successfully."));
946 assert!(!summary.contains("Running tests again."));
948 assert!(!summary.contains("All tests passed."));
949 assert_eq!(messages.len(), 4);
951 }
952
953 #[test]
954 fn observation_mask_keeps_short_tool_output() {
955 let messages = vec![
956 serde_json::json!({"role": "user", "content": "OK"}),
957 serde_json::json!({"role": "user", "content": "Done."}),
958 ];
959 let summary = observation_mask_compaction(&messages, 2);
960 assert!(summary.contains("[user] OK"));
961 assert!(summary.contains("[user] Done."));
962 assert!(!summary.contains("masked"));
963 }
964
965 #[test]
966 fn estimate_message_tokens_basic() {
967 let messages = vec![
968 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
969 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
970 ];
971 let tokens = estimate_message_tokens(&messages);
972 assert_eq!(tokens, 200); }
974
975 #[test]
978 fn dedup_artifacts_removes_duplicates() {
979 let mut artifacts = vec![
980 ArtifactRecord {
981 id: "a1".to_string(),
982 kind: "test".to_string(),
983 text: Some("duplicate content".to_string()),
984 ..Default::default()
985 },
986 ArtifactRecord {
987 id: "a2".to_string(),
988 kind: "test".to_string(),
989 text: Some("duplicate content".to_string()),
990 ..Default::default()
991 },
992 ArtifactRecord {
993 id: "a3".to_string(),
994 kind: "test".to_string(),
995 text: Some("unique content".to_string()),
996 ..Default::default()
997 },
998 ];
999 dedup_artifacts(&mut artifacts);
1000 assert_eq!(artifacts.len(), 2);
1001 }
1002
1003 #[test]
1004 fn microcompact_artifact_snips_oversized() {
1005 let mut artifact = ArtifactRecord {
1006 id: "a1".to_string(),
1007 kind: "test".to_string(),
1008 text: Some("x".repeat(10_000)),
1009 estimated_tokens: Some(2_500),
1010 ..Default::default()
1011 };
1012 microcompact_artifact(&mut artifact, 500);
1013 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
1014 assert_eq!(artifact.estimated_tokens, Some(500));
1015 }
1016
1017 #[test]
1020 fn arg_constraint_allows_matching_pattern() {
1021 let policy = CapabilityPolicy {
1022 tool_arg_constraints: vec![ToolArgConstraint {
1023 tool: "exec".to_string(),
1024 arg_patterns: vec!["cargo *".to_string()],
1025 }],
1026 ..Default::default()
1027 };
1028 let result = enforce_tool_arg_constraints(
1029 &policy,
1030 "exec",
1031 &serde_json::json!({"command": "cargo test"}),
1032 );
1033 assert!(result.is_ok());
1034 }
1035
1036 #[test]
1037 fn arg_constraint_rejects_non_matching_pattern() {
1038 let policy = CapabilityPolicy {
1039 tool_arg_constraints: vec![ToolArgConstraint {
1040 tool: "exec".to_string(),
1041 arg_patterns: vec!["cargo *".to_string()],
1042 }],
1043 ..Default::default()
1044 };
1045 let result = enforce_tool_arg_constraints(
1046 &policy,
1047 "exec",
1048 &serde_json::json!({"command": "rm -rf /"}),
1049 );
1050 assert!(result.is_err());
1051 }
1052
1053 #[test]
1054 fn arg_constraint_ignores_unmatched_tool() {
1055 let policy = CapabilityPolicy {
1056 tool_arg_constraints: vec![ToolArgConstraint {
1057 tool: "exec".to_string(),
1058 arg_patterns: vec!["cargo *".to_string()],
1059 }],
1060 ..Default::default()
1061 };
1062 let result = enforce_tool_arg_constraints(
1063 &policy,
1064 "read_file",
1065 &serde_json::json!({"path": "/etc/passwd"}),
1066 );
1067 assert!(result.is_ok());
1068 }
1069
1070 #[test]
1071 fn arg_constraint_prefers_declared_path_param_metadata() {
1072 let mut tool_metadata = std::collections::BTreeMap::new();
1073 tool_metadata.insert(
1074 "edit".to_string(),
1075 ToolRuntimePolicyMetadata {
1076 path_params: vec!["path".to_string()],
1077 ..Default::default()
1078 },
1079 );
1080 let policy = CapabilityPolicy {
1081 tool_arg_constraints: vec![ToolArgConstraint {
1082 tool: "edit".to_string(),
1083 arg_patterns: vec!["tests/*".to_string()],
1084 }],
1085 tool_metadata,
1086 ..Default::default()
1087 };
1088 let result = enforce_tool_arg_constraints(
1089 &policy,
1090 "edit",
1091 &serde_json::json!({
1092 "action": "replace_range",
1093 "path": "tests/unit/test_experiment_service.py",
1094 "content": "..."
1095 }),
1096 );
1097 assert!(result.is_ok());
1098 }
1099
1100 #[test]
1101 fn microcompact_handles_multibyte_utf8() {
1102 let emoji_output = "🔥".repeat(500); let result = microcompact_tool_output(&emoji_output, 400);
1105 assert!(result.contains("snipped"));
1107
1108 let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
1110 let result2 = microcompact_tool_output(&mixed, 400);
1111 assert!(result2.contains("snipped"));
1112
1113 let cjk = "中文".repeat(500);
1115 let result3 = microcompact_tool_output(&cjk, 400);
1116 assert!(result3.contains("snipped"));
1117 }
1118}