1use std::path::PathBuf;
2use std::{cell::RefCell, thread_local};
3
4use serde::{Deserialize, Serialize};
5
6use crate::llm::vm_value_to_json;
7use crate::value::{VmError, VmValue};
8
9pub(crate) fn now_rfc3339() -> String {
10 use std::time::{SystemTime, UNIX_EPOCH};
11 let ts = SystemTime::now()
12 .duration_since(UNIX_EPOCH)
13 .unwrap_or_default()
14 .as_secs();
15 format!("{ts}")
16}
17
18pub(crate) fn new_id(prefix: &str) -> String {
19 format!("{prefix}_{}", uuid::Uuid::now_v7())
20}
21
22pub(crate) fn default_run_dir() -> PathBuf {
23 let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
24 crate::runtime_paths::run_root(&base)
25}
26
27mod hooks;
28pub use hooks::*;
29
30mod compaction;
31pub use compaction::*;
32
33mod artifacts;
34pub use artifacts::*;
35
36mod policy;
37pub use policy::*;
38
39mod workflow;
40pub use workflow::*;
41
42mod records;
43pub use records::*;
44
45thread_local! {
46 static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
47}
48
49#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
50#[serde(default)]
51pub struct MutationSessionRecord {
52 pub session_id: String,
53 pub parent_session_id: Option<String>,
54 pub run_id: Option<String>,
55 pub worker_id: Option<String>,
56 pub execution_kind: Option<String>,
57 pub mutation_scope: String,
58 pub approval_policy: Option<ToolApprovalPolicy>,
61}
62
63impl MutationSessionRecord {
64 pub fn normalize(mut self) -> Self {
65 if self.session_id.is_empty() {
66 self.session_id = new_id("session");
67 }
68 if self.mutation_scope.is_empty() {
69 self.mutation_scope = "read_only".to_string();
70 }
71 self
72 }
73}
74
75pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
76 CURRENT_MUTATION_SESSION.with(|slot| {
77 *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
78 });
79}
80
81pub fn current_mutation_session() -> Option<MutationSessionRecord> {
82 CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
83}
84pub(crate) fn parse_json_payload<T: for<'de> Deserialize<'de>>(
85 json: serde_json::Value,
86 label: &str,
87) -> Result<T, VmError> {
88 let payload = json.to_string();
89 let mut deserializer = serde_json::Deserializer::from_str(&payload);
90 let mut tracker = serde_path_to_error::Track::new();
91 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
92 T::deserialize(path_deserializer).map_err(|error| {
93 let snippet = if payload.len() > 600 {
94 format!("{}...", &payload[..600])
95 } else {
96 payload.clone()
97 };
98 VmError::Runtime(format!(
99 "{label} parse error at {}: {} | payload={}",
100 tracker.path(),
101 error,
102 snippet
103 ))
104 })
105}
106
107pub(crate) fn parse_json_value<T: for<'de> Deserialize<'de>>(
108 value: &VmValue,
109) -> Result<T, VmError> {
110 parse_json_payload(vm_value_to_json(value), "orchestration")
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116 use std::collections::BTreeMap;
117 use std::rc::Rc;
118
119 #[test]
120 fn capability_intersection_rejects_privilege_expansion() {
121 let ceiling = CapabilityPolicy {
122 tools: vec!["read".to_string()],
123 side_effect_level: Some("read_only".to_string()),
124 recursion_limit: Some(2),
125 ..Default::default()
126 };
127 let requested = CapabilityPolicy {
128 tools: vec!["read".to_string(), "edit".to_string()],
129 ..Default::default()
130 };
131 let error = ceiling.intersect(&requested).unwrap_err();
132 assert!(error.contains("host ceiling"));
133 }
134
135 #[test]
136 fn mutation_session_normalize_fills_defaults() {
137 let normalized = MutationSessionRecord::default().normalize();
138 assert!(normalized.session_id.starts_with("session_"));
139 assert_eq!(normalized.mutation_scope, "read_only");
140 assert!(normalized.approval_policy.is_none());
141 }
142
143 #[test]
144 fn install_current_mutation_session_round_trips() {
145 let policy = ToolApprovalPolicy {
146 require_approval: vec!["edit*".to_string()],
147 ..Default::default()
148 };
149 install_current_mutation_session(Some(MutationSessionRecord {
150 session_id: "session_test".to_string(),
151 mutation_scope: "apply_workspace".to_string(),
152 approval_policy: Some(policy.clone()),
153 ..Default::default()
154 }));
155 let current = current_mutation_session().expect("session installed");
156 assert_eq!(current.session_id, "session_test");
157 assert_eq!(current.mutation_scope, "apply_workspace");
158 assert_eq!(current.approval_policy.as_ref(), Some(&policy));
159
160 install_current_mutation_session(None);
161 assert!(current_mutation_session().is_none());
162 }
163
164 #[test]
165 fn active_execution_policy_rejects_unknown_bridge_builtin() {
166 push_execution_policy(CapabilityPolicy {
167 tools: vec!["read".to_string()],
168 capabilities: BTreeMap::from([(
169 "workspace".to_string(),
170 vec!["read_text".to_string()],
171 )]),
172 side_effect_level: Some("read_only".to_string()),
173 recursion_limit: Some(1),
174 ..Default::default()
175 });
176 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
177 pop_execution_policy();
178 assert!(matches!(
179 error,
180 VmError::CategorizedError {
181 category: crate::value::ErrorCategory::ToolRejected,
182 ..
183 }
184 ));
185 }
186
187 #[test]
188 fn active_execution_policy_rejects_mcp_escape_hatch() {
189 push_execution_policy(CapabilityPolicy {
190 tools: vec!["read".to_string()],
191 capabilities: BTreeMap::from([(
192 "workspace".to_string(),
193 vec!["read_text".to_string()],
194 )]),
195 side_effect_level: Some("read_only".to_string()),
196 recursion_limit: Some(1),
197 ..Default::default()
198 });
199 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
200 pop_execution_policy();
201 assert!(matches!(
202 error,
203 VmError::CategorizedError {
204 category: crate::value::ErrorCategory::ToolRejected,
205 ..
206 }
207 ));
208 }
209
210 #[test]
211 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
212 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
213 "name": "legacy",
214 "act": {"mode": "llm"},
215 "verify": {"kind": "verify"},
216 "repair": {"mode": "agent"},
217 }));
218 let graph = normalize_workflow_value(&value).unwrap();
219 assert_eq!(graph.type_name, "workflow_graph");
220 assert!(graph.nodes.contains_key("act"));
221 assert!(graph.nodes.contains_key("verify"));
222 assert!(graph.nodes.contains_key("repair"));
223 assert_eq!(graph.entry, "act");
224 }
225
226 #[test]
227 fn workflow_normalization_accepts_tool_registry_nodes() {
228 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
229 "name": "registry_tools",
230 "entry": "implement",
231 "nodes": {
232 "implement": {
233 "kind": "stage",
234 "mode": "agent",
235 "tools": {
236 "_type": "tool_registry",
237 "tools": [
238 {"name": "read", "description": "Read files"},
239 {"name": "run", "description": "Run commands"}
240 ]
241 }
242 }
243 },
244 "edges": []
245 }));
246 let graph = normalize_workflow_value(&value).unwrap();
247 let node = graph.nodes.get("implement").unwrap();
248 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
249 }
250
251 #[test]
252 fn artifact_selection_honors_budget_and_priority() {
253 let policy = ContextPolicy {
254 max_artifacts: Some(2),
255 max_tokens: Some(30),
256 prefer_recent: true,
257 prefer_fresh: true,
258 prioritize_kinds: vec!["verification_result".to_string()],
259 ..Default::default()
260 };
261 let artifacts = vec![
262 ArtifactRecord {
263 type_name: "artifact".to_string(),
264 id: "a".to_string(),
265 kind: "summary".to_string(),
266 text: Some("short".to_string()),
267 relevance: Some(0.9),
268 created_at: now_rfc3339(),
269 ..Default::default()
270 }
271 .normalize(),
272 ArtifactRecord {
273 type_name: "artifact".to_string(),
274 id: "b".to_string(),
275 kind: "summary".to_string(),
276 text: Some("this is a much larger artifact body".to_string()),
277 relevance: Some(1.0),
278 created_at: now_rfc3339(),
279 ..Default::default()
280 }
281 .normalize(),
282 ArtifactRecord {
283 type_name: "artifact".to_string(),
284 id: "c".to_string(),
285 kind: "summary".to_string(),
286 text: Some("tiny".to_string()),
287 relevance: Some(0.5),
288 created_at: now_rfc3339(),
289 ..Default::default()
290 }
291 .normalize(),
292 ];
293 let selected = select_artifacts(artifacts, &policy);
294 assert_eq!(selected.len(), 2);
295 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
296 }
297
298 #[test]
299 fn workflow_validation_rejects_condition_without_true_false_edges() {
300 let graph = WorkflowGraph {
301 entry: "gate".to_string(),
302 nodes: BTreeMap::from([(
303 "gate".to_string(),
304 WorkflowNode {
305 id: Some("gate".to_string()),
306 kind: "condition".to_string(),
307 ..Default::default()
308 },
309 )]),
310 edges: vec![WorkflowEdge {
311 from: "gate".to_string(),
312 to: "next".to_string(),
313 branch: Some("true".to_string()),
314 label: None,
315 }],
316 ..Default::default()
317 };
318 let report = validate_workflow(&graph, None);
319 assert!(!report.valid);
320 assert!(report
321 .errors
322 .iter()
323 .any(|error| error.contains("true") && error.contains("false")));
324 }
325
326 #[test]
327 fn replay_fixture_round_trip_passes() {
328 let run = RunRecord {
329 type_name: "run_record".to_string(),
330 id: "run_1".to_string(),
331 workflow_id: "wf".to_string(),
332 workflow_name: Some("demo".to_string()),
333 task: "demo".to_string(),
334 status: "completed".to_string(),
335 started_at: "1".to_string(),
336 finished_at: Some("2".to_string()),
337 parent_run_id: None,
338 root_run_id: Some("run_1".to_string()),
339 stages: vec![RunStageRecord {
340 id: "stage_1".to_string(),
341 node_id: "act".to_string(),
342 kind: "stage".to_string(),
343 status: "completed".to_string(),
344 outcome: "success".to_string(),
345 branch: Some("success".to_string()),
346 started_at: "1".to_string(),
347 finished_at: Some("2".to_string()),
348 visible_text: Some("done".to_string()),
349 private_reasoning: None,
350 transcript: None,
351 verification: None,
352 usage: None,
353 artifacts: vec![ArtifactRecord {
354 type_name: "artifact".to_string(),
355 id: "a1".to_string(),
356 kind: "summary".to_string(),
357 text: Some("done".to_string()),
358 created_at: "1".to_string(),
359 ..Default::default()
360 }
361 .normalize()],
362 consumed_artifact_ids: vec![],
363 produced_artifact_ids: vec!["a1".to_string()],
364 attempts: vec![],
365 metadata: BTreeMap::new(),
366 }],
367 transitions: vec![],
368 checkpoints: vec![],
369 pending_nodes: vec![],
370 completed_nodes: vec!["act".to_string()],
371 child_runs: vec![],
372 artifacts: vec![],
373 policy: CapabilityPolicy::default(),
374 execution: None,
375 transcript: None,
376 usage: None,
377 replay_fixture: None,
378 trace_spans: vec![],
379 tool_recordings: vec![],
380 metadata: BTreeMap::new(),
381 persisted_path: None,
382 };
383 let fixture = replay_fixture_from_run(&run);
384 let report = evaluate_run_against_fixture(&run, &fixture);
385 assert!(report.pass);
386 assert!(report.failures.is_empty());
387 }
388
389 #[test]
390 fn replay_eval_suite_reports_failed_case() {
391 let good = RunRecord {
392 id: "run_good".to_string(),
393 workflow_id: "wf".to_string(),
394 status: "completed".to_string(),
395 stages: vec![RunStageRecord {
396 node_id: "act".to_string(),
397 status: "completed".to_string(),
398 outcome: "success".to_string(),
399 ..Default::default()
400 }],
401 ..Default::default()
402 };
403 let bad = RunRecord {
404 id: "run_bad".to_string(),
405 workflow_id: "wf".to_string(),
406 status: "failed".to_string(),
407 stages: vec![RunStageRecord {
408 node_id: "act".to_string(),
409 status: "failed".to_string(),
410 outcome: "error".to_string(),
411 ..Default::default()
412 }],
413 ..Default::default()
414 };
415 let suite = evaluate_run_suite(vec![
416 (
417 good.clone(),
418 replay_fixture_from_run(&good),
419 Some("good.json".to_string()),
420 ),
421 (
422 bad.clone(),
423 replay_fixture_from_run(&good),
424 Some("bad.json".to_string()),
425 ),
426 ]);
427 assert!(!suite.pass);
428 assert_eq!(suite.total, 2);
429 assert_eq!(suite.failed, 1);
430 assert!(suite.cases.iter().any(|case| !case.pass));
431 }
432
433 #[test]
434 fn run_diff_reports_changed_stage() {
435 let left = RunRecord {
436 id: "left".to_string(),
437 workflow_id: "wf".to_string(),
438 status: "completed".to_string(),
439 stages: vec![RunStageRecord {
440 node_id: "act".to_string(),
441 status: "completed".to_string(),
442 outcome: "success".to_string(),
443 ..Default::default()
444 }],
445 ..Default::default()
446 };
447 let right = RunRecord {
448 id: "right".to_string(),
449 workflow_id: "wf".to_string(),
450 status: "failed".to_string(),
451 stages: vec![RunStageRecord {
452 node_id: "act".to_string(),
453 status: "failed".to_string(),
454 outcome: "error".to_string(),
455 ..Default::default()
456 }],
457 ..Default::default()
458 };
459 let diff = diff_run_records(&left, &right);
460 assert!(diff.status_changed);
461 assert!(!diff.identical);
462 assert_eq!(diff.stage_diffs.len(), 1);
463 }
464
465 #[test]
466 fn eval_suite_manifest_can_fail_on_baseline_diff() {
467 let temp_dir =
468 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
469 std::fs::create_dir_all(&temp_dir).unwrap();
470 let baseline_path = temp_dir.join("baseline.json");
471 let candidate_path = temp_dir.join("candidate.json");
472
473 let baseline = RunRecord {
474 id: "baseline".to_string(),
475 workflow_id: "wf".to_string(),
476 status: "completed".to_string(),
477 stages: vec![RunStageRecord {
478 node_id: "act".to_string(),
479 status: "completed".to_string(),
480 outcome: "success".to_string(),
481 ..Default::default()
482 }],
483 ..Default::default()
484 };
485 let candidate = RunRecord {
486 id: "candidate".to_string(),
487 workflow_id: "wf".to_string(),
488 status: "failed".to_string(),
489 stages: vec![RunStageRecord {
490 node_id: "act".to_string(),
491 status: "failed".to_string(),
492 outcome: "error".to_string(),
493 ..Default::default()
494 }],
495 ..Default::default()
496 };
497
498 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
499 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
500
501 let manifest = EvalSuiteManifest {
502 base_dir: Some(temp_dir.display().to_string()),
503 cases: vec![EvalSuiteCase {
504 label: Some("candidate".to_string()),
505 run_path: "candidate.json".to_string(),
506 fixture_path: None,
507 compare_to: Some("baseline.json".to_string()),
508 }],
509 ..Default::default()
510 };
511 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
512 assert!(!suite.pass);
513 assert_eq!(suite.failed, 1);
514 assert!(suite.cases[0].comparison.is_some());
515 assert!(suite.cases[0]
516 .failures
517 .iter()
518 .any(|failure| failure.contains("baseline")));
519 }
520
521 #[test]
522 fn render_unified_diff_marks_removed_and_added_lines() {
523 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
524 assert!(diff.contains("--- a/src/main.rs"));
525 assert!(diff.contains("+++ b/src/main.rs"));
526 assert!(diff.contains("-old"));
527 assert!(diff.contains("+new"));
528 assert!(diff.contains(" same"));
529 }
530
531 #[test]
532 fn render_unified_diff_identical_inputs() {
533 let text = "line1\nline2\nline3";
534 let diff = render_unified_diff(None, text, text);
535 assert!(diff.contains("--- a/artifact"));
536 let body: Vec<&str> = diff.lines().skip(2).collect();
537 assert!(!body.iter().any(|l| l.starts_with('-')));
538 assert!(!body.iter().any(|l| l.starts_with('+')));
539 assert_eq!(body.len(), 3);
540 }
541
542 #[test]
543 fn render_unified_diff_empty_before() {
544 let diff = render_unified_diff(None, "", "new1\nnew2");
545 assert!(diff.contains("+new1"));
546 assert!(diff.contains("+new2"));
547 let body: Vec<&str> = diff.lines().skip(2).collect();
548 assert!(!body.iter().any(|l| l.starts_with('-')));
549 }
550
551 #[test]
552 fn render_unified_diff_empty_after() {
553 let diff = render_unified_diff(None, "old1\nold2", "");
554 assert!(diff.contains("-old1"));
555 assert!(diff.contains("-old2"));
556 let body: Vec<&str> = diff.lines().skip(2).collect();
557 assert!(!body.iter().any(|l| l.starts_with('+')));
558 }
559
560 #[test]
561 fn render_unified_diff_both_empty() {
562 let diff = render_unified_diff(None, "", "");
563 assert!(diff.contains("--- a/artifact"));
564 assert!(diff.contains("+++ b/artifact"));
565 let body: String = diff.lines().skip(2).collect();
567 assert!(body.is_empty());
568 }
569
570 #[test]
571 fn render_unified_diff_all_changed() {
572 let diff = render_unified_diff(None, "a\nb", "x\ny");
573 assert!(diff.contains("-a"));
574 assert!(diff.contains("-b"));
575 assert!(diff.contains("+x"));
576 assert!(diff.contains("+y"));
577 }
578
579 #[test]
580 fn render_unified_diff_insertion_in_middle() {
581 let diff = render_unified_diff(None, "a\nc", "a\nb\nc");
582 assert!(diff.contains(" a"));
583 assert!(diff.contains("+b"));
584 assert!(diff.contains(" c"));
585 let body: Vec<&str> = diff.lines().skip(2).collect();
586 assert!(!body.iter().any(|l| l.starts_with('-')));
587 }
588
589 #[test]
590 fn render_unified_diff_deletion_from_middle() {
591 let diff = render_unified_diff(None, "a\nb\nc", "a\nc");
592 assert!(diff.contains(" a"));
593 assert!(diff.contains("-b"));
594 assert!(diff.contains(" c"));
595 let body: Vec<&str> = diff.lines().skip(2).collect();
596 assert!(!body.iter().any(|l| l.starts_with('+')));
597 }
598
599 #[test]
600 fn render_unified_diff_default_path() {
601 let diff = render_unified_diff(None, "a", "b");
602 assert!(diff.contains("--- a/artifact"));
603 assert!(diff.contains("+++ b/artifact"));
604 }
605
606 #[test]
607 fn render_unified_diff_large_similar() {
608 let mut before = Vec::new();
610 let mut after = Vec::new();
611 for i in 0..1000 {
612 before.push(format!("line {i}"));
613 after.push(format!("line {i}"));
614 }
615 before[500] = "OLD LINE 500".to_string();
616 after[500] = "NEW LINE 500".to_string();
617 let before_str = before.join("\n");
618 let after_str = after.join("\n");
619 let diff = render_unified_diff(None, &before_str, &after_str);
620 assert!(diff.contains("-OLD LINE 500"));
621 assert!(diff.contains("+NEW LINE 500"));
622 assert!(diff.contains(" line 499"));
624 assert!(diff.contains(" line 501"));
625 }
626
627 #[test]
628 fn myers_diff_empty_sequences() {
629 let ops = myers_diff(&[], &[]);
630 assert!(ops.is_empty());
631 }
632
633 #[test]
634 fn myers_diff_insert_only() {
635 let ops = myers_diff(&[], &["a", "b"]);
636 assert_eq!(ops.len(), 2);
637 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Insert));
638 }
639
640 #[test]
641 fn myers_diff_delete_only() {
642 let ops = myers_diff(&["a", "b"], &[]);
643 assert_eq!(ops.len(), 2);
644 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Delete));
645 }
646
647 #[test]
648 fn myers_diff_equal() {
649 let ops = myers_diff(&["a", "b", "c"], &["a", "b", "c"]);
650 assert_eq!(ops.len(), 3);
651 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Equal));
652 }
653
654 #[test]
655 fn execution_policy_rejects_process_exec_when_read_only() {
656 push_execution_policy(CapabilityPolicy {
657 side_effect_level: Some("read_only".to_string()),
658 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
659 ..Default::default()
660 });
661 let result = enforce_current_policy_for_builtin("exec", &[]);
662 pop_execution_policy();
663 assert!(result.is_err());
664 }
665
666 #[test]
667 fn execution_policy_rejects_unlisted_tool() {
668 push_execution_policy(CapabilityPolicy {
669 tools: vec!["read".to_string()],
670 ..Default::default()
671 });
672 let result = enforce_current_policy_for_tool("edit");
673 pop_execution_policy();
674 assert!(result.is_err());
675 }
676
677 #[test]
678 fn normalize_run_record_preserves_trace_spans() {
679 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
680 "_type": "run_record",
681 "id": "run_trace",
682 "workflow_id": "wf",
683 "status": "completed",
684 "started_at": "1",
685 "trace_spans": [
686 {
687 "span_id": 1,
688 "parent_id": null,
689 "kind": "pipeline",
690 "name": "workflow",
691 "start_ms": 0,
692 "duration_ms": 42,
693 "metadata": {"model": "demo"}
694 }
695 ]
696 }));
697
698 let run = normalize_run_record(&value).unwrap();
699 assert_eq!(run.trace_spans.len(), 1);
700 assert_eq!(run.trace_spans[0].kind, "pipeline");
701 assert_eq!(
702 run.trace_spans[0].metadata["model"],
703 serde_json::json!("demo")
704 );
705 }
706
707 #[test]
710 fn pre_tool_hook_deny_blocks_execution() {
711 clear_tool_hooks();
712 register_tool_hook(ToolHook {
713 pattern: "dangerous_*".to_string(),
714 pre: Some(Rc::new(|_name, _args| {
715 PreToolAction::Deny("blocked by policy".to_string())
716 })),
717 post: None,
718 });
719 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
720 clear_tool_hooks();
721 assert!(matches!(result, PreToolAction::Deny(_)));
722 }
723
724 #[test]
725 fn pre_tool_hook_allow_passes_through() {
726 clear_tool_hooks();
727 register_tool_hook(ToolHook {
728 pattern: "safe_*".to_string(),
729 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
730 post: None,
731 });
732 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
733 clear_tool_hooks();
734 assert!(matches!(result, PreToolAction::Allow));
735 }
736
737 #[test]
738 fn pre_tool_hook_modify_rewrites_args() {
739 clear_tool_hooks();
740 register_tool_hook(ToolHook {
741 pattern: "*".to_string(),
742 pre: Some(Rc::new(|_name, _args| {
743 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
744 })),
745 post: None,
746 });
747 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
748 clear_tool_hooks();
749 match result {
750 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
751 _ => panic!("expected Modify"),
752 }
753 }
754
755 #[test]
756 fn post_tool_hook_modifies_result() {
757 clear_tool_hooks();
758 register_tool_hook(ToolHook {
759 pattern: "exec".to_string(),
760 pre: None,
761 post: Some(Rc::new(|_name, result| {
762 if result.contains("SECRET") {
763 PostToolAction::Modify("[REDACTED]".to_string())
764 } else {
765 PostToolAction::Pass
766 }
767 })),
768 });
769 let result = run_post_tool_hooks("exec", "output with SECRET data");
770 let clean = run_post_tool_hooks("exec", "clean output");
771 clear_tool_hooks();
772 assert_eq!(result, "[REDACTED]");
773 assert_eq!(clean, "clean output");
774 }
775
776 #[test]
777 fn unmatched_hook_pattern_does_not_fire() {
778 clear_tool_hooks();
779 register_tool_hook(ToolHook {
780 pattern: "exec".to_string(),
781 pre: Some(Rc::new(|_name, _args| {
782 PreToolAction::Deny("should not match".to_string())
783 })),
784 post: None,
785 });
786 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
787 clear_tool_hooks();
788 assert!(matches!(result, PreToolAction::Allow));
789 }
790
791 #[test]
792 fn glob_match_patterns() {
793 assert!(glob_match("*", "anything"));
794 assert!(glob_match("exec*", "exec_at"));
795 assert!(glob_match("*_file", "read_file"));
796 assert!(!glob_match("exec*", "read_file"));
797 assert!(glob_match("read_file", "read_file"));
798 assert!(!glob_match("read_file", "write_file"));
799 }
800
801 #[test]
804 fn microcompact_snips_large_output() {
805 let large = "x".repeat(50_000);
806 let result = microcompact_tool_output(&large, 10_000);
807 assert!(result.len() < 15_000);
808 assert!(result.contains("snipped"));
809 }
810
811 #[test]
812 fn microcompact_preserves_small_output() {
813 let small = "hello world";
814 let result = microcompact_tool_output(small, 10_000);
815 assert_eq!(result, small);
816 }
817
818 #[test]
819 fn microcompact_preserves_strong_keyword_lines_without_file_line() {
820 let mut output = String::new();
829 for i in 0..100 {
830 output.push_str(&format!("verbose progress line {i}\n"));
831 }
832 output.push_str("--- FAIL: TestEmpty (0.00s)\n");
833 output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
834 output.push_str("FAILED tests/test_parser.py::test_empty\n");
835 for i in 0..100 {
836 output.push_str(&format!("more output after failures {i}\n"));
837 }
838 let result = microcompact_tool_output(&output, 2_000);
839 assert!(
840 result.contains("--- FAIL: TestEmpty"),
841 "strong 'FAIL' keyword should preserve the line:\n{result}"
842 );
843 assert!(
844 result.contains("panicked at"),
845 "strong 'panic' keyword should preserve the line:\n{result}"
846 );
847 assert!(
848 result.contains("FAILED tests/test_parser.py"),
849 "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
850 );
851 }
852
853 #[test]
854 fn auto_compact_messages_reduces_count() {
855 let mut messages: Vec<serde_json::Value> = (0..20)
856 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
857 .collect();
858 let runtime = tokio::runtime::Builder::new_current_thread()
859 .enable_all()
860 .build()
861 .unwrap();
862 let compacted = runtime.block_on(auto_compact_messages(
863 &mut messages,
864 &AutoCompactConfig {
865 compact_strategy: CompactStrategy::Truncate,
866 keep_last: 6,
867 ..Default::default()
868 },
869 None,
870 ));
871 let summary = compacted.unwrap();
872 assert!(summary.is_some());
873 assert!(messages.len() <= 7); assert!(messages[0]["content"]
875 .as_str()
876 .unwrap()
877 .contains("auto-compacted"));
878 }
879
880 #[test]
881 fn auto_compact_noop_when_under_threshold() {
882 let mut messages: Vec<serde_json::Value> = (0..4)
883 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
884 .collect();
885 let runtime = tokio::runtime::Builder::new_current_thread()
886 .enable_all()
887 .build()
888 .unwrap();
889 let compacted = runtime.block_on(auto_compact_messages(
890 &mut messages,
891 &AutoCompactConfig {
892 compact_strategy: CompactStrategy::Truncate,
893 keep_last: 6,
894 ..Default::default()
895 },
896 None,
897 ));
898 assert!(compacted.unwrap().is_none());
899 assert_eq!(messages.len(), 4);
900 }
901
902 #[test]
903 fn observation_mask_preserves_errors_masks_verbose_output() {
904 let verbose_lines: Vec<String> = (0..60)
906 .map(|i| format!("// source line {} of the generated file", i))
907 .collect();
908 let verbose_content = format!(
909 "File created: a.go\npackage main\n{}",
910 verbose_lines.join("\n")
911 );
912 let mut messages = vec![
913 serde_json::json!({"role": "assistant", "content": "I'll create the file now."}),
914 serde_json::json!({"role": "user", "content": verbose_content}),
915 serde_json::json!({"role": "assistant", "content": "Now let me run the tests."}),
916 serde_json::json!({"role": "user", "content": "error: cannot find module\nexit code 1\nfailed to compile"}),
917 serde_json::json!({"role": "assistant", "content": "I see the issue. Let me fix it."}),
918 serde_json::json!({"role": "user", "content": "File patched successfully."}),
919 serde_json::json!({"role": "assistant", "content": "Running tests again."}),
921 serde_json::json!({"role": "user", "content": "All tests passed."}),
922 ];
923 let runtime = tokio::runtime::Builder::new_current_thread()
924 .enable_all()
925 .build()
926 .unwrap();
927 let compacted = runtime.block_on(auto_compact_messages(
928 &mut messages,
929 &AutoCompactConfig {
930 compact_strategy: CompactStrategy::ObservationMask,
931 keep_last: 2,
932 ..Default::default()
933 },
934 None,
935 ));
936 let summary = compacted.unwrap().unwrap();
937 assert!(summary.contains("I'll create the file now."));
939 assert!(summary.contains("Now let me run the tests."));
940 assert!(summary.contains("I see the issue. Let me fix it."));
941 assert!(summary.contains("error: cannot find module"));
943 assert!(summary.contains("exit code 1"));
944 assert!(summary.contains("masked]"));
946 assert!(summary.contains("File created: a.go"));
947 assert!(!summary.contains("File patched successfully."));
949 assert!(!summary.contains("Running tests again."));
951 assert!(!summary.contains("All tests passed."));
952 assert_eq!(messages.len(), 4);
954 }
955
956 #[test]
957 fn observation_mask_keeps_short_tool_output() {
958 let messages = vec![
959 serde_json::json!({"role": "user", "content": "OK"}),
960 serde_json::json!({"role": "user", "content": "Done."}),
961 ];
962 let summary = observation_mask_compaction(&messages, 2);
963 assert!(summary.contains("[user] OK"));
964 assert!(summary.contains("[user] Done."));
965 assert!(!summary.contains("masked"));
966 }
967
968 #[test]
969 fn estimate_message_tokens_basic() {
970 let messages = vec![
971 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
972 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
973 ];
974 let tokens = estimate_message_tokens(&messages);
975 assert_eq!(tokens, 200); }
977
978 #[test]
981 fn dedup_artifacts_removes_duplicates() {
982 let mut artifacts = vec![
983 ArtifactRecord {
984 id: "a1".to_string(),
985 kind: "test".to_string(),
986 text: Some("duplicate content".to_string()),
987 ..Default::default()
988 },
989 ArtifactRecord {
990 id: "a2".to_string(),
991 kind: "test".to_string(),
992 text: Some("duplicate content".to_string()),
993 ..Default::default()
994 },
995 ArtifactRecord {
996 id: "a3".to_string(),
997 kind: "test".to_string(),
998 text: Some("unique content".to_string()),
999 ..Default::default()
1000 },
1001 ];
1002 dedup_artifacts(&mut artifacts);
1003 assert_eq!(artifacts.len(), 2);
1004 }
1005
1006 #[test]
1007 fn microcompact_artifact_snips_oversized() {
1008 let mut artifact = ArtifactRecord {
1009 id: "a1".to_string(),
1010 kind: "test".to_string(),
1011 text: Some("x".repeat(10_000)),
1012 estimated_tokens: Some(2_500),
1013 ..Default::default()
1014 };
1015 microcompact_artifact(&mut artifact, 500);
1016 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
1017 assert_eq!(artifact.estimated_tokens, Some(500));
1018 }
1019
1020 #[test]
1023 fn arg_constraint_allows_matching_pattern() {
1024 let policy = CapabilityPolicy {
1025 tool_arg_constraints: vec![ToolArgConstraint {
1026 tool: "exec".to_string(),
1027 arg_patterns: vec!["cargo *".to_string()],
1028 }],
1029 ..Default::default()
1030 };
1031 let result = enforce_tool_arg_constraints(
1032 &policy,
1033 "exec",
1034 &serde_json::json!({"command": "cargo test"}),
1035 );
1036 assert!(result.is_ok());
1037 }
1038
1039 #[test]
1040 fn arg_constraint_rejects_non_matching_pattern() {
1041 let policy = CapabilityPolicy {
1042 tool_arg_constraints: vec![ToolArgConstraint {
1043 tool: "exec".to_string(),
1044 arg_patterns: vec!["cargo *".to_string()],
1045 }],
1046 ..Default::default()
1047 };
1048 let result = enforce_tool_arg_constraints(
1049 &policy,
1050 "exec",
1051 &serde_json::json!({"command": "rm -rf /"}),
1052 );
1053 assert!(result.is_err());
1054 }
1055
1056 #[test]
1057 fn arg_constraint_ignores_unmatched_tool() {
1058 let policy = CapabilityPolicy {
1059 tool_arg_constraints: vec![ToolArgConstraint {
1060 tool: "exec".to_string(),
1061 arg_patterns: vec!["cargo *".to_string()],
1062 }],
1063 ..Default::default()
1064 };
1065 let result = enforce_tool_arg_constraints(
1066 &policy,
1067 "read_file",
1068 &serde_json::json!({"path": "/etc/passwd"}),
1069 );
1070 assert!(result.is_ok());
1071 }
1072
1073 #[test]
1074 fn arg_constraint_prefers_declared_path_param_metadata() {
1075 let mut tool_metadata = std::collections::BTreeMap::new();
1076 tool_metadata.insert(
1077 "edit".to_string(),
1078 ToolRuntimePolicyMetadata {
1079 path_params: vec!["path".to_string()],
1080 ..Default::default()
1081 },
1082 );
1083 let policy = CapabilityPolicy {
1084 tool_arg_constraints: vec![ToolArgConstraint {
1085 tool: "edit".to_string(),
1086 arg_patterns: vec!["tests/*".to_string()],
1087 }],
1088 tool_metadata,
1089 ..Default::default()
1090 };
1091 let result = enforce_tool_arg_constraints(
1092 &policy,
1093 "edit",
1094 &serde_json::json!({
1095 "action": "replace_range",
1096 "path": "tests/unit/test_experiment_service.py",
1097 "content": "..."
1098 }),
1099 );
1100 assert!(result.is_ok());
1101 }
1102
1103 #[test]
1104 fn microcompact_handles_multibyte_utf8() {
1105 let emoji_output = "🔥".repeat(500); let result = microcompact_tool_output(&emoji_output, 400);
1108 assert!(result.contains("snipped"));
1110
1111 let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
1113 let result2 = microcompact_tool_output(&mixed, 400);
1114 assert!(result2.contains("snipped"));
1115
1116 let cjk = "中文".repeat(500);
1118 let result3 = microcompact_tool_output(&cjk, 400);
1119 assert!(result3.contains("snipped"));
1120 }
1121
1122 #[test]
1123 fn workflow_node_defaults_exit_when_verified_to_false() {
1124 let node = WorkflowNode::default();
1125 assert!(!node.exit_when_verified);
1126 }
1127
1128 #[test]
1129 fn workflow_node_exit_when_verified_round_trips_through_serde() {
1130 let node = WorkflowNode {
1131 id: Some("execute".to_string()),
1132 kind: "stage".to_string(),
1133 exit_when_verified: true,
1134 ..Default::default()
1135 };
1136 let encoded = serde_json::to_value(&node).expect("serialize");
1137 assert_eq!(
1138 encoded.get("exit_when_verified"),
1139 Some(&serde_json::json!(true))
1140 );
1141 let decoded: WorkflowNode = serde_json::from_value(encoded).expect("deserialize");
1142 assert!(decoded.exit_when_verified);
1143 }
1144
1145 #[test]
1146 fn workflow_node_exit_when_verified_accepts_missing_field_for_backcompat() {
1147 let encoded = serde_json::json!({
1148 "id": "legacy_stage",
1149 "kind": "stage",
1150 });
1151 let decoded: WorkflowNode = serde_json::from_value(encoded).expect("deserialize");
1152 assert!(
1153 !decoded.exit_when_verified,
1154 "nodes serialized before this field was added must deserialize with the default"
1155 );
1156 }
1157}