1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12 use std::time::{SystemTime, UNIX_EPOCH};
13 let ts = SystemTime::now()
14 .duration_since(UNIX_EPOCH)
15 .unwrap_or_default()
16 .as_secs();
17 format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21 format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25 std::env::var("HARN_RUN_DIR")
26 .map(PathBuf::from)
27 .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31 static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32 static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33 static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
34}
35
36#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
37#[serde(default)]
38pub struct MutationSessionRecord {
39 pub session_id: String,
40 pub parent_session_id: Option<String>,
41 pub run_id: Option<String>,
42 pub worker_id: Option<String>,
43 pub execution_kind: Option<String>,
44 pub mutation_scope: String,
45 pub approval_mode: String,
46}
47
48impl MutationSessionRecord {
49 pub fn normalize(mut self) -> Self {
50 if self.session_id.is_empty() {
51 self.session_id = new_id("session");
52 }
53 if self.mutation_scope.is_empty() {
54 self.mutation_scope = "read_only".to_string();
55 }
56 if self.approval_mode.is_empty() {
57 self.approval_mode = "host_enforced".to_string();
58 }
59 self
60 }
61}
62
63pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
64 CURRENT_MUTATION_SESSION.with(|slot| {
65 *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
66 });
67}
68
69pub fn current_mutation_session() -> Option<MutationSessionRecord> {
70 CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
71}
72
73#[derive(Clone, Debug)]
77pub enum PreToolAction {
78 Allow,
80 Deny(String),
82 Modify(serde_json::Value),
84}
85
86#[derive(Clone, Debug)]
88pub enum PostToolAction {
89 Pass,
91 Modify(String),
93}
94
95pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
97pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
98
99#[derive(Clone)]
101pub struct ToolHook {
102 pub pattern: String,
104 pub pre: Option<PreToolHookFn>,
106 pub post: Option<PostToolHookFn>,
108}
109
110impl std::fmt::Debug for ToolHook {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 f.debug_struct("ToolHook")
113 .field("pattern", &self.pattern)
114 .field("has_pre", &self.pre.is_some())
115 .field("has_post", &self.post.is_some())
116 .finish()
117 }
118}
119
120fn glob_match(pattern: &str, name: &str) -> bool {
121 if pattern == "*" {
122 return true;
123 }
124 if let Some(prefix) = pattern.strip_suffix('*') {
125 return name.starts_with(prefix);
126 }
127 if let Some(suffix) = pattern.strip_prefix('*') {
128 return name.ends_with(suffix);
129 }
130 pattern == name
131}
132
133pub fn register_tool_hook(hook: ToolHook) {
134 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
135}
136
137pub fn clear_tool_hooks() {
138 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
139}
140
141pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
143 TOOL_HOOKS.with(|hooks| {
144 let hooks = hooks.borrow();
145 let mut current_args = args.clone();
146 for hook in hooks.iter() {
147 if !glob_match(&hook.pattern, tool_name) {
148 continue;
149 }
150 if let Some(ref pre) = hook.pre {
151 match pre(tool_name, ¤t_args) {
152 PreToolAction::Allow => {}
153 PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
154 PreToolAction::Modify(new_args) => {
155 current_args = new_args;
156 }
157 }
158 }
159 }
160 if current_args != *args {
161 PreToolAction::Modify(current_args)
162 } else {
163 PreToolAction::Allow
164 }
165 })
166}
167
168pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
170 TOOL_HOOKS.with(|hooks| {
171 let hooks = hooks.borrow();
172 let mut current = result.to_string();
173 for hook in hooks.iter() {
174 if !glob_match(&hook.pattern, tool_name) {
175 continue;
176 }
177 if let Some(ref post) = hook.post {
178 match post(tool_name, ¤t) {
179 PostToolAction::Pass => {}
180 PostToolAction::Modify(new_result) => {
181 current = new_result;
182 }
183 }
184 }
185 }
186 current
187 })
188}
189
190#[derive(Clone, Debug, PartialEq, Eq)]
193pub enum CompactStrategy {
194 Llm,
195 Truncate,
196 Custom,
197}
198
199pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
200 match value {
201 "llm" => Ok(CompactStrategy::Llm),
202 "truncate" => Ok(CompactStrategy::Truncate),
203 "custom" => Ok(CompactStrategy::Custom),
204 other => Err(VmError::Runtime(format!(
205 "unknown compact_strategy '{other}' (expected 'llm', 'truncate', or 'custom')"
206 ))),
207 }
208}
209
210#[derive(Clone, Debug)]
212pub struct AutoCompactConfig {
213 pub token_threshold: usize,
215 pub tool_output_max_chars: usize,
217 pub keep_last: usize,
219 pub compact_strategy: CompactStrategy,
221 pub custom_compactor: Option<VmValue>,
223}
224
225impl Default for AutoCompactConfig {
226 fn default() -> Self {
227 Self {
228 token_threshold: 80_000,
229 tool_output_max_chars: 20_000,
230 keep_last: 8,
231 compact_strategy: CompactStrategy::Llm,
232 custom_compactor: None,
233 }
234 }
235}
236
237pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
239 messages
240 .iter()
241 .map(|m| {
242 m.get("content")
243 .and_then(|c| c.as_str())
244 .map(|s| s.len())
245 .unwrap_or(0)
246 })
247 .sum::<usize>()
248 / 4
249}
250
251pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
254 if output.len() <= max_chars || max_chars < 200 {
255 return output.to_string();
256 }
257 let diagnostic_lines = output
258 .lines()
259 .filter(|line| {
260 let trimmed = line.trim();
261 let lower = trimmed.to_lowercase();
262 let has_file_line = {
264 let bytes = trimmed.as_bytes();
265 let mut i = 0;
266 let mut found_colon = false;
267 while i < bytes.len() {
268 if bytes[i] == b':' {
269 found_colon = true;
270 break;
271 }
272 i += 1;
273 }
274 found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
275 };
276 let has_keyword = trimmed.contains("error")
278 || trimmed.contains("FAIL")
279 || trimmed.contains("panic")
280 || trimmed.contains("undefined")
281 || trimmed.contains("expected")
282 || trimmed.contains("got")
283 || lower.contains("cannot find")
284 || lower.contains("not found")
285 || lower.contains("no such")
286 || lower.contains("unresolved")
287 || lower.contains("missing")
288 || lower.contains("declared but not used")
289 || lower.contains("unused")
290 || lower.contains("mismatch");
291 let positional = lower.contains(" error ")
292 || lower.starts_with("error:")
293 || lower.starts_with("fail")
294 || lower.contains("panic:");
295 (has_file_line && has_keyword) || positional
296 })
297 .take(32)
298 .collect::<Vec<_>>();
299 if !diagnostic_lines.is_empty() {
300 let diagnostics = diagnostic_lines.join("\n");
301 let budget = max_chars.saturating_sub(diagnostics.len() + 64);
302 let keep = budget / 2;
303 if keep >= 80 && output.len() > keep * 2 {
304 let head = &output[..keep];
305 let tail = &output[output.len() - keep..];
306 return format!(
307 "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
308 );
309 }
310 }
311 let keep = max_chars / 2;
312 let head = &output[..keep];
313 let tail = &output[output.len() - keep..];
314 let snipped = output.len() - max_chars;
315 format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
316}
317
318fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
319 messages
320 .iter()
321 .map(|msg| {
322 let role = msg
323 .get("role")
324 .and_then(|v| v.as_str())
325 .unwrap_or("user")
326 .to_uppercase();
327 let content = msg
328 .get("content")
329 .and_then(|v| v.as_str())
330 .unwrap_or_default();
331 format!("{role}: {content}")
332 })
333 .collect::<Vec<_>>()
334 .join("\n")
335}
336
337fn truncate_compaction_summary(
338 old_messages: &[serde_json::Value],
339 archived_count: usize,
340) -> String {
341 let summary_parts: Vec<String> = old_messages
342 .iter()
343 .filter_map(|m| {
344 let role = m.get("role")?.as_str()?;
345 let content = m.get("content")?.as_str()?;
346 if content.is_empty() {
347 return None;
348 }
349 let truncated = if content.len() > 500 {
350 format!("{}...", &content[..500])
351 } else {
352 content.to_string()
353 };
354 Some(format!("[{role}] {truncated}"))
355 })
356 .take(15)
357 .collect();
358 format!(
359 "[auto-compacted {archived_count} older messages]\n{}{}",
360 summary_parts.join("\n"),
361 if archived_count > 15 {
362 format!("\n... and {} more", archived_count - 15)
363 } else {
364 String::new()
365 }
366 )
367}
368
369fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
370 if let Some(map) = value.as_dict() {
371 if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
372 return Ok(summary.display());
373 }
374 }
375 match value {
376 VmValue::String(text) => Ok(text.to_string()),
377 VmValue::Nil => Ok(String::new()),
378 _ => serde_json::to_string_pretty(&vm_value_to_json(value))
379 .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
380 }
381}
382
383async fn llm_compaction_summary(
384 old_messages: &[serde_json::Value],
385 archived_count: usize,
386 llm_opts: &crate::llm::api::LlmCallOptions,
387) -> Result<String, VmError> {
388 let mut compact_opts = llm_opts.clone();
389 let formatted = format_compaction_messages(old_messages);
390 compact_opts.system = None;
391 compact_opts.transcript_id = None;
392 compact_opts.transcript_summary = None;
393 compact_opts.transcript_metadata = None;
394 compact_opts.native_tools = None;
395 compact_opts.tool_choice = None;
396 compact_opts.response_format = None;
397 compact_opts.json_schema = None;
398 compact_opts.messages = vec![serde_json::json!({
399 "role": "user",
400 "content": format!(
401 "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
402 ),
403 })];
404 let result = vm_call_llm_full(&compact_opts).await?;
405 let summary = result.text.trim();
406 if summary.is_empty() {
407 Ok(truncate_compaction_summary(old_messages, archived_count))
408 } else {
409 Ok(format!(
410 "[auto-compacted {archived_count} older messages]\n{summary}"
411 ))
412 }
413}
414
415async fn custom_compaction_summary(
416 old_messages: &[serde_json::Value],
417 archived_count: usize,
418 callback: &VmValue,
419) -> Result<String, VmError> {
420 let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
421 return Err(VmError::Runtime(
422 "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
423 ));
424 };
425 let mut vm = crate::vm::take_async_builtin_child_vm().ok_or_else(|| {
426 VmError::Runtime(
427 "custom transcript compaction requires an async builtin VM context".to_string(),
428 )
429 })?;
430 let messages_vm = VmValue::List(Rc::new(
431 old_messages
432 .iter()
433 .map(crate::stdlib::json_to_vm_value)
434 .collect(),
435 ));
436 let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await;
437 crate::vm::restore_async_builtin_child_vm(vm);
438 let summary = compact_summary_text_from_value(&result?)?;
439 if summary.trim().is_empty() {
440 Ok(truncate_compaction_summary(old_messages, archived_count))
441 } else {
442 Ok(format!(
443 "[auto-compacted {archived_count} older messages]\n{summary}"
444 ))
445 }
446}
447
448pub(crate) async fn auto_compact_messages(
451 messages: &mut Vec<serde_json::Value>,
452 config: &AutoCompactConfig,
453 llm_opts: Option<&crate::llm::api::LlmCallOptions>,
454) -> Result<bool, VmError> {
455 if messages.len() <= config.keep_last {
456 return Ok(false);
457 }
458 let split_at = messages.len().saturating_sub(config.keep_last);
459 let old_messages: Vec<_> = messages.drain(..split_at).collect();
460 let archived_count = old_messages.len();
461 let summary = match config.compact_strategy {
462 CompactStrategy::Truncate => truncate_compaction_summary(&old_messages, archived_count),
463 CompactStrategy::Llm => {
464 llm_compaction_summary(
465 &old_messages,
466 archived_count,
467 llm_opts.ok_or_else(|| {
468 VmError::Runtime(
469 "LLM transcript compaction requires active LLM call options".to_string(),
470 )
471 })?,
472 )
473 .await?
474 }
475 CompactStrategy::Custom => {
476 custom_compaction_summary(
477 &old_messages,
478 archived_count,
479 config.custom_compactor.as_ref().ok_or_else(|| {
480 VmError::Runtime(
481 "compact_callback is required when compact_strategy is 'custom'"
482 .to_string(),
483 )
484 })?,
485 )
486 .await?
487 }
488 };
489 messages.insert(
490 0,
491 serde_json::json!({
492 "role": "user",
493 "content": summary,
494 }),
495 );
496 Ok(true)
497}
498
499pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
503 let max_chars = max_tokens * 4;
504 if let Some(ref text) = artifact.text {
505 if text.len() > max_chars && max_chars >= 200 {
506 artifact.text = Some(microcompact_tool_output(text, max_chars));
507 artifact.estimated_tokens = Some(max_tokens);
508 }
509 }
510}
511
512pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
515 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
516 artifacts.retain(|artifact| {
517 let text = artifact.text.as_deref().unwrap_or("");
518 if text.is_empty() {
519 return true;
520 }
521 let hash = {
523 use std::hash::{Hash, Hasher};
524 let mut hasher = std::collections::hash_map::DefaultHasher::new();
525 text.hash(&mut hasher);
526 hasher.finish()
527 };
528 seen_hashes.insert(hash)
529 });
530}
531
532pub fn select_artifacts_adaptive(
535 mut artifacts: Vec<ArtifactRecord>,
536 policy: &ContextPolicy,
537) -> Vec<ArtifactRecord> {
538 dedup_artifacts(&mut artifacts);
540
541 if let Some(max_tokens) = policy.max_tokens {
545 let count = artifacts.len().max(1);
546 let per_artifact_budget = max_tokens / count;
547 let cap = per_artifact_budget.max(500).min(max_tokens);
549 for artifact in &mut artifacts {
550 let est = artifact.estimated_tokens.unwrap_or(0);
551 if est > cap * 2 {
552 microcompact_artifact(artifact, cap);
553 }
554 }
555 }
556
557 select_artifacts(artifacts, policy)
559}
560
561#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
565#[serde(default)]
566pub struct ToolArgConstraint {
567 pub tool: String,
569 pub arg_patterns: Vec<String>,
572}
573
574pub fn enforce_tool_arg_constraints(
576 policy: &CapabilityPolicy,
577 tool_name: &str,
578 args: &serde_json::Value,
579) -> Result<(), VmError> {
580 for constraint in &policy.tool_arg_constraints {
581 if !glob_match(&constraint.tool, tool_name) {
582 continue;
583 }
584 if constraint.arg_patterns.is_empty() {
585 continue;
586 }
587 let first_arg = args
589 .as_object()
590 .and_then(|o| o.values().next())
591 .and_then(|v| v.as_str())
592 .or_else(|| args.as_str())
593 .unwrap_or("");
594 let matches = constraint
595 .arg_patterns
596 .iter()
597 .any(|pattern| glob_match(pattern, first_arg));
598 if !matches {
599 return reject_policy(format!(
600 "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
601 constraint.arg_patterns
602 ));
603 }
604 }
605 Ok(())
606}
607
608fn normalize_artifact_kind(kind: &str) -> String {
609 match kind {
610 "resource"
611 | "workspace_file"
612 | "editor_selection"
613 | "workspace_snapshot"
614 | "transcript_summary"
615 | "summary"
616 | "plan"
617 | "diff"
618 | "git_diff"
619 | "patch"
620 | "patch_set"
621 | "patch_proposal"
622 | "diff_review"
623 | "review_decision"
624 | "verification_bundle"
625 | "apply_intent"
626 | "verification_result"
627 | "test_result"
628 | "command_result"
629 | "provider_payload"
630 | "worker_result"
631 | "worker_notification"
632 | "artifact" => kind.to_string(),
633 "file" => "workspace_file".to_string(),
634 "transcript" => "transcript_summary".to_string(),
635 "verification" => "verification_result".to_string(),
636 "test" => "test_result".to_string(),
637 other if other.trim().is_empty() => "artifact".to_string(),
638 other => other.to_string(),
639 }
640}
641
642fn default_artifact_priority(kind: &str) -> i64 {
643 match kind {
644 "verification_result" | "test_result" => 100,
645 "verification_bundle" => 95,
646 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
647 | "review_decision" | "apply_intent" => 90,
648 "plan" => 80,
649 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
650 "summary" | "transcript_summary" => 60,
651 "command_result" => 50,
652 _ => 40,
653 }
654}
655
656fn freshness_rank(value: Option<&str>) -> i64 {
657 match value.unwrap_or_default() {
658 "fresh" | "live" => 3,
659 "recent" => 2,
660 "stale" => 0,
661 _ => 1,
662 }
663}
664
665#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
666#[serde(default)]
667pub struct ToolRuntimePolicyMetadata {
668 pub capabilities: BTreeMap<String, Vec<String>>,
669 pub side_effect_level: Option<String>,
670 pub path_params: Vec<String>,
671 pub mutation_classification: Option<String>,
672}
673
674#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
675#[serde(default)]
676pub struct CapabilityPolicy {
677 pub tools: Vec<String>,
678 pub capabilities: BTreeMap<String, Vec<String>>,
679 pub workspace_roots: Vec<String>,
680 pub side_effect_level: Option<String>,
681 pub recursion_limit: Option<usize>,
682 #[serde(default)]
684 pub tool_arg_constraints: Vec<ToolArgConstraint>,
685 #[serde(default)]
686 pub tool_metadata: BTreeMap<String, ToolRuntimePolicyMetadata>,
687}
688
689impl CapabilityPolicy {
690 pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
691 let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
692 (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
693 (Some(a), None) => Some(a.clone()),
694 (None, Some(b)) => Some(b.clone()),
695 (None, None) => None,
696 };
697
698 if !self.tools.is_empty() {
699 let denied: Vec<String> = requested
700 .tools
701 .iter()
702 .filter(|tool| !self.tools.contains(*tool))
703 .cloned()
704 .collect();
705 if !denied.is_empty() {
706 return Err(format!(
707 "requested tools exceed host ceiling: {}",
708 denied.join(", ")
709 ));
710 }
711 }
712
713 for (capability, requested_ops) in &requested.capabilities {
714 if let Some(allowed_ops) = self.capabilities.get(capability) {
715 let denied: Vec<String> = requested_ops
716 .iter()
717 .filter(|op| !allowed_ops.contains(*op))
718 .cloned()
719 .collect();
720 if !denied.is_empty() {
721 return Err(format!(
722 "requested capability operations exceed host ceiling: {}.{}",
723 capability,
724 denied.join(",")
725 ));
726 }
727 } else if !self.capabilities.is_empty() {
728 return Err(format!(
729 "requested capability exceeds host ceiling: {capability}"
730 ));
731 }
732 }
733
734 let tools = if self.tools.is_empty() {
735 requested.tools.clone()
736 } else if requested.tools.is_empty() {
737 self.tools.clone()
738 } else {
739 requested
740 .tools
741 .iter()
742 .filter(|tool| self.tools.contains(*tool))
743 .cloned()
744 .collect()
745 };
746
747 let capabilities = if self.capabilities.is_empty() {
748 requested.capabilities.clone()
749 } else if requested.capabilities.is_empty() {
750 self.capabilities.clone()
751 } else {
752 requested
753 .capabilities
754 .iter()
755 .filter_map(|(capability, requested_ops)| {
756 self.capabilities.get(capability).map(|allowed_ops| {
757 (
758 capability.clone(),
759 requested_ops
760 .iter()
761 .filter(|op| allowed_ops.contains(*op))
762 .cloned()
763 .collect::<Vec<_>>(),
764 )
765 })
766 })
767 .collect()
768 };
769
770 let workspace_roots = if self.workspace_roots.is_empty() {
771 requested.workspace_roots.clone()
772 } else if requested.workspace_roots.is_empty() {
773 self.workspace_roots.clone()
774 } else {
775 requested
776 .workspace_roots
777 .iter()
778 .filter(|root| self.workspace_roots.contains(*root))
779 .cloned()
780 .collect()
781 };
782
783 let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
784 (Some(a), Some(b)) => Some(a.min(b)),
785 (Some(a), None) => Some(a),
786 (None, Some(b)) => Some(b),
787 (None, None) => None,
788 };
789
790 let mut tool_arg_constraints = self.tool_arg_constraints.clone();
792 tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
793
794 let tool_metadata = tools
795 .iter()
796 .filter_map(|tool| {
797 requested
798 .tool_metadata
799 .get(tool)
800 .or_else(|| self.tool_metadata.get(tool))
801 .cloned()
802 .map(|metadata| (tool.clone(), metadata))
803 })
804 .collect();
805
806 Ok(CapabilityPolicy {
807 tools,
808 capabilities,
809 workspace_roots,
810 side_effect_level,
811 recursion_limit,
812 tool_arg_constraints,
813 tool_metadata,
814 })
815 }
816}
817
818fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
819 fn rank(v: &str) -> usize {
820 match v {
821 "none" => 0,
822 "read_only" => 1,
823 "workspace_write" => 2,
824 "process_exec" => 3,
825 "network" => 4,
826 _ => 5,
827 }
828 }
829 if rank(a) <= rank(b) {
830 a
831 } else {
832 b
833 }
834}
835
836#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
837#[serde(default)]
838pub struct ModelPolicy {
839 pub provider: Option<String>,
840 pub model: Option<String>,
841 pub model_tier: Option<String>,
842 pub temperature: Option<f64>,
843 pub max_tokens: Option<i64>,
844}
845
846#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
847#[serde(default)]
848pub struct TranscriptPolicy {
849 pub mode: Option<String>,
850 pub visibility: Option<String>,
851 pub summarize: bool,
852 pub compact: bool,
853 pub keep_last: Option<usize>,
854}
855
856#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
857#[serde(default)]
858pub struct ContextPolicy {
859 pub max_artifacts: Option<usize>,
860 pub max_tokens: Option<usize>,
861 pub reserve_tokens: Option<usize>,
862 pub include_kinds: Vec<String>,
863 pub exclude_kinds: Vec<String>,
864 pub prioritize_kinds: Vec<String>,
865 pub pinned_ids: Vec<String>,
866 pub include_stages: Vec<String>,
867 pub prefer_recent: bool,
868 pub prefer_fresh: bool,
869 pub render: Option<String>,
870}
871
872#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
873#[serde(default)]
874pub struct RetryPolicy {
875 pub max_attempts: usize,
876 pub verify: bool,
877 pub repair: bool,
878}
879
880#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
881#[serde(default)]
882pub struct StageContract {
883 pub input_kinds: Vec<String>,
884 pub output_kinds: Vec<String>,
885 pub min_inputs: Option<usize>,
886 pub max_inputs: Option<usize>,
887 pub require_transcript: bool,
888 pub schema: Option<serde_json::Value>,
889}
890
891#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
892#[serde(default)]
893pub struct BranchSemantics {
894 pub success: Option<String>,
895 pub failure: Option<String>,
896 pub verify_pass: Option<String>,
897 pub verify_fail: Option<String>,
898 pub condition_true: Option<String>,
899 pub condition_false: Option<String>,
900 pub loop_continue: Option<String>,
901 pub loop_exit: Option<String>,
902 pub escalation: Option<String>,
903}
904
905#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
906#[serde(default)]
907pub struct MapPolicy {
908 pub items: Vec<serde_json::Value>,
909 pub item_artifact_kind: Option<String>,
910 pub output_kind: Option<String>,
911 pub max_items: Option<usize>,
912}
913
914#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
915#[serde(default)]
916pub struct JoinPolicy {
917 pub strategy: String,
918 pub require_all_inputs: bool,
919 pub min_completed: Option<usize>,
920}
921
922#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
923#[serde(default)]
924pub struct ReducePolicy {
925 pub strategy: String,
926 pub separator: Option<String>,
927 pub output_kind: Option<String>,
928}
929
930#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
931#[serde(default)]
932pub struct EscalationPolicy {
933 pub level: Option<String>,
934 pub queue: Option<String>,
935 pub reason: Option<String>,
936}
937
938#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
939#[serde(default)]
940pub struct ArtifactRecord {
941 #[serde(rename = "_type")]
942 pub type_name: String,
943 pub id: String,
944 pub kind: String,
945 pub title: Option<String>,
946 pub text: Option<String>,
947 pub data: Option<serde_json::Value>,
948 pub source: Option<String>,
949 pub created_at: String,
950 pub freshness: Option<String>,
951 pub priority: Option<i64>,
952 pub lineage: Vec<String>,
953 pub relevance: Option<f64>,
954 pub estimated_tokens: Option<usize>,
955 pub stage: Option<String>,
956 pub metadata: BTreeMap<String, serde_json::Value>,
957}
958
959impl ArtifactRecord {
960 pub fn normalize(mut self) -> Self {
961 if self.type_name.is_empty() {
962 self.type_name = "artifact".to_string();
963 }
964 if self.id.is_empty() {
965 self.id = new_id("artifact");
966 }
967 if self.created_at.is_empty() {
968 self.created_at = now_rfc3339();
969 }
970 if self.kind.is_empty() {
971 self.kind = "artifact".to_string();
972 }
973 self.kind = normalize_artifact_kind(&self.kind);
974 if self.estimated_tokens.is_none() {
975 self.estimated_tokens = self
976 .text
977 .as_ref()
978 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
979 }
980 if self.priority.is_none() {
981 self.priority = Some(default_artifact_priority(&self.kind));
982 }
983 self
984 }
985}
986
987#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
988#[serde(default)]
989pub struct WorkflowNode {
990 pub id: Option<String>,
991 pub kind: String,
992 pub mode: Option<String>,
993 pub prompt: Option<String>,
994 pub system: Option<String>,
995 pub task_label: Option<String>,
996 pub done_sentinel: Option<String>,
997 pub tools: serde_json::Value,
998 pub model_policy: ModelPolicy,
999 pub transcript_policy: TranscriptPolicy,
1000 pub context_policy: ContextPolicy,
1001 pub retry_policy: RetryPolicy,
1002 pub capability_policy: CapabilityPolicy,
1003 pub input_contract: StageContract,
1004 pub output_contract: StageContract,
1005 pub branch_semantics: BranchSemantics,
1006 pub map_policy: MapPolicy,
1007 pub join_policy: JoinPolicy,
1008 pub reduce_policy: ReducePolicy,
1009 pub escalation_policy: EscalationPolicy,
1010 pub verify: Option<serde_json::Value>,
1011 pub metadata: BTreeMap<String, serde_json::Value>,
1012}
1013
1014pub fn workflow_tool_names(value: &serde_json::Value) -> Vec<String> {
1015 match value {
1016 serde_json::Value::Null => Vec::new(),
1017 serde_json::Value::Array(items) => items
1018 .iter()
1019 .filter_map(|item| match item {
1020 serde_json::Value::Object(map) => map
1021 .get("name")
1022 .and_then(|value| value.as_str())
1023 .filter(|name| !name.is_empty())
1024 .map(|name| name.to_string()),
1025 _ => None,
1026 })
1027 .collect(),
1028 serde_json::Value::Object(map) => {
1029 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1030 return map
1031 .get("tools")
1032 .map(workflow_tool_names)
1033 .unwrap_or_default();
1034 }
1035 map.get("name")
1036 .and_then(|value| value.as_str())
1037 .filter(|name| !name.is_empty())
1038 .map(|name| vec![name.to_string()])
1039 .unwrap_or_default()
1040 }
1041 _ => Vec::new(),
1042 }
1043}
1044
1045fn max_side_effect_level(levels: impl Iterator<Item = String>) -> Option<String> {
1046 fn rank(v: &str) -> usize {
1047 match v {
1048 "none" => 0,
1049 "read_only" => 1,
1050 "workspace_write" => 2,
1051 "process_exec" => 3,
1052 "network" => 4,
1053 _ => 5,
1054 }
1055 }
1056 levels.max_by_key(|level| rank(level))
1057}
1058
1059fn parse_tool_runtime_policy(
1060 map: &serde_json::Map<String, serde_json::Value>,
1061) -> ToolRuntimePolicyMetadata {
1062 let Some(policy) = map.get("policy").and_then(|value| value.as_object()) else {
1063 return ToolRuntimePolicyMetadata::default();
1064 };
1065
1066 let capabilities = policy
1067 .get("capabilities")
1068 .and_then(|value| value.as_object())
1069 .map(|caps| {
1070 caps.iter()
1071 .map(|(capability, ops)| {
1072 let values = ops
1073 .as_array()
1074 .map(|items| {
1075 items
1076 .iter()
1077 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1078 .collect::<Vec<_>>()
1079 })
1080 .unwrap_or_default();
1081 (capability.clone(), values)
1082 })
1083 .collect::<BTreeMap<_, _>>()
1084 })
1085 .unwrap_or_default();
1086
1087 let path_params = policy
1088 .get("path_params")
1089 .and_then(|value| value.as_array())
1090 .map(|items| {
1091 items
1092 .iter()
1093 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1094 .collect::<Vec<_>>()
1095 })
1096 .unwrap_or_default();
1097
1098 ToolRuntimePolicyMetadata {
1099 capabilities,
1100 side_effect_level: policy
1101 .get("side_effect_level")
1102 .and_then(|value| value.as_str())
1103 .map(|s| s.to_string()),
1104 path_params,
1105 mutation_classification: policy
1106 .get("mutation_classification")
1107 .and_then(|value| value.as_str())
1108 .map(|s| s.to_string()),
1109 }
1110}
1111
1112pub fn workflow_tool_metadata(
1113 value: &serde_json::Value,
1114) -> BTreeMap<String, ToolRuntimePolicyMetadata> {
1115 match value {
1116 serde_json::Value::Null => BTreeMap::new(),
1117 serde_json::Value::Array(items) => items
1118 .iter()
1119 .filter_map(|item| match item {
1120 serde_json::Value::Object(map) => map
1121 .get("name")
1122 .and_then(|value| value.as_str())
1123 .filter(|name| !name.is_empty())
1124 .map(|name| (name.to_string(), parse_tool_runtime_policy(map))),
1125 _ => None,
1126 })
1127 .collect(),
1128 serde_json::Value::Object(map) => {
1129 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1130 return map
1131 .get("tools")
1132 .map(workflow_tool_metadata)
1133 .unwrap_or_default();
1134 }
1135 map.get("name")
1136 .and_then(|value| value.as_str())
1137 .filter(|name| !name.is_empty())
1138 .map(|name| {
1139 let mut metadata = BTreeMap::new();
1140 metadata.insert(name.to_string(), parse_tool_runtime_policy(map));
1141 metadata
1142 })
1143 .unwrap_or_default()
1144 }
1145 _ => BTreeMap::new(),
1146 }
1147}
1148
1149pub fn workflow_tool_policy_from_tools(value: &serde_json::Value) -> CapabilityPolicy {
1150 let tools = workflow_tool_names(value);
1151 let tool_metadata = workflow_tool_metadata(value);
1152 let mut capabilities: BTreeMap<String, Vec<String>> = BTreeMap::new();
1153 for metadata in tool_metadata.values() {
1154 for (capability, ops) in &metadata.capabilities {
1155 let entry = capabilities.entry(capability.clone()).or_default();
1156 for op in ops {
1157 if !entry.contains(op) {
1158 entry.push(op.clone());
1159 }
1160 }
1161 entry.sort();
1162 }
1163 }
1164 let side_effect_level = max_side_effect_level(
1165 tool_metadata
1166 .values()
1167 .filter_map(|metadata| metadata.side_effect_level.clone()),
1168 );
1169 CapabilityPolicy {
1170 tools,
1171 capabilities,
1172 workspace_roots: Vec::new(),
1173 side_effect_level,
1174 recursion_limit: None,
1175 tool_arg_constraints: Vec::new(),
1176 tool_metadata,
1177 }
1178}
1179
1180#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1181#[serde(default)]
1182pub struct WorkflowEdge {
1183 pub from: String,
1184 pub to: String,
1185 pub branch: Option<String>,
1186 pub label: Option<String>,
1187}
1188
1189#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1190#[serde(default)]
1191pub struct WorkflowGraph {
1192 #[serde(rename = "_type")]
1193 pub type_name: String,
1194 pub id: String,
1195 pub name: Option<String>,
1196 pub version: usize,
1197 pub entry: String,
1198 pub nodes: BTreeMap<String, WorkflowNode>,
1199 pub edges: Vec<WorkflowEdge>,
1200 pub capability_policy: CapabilityPolicy,
1201 pub metadata: BTreeMap<String, serde_json::Value>,
1202 pub audit_log: Vec<WorkflowAuditEntry>,
1203}
1204
1205#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1206#[serde(default)]
1207pub struct WorkflowAuditEntry {
1208 pub id: String,
1209 pub op: String,
1210 pub node_id: Option<String>,
1211 pub timestamp: String,
1212 pub reason: Option<String>,
1213 pub metadata: BTreeMap<String, serde_json::Value>,
1214}
1215
1216#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1217#[serde(default)]
1218pub struct LlmUsageRecord {
1219 pub input_tokens: i64,
1220 pub output_tokens: i64,
1221 pub total_duration_ms: i64,
1222 pub call_count: i64,
1223 pub total_cost: f64,
1224 pub models: Vec<String>,
1225}
1226
1227#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1228#[serde(default)]
1229pub struct RunStageRecord {
1230 pub id: String,
1231 pub node_id: String,
1232 pub kind: String,
1233 pub status: String,
1234 pub outcome: String,
1235 pub branch: Option<String>,
1236 pub started_at: String,
1237 pub finished_at: Option<String>,
1238 pub visible_text: Option<String>,
1239 pub private_reasoning: Option<String>,
1240 pub transcript: Option<serde_json::Value>,
1241 pub verification: Option<serde_json::Value>,
1242 pub usage: Option<LlmUsageRecord>,
1243 pub artifacts: Vec<ArtifactRecord>,
1244 pub consumed_artifact_ids: Vec<String>,
1245 pub produced_artifact_ids: Vec<String>,
1246 pub attempts: Vec<RunStageAttemptRecord>,
1247 pub metadata: BTreeMap<String, serde_json::Value>,
1248}
1249
1250#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1251#[serde(default)]
1252pub struct RunStageAttemptRecord {
1253 pub attempt: usize,
1254 pub status: String,
1255 pub outcome: String,
1256 pub branch: Option<String>,
1257 pub error: Option<String>,
1258 pub verification: Option<serde_json::Value>,
1259 pub started_at: String,
1260 pub finished_at: Option<String>,
1261}
1262
1263#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1264#[serde(default)]
1265pub struct RunTransitionRecord {
1266 pub id: String,
1267 pub from_stage_id: Option<String>,
1268 pub from_node_id: Option<String>,
1269 pub to_node_id: String,
1270 pub branch: Option<String>,
1271 pub timestamp: String,
1272 pub consumed_artifact_ids: Vec<String>,
1273 pub produced_artifact_ids: Vec<String>,
1274}
1275
1276#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1277#[serde(default)]
1278pub struct RunCheckpointRecord {
1279 pub id: String,
1280 pub ready_nodes: Vec<String>,
1281 pub completed_nodes: Vec<String>,
1282 pub last_stage_id: Option<String>,
1283 pub persisted_at: String,
1284 pub reason: String,
1285}
1286
1287#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1288#[serde(default)]
1289pub struct ReplayFixture {
1290 #[serde(rename = "_type")]
1291 pub type_name: String,
1292 pub id: String,
1293 pub source_run_id: String,
1294 pub workflow_id: String,
1295 pub workflow_name: Option<String>,
1296 pub created_at: String,
1297 pub expected_status: String,
1298 pub stage_assertions: Vec<ReplayStageAssertion>,
1299}
1300
1301#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1302#[serde(default)]
1303pub struct ReplayStageAssertion {
1304 pub node_id: String,
1305 pub expected_status: String,
1306 pub expected_outcome: String,
1307 pub expected_branch: Option<String>,
1308 pub required_artifact_kinds: Vec<String>,
1309 pub visible_text_contains: Option<String>,
1310}
1311
1312#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1313#[serde(default)]
1314pub struct ReplayEvalReport {
1315 pub pass: bool,
1316 pub failures: Vec<String>,
1317 pub stage_count: usize,
1318}
1319
1320#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1321#[serde(default)]
1322pub struct ReplayEvalCaseReport {
1323 pub run_id: String,
1324 pub workflow_id: String,
1325 pub label: Option<String>,
1326 pub pass: bool,
1327 pub failures: Vec<String>,
1328 pub stage_count: usize,
1329 pub source_path: Option<String>,
1330 pub comparison: Option<RunDiffReport>,
1331}
1332
1333#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1334#[serde(default)]
1335pub struct ReplayEvalSuiteReport {
1336 pub pass: bool,
1337 pub total: usize,
1338 pub passed: usize,
1339 pub failed: usize,
1340 pub cases: Vec<ReplayEvalCaseReport>,
1341}
1342
1343#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1344#[serde(default)]
1345pub struct RunStageDiffRecord {
1346 pub node_id: String,
1347 pub change: String,
1348 pub details: Vec<String>,
1349}
1350
1351#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1352#[serde(default)]
1353pub struct RunDiffReport {
1354 pub left_run_id: String,
1355 pub right_run_id: String,
1356 pub identical: bool,
1357 pub status_changed: bool,
1358 pub left_status: String,
1359 pub right_status: String,
1360 pub stage_diffs: Vec<RunStageDiffRecord>,
1361 pub transition_count_delta: isize,
1362 pub artifact_count_delta: isize,
1363 pub checkpoint_count_delta: isize,
1364}
1365
1366#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1367#[serde(default)]
1368pub struct EvalSuiteManifest {
1369 #[serde(rename = "_type")]
1370 pub type_name: String,
1371 pub id: String,
1372 pub name: Option<String>,
1373 pub base_dir: Option<String>,
1374 pub cases: Vec<EvalSuiteCase>,
1375}
1376
1377#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1378#[serde(default)]
1379pub struct EvalSuiteCase {
1380 pub label: Option<String>,
1381 pub run_path: String,
1382 pub fixture_path: Option<String>,
1383 pub compare_to: Option<String>,
1384}
1385
1386#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1387#[serde(default)]
1388pub struct RunRecord {
1389 #[serde(rename = "_type")]
1390 pub type_name: String,
1391 pub id: String,
1392 pub workflow_id: String,
1393 pub workflow_name: Option<String>,
1394 pub task: String,
1395 pub status: String,
1396 pub started_at: String,
1397 pub finished_at: Option<String>,
1398 pub parent_run_id: Option<String>,
1399 pub root_run_id: Option<String>,
1400 pub stages: Vec<RunStageRecord>,
1401 pub transitions: Vec<RunTransitionRecord>,
1402 pub checkpoints: Vec<RunCheckpointRecord>,
1403 pub pending_nodes: Vec<String>,
1404 pub completed_nodes: Vec<String>,
1405 pub child_runs: Vec<RunChildRecord>,
1406 pub artifacts: Vec<ArtifactRecord>,
1407 pub policy: CapabilityPolicy,
1408 pub execution: Option<RunExecutionRecord>,
1409 pub transcript: Option<serde_json::Value>,
1410 pub usage: Option<LlmUsageRecord>,
1411 pub replay_fixture: Option<ReplayFixture>,
1412 pub metadata: BTreeMap<String, serde_json::Value>,
1413 pub persisted_path: Option<String>,
1414}
1415
1416#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1417#[serde(default)]
1418pub struct RunChildRecord {
1419 pub worker_id: String,
1420 pub worker_name: String,
1421 pub parent_stage_id: Option<String>,
1422 pub session_id: Option<String>,
1423 pub parent_session_id: Option<String>,
1424 pub mutation_scope: Option<String>,
1425 pub approval_mode: Option<String>,
1426 pub task: String,
1427 pub status: String,
1428 pub started_at: String,
1429 pub finished_at: Option<String>,
1430 pub run_id: Option<String>,
1431 pub run_path: Option<String>,
1432 pub snapshot_path: Option<String>,
1433 pub execution: Option<RunExecutionRecord>,
1434}
1435
1436#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1437#[serde(default)]
1438pub struct RunExecutionRecord {
1439 pub cwd: Option<String>,
1440 pub source_dir: Option<String>,
1441 pub env: BTreeMap<String, String>,
1442 pub adapter: Option<String>,
1443 pub repo_path: Option<String>,
1444 pub worktree_path: Option<String>,
1445 pub branch: Option<String>,
1446 pub base_ref: Option<String>,
1447 pub cleanup: Option<String>,
1448}
1449
1450#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1451#[serde(default)]
1452pub struct WorkflowValidationReport {
1453 pub valid: bool,
1454 pub errors: Vec<String>,
1455 pub warnings: Vec<String>,
1456 pub reachable_nodes: Vec<String>,
1457}
1458
1459fn parse_json_payload<T: for<'de> Deserialize<'de>>(
1460 json: serde_json::Value,
1461 label: &str,
1462) -> Result<T, VmError> {
1463 let payload = json.to_string();
1464 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1465 let mut tracker = serde_path_to_error::Track::new();
1466 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1467 T::deserialize(path_deserializer).map_err(|error| {
1468 let snippet = if payload.len() > 600 {
1469 format!("{}...", &payload[..600])
1470 } else {
1471 payload.clone()
1472 };
1473 VmError::Runtime(format!(
1474 "{label} parse error at {}: {} | payload={}",
1475 tracker.path(),
1476 error,
1477 snippet
1478 ))
1479 })
1480}
1481
1482fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1483 parse_json_payload(vm_value_to_json(value), "orchestration")
1484}
1485
1486pub fn parse_workflow_node_value(value: &VmValue, label: &str) -> Result<WorkflowNode, VmError> {
1487 parse_json_payload(vm_value_to_json(value), label)
1488}
1489
1490pub fn parse_workflow_node_json(
1491 json: serde_json::Value,
1492 label: &str,
1493) -> Result<WorkflowNode, VmError> {
1494 parse_json_payload(json, label)
1495}
1496
1497pub fn parse_workflow_edge_json(
1498 json: serde_json::Value,
1499 label: &str,
1500) -> Result<WorkflowEdge, VmError> {
1501 parse_json_payload(json, label)
1502}
1503
1504pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1505 let mut graph: WorkflowGraph = parse_json_value(value)?;
1506 let as_dict = value.as_dict().cloned().unwrap_or_default();
1507
1508 if graph.nodes.is_empty() {
1509 for key in ["act", "verify", "repair"] {
1510 if let Some(node_value) = as_dict.get(key) {
1511 let mut node: WorkflowNode = parse_json_value(node_value)?;
1512 let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1513 node.id = Some(key.to_string());
1514 if node.kind.is_empty() {
1515 node.kind = if key == "verify" {
1516 "verify".to_string()
1517 } else {
1518 "stage".to_string()
1519 };
1520 }
1521 if node.model_policy.provider.is_none() {
1522 node.model_policy.provider = as_dict
1523 .get("provider")
1524 .map(|value| value.display())
1525 .filter(|value| !value.is_empty());
1526 }
1527 if node.model_policy.model.is_none() {
1528 node.model_policy.model = as_dict
1529 .get("model")
1530 .map(|value| value.display())
1531 .filter(|value| !value.is_empty());
1532 }
1533 if node.model_policy.model_tier.is_none() {
1534 node.model_policy.model_tier = as_dict
1535 .get("model_tier")
1536 .or_else(|| as_dict.get("tier"))
1537 .map(|value| value.display())
1538 .filter(|value| !value.is_empty());
1539 }
1540 if node.model_policy.temperature.is_none() {
1541 node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1542 if let VmValue::Float(number) = value {
1543 Some(*number)
1544 } else {
1545 value.as_int().map(|number| number as f64)
1546 }
1547 });
1548 }
1549 if node.model_policy.max_tokens.is_none() {
1550 node.model_policy.max_tokens =
1551 as_dict.get("max_tokens").and_then(|value| value.as_int());
1552 }
1553 if node.mode.is_none() {
1554 node.mode = as_dict
1555 .get("mode")
1556 .map(|value| value.display())
1557 .filter(|value| !value.is_empty());
1558 }
1559 if node.done_sentinel.is_none() {
1560 node.done_sentinel = as_dict
1561 .get("done_sentinel")
1562 .map(|value| value.display())
1563 .filter(|value| !value.is_empty());
1564 }
1565 if key == "verify"
1566 && node.verify.is_none()
1567 && (raw_node.contains_key("assert_text")
1568 || raw_node.contains_key("command")
1569 || raw_node.contains_key("expect_status")
1570 || raw_node.contains_key("expect_text"))
1571 {
1572 node.verify = Some(serde_json::json!({
1573 "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1574 "command": raw_node.get("command").map(vm_value_to_json),
1575 "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1576 "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1577 }));
1578 }
1579 graph.nodes.insert(key.to_string(), node);
1580 }
1581 }
1582 if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1583 graph.entry = "act".to_string();
1584 }
1585 if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1586 if graph.nodes.contains_key("verify") {
1587 graph.edges.push(WorkflowEdge {
1588 from: "act".to_string(),
1589 to: "verify".to_string(),
1590 branch: None,
1591 label: None,
1592 });
1593 }
1594 if graph.nodes.contains_key("repair") {
1595 graph.edges.push(WorkflowEdge {
1596 from: "verify".to_string(),
1597 to: "repair".to_string(),
1598 branch: Some("failed".to_string()),
1599 label: None,
1600 });
1601 graph.edges.push(WorkflowEdge {
1602 from: "repair".to_string(),
1603 to: "verify".to_string(),
1604 branch: Some("retry".to_string()),
1605 label: None,
1606 });
1607 }
1608 }
1609 }
1610
1611 if graph.type_name.is_empty() {
1612 graph.type_name = "workflow_graph".to_string();
1613 }
1614 if graph.id.is_empty() {
1615 graph.id = new_id("workflow");
1616 }
1617 if graph.version == 0 {
1618 graph.version = 1;
1619 }
1620 if graph.entry.is_empty() {
1621 graph.entry = graph
1622 .nodes
1623 .keys()
1624 .next()
1625 .cloned()
1626 .unwrap_or_else(|| "act".to_string());
1627 }
1628 for (node_id, node) in &mut graph.nodes {
1629 if node.id.is_none() {
1630 node.id = Some(node_id.clone());
1631 }
1632 if node.kind.is_empty() {
1633 node.kind = "stage".to_string();
1634 }
1635 if node.join_policy.strategy.is_empty() {
1636 node.join_policy.strategy = "all".to_string();
1637 }
1638 if node.reduce_policy.strategy.is_empty() {
1639 node.reduce_policy.strategy = "concat".to_string();
1640 }
1641 if node.output_contract.output_kinds.is_empty() {
1642 node.output_contract.output_kinds = vec![match node.kind.as_str() {
1643 "verify" => "verification_result".to_string(),
1644 "reduce" => node
1645 .reduce_policy
1646 .output_kind
1647 .clone()
1648 .unwrap_or_else(|| "summary".to_string()),
1649 "map" => node
1650 .map_policy
1651 .output_kind
1652 .clone()
1653 .unwrap_or_else(|| "artifact".to_string()),
1654 "escalation" => "plan".to_string(),
1655 _ => "artifact".to_string(),
1656 }];
1657 }
1658 if node.retry_policy.max_attempts == 0 {
1659 node.retry_policy.max_attempts = 1;
1660 }
1661 }
1662 Ok(graph)
1663}
1664
1665pub fn validate_workflow(
1666 graph: &WorkflowGraph,
1667 ceiling: Option<&CapabilityPolicy>,
1668) -> WorkflowValidationReport {
1669 let mut errors = Vec::new();
1670 let mut warnings = Vec::new();
1671
1672 if !graph.nodes.contains_key(&graph.entry) {
1673 errors.push(format!("entry node does not exist: {}", graph.entry));
1674 }
1675
1676 let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1677 for edge in &graph.edges {
1678 if !node_ids.contains(&edge.from) {
1679 errors.push(format!("edge.from references unknown node: {}", edge.from));
1680 }
1681 if !node_ids.contains(&edge.to) {
1682 errors.push(format!("edge.to references unknown node: {}", edge.to));
1683 }
1684 }
1685
1686 let reachable_nodes = reachable_nodes(graph);
1687 for node_id in &node_ids {
1688 if !reachable_nodes.contains(node_id) {
1689 warnings.push(format!("node is unreachable: {node_id}"));
1690 }
1691 }
1692
1693 for (node_id, node) in &graph.nodes {
1694 let incoming = graph
1695 .edges
1696 .iter()
1697 .filter(|edge| edge.to == *node_id)
1698 .count();
1699 let outgoing: Vec<&WorkflowEdge> = graph
1700 .edges
1701 .iter()
1702 .filter(|edge| edge.from == *node_id)
1703 .collect();
1704 if let Some(min_inputs) = node.input_contract.min_inputs {
1705 if let Some(max_inputs) = node.input_contract.max_inputs {
1706 if min_inputs > max_inputs {
1707 errors.push(format!(
1708 "node {node_id}: input contract min_inputs exceeds max_inputs"
1709 ));
1710 }
1711 }
1712 }
1713 match node.kind.as_str() {
1714 "condition" => {
1715 let has_true = outgoing
1716 .iter()
1717 .any(|edge| edge.branch.as_deref() == Some("true"));
1718 let has_false = outgoing
1719 .iter()
1720 .any(|edge| edge.branch.as_deref() == Some("false"));
1721 if !has_true || !has_false {
1722 errors.push(format!(
1723 "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
1724 ));
1725 }
1726 }
1727 "fork" => {
1728 if outgoing.len() < 2 {
1729 errors.push(format!(
1730 "node {node_id}: fork nodes require at least two outgoing edges"
1731 ));
1732 }
1733 }
1734 "join" => {
1735 if incoming < 2 {
1736 warnings.push(format!(
1737 "node {node_id}: join node has fewer than two incoming edges"
1738 ));
1739 }
1740 }
1741 "map" => {
1742 if node.map_policy.items.is_empty()
1743 && node.map_policy.item_artifact_kind.is_none()
1744 && node.input_contract.input_kinds.is_empty()
1745 {
1746 errors.push(format!(
1747 "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
1748 ));
1749 }
1750 }
1751 "reduce" => {
1752 if node.input_contract.input_kinds.is_empty() {
1753 warnings.push(format!(
1754 "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
1755 ));
1756 }
1757 }
1758 _ => {}
1759 }
1760 }
1761
1762 if let Some(ceiling) = ceiling {
1763 if let Err(error) = ceiling.intersect(&graph.capability_policy) {
1764 errors.push(error);
1765 }
1766 for (node_id, node) in &graph.nodes {
1767 if let Err(error) = ceiling.intersect(&node.capability_policy) {
1768 errors.push(format!("node {node_id}: {error}"));
1769 }
1770 }
1771 }
1772
1773 WorkflowValidationReport {
1774 valid: errors.is_empty(),
1775 errors,
1776 warnings,
1777 reachable_nodes: reachable_nodes.into_iter().collect(),
1778 }
1779}
1780
1781fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
1782 let mut seen = BTreeSet::new();
1783 let mut stack = vec![graph.entry.clone()];
1784 while let Some(node_id) = stack.pop() {
1785 if !seen.insert(node_id.clone()) {
1786 continue;
1787 }
1788 for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
1789 stack.push(edge.to.clone());
1790 }
1791 }
1792 seen
1793}
1794
1795pub fn select_artifacts(
1796 mut artifacts: Vec<ArtifactRecord>,
1797 policy: &ContextPolicy,
1798) -> Vec<ArtifactRecord> {
1799 artifacts.retain(|artifact| {
1800 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
1801 && !policy.exclude_kinds.contains(&artifact.kind)
1802 && (policy.include_stages.is_empty()
1803 || artifact
1804 .stage
1805 .as_ref()
1806 .is_some_and(|stage| policy.include_stages.contains(stage)))
1807 });
1808 artifacts.sort_by(|a, b| {
1809 let b_pinned = policy.pinned_ids.contains(&b.id);
1810 let a_pinned = policy.pinned_ids.contains(&a.id);
1811 b_pinned
1812 .cmp(&a_pinned)
1813 .then_with(|| {
1814 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
1815 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
1816 b_prio_kind.cmp(&a_prio_kind)
1817 })
1818 .then_with(|| {
1819 b.priority
1820 .unwrap_or_default()
1821 .cmp(&a.priority.unwrap_or_default())
1822 })
1823 .then_with(|| {
1824 if policy.prefer_fresh {
1825 freshness_rank(b.freshness.as_deref())
1826 .cmp(&freshness_rank(a.freshness.as_deref()))
1827 } else {
1828 std::cmp::Ordering::Equal
1829 }
1830 })
1831 .then_with(|| {
1832 if policy.prefer_recent {
1833 b.created_at.cmp(&a.created_at)
1834 } else {
1835 std::cmp::Ordering::Equal
1836 }
1837 })
1838 .then_with(|| {
1839 b.relevance
1840 .partial_cmp(&a.relevance)
1841 .unwrap_or(std::cmp::Ordering::Equal)
1842 })
1843 .then_with(|| {
1844 a.estimated_tokens
1845 .unwrap_or(usize::MAX)
1846 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
1847 })
1848 });
1849
1850 let mut selected = Vec::new();
1851 let mut used_tokens = 0usize;
1852 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
1853 let effective_max_tokens = policy
1854 .max_tokens
1855 .map(|max| max.saturating_sub(reserve_tokens));
1856 for artifact in artifacts {
1857 if let Some(max_artifacts) = policy.max_artifacts {
1858 if selected.len() >= max_artifacts {
1859 break;
1860 }
1861 }
1862 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
1863 if let Some(max_tokens) = effective_max_tokens {
1864 if used_tokens + next_tokens > max_tokens {
1865 continue;
1866 }
1867 }
1868 used_tokens += next_tokens;
1869 selected.push(artifact);
1870 }
1871 selected
1872}
1873
1874pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
1875 let mut parts = Vec::new();
1876 for artifact in artifacts {
1877 let title = artifact
1878 .title
1879 .clone()
1880 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
1881 let body = artifact
1882 .text
1883 .clone()
1884 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
1885 .unwrap_or_default();
1886 match policy.render.as_deref() {
1887 Some("json") => {
1888 parts.push(
1889 serde_json::json!({
1890 "id": artifact.id,
1891 "kind": artifact.kind,
1892 "title": title,
1893 "source": artifact.source,
1894 "freshness": artifact.freshness,
1895 "priority": artifact.priority,
1896 "text": body,
1897 })
1898 .to_string(),
1899 );
1900 }
1901 _ => parts.push(format!(
1902 "[{title}] kind={} source={} freshness={} priority={}\n{}",
1903 artifact.kind,
1904 artifact
1905 .source
1906 .clone()
1907 .unwrap_or_else(|| "unknown".to_string()),
1908 artifact
1909 .freshness
1910 .clone()
1911 .unwrap_or_else(|| "normal".to_string()),
1912 artifact.priority.unwrap_or_default(),
1913 body
1914 )),
1915 }
1916 }
1917 parts.join("\n\n")
1918}
1919
1920pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
1921 let artifact: ArtifactRecord = parse_json_value(value)?;
1922 Ok(artifact.normalize())
1923}
1924
1925pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
1926 let json = vm_value_to_json(value);
1927 let payload = json.to_string();
1928 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1929 let mut tracker = serde_path_to_error::Track::new();
1930 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1931 let mut run: RunRecord = RunRecord::deserialize(path_deserializer).map_err(|error| {
1932 let snippet = if payload.len() > 600 {
1933 format!("{}...", &payload[..600])
1934 } else {
1935 payload.clone()
1936 };
1937 VmError::Runtime(format!(
1938 "orchestration parse error at {}: {} | payload={}",
1939 tracker.path(),
1940 error,
1941 snippet
1942 ))
1943 })?;
1944 if run.type_name.is_empty() {
1945 run.type_name = "run_record".to_string();
1946 }
1947 if run.id.is_empty() {
1948 run.id = new_id("run");
1949 }
1950 if run.started_at.is_empty() {
1951 run.started_at = now_rfc3339();
1952 }
1953 if run.status.is_empty() {
1954 run.status = "running".to_string();
1955 }
1956 if run.root_run_id.is_none() {
1957 run.root_run_id = Some(run.id.clone());
1958 }
1959 if run.replay_fixture.is_none() {
1960 run.replay_fixture = Some(replay_fixture_from_run(&run));
1961 }
1962 Ok(run)
1963}
1964
1965pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
1966 let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
1967 if manifest.type_name.is_empty() {
1968 manifest.type_name = "eval_suite_manifest".to_string();
1969 }
1970 if manifest.id.is_empty() {
1971 manifest.id = new_id("eval_suite");
1972 }
1973 Ok(manifest)
1974}
1975
1976fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
1977 let content = std::fs::read_to_string(path)
1978 .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
1979 serde_json::from_str(&content)
1980 .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
1981}
1982
1983fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
1984 let path_buf = PathBuf::from(path);
1985 if path_buf.is_absolute() {
1986 path_buf
1987 } else if let Some(base_dir) = base_dir {
1988 base_dir.join(path_buf)
1989 } else {
1990 path_buf
1991 }
1992}
1993
1994pub fn evaluate_run_suite_manifest(
1995 manifest: &EvalSuiteManifest,
1996) -> Result<ReplayEvalSuiteReport, VmError> {
1997 let base_dir = manifest.base_dir.as_deref().map(Path::new);
1998 let mut reports = Vec::new();
1999 for case in &manifest.cases {
2000 let run_path = resolve_manifest_path(base_dir, &case.run_path);
2001 let run = load_run_record(&run_path)?;
2002 let fixture = match &case.fixture_path {
2003 Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
2004 None => run
2005 .replay_fixture
2006 .clone()
2007 .unwrap_or_else(|| replay_fixture_from_run(&run)),
2008 };
2009 let eval = evaluate_run_against_fixture(&run, &fixture);
2010 let mut pass = eval.pass;
2011 let mut failures = eval.failures;
2012 let comparison = match &case.compare_to {
2013 Some(path) => {
2014 let baseline_path = resolve_manifest_path(base_dir, path);
2015 let baseline = load_run_record(&baseline_path)?;
2016 let diff = diff_run_records(&baseline, &run);
2017 if !diff.identical {
2018 pass = false;
2019 failures.push(format!(
2020 "run differs from baseline {} with {} stage changes",
2021 baseline_path.display(),
2022 diff.stage_diffs.len()
2023 ));
2024 }
2025 Some(diff)
2026 }
2027 None => None,
2028 };
2029 reports.push(ReplayEvalCaseReport {
2030 run_id: run.id.clone(),
2031 workflow_id: run.workflow_id.clone(),
2032 label: case.label.clone(),
2033 pass,
2034 failures,
2035 stage_count: eval.stage_count,
2036 source_path: Some(run_path.display().to_string()),
2037 comparison,
2038 });
2039 }
2040 let total = reports.len();
2041 let passed = reports.iter().filter(|report| report.pass).count();
2042 let failed = total.saturating_sub(passed);
2043 Ok(ReplayEvalSuiteReport {
2044 pass: failed == 0,
2045 total,
2046 passed,
2047 failed,
2048 cases: reports,
2049 })
2050}
2051
2052pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
2053 let before_lines: Vec<&str> = before.lines().collect();
2054 let after_lines: Vec<&str> = after.lines().collect();
2055 let mut table = vec![vec![0usize; after_lines.len() + 1]; before_lines.len() + 1];
2056 for i in (0..before_lines.len()).rev() {
2057 for j in (0..after_lines.len()).rev() {
2058 table[i][j] = if before_lines[i] == after_lines[j] {
2059 table[i + 1][j + 1] + 1
2060 } else {
2061 table[i + 1][j].max(table[i][j + 1])
2062 };
2063 }
2064 }
2065
2066 let mut diff = String::new();
2067 let file = path.unwrap_or("artifact");
2068 diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
2069 let mut i = 0;
2070 let mut j = 0;
2071 while i < before_lines.len() && j < after_lines.len() {
2072 if before_lines[i] == after_lines[j] {
2073 diff.push_str(&format!(" {}\n", before_lines[i]));
2074 i += 1;
2075 j += 1;
2076 } else if table[i + 1][j] >= table[i][j + 1] {
2077 diff.push_str(&format!("-{}\n", before_lines[i]));
2078 i += 1;
2079 } else {
2080 diff.push_str(&format!("+{}\n", after_lines[j]));
2081 j += 1;
2082 }
2083 }
2084 while i < before_lines.len() {
2085 diff.push_str(&format!("-{}\n", before_lines[i]));
2086 i += 1;
2087 }
2088 while j < after_lines.len() {
2089 diff.push_str(&format!("+{}\n", after_lines[j]));
2090 j += 1;
2091 }
2092 diff
2093}
2094
2095pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
2096 let path = path
2097 .map(PathBuf::from)
2098 .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
2099 if let Some(parent) = path.parent() {
2100 std::fs::create_dir_all(parent)
2101 .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
2102 }
2103 let json = serde_json::to_string_pretty(run)
2104 .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
2105 let tmp_path = path.with_extension("json.tmp");
2107 std::fs::write(&tmp_path, &json)
2108 .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
2109 std::fs::rename(&tmp_path, &path).map_err(|e| {
2110 let _ = std::fs::write(&path, &json);
2112 VmError::Runtime(format!("failed to finalize run record: {e}"))
2113 })?;
2114 Ok(path.to_string_lossy().to_string())
2115}
2116
2117pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
2118 let content = std::fs::read_to_string(path)
2119 .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
2120 serde_json::from_str(&content)
2121 .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
2122}
2123
2124pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
2125 ReplayFixture {
2126 type_name: "replay_fixture".to_string(),
2127 id: new_id("fixture"),
2128 source_run_id: run.id.clone(),
2129 workflow_id: run.workflow_id.clone(),
2130 workflow_name: run.workflow_name.clone(),
2131 created_at: now_rfc3339(),
2132 expected_status: run.status.clone(),
2133 stage_assertions: run
2134 .stages
2135 .iter()
2136 .map(|stage| ReplayStageAssertion {
2137 node_id: stage.node_id.clone(),
2138 expected_status: stage.status.clone(),
2139 expected_outcome: stage.outcome.clone(),
2140 expected_branch: stage.branch.clone(),
2141 required_artifact_kinds: stage
2142 .artifacts
2143 .iter()
2144 .map(|artifact| artifact.kind.clone())
2145 .collect(),
2146 visible_text_contains: stage
2147 .visible_text
2148 .as_ref()
2149 .filter(|text| !text.is_empty())
2150 .map(|text| text.chars().take(80).collect()),
2151 })
2152 .collect(),
2153 }
2154}
2155
2156pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
2157 let mut failures = Vec::new();
2158 if run.status != fixture.expected_status {
2159 failures.push(format!(
2160 "run status mismatch: expected {}, got {}",
2161 fixture.expected_status, run.status
2162 ));
2163 }
2164 for assertion in &fixture.stage_assertions {
2165 let Some(stage) = run
2166 .stages
2167 .iter()
2168 .find(|stage| stage.node_id == assertion.node_id)
2169 else {
2170 failures.push(format!("missing stage {}", assertion.node_id));
2171 continue;
2172 };
2173 if stage.status != assertion.expected_status {
2174 failures.push(format!(
2175 "stage {} status mismatch: expected {}, got {}",
2176 assertion.node_id, assertion.expected_status, stage.status
2177 ));
2178 }
2179 if stage.outcome != assertion.expected_outcome {
2180 failures.push(format!(
2181 "stage {} outcome mismatch: expected {}, got {}",
2182 assertion.node_id, assertion.expected_outcome, stage.outcome
2183 ));
2184 }
2185 if stage.branch != assertion.expected_branch {
2186 failures.push(format!(
2187 "stage {} branch mismatch: expected {:?}, got {:?}",
2188 assertion.node_id, assertion.expected_branch, stage.branch
2189 ));
2190 }
2191 for required_kind in &assertion.required_artifact_kinds {
2192 if !stage
2193 .artifacts
2194 .iter()
2195 .any(|artifact| &artifact.kind == required_kind)
2196 {
2197 failures.push(format!(
2198 "stage {} missing artifact kind {}",
2199 assertion.node_id, required_kind
2200 ));
2201 }
2202 }
2203 if let Some(snippet) = &assertion.visible_text_contains {
2204 let actual = stage.visible_text.clone().unwrap_or_default();
2205 if !actual.contains(snippet) {
2206 failures.push(format!(
2207 "stage {} visible text does not contain expected snippet {:?}",
2208 assertion.node_id, snippet
2209 ));
2210 }
2211 }
2212 }
2213
2214 ReplayEvalReport {
2215 pass: failures.is_empty(),
2216 failures,
2217 stage_count: run.stages.len(),
2218 }
2219}
2220
2221pub fn evaluate_run_suite(
2222 cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
2223) -> ReplayEvalSuiteReport {
2224 let mut reports = Vec::new();
2225 for (run, fixture, source_path) in cases {
2226 let report = evaluate_run_against_fixture(&run, &fixture);
2227 reports.push(ReplayEvalCaseReport {
2228 run_id: run.id.clone(),
2229 workflow_id: run.workflow_id.clone(),
2230 label: None,
2231 pass: report.pass,
2232 failures: report.failures,
2233 stage_count: report.stage_count,
2234 source_path,
2235 comparison: None,
2236 });
2237 }
2238 let total = reports.len();
2239 let passed = reports.iter().filter(|report| report.pass).count();
2240 let failed = total.saturating_sub(passed);
2241 ReplayEvalSuiteReport {
2242 pass: failed == 0,
2243 total,
2244 passed,
2245 failed,
2246 cases: reports,
2247 }
2248}
2249
2250pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
2251 let mut stage_diffs = Vec::new();
2252 let mut all_node_ids = BTreeSet::new();
2253 all_node_ids.extend(left.stages.iter().map(|stage| stage.node_id.clone()));
2254 all_node_ids.extend(right.stages.iter().map(|stage| stage.node_id.clone()));
2255
2256 for node_id in all_node_ids {
2257 let left_stage = left.stages.iter().find(|stage| stage.node_id == node_id);
2258 let right_stage = right.stages.iter().find(|stage| stage.node_id == node_id);
2259 match (left_stage, right_stage) {
2260 (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
2261 node_id,
2262 change: "removed".to_string(),
2263 details: vec!["stage missing from right run".to_string()],
2264 }),
2265 (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
2266 node_id,
2267 change: "added".to_string(),
2268 details: vec!["stage missing from left run".to_string()],
2269 }),
2270 (Some(left_stage), Some(right_stage)) => {
2271 let mut details = Vec::new();
2272 if left_stage.status != right_stage.status {
2273 details.push(format!(
2274 "status: {} -> {}",
2275 left_stage.status, right_stage.status
2276 ));
2277 }
2278 if left_stage.outcome != right_stage.outcome {
2279 details.push(format!(
2280 "outcome: {} -> {}",
2281 left_stage.outcome, right_stage.outcome
2282 ));
2283 }
2284 if left_stage.branch != right_stage.branch {
2285 details.push(format!(
2286 "branch: {:?} -> {:?}",
2287 left_stage.branch, right_stage.branch
2288 ));
2289 }
2290 if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
2291 {
2292 details.push(format!(
2293 "produced_artifacts: {} -> {}",
2294 left_stage.produced_artifact_ids.len(),
2295 right_stage.produced_artifact_ids.len()
2296 ));
2297 }
2298 if left_stage.artifacts.len() != right_stage.artifacts.len() {
2299 details.push(format!(
2300 "artifact_records: {} -> {}",
2301 left_stage.artifacts.len(),
2302 right_stage.artifacts.len()
2303 ));
2304 }
2305 if !details.is_empty() {
2306 stage_diffs.push(RunStageDiffRecord {
2307 node_id,
2308 change: "changed".to_string(),
2309 details,
2310 });
2311 }
2312 }
2313 (None, None) => {}
2314 }
2315 }
2316
2317 let status_changed = left.status != right.status;
2318 let identical = !status_changed
2319 && stage_diffs.is_empty()
2320 && left.transitions.len() == right.transitions.len()
2321 && left.artifacts.len() == right.artifacts.len()
2322 && left.checkpoints.len() == right.checkpoints.len();
2323
2324 RunDiffReport {
2325 left_run_id: left.id.clone(),
2326 right_run_id: right.id.clone(),
2327 identical,
2328 status_changed,
2329 left_status: left.status.clone(),
2330 right_status: right.status.clone(),
2331 stage_diffs,
2332 transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
2333 artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
2334 checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
2335 }
2336}
2337
2338pub fn push_execution_policy(policy: CapabilityPolicy) {
2339 EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
2340}
2341
2342pub fn pop_execution_policy() {
2343 EXECUTION_POLICY_STACK.with(|stack| {
2344 stack.borrow_mut().pop();
2345 });
2346}
2347
2348pub fn current_execution_policy() -> Option<CapabilityPolicy> {
2349 EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
2350}
2351
2352pub fn current_tool_metadata(tool: &str) -> Option<ToolRuntimePolicyMetadata> {
2353 current_execution_policy().and_then(|policy| policy.tool_metadata.get(tool).cloned())
2354}
2355
2356fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
2357 policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
2358}
2359
2360fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
2361 policy.capabilities.is_empty()
2362 || policy
2363 .capabilities
2364 .get(capability)
2365 .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
2366}
2367
2368fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
2369 fn rank(v: &str) -> usize {
2370 match v {
2371 "none" => 0,
2372 "read_only" => 1,
2373 "workspace_write" => 2,
2374 "process_exec" => 3,
2375 "network" => 4,
2376 _ => 5,
2377 }
2378 }
2379 policy
2380 .side_effect_level
2381 .as_ref()
2382 .map(|allowed| rank(allowed) >= rank(requested))
2383 .unwrap_or(true)
2384}
2385
2386fn reject_policy(reason: String) -> Result<(), VmError> {
2387 Err(VmError::CategorizedError {
2388 message: reason,
2389 category: crate::value::ErrorCategory::ToolRejected,
2390 })
2391}
2392
2393fn fallback_mutation_classification(tool_name: &str) -> String {
2394 let lower = tool_name.to_ascii_lowercase();
2395 if lower.starts_with("mcp_") {
2396 return "host_defined".to_string();
2397 }
2398 if lower == "exec"
2399 || lower == "shell"
2400 || lower == "exec_at"
2401 || lower == "shell_at"
2402 || lower == "run"
2403 || lower.starts_with("run_")
2404 {
2405 return "ambient_side_effect".to_string();
2406 }
2407 if lower.starts_with("delete")
2408 || lower.starts_with("remove")
2409 || lower.starts_with("move")
2410 || lower.starts_with("rename")
2411 {
2412 return "destructive".to_string();
2413 }
2414 if lower.contains("write")
2415 || lower.contains("edit")
2416 || lower.contains("patch")
2417 || lower.contains("create")
2418 || lower.contains("scaffold")
2419 || lower.starts_with("insert")
2420 || lower.starts_with("replace")
2421 || lower == "add_import"
2422 {
2423 return "apply_workspace".to_string();
2424 }
2425 "read_only".to_string()
2426}
2427
2428pub fn current_tool_mutation_classification(tool_name: &str) -> String {
2429 current_tool_metadata(tool_name)
2430 .and_then(|metadata| metadata.mutation_classification)
2431 .unwrap_or_else(|| fallback_mutation_classification(tool_name))
2432}
2433
2434pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
2435 let Some(map) = args.as_object() else {
2436 return Vec::new();
2437 };
2438 let path_keys = current_tool_metadata(tool_name)
2439 .map(|metadata| metadata.path_params)
2440 .filter(|keys| !keys.is_empty())
2441 .unwrap_or_else(|| {
2442 vec![
2443 "path".to_string(),
2444 "file".to_string(),
2445 "cwd".to_string(),
2446 "repo".to_string(),
2447 "target".to_string(),
2448 "destination".to_string(),
2449 ]
2450 });
2451 let mut paths = Vec::new();
2452 for key in path_keys {
2453 if let Some(value) = map.get(&key).and_then(|value| value.as_str()) {
2454 if !value.is_empty() {
2455 paths.push(value.to_string());
2456 }
2457 }
2458 }
2459 if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
2460 for item in items {
2461 if let Some(value) = item.as_str() {
2462 if !value.is_empty() {
2463 paths.push(value.to_string());
2464 }
2465 }
2466 }
2467 }
2468 paths.sort();
2469 paths.dedup();
2470 paths
2471}
2472
2473pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
2474 let Some(policy) = current_execution_policy() else {
2475 return Ok(());
2476 };
2477 match name {
2478 "read" | "read_file" => {
2479 if !policy_allows_tool(&policy, name)
2480 || !policy_allows_capability(&policy, "workspace", "read_text")
2481 {
2482 return reject_policy(format!(
2483 "builtin '{name}' exceeds workspace.read_text ceiling"
2484 ));
2485 }
2486 }
2487 "search" | "list_dir" => {
2488 if !policy_allows_tool(&policy, name)
2489 || !policy_allows_capability(&policy, "workspace", "list")
2490 {
2491 return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
2492 }
2493 }
2494 "file_exists" | "stat" => {
2495 if !policy_allows_capability(&policy, "workspace", "exists") {
2496 return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
2497 }
2498 }
2499 "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
2500 if !policy_allows_tool(&policy, "edit")
2501 || !policy_allows_capability(&policy, "workspace", "write_text")
2502 || !policy_allows_side_effect(&policy, "workspace_write")
2503 {
2504 return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
2505 }
2506 }
2507 "delete_file" => {
2508 if !policy_allows_capability(&policy, "workspace", "delete")
2509 || !policy_allows_side_effect(&policy, "workspace_write")
2510 {
2511 return reject_policy(
2512 "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
2513 );
2514 }
2515 }
2516 "apply_edit" => {
2517 if !policy_allows_capability(&policy, "workspace", "apply_edit")
2518 || !policy_allows_side_effect(&policy, "workspace_write")
2519 {
2520 return reject_policy(
2521 "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
2522 );
2523 }
2524 }
2525 "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
2526 if !policy_allows_tool(&policy, "run")
2527 || !policy_allows_capability(&policy, "process", "exec")
2528 || !policy_allows_side_effect(&policy, "process_exec")
2529 {
2530 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2531 }
2532 }
2533 "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
2534 if !policy_allows_side_effect(&policy, "network") {
2535 return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
2536 }
2537 }
2538 "mcp_connect"
2539 | "mcp_call"
2540 | "mcp_list_tools"
2541 | "mcp_list_resources"
2542 | "mcp_list_resource_templates"
2543 | "mcp_read_resource"
2544 | "mcp_list_prompts"
2545 | "mcp_get_prompt"
2546 | "mcp_server_info"
2547 | "mcp_disconnect" => {
2548 if !policy_allows_tool(&policy, "run")
2549 || !policy_allows_capability(&policy, "process", "exec")
2550 || !policy_allows_side_effect(&policy, "process_exec")
2551 {
2552 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2553 }
2554 }
2555 "host_invoke" => {
2556 let capability = args.first().map(|v| v.display()).unwrap_or_default();
2557 let op = args.get(1).map(|v| v.display()).unwrap_or_default();
2558 if !policy_allows_capability(&policy, &capability, &op) {
2559 return reject_policy(format!(
2560 "host_invoke {capability}.{op} exceeds capability ceiling"
2561 ));
2562 }
2563 let requested_side_effect = match (capability.as_str(), op.as_str()) {
2564 ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
2565 ("process", "exec") => "process_exec",
2566 _ => "read_only",
2567 };
2568 if !policy_allows_side_effect(&policy, requested_side_effect) {
2569 return reject_policy(format!(
2570 "host_invoke {capability}.{op} exceeds side-effect ceiling"
2571 ));
2572 }
2573 }
2574 _ => {}
2575 }
2576 Ok(())
2577}
2578
2579pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
2580 if current_execution_policy().is_some() {
2581 return reject_policy(format!(
2582 "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
2583 ));
2584 }
2585 Ok(())
2586}
2587
2588pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
2589 let Some(policy) = current_execution_policy() else {
2590 return Ok(());
2591 };
2592 if !policy_allows_tool(&policy, tool_name) {
2593 return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
2594 }
2595 if let Some(metadata) = policy.tool_metadata.get(tool_name) {
2596 for (capability, ops) in &metadata.capabilities {
2597 for op in ops {
2598 if !policy_allows_capability(&policy, capability, op) {
2599 return reject_policy(format!(
2600 "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
2601 ));
2602 }
2603 }
2604 }
2605 if let Some(side_effect_level) = metadata.side_effect_level.as_deref() {
2606 if !policy_allows_side_effect(&policy, side_effect_level) {
2607 return reject_policy(format!(
2608 "tool '{tool_name}' exceeds side-effect ceiling: {side_effect_level}"
2609 ));
2610 }
2611 }
2612 }
2613 Ok(())
2614}
2615
2616fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
2617 let dict = transcript.as_dict()?;
2618 let messages = match dict.get("messages") {
2619 Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
2620 _ => Vec::new(),
2621 };
2622 let retained = messages
2623 .into_iter()
2624 .rev()
2625 .take(keep_last)
2626 .collect::<Vec<_>>()
2627 .into_iter()
2628 .rev()
2629 .collect::<Vec<_>>();
2630 let mut compacted = dict.clone();
2631 compacted.insert(
2632 "messages".to_string(),
2633 VmValue::List(Rc::new(retained.clone())),
2634 );
2635 compacted.insert(
2636 "events".to_string(),
2637 VmValue::List(Rc::new(
2638 crate::llm::helpers::transcript_events_from_messages(&retained),
2639 )),
2640 );
2641 Some(VmValue::Dict(Rc::new(compacted)))
2642}
2643
2644fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
2645 let Some(visibility) = visibility else {
2646 return Some(transcript.clone());
2647 };
2648 if visibility != "public" && visibility != "public_only" {
2649 return Some(transcript.clone());
2650 }
2651 let dict = transcript.as_dict()?;
2652 let public_messages = match dict.get("messages") {
2653 Some(VmValue::List(list)) => list
2654 .iter()
2655 .filter(|message| {
2656 message
2657 .as_dict()
2658 .and_then(|d| d.get("role"))
2659 .map(|v| v.display())
2660 .map(|role| role != "tool_result")
2661 .unwrap_or(true)
2662 })
2663 .cloned()
2664 .collect::<Vec<_>>(),
2665 _ => Vec::new(),
2666 };
2667 let public_events = match dict.get("events") {
2668 Some(VmValue::List(list)) => list
2669 .iter()
2670 .filter(|event| {
2671 event
2672 .as_dict()
2673 .and_then(|d| d.get("visibility"))
2674 .map(|v| v.display())
2675 .map(|value| value == "public")
2676 .unwrap_or(true)
2677 })
2678 .cloned()
2679 .collect::<Vec<_>>(),
2680 _ => Vec::new(),
2681 };
2682 let mut redacted = dict.clone();
2683 redacted.insert(
2684 "messages".to_string(),
2685 VmValue::List(Rc::new(public_messages)),
2686 );
2687 redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
2688 Some(VmValue::Dict(Rc::new(redacted)))
2689}
2690
2691pub(crate) fn apply_input_transcript_policy(
2692 transcript: Option<VmValue>,
2693 policy: &TranscriptPolicy,
2694) -> Option<VmValue> {
2695 let mut transcript = transcript;
2696 match policy.mode.as_deref() {
2697 Some("reset") => return None,
2698 Some("fork") => {
2699 if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
2700 let mut forked = dict.as_ref().clone();
2701 forked.insert(
2702 "id".to_string(),
2703 VmValue::String(Rc::from(new_id("transcript"))),
2704 );
2705 transcript = Some(VmValue::Dict(Rc::new(forked)));
2706 }
2707 }
2708 _ => {}
2709 }
2710 if policy.compact {
2711 let keep_last = policy.keep_last.unwrap_or(6);
2712 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2713 }
2714 transcript
2715}
2716
2717fn apply_output_transcript_policy(
2718 transcript: Option<VmValue>,
2719 policy: &TranscriptPolicy,
2720) -> Option<VmValue> {
2721 let mut transcript = transcript;
2722 if policy.compact {
2723 let keep_last = policy.keep_last.unwrap_or(6);
2724 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2725 }
2726 transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
2727}
2728
2729pub async fn execute_stage_node(
2730 node_id: &str,
2731 node: &WorkflowNode,
2732 task: &str,
2733 artifacts: &[ArtifactRecord],
2734 transcript: Option<VmValue>,
2735) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
2736 let mut selection_policy = node.context_policy.clone();
2737 if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
2738 selection_policy.include_kinds = node.input_contract.input_kinds.clone();
2739 }
2740 let selected = select_artifacts_adaptive(artifacts.to_vec(), &selection_policy);
2741 let rendered_context = render_artifacts_context(&selected, &node.context_policy);
2742 let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
2743 if node.input_contract.require_transcript && transcript.is_none() {
2744 return Err(VmError::Runtime(format!(
2745 "workflow stage {node_id} requires transcript input"
2746 )));
2747 }
2748 if let Some(min_inputs) = node.input_contract.min_inputs {
2749 if selected.len() < min_inputs {
2750 return Err(VmError::Runtime(format!(
2751 "workflow stage {node_id} requires at least {min_inputs} input artifacts"
2752 )));
2753 }
2754 }
2755 if let Some(max_inputs) = node.input_contract.max_inputs {
2756 if selected.len() > max_inputs {
2757 return Err(VmError::Runtime(format!(
2758 "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
2759 )));
2760 }
2761 }
2762 let prompt = if rendered_context.is_empty() {
2763 task.to_string()
2764 } else {
2765 format!(
2766 "{rendered_context}\n\n{}:\n{task}",
2767 node.task_label
2768 .clone()
2769 .unwrap_or_else(|| "Task".to_string())
2770 )
2771 };
2772
2773 let tool_format = std::env::var("HARN_AGENT_TOOL_FORMAT")
2774 .ok()
2775 .filter(|value| !value.trim().is_empty())
2776 .unwrap_or_else(|| "text".to_string());
2777 let mut llm_result = if node.kind == "verify" {
2778 if let Some(command) = node
2779 .verify
2780 .as_ref()
2781 .and_then(|verify| verify.as_object())
2782 .and_then(|verify| verify.get("command"))
2783 .and_then(|value| value.as_str())
2784 .map(str::trim)
2785 .filter(|value| !value.is_empty())
2786 {
2787 let mut process = if cfg!(target_os = "windows") {
2788 let mut cmd = tokio::process::Command::new("cmd");
2789 cmd.arg("/C").arg(command);
2790 cmd
2791 } else {
2792 let mut cmd = tokio::process::Command::new("/bin/sh");
2793 cmd.arg("-lc").arg(command);
2794 cmd
2795 };
2796 process.stdin(std::process::Stdio::null());
2797 if let Some(context) = crate::stdlib::process::current_execution_context() {
2798 if let Some(cwd) = context.cwd.filter(|cwd| !cwd.is_empty()) {
2799 process.current_dir(cwd);
2800 }
2801 if !context.env.is_empty() {
2802 process.envs(context.env);
2803 }
2804 }
2805 let output = process
2806 .output()
2807 .await
2808 .map_err(|e| VmError::Runtime(format!("workflow verify exec failed: {e}")))?;
2809 let stdout = String::from_utf8_lossy(&output.stdout).to_string();
2810 let stderr = String::from_utf8_lossy(&output.stderr).to_string();
2811 let combined = if stderr.is_empty() {
2812 stdout.clone()
2813 } else if stdout.is_empty() {
2814 stderr.clone()
2815 } else {
2816 format!("{stdout}\n{stderr}")
2817 };
2818 serde_json::json!({
2819 "status": "completed",
2820 "text": combined,
2821 "visible_text": combined,
2822 "command": command,
2823 "stdout": stdout,
2824 "stderr": stderr,
2825 "exit_status": output.status.code().unwrap_or(-1),
2826 "success": output.status.success(),
2827 })
2828 } else {
2829 serde_json::json!({
2830 "status": "completed",
2831 "text": "",
2832 "visible_text": "",
2833 })
2834 }
2835 } else {
2836 let mut options = BTreeMap::new();
2837 if let Some(provider) = &node.model_policy.provider {
2838 options.insert(
2839 "provider".to_string(),
2840 VmValue::String(Rc::from(provider.clone())),
2841 );
2842 }
2843 if let Some(model) = &node.model_policy.model {
2844 options.insert(
2845 "model".to_string(),
2846 VmValue::String(Rc::from(model.clone())),
2847 );
2848 }
2849 if let Some(model_tier) = &node.model_policy.model_tier {
2850 options.insert(
2851 "model_tier".to_string(),
2852 VmValue::String(Rc::from(model_tier.clone())),
2853 );
2854 }
2855 if let Some(temperature) = node.model_policy.temperature {
2856 options.insert("temperature".to_string(), VmValue::Float(temperature));
2857 }
2858 if let Some(max_tokens) = node.model_policy.max_tokens {
2859 options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
2860 }
2861 let tool_names = workflow_tool_names(&node.tools);
2862 if !matches!(node.tools, serde_json::Value::Null) && !tool_names.is_empty() {
2863 options.insert(
2864 "tools".to_string(),
2865 crate::stdlib::json_to_vm_value(&node.tools),
2866 );
2867 }
2868 if let Some(transcript) = transcript.clone() {
2869 options.insert("transcript".to_string(), transcript);
2870 }
2871
2872 let args = vec![
2873 VmValue::String(Rc::from(prompt.clone())),
2874 node.system
2875 .clone()
2876 .map(|s| VmValue::String(Rc::from(s)))
2877 .unwrap_or(VmValue::Nil),
2878 VmValue::Dict(Rc::new(options)),
2879 ];
2880 let mut opts = extract_llm_options(&args)?;
2881
2882 if node.mode.as_deref() == Some("agent") || !tool_names.is_empty() {
2883 crate::llm::run_agent_loop_internal(
2884 &mut opts,
2885 crate::llm::AgentLoopConfig {
2886 persistent: true,
2887 max_iterations: 12,
2888 max_nudges: 3,
2889 nudge: None,
2890 done_sentinel: node.done_sentinel.clone(),
2891 tool_retries: 0,
2892 tool_backoff_ms: 1000,
2893 tool_format: tool_format.clone(),
2894 auto_compact: None,
2895 policy: None,
2896 daemon: false,
2897 llm_retries: 2,
2898 llm_backoff_ms: 2000,
2899 },
2900 )
2901 .await?
2902 } else {
2903 let result = vm_call_llm_full(&opts).await?;
2904 crate::llm::agent_loop_result_from_llm(&result, opts)
2905 }
2906 };
2907 if let Some(payload) = llm_result.as_object_mut() {
2908 payload.insert("prompt".to_string(), serde_json::json!(prompt));
2909 payload.insert(
2910 "system_prompt".to_string(),
2911 serde_json::json!(node.system.clone().unwrap_or_default()),
2912 );
2913 payload.insert(
2914 "rendered_context".to_string(),
2915 serde_json::json!(rendered_context),
2916 );
2917 payload.insert(
2918 "selected_artifact_ids".to_string(),
2919 serde_json::json!(selected
2920 .iter()
2921 .map(|artifact| artifact.id.clone())
2922 .collect::<Vec<_>>()),
2923 );
2924 payload.insert(
2925 "selected_artifact_titles".to_string(),
2926 serde_json::json!(selected
2927 .iter()
2928 .map(|artifact| artifact.title.clone())
2929 .collect::<Vec<_>>()),
2930 );
2931 payload.insert(
2932 "tool_calling_mode".to_string(),
2933 serde_json::json!(tool_format.clone()),
2934 );
2935 }
2936
2937 let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
2938 let transcript = llm_result
2939 .get("transcript")
2940 .cloned()
2941 .map(|value| crate::stdlib::json_to_vm_value(&value));
2942 let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
2943 let output_kind = node
2944 .output_contract
2945 .output_kinds
2946 .first()
2947 .cloned()
2948 .unwrap_or_else(|| {
2949 if node.kind == "verify" {
2950 "verification_result".to_string()
2951 } else {
2952 "artifact".to_string()
2953 }
2954 });
2955 let mut metadata = BTreeMap::new();
2956 metadata.insert(
2957 "input_artifact_ids".to_string(),
2958 serde_json::json!(selected
2959 .iter()
2960 .map(|artifact| artifact.id.clone())
2961 .collect::<Vec<_>>()),
2962 );
2963 metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
2964 let artifact = ArtifactRecord {
2965 type_name: "artifact".to_string(),
2966 id: new_id("artifact"),
2967 kind: output_kind,
2968 title: Some(format!("stage {node_id} output")),
2969 text: Some(visible_text),
2970 data: Some(llm_result.clone()),
2971 source: Some(node_id.to_string()),
2972 created_at: now_rfc3339(),
2973 freshness: Some("fresh".to_string()),
2974 priority: None,
2975 lineage: selected
2976 .iter()
2977 .map(|artifact| artifact.id.clone())
2978 .collect(),
2979 relevance: Some(1.0),
2980 estimated_tokens: None,
2981 stage: Some(node_id.to_string()),
2982 metadata,
2983 }
2984 .normalize();
2985
2986 Ok((llm_result, vec![artifact], transcript))
2987}
2988
2989pub fn next_nodes_for(
2990 graph: &WorkflowGraph,
2991 current: &str,
2992 branch: Option<&str>,
2993) -> Vec<WorkflowEdge> {
2994 let mut matching: Vec<WorkflowEdge> = graph
2995 .edges
2996 .iter()
2997 .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
2998 .cloned()
2999 .collect();
3000 if matching.is_empty() {
3001 matching = graph
3002 .edges
3003 .iter()
3004 .filter(|edge| edge.from == current && edge.branch.is_none())
3005 .cloned()
3006 .collect();
3007 }
3008 matching
3009}
3010
3011pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
3012 next_nodes_for(graph, current, Some(branch))
3013 .into_iter()
3014 .next()
3015 .map(|edge| edge.to)
3016}
3017
3018pub fn append_audit_entry(
3019 graph: &mut WorkflowGraph,
3020 op: &str,
3021 node_id: Option<String>,
3022 reason: Option<String>,
3023 metadata: BTreeMap<String, serde_json::Value>,
3024) {
3025 graph.audit_log.push(WorkflowAuditEntry {
3026 id: new_id("audit"),
3027 op: op.to_string(),
3028 node_id,
3029 timestamp: now_rfc3339(),
3030 reason,
3031 metadata,
3032 });
3033}
3034
3035pub fn builtin_ceiling() -> CapabilityPolicy {
3036 CapabilityPolicy {
3037 tools: Vec::new(),
3040 capabilities: BTreeMap::from([
3041 (
3042 "workspace".to_string(),
3043 vec![
3044 "read_text".to_string(),
3045 "write_text".to_string(),
3046 "apply_edit".to_string(),
3047 "delete".to_string(),
3048 "exists".to_string(),
3049 "list".to_string(),
3050 ],
3051 ),
3052 ("process".to_string(), vec!["exec".to_string()]),
3053 ]),
3054 workspace_roots: Vec::new(),
3055 side_effect_level: Some("network".to_string()),
3056 recursion_limit: Some(8),
3057 tool_arg_constraints: Vec::new(),
3058 tool_metadata: BTreeMap::new(),
3059 }
3060}
3061
3062#[cfg(test)]
3063mod tests {
3064 use super::*;
3065
3066 #[test]
3067 fn capability_intersection_rejects_privilege_expansion() {
3068 let ceiling = CapabilityPolicy {
3069 tools: vec!["read".to_string()],
3070 side_effect_level: Some("read_only".to_string()),
3071 recursion_limit: Some(2),
3072 ..Default::default()
3073 };
3074 let requested = CapabilityPolicy {
3075 tools: vec!["read".to_string(), "edit".to_string()],
3076 ..Default::default()
3077 };
3078 let error = ceiling.intersect(&requested).unwrap_err();
3079 assert!(error.contains("host ceiling"));
3080 }
3081
3082 #[test]
3083 fn mutation_session_normalize_fills_defaults() {
3084 let normalized = MutationSessionRecord::default().normalize();
3085 assert!(normalized.session_id.starts_with("session_"));
3086 assert_eq!(normalized.mutation_scope, "read_only");
3087 assert_eq!(normalized.approval_mode, "host_enforced");
3088 }
3089
3090 #[test]
3091 fn install_current_mutation_session_round_trips() {
3092 install_current_mutation_session(Some(MutationSessionRecord {
3093 session_id: "session_test".to_string(),
3094 mutation_scope: "apply_workspace".to_string(),
3095 approval_mode: "explicit".to_string(),
3096 ..Default::default()
3097 }));
3098 let current = current_mutation_session().expect("session installed");
3099 assert_eq!(current.session_id, "session_test");
3100 assert_eq!(current.mutation_scope, "apply_workspace");
3101 assert_eq!(current.approval_mode, "explicit");
3102
3103 install_current_mutation_session(None);
3104 assert!(current_mutation_session().is_none());
3105 }
3106
3107 #[test]
3108 fn active_execution_policy_rejects_unknown_bridge_builtin() {
3109 push_execution_policy(CapabilityPolicy {
3110 tools: vec!["read".to_string()],
3111 capabilities: BTreeMap::from([(
3112 "workspace".to_string(),
3113 vec!["read_text".to_string()],
3114 )]),
3115 side_effect_level: Some("read_only".to_string()),
3116 recursion_limit: Some(1),
3117 ..Default::default()
3118 });
3119 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
3120 pop_execution_policy();
3121 assert!(matches!(
3122 error,
3123 VmError::CategorizedError {
3124 category: crate::value::ErrorCategory::ToolRejected,
3125 ..
3126 }
3127 ));
3128 }
3129
3130 #[test]
3131 fn active_execution_policy_rejects_mcp_escape_hatch() {
3132 push_execution_policy(CapabilityPolicy {
3133 tools: vec!["read".to_string()],
3134 capabilities: BTreeMap::from([(
3135 "workspace".to_string(),
3136 vec!["read_text".to_string()],
3137 )]),
3138 side_effect_level: Some("read_only".to_string()),
3139 recursion_limit: Some(1),
3140 ..Default::default()
3141 });
3142 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
3143 pop_execution_policy();
3144 assert!(matches!(
3145 error,
3146 VmError::CategorizedError {
3147 category: crate::value::ErrorCategory::ToolRejected,
3148 ..
3149 }
3150 ));
3151 }
3152
3153 #[test]
3154 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
3155 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3156 "name": "legacy",
3157 "act": {"mode": "llm"},
3158 "verify": {"kind": "verify"},
3159 "repair": {"mode": "agent"},
3160 }));
3161 let graph = normalize_workflow_value(&value).unwrap();
3162 assert_eq!(graph.type_name, "workflow_graph");
3163 assert!(graph.nodes.contains_key("act"));
3164 assert!(graph.nodes.contains_key("verify"));
3165 assert!(graph.nodes.contains_key("repair"));
3166 assert_eq!(graph.entry, "act");
3167 }
3168
3169 #[test]
3170 fn workflow_normalization_accepts_tool_registry_nodes() {
3171 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3172 "name": "registry_tools",
3173 "entry": "implement",
3174 "nodes": {
3175 "implement": {
3176 "kind": "stage",
3177 "mode": "agent",
3178 "tools": {
3179 "_type": "tool_registry",
3180 "tools": [
3181 {"name": "read", "description": "Read files"},
3182 {"name": "run", "description": "Run commands"}
3183 ]
3184 }
3185 }
3186 },
3187 "edges": []
3188 }));
3189 let graph = normalize_workflow_value(&value).unwrap();
3190 let node = graph.nodes.get("implement").unwrap();
3191 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
3192 }
3193
3194 #[test]
3195 fn artifact_selection_honors_budget_and_priority() {
3196 let policy = ContextPolicy {
3197 max_artifacts: Some(2),
3198 max_tokens: Some(30),
3199 prefer_recent: true,
3200 prefer_fresh: true,
3201 prioritize_kinds: vec!["verification_result".to_string()],
3202 ..Default::default()
3203 };
3204 let artifacts = vec![
3205 ArtifactRecord {
3206 type_name: "artifact".to_string(),
3207 id: "a".to_string(),
3208 kind: "summary".to_string(),
3209 text: Some("short".to_string()),
3210 relevance: Some(0.9),
3211 created_at: now_rfc3339(),
3212 ..Default::default()
3213 }
3214 .normalize(),
3215 ArtifactRecord {
3216 type_name: "artifact".to_string(),
3217 id: "b".to_string(),
3218 kind: "summary".to_string(),
3219 text: Some("this is a much larger artifact body".to_string()),
3220 relevance: Some(1.0),
3221 created_at: now_rfc3339(),
3222 ..Default::default()
3223 }
3224 .normalize(),
3225 ArtifactRecord {
3226 type_name: "artifact".to_string(),
3227 id: "c".to_string(),
3228 kind: "summary".to_string(),
3229 text: Some("tiny".to_string()),
3230 relevance: Some(0.5),
3231 created_at: now_rfc3339(),
3232 ..Default::default()
3233 }
3234 .normalize(),
3235 ];
3236 let selected = select_artifacts(artifacts, &policy);
3237 assert_eq!(selected.len(), 2);
3238 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
3239 }
3240
3241 #[test]
3242 fn workflow_validation_rejects_condition_without_true_false_edges() {
3243 let graph = WorkflowGraph {
3244 entry: "gate".to_string(),
3245 nodes: BTreeMap::from([(
3246 "gate".to_string(),
3247 WorkflowNode {
3248 id: Some("gate".to_string()),
3249 kind: "condition".to_string(),
3250 ..Default::default()
3251 },
3252 )]),
3253 edges: vec![WorkflowEdge {
3254 from: "gate".to_string(),
3255 to: "next".to_string(),
3256 branch: Some("true".to_string()),
3257 label: None,
3258 }],
3259 ..Default::default()
3260 };
3261 let report = validate_workflow(&graph, None);
3262 assert!(!report.valid);
3263 assert!(report
3264 .errors
3265 .iter()
3266 .any(|error| error.contains("true") && error.contains("false")));
3267 }
3268
3269 #[test]
3270 fn replay_fixture_round_trip_passes() {
3271 let run = RunRecord {
3272 type_name: "run_record".to_string(),
3273 id: "run_1".to_string(),
3274 workflow_id: "wf".to_string(),
3275 workflow_name: Some("demo".to_string()),
3276 task: "demo".to_string(),
3277 status: "completed".to_string(),
3278 started_at: "1".to_string(),
3279 finished_at: Some("2".to_string()),
3280 parent_run_id: None,
3281 root_run_id: Some("run_1".to_string()),
3282 stages: vec![RunStageRecord {
3283 id: "stage_1".to_string(),
3284 node_id: "act".to_string(),
3285 kind: "stage".to_string(),
3286 status: "completed".to_string(),
3287 outcome: "success".to_string(),
3288 branch: Some("success".to_string()),
3289 started_at: "1".to_string(),
3290 finished_at: Some("2".to_string()),
3291 visible_text: Some("done".to_string()),
3292 private_reasoning: None,
3293 transcript: None,
3294 verification: None,
3295 usage: None,
3296 artifacts: vec![ArtifactRecord {
3297 type_name: "artifact".to_string(),
3298 id: "a1".to_string(),
3299 kind: "summary".to_string(),
3300 text: Some("done".to_string()),
3301 created_at: "1".to_string(),
3302 ..Default::default()
3303 }
3304 .normalize()],
3305 consumed_artifact_ids: vec![],
3306 produced_artifact_ids: vec!["a1".to_string()],
3307 attempts: vec![],
3308 metadata: BTreeMap::new(),
3309 }],
3310 transitions: vec![],
3311 checkpoints: vec![],
3312 pending_nodes: vec![],
3313 completed_nodes: vec!["act".to_string()],
3314 child_runs: vec![],
3315 artifacts: vec![],
3316 policy: CapabilityPolicy::default(),
3317 execution: None,
3318 transcript: None,
3319 usage: None,
3320 replay_fixture: None,
3321 metadata: BTreeMap::new(),
3322 persisted_path: None,
3323 };
3324 let fixture = replay_fixture_from_run(&run);
3325 let report = evaluate_run_against_fixture(&run, &fixture);
3326 assert!(report.pass);
3327 assert!(report.failures.is_empty());
3328 }
3329
3330 #[test]
3331 fn replay_eval_suite_reports_failed_case() {
3332 let good = RunRecord {
3333 id: "run_good".to_string(),
3334 workflow_id: "wf".to_string(),
3335 status: "completed".to_string(),
3336 stages: vec![RunStageRecord {
3337 node_id: "act".to_string(),
3338 status: "completed".to_string(),
3339 outcome: "success".to_string(),
3340 ..Default::default()
3341 }],
3342 ..Default::default()
3343 };
3344 let bad = RunRecord {
3345 id: "run_bad".to_string(),
3346 workflow_id: "wf".to_string(),
3347 status: "failed".to_string(),
3348 stages: vec![RunStageRecord {
3349 node_id: "act".to_string(),
3350 status: "failed".to_string(),
3351 outcome: "error".to_string(),
3352 ..Default::default()
3353 }],
3354 ..Default::default()
3355 };
3356 let suite = evaluate_run_suite(vec![
3357 (
3358 good.clone(),
3359 replay_fixture_from_run(&good),
3360 Some("good.json".to_string()),
3361 ),
3362 (
3363 bad.clone(),
3364 replay_fixture_from_run(&good),
3365 Some("bad.json".to_string()),
3366 ),
3367 ]);
3368 assert!(!suite.pass);
3369 assert_eq!(suite.total, 2);
3370 assert_eq!(suite.failed, 1);
3371 assert!(suite.cases.iter().any(|case| !case.pass));
3372 }
3373
3374 #[test]
3375 fn run_diff_reports_changed_stage() {
3376 let left = RunRecord {
3377 id: "left".to_string(),
3378 workflow_id: "wf".to_string(),
3379 status: "completed".to_string(),
3380 stages: vec![RunStageRecord {
3381 node_id: "act".to_string(),
3382 status: "completed".to_string(),
3383 outcome: "success".to_string(),
3384 ..Default::default()
3385 }],
3386 ..Default::default()
3387 };
3388 let right = RunRecord {
3389 id: "right".to_string(),
3390 workflow_id: "wf".to_string(),
3391 status: "failed".to_string(),
3392 stages: vec![RunStageRecord {
3393 node_id: "act".to_string(),
3394 status: "failed".to_string(),
3395 outcome: "error".to_string(),
3396 ..Default::default()
3397 }],
3398 ..Default::default()
3399 };
3400 let diff = diff_run_records(&left, &right);
3401 assert!(diff.status_changed);
3402 assert!(!diff.identical);
3403 assert_eq!(diff.stage_diffs.len(), 1);
3404 }
3405
3406 #[test]
3407 fn eval_suite_manifest_can_fail_on_baseline_diff() {
3408 let temp_dir =
3409 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
3410 std::fs::create_dir_all(&temp_dir).unwrap();
3411 let baseline_path = temp_dir.join("baseline.json");
3412 let candidate_path = temp_dir.join("candidate.json");
3413
3414 let baseline = RunRecord {
3415 id: "baseline".to_string(),
3416 workflow_id: "wf".to_string(),
3417 status: "completed".to_string(),
3418 stages: vec![RunStageRecord {
3419 node_id: "act".to_string(),
3420 status: "completed".to_string(),
3421 outcome: "success".to_string(),
3422 ..Default::default()
3423 }],
3424 ..Default::default()
3425 };
3426 let candidate = RunRecord {
3427 id: "candidate".to_string(),
3428 workflow_id: "wf".to_string(),
3429 status: "failed".to_string(),
3430 stages: vec![RunStageRecord {
3431 node_id: "act".to_string(),
3432 status: "failed".to_string(),
3433 outcome: "error".to_string(),
3434 ..Default::default()
3435 }],
3436 ..Default::default()
3437 };
3438
3439 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
3440 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
3441
3442 let manifest = EvalSuiteManifest {
3443 base_dir: Some(temp_dir.display().to_string()),
3444 cases: vec![EvalSuiteCase {
3445 label: Some("candidate".to_string()),
3446 run_path: "candidate.json".to_string(),
3447 fixture_path: None,
3448 compare_to: Some("baseline.json".to_string()),
3449 }],
3450 ..Default::default()
3451 };
3452 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
3453 assert!(!suite.pass);
3454 assert_eq!(suite.failed, 1);
3455 assert!(suite.cases[0].comparison.is_some());
3456 assert!(suite.cases[0]
3457 .failures
3458 .iter()
3459 .any(|failure| failure.contains("baseline")));
3460 }
3461
3462 #[test]
3463 fn render_unified_diff_marks_removed_and_added_lines() {
3464 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
3465 assert!(diff.contains("--- a/src/main.rs"));
3466 assert!(diff.contains("+++ b/src/main.rs"));
3467 assert!(diff.contains("-old"));
3468 assert!(diff.contains("+new"));
3469 assert!(diff.contains(" same"));
3470 }
3471
3472 #[test]
3473 fn execution_policy_rejects_process_exec_when_read_only() {
3474 push_execution_policy(CapabilityPolicy {
3475 side_effect_level: Some("read_only".to_string()),
3476 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
3477 ..Default::default()
3478 });
3479 let result = enforce_current_policy_for_builtin("exec", &[]);
3480 pop_execution_policy();
3481 assert!(result.is_err());
3482 }
3483
3484 #[test]
3485 fn execution_policy_rejects_unlisted_tool() {
3486 push_execution_policy(CapabilityPolicy {
3487 tools: vec!["read".to_string()],
3488 ..Default::default()
3489 });
3490 let result = enforce_current_policy_for_tool("edit");
3491 pop_execution_policy();
3492 assert!(result.is_err());
3493 }
3494
3495 #[test]
3498 fn pre_tool_hook_deny_blocks_execution() {
3499 clear_tool_hooks();
3500 register_tool_hook(ToolHook {
3501 pattern: "dangerous_*".to_string(),
3502 pre: Some(Rc::new(|_name, _args| {
3503 PreToolAction::Deny("blocked by policy".to_string())
3504 })),
3505 post: None,
3506 });
3507 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
3508 clear_tool_hooks();
3509 assert!(matches!(result, PreToolAction::Deny(_)));
3510 }
3511
3512 #[test]
3513 fn pre_tool_hook_allow_passes_through() {
3514 clear_tool_hooks();
3515 register_tool_hook(ToolHook {
3516 pattern: "safe_*".to_string(),
3517 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
3518 post: None,
3519 });
3520 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
3521 clear_tool_hooks();
3522 assert!(matches!(result, PreToolAction::Allow));
3523 }
3524
3525 #[test]
3526 fn pre_tool_hook_modify_rewrites_args() {
3527 clear_tool_hooks();
3528 register_tool_hook(ToolHook {
3529 pattern: "*".to_string(),
3530 pre: Some(Rc::new(|_name, _args| {
3531 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
3532 })),
3533 post: None,
3534 });
3535 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
3536 clear_tool_hooks();
3537 match result {
3538 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
3539 _ => panic!("expected Modify"),
3540 }
3541 }
3542
3543 #[test]
3544 fn post_tool_hook_modifies_result() {
3545 clear_tool_hooks();
3546 register_tool_hook(ToolHook {
3547 pattern: "exec".to_string(),
3548 pre: None,
3549 post: Some(Rc::new(|_name, result| {
3550 if result.contains("SECRET") {
3551 PostToolAction::Modify("[REDACTED]".to_string())
3552 } else {
3553 PostToolAction::Pass
3554 }
3555 })),
3556 });
3557 let result = run_post_tool_hooks("exec", "output with SECRET data");
3558 let clean = run_post_tool_hooks("exec", "clean output");
3559 clear_tool_hooks();
3560 assert_eq!(result, "[REDACTED]");
3561 assert_eq!(clean, "clean output");
3562 }
3563
3564 #[test]
3565 fn unmatched_hook_pattern_does_not_fire() {
3566 clear_tool_hooks();
3567 register_tool_hook(ToolHook {
3568 pattern: "exec".to_string(),
3569 pre: Some(Rc::new(|_name, _args| {
3570 PreToolAction::Deny("should not match".to_string())
3571 })),
3572 post: None,
3573 });
3574 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
3575 clear_tool_hooks();
3576 assert!(matches!(result, PreToolAction::Allow));
3577 }
3578
3579 #[test]
3580 fn glob_match_patterns() {
3581 assert!(glob_match("*", "anything"));
3582 assert!(glob_match("exec*", "exec_at"));
3583 assert!(glob_match("*_file", "read_file"));
3584 assert!(!glob_match("exec*", "read_file"));
3585 assert!(glob_match("read_file", "read_file"));
3586 assert!(!glob_match("read_file", "write_file"));
3587 }
3588
3589 #[test]
3592 fn microcompact_snips_large_output() {
3593 let large = "x".repeat(50_000);
3594 let result = microcompact_tool_output(&large, 10_000);
3595 assert!(result.len() < 15_000);
3596 assert!(result.contains("snipped"));
3597 }
3598
3599 #[test]
3600 fn microcompact_preserves_small_output() {
3601 let small = "hello world";
3602 let result = microcompact_tool_output(small, 10_000);
3603 assert_eq!(result, small);
3604 }
3605
3606 #[test]
3607 fn auto_compact_messages_reduces_count() {
3608 let mut messages: Vec<serde_json::Value> = (0..20)
3609 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
3610 .collect();
3611 let runtime = tokio::runtime::Builder::new_current_thread()
3612 .enable_all()
3613 .build()
3614 .unwrap();
3615 let compacted = runtime.block_on(auto_compact_messages(
3616 &mut messages,
3617 &AutoCompactConfig {
3618 compact_strategy: CompactStrategy::Truncate,
3619 keep_last: 6,
3620 ..Default::default()
3621 },
3622 None,
3623 ));
3624 assert!(compacted.unwrap());
3625 assert!(messages.len() <= 7); assert!(messages[0]["content"]
3627 .as_str()
3628 .unwrap()
3629 .contains("auto-compacted"));
3630 }
3631
3632 #[test]
3633 fn auto_compact_noop_when_under_threshold() {
3634 let mut messages: Vec<serde_json::Value> = (0..4)
3635 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
3636 .collect();
3637 let runtime = tokio::runtime::Builder::new_current_thread()
3638 .enable_all()
3639 .build()
3640 .unwrap();
3641 let compacted = runtime.block_on(auto_compact_messages(
3642 &mut messages,
3643 &AutoCompactConfig {
3644 compact_strategy: CompactStrategy::Truncate,
3645 keep_last: 6,
3646 ..Default::default()
3647 },
3648 None,
3649 ));
3650 assert!(!compacted.unwrap());
3651 assert_eq!(messages.len(), 4);
3652 }
3653
3654 #[test]
3655 fn estimate_message_tokens_basic() {
3656 let messages = vec![
3657 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
3658 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
3659 ];
3660 let tokens = estimate_message_tokens(&messages);
3661 assert_eq!(tokens, 200); }
3663
3664 #[test]
3667 fn dedup_artifacts_removes_duplicates() {
3668 let mut artifacts = vec![
3669 ArtifactRecord {
3670 id: "a1".to_string(),
3671 kind: "test".to_string(),
3672 text: Some("duplicate content".to_string()),
3673 ..Default::default()
3674 },
3675 ArtifactRecord {
3676 id: "a2".to_string(),
3677 kind: "test".to_string(),
3678 text: Some("duplicate content".to_string()),
3679 ..Default::default()
3680 },
3681 ArtifactRecord {
3682 id: "a3".to_string(),
3683 kind: "test".to_string(),
3684 text: Some("unique content".to_string()),
3685 ..Default::default()
3686 },
3687 ];
3688 dedup_artifacts(&mut artifacts);
3689 assert_eq!(artifacts.len(), 2);
3690 }
3691
3692 #[test]
3693 fn microcompact_artifact_snips_oversized() {
3694 let mut artifact = ArtifactRecord {
3695 id: "a1".to_string(),
3696 kind: "test".to_string(),
3697 text: Some("x".repeat(10_000)),
3698 estimated_tokens: Some(2_500),
3699 ..Default::default()
3700 };
3701 microcompact_artifact(&mut artifact, 500);
3702 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
3703 assert_eq!(artifact.estimated_tokens, Some(500));
3704 }
3705
3706 #[test]
3709 fn arg_constraint_allows_matching_pattern() {
3710 let policy = CapabilityPolicy {
3711 tool_arg_constraints: vec![ToolArgConstraint {
3712 tool: "exec".to_string(),
3713 arg_patterns: vec!["cargo *".to_string()],
3714 }],
3715 ..Default::default()
3716 };
3717 let result = enforce_tool_arg_constraints(
3718 &policy,
3719 "exec",
3720 &serde_json::json!({"command": "cargo test"}),
3721 );
3722 assert!(result.is_ok());
3723 }
3724
3725 #[test]
3726 fn arg_constraint_rejects_non_matching_pattern() {
3727 let policy = CapabilityPolicy {
3728 tool_arg_constraints: vec![ToolArgConstraint {
3729 tool: "exec".to_string(),
3730 arg_patterns: vec!["cargo *".to_string()],
3731 }],
3732 ..Default::default()
3733 };
3734 let result = enforce_tool_arg_constraints(
3735 &policy,
3736 "exec",
3737 &serde_json::json!({"command": "rm -rf /"}),
3738 );
3739 assert!(result.is_err());
3740 }
3741
3742 #[test]
3743 fn arg_constraint_ignores_unmatched_tool() {
3744 let policy = CapabilityPolicy {
3745 tool_arg_constraints: vec![ToolArgConstraint {
3746 tool: "exec".to_string(),
3747 arg_patterns: vec!["cargo *".to_string()],
3748 }],
3749 ..Default::default()
3750 };
3751 let result = enforce_tool_arg_constraints(
3752 &policy,
3753 "read_file",
3754 &serde_json::json!({"path": "/etc/passwd"}),
3755 );
3756 assert!(result.is_ok());
3757 }
3758}