1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12 use std::time::{SystemTime, UNIX_EPOCH};
13 let ts = SystemTime::now()
14 .duration_since(UNIX_EPOCH)
15 .unwrap_or_default()
16 .as_secs();
17 format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21 format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25 std::env::var("HARN_RUN_DIR")
26 .map(PathBuf::from)
27 .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31 static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32 static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33 static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
34}
35
36#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
37#[serde(default)]
38pub struct MutationSessionRecord {
39 pub session_id: String,
40 pub parent_session_id: Option<String>,
41 pub run_id: Option<String>,
42 pub worker_id: Option<String>,
43 pub execution_kind: Option<String>,
44 pub mutation_scope: String,
45 pub approval_mode: String,
46}
47
48impl MutationSessionRecord {
49 pub fn normalize(mut self) -> Self {
50 if self.session_id.is_empty() {
51 self.session_id = new_id("session");
52 }
53 if self.mutation_scope.is_empty() {
54 self.mutation_scope = "read_only".to_string();
55 }
56 if self.approval_mode.is_empty() {
57 self.approval_mode = "host_enforced".to_string();
58 }
59 self
60 }
61}
62
63pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
64 CURRENT_MUTATION_SESSION.with(|slot| {
65 *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
66 });
67}
68
69pub fn current_mutation_session() -> Option<MutationSessionRecord> {
70 CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
71}
72
73#[derive(Clone, Debug)]
77pub enum PreToolAction {
78 Allow,
80 Deny(String),
82 Modify(serde_json::Value),
84}
85
86#[derive(Clone, Debug)]
88pub enum PostToolAction {
89 Pass,
91 Modify(String),
93}
94
95pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
97pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
98
99#[derive(Clone)]
101pub struct ToolHook {
102 pub pattern: String,
104 pub pre: Option<PreToolHookFn>,
106 pub post: Option<PostToolHookFn>,
108}
109
110impl std::fmt::Debug for ToolHook {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 f.debug_struct("ToolHook")
113 .field("pattern", &self.pattern)
114 .field("has_pre", &self.pre.is_some())
115 .field("has_post", &self.post.is_some())
116 .finish()
117 }
118}
119
120fn glob_match(pattern: &str, name: &str) -> bool {
121 if pattern == "*" {
122 return true;
123 }
124 if let Some(prefix) = pattern.strip_suffix('*') {
125 return name.starts_with(prefix);
126 }
127 if let Some(suffix) = pattern.strip_prefix('*') {
128 return name.ends_with(suffix);
129 }
130 pattern == name
131}
132
133pub fn register_tool_hook(hook: ToolHook) {
134 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
135}
136
137pub fn clear_tool_hooks() {
138 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
139}
140
141pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
143 TOOL_HOOKS.with(|hooks| {
144 let hooks = hooks.borrow();
145 let mut current_args = args.clone();
146 for hook in hooks.iter() {
147 if !glob_match(&hook.pattern, tool_name) {
148 continue;
149 }
150 if let Some(ref pre) = hook.pre {
151 match pre(tool_name, ¤t_args) {
152 PreToolAction::Allow => {}
153 PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
154 PreToolAction::Modify(new_args) => {
155 current_args = new_args;
156 }
157 }
158 }
159 }
160 if current_args != *args {
161 PreToolAction::Modify(current_args)
162 } else {
163 PreToolAction::Allow
164 }
165 })
166}
167
168pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
170 TOOL_HOOKS.with(|hooks| {
171 let hooks = hooks.borrow();
172 let mut current = result.to_string();
173 for hook in hooks.iter() {
174 if !glob_match(&hook.pattern, tool_name) {
175 continue;
176 }
177 if let Some(ref post) = hook.post {
178 match post(tool_name, ¤t) {
179 PostToolAction::Pass => {}
180 PostToolAction::Modify(new_result) => {
181 current = new_result;
182 }
183 }
184 }
185 }
186 current
187 })
188}
189
190#[derive(Clone, Debug, PartialEq, Eq)]
193pub enum CompactStrategy {
194 Llm,
195 Truncate,
196 Custom,
197}
198
199pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
200 match value {
201 "llm" => Ok(CompactStrategy::Llm),
202 "truncate" => Ok(CompactStrategy::Truncate),
203 "custom" => Ok(CompactStrategy::Custom),
204 other => Err(VmError::Runtime(format!(
205 "unknown compact_strategy '{other}' (expected 'llm', 'truncate', or 'custom')"
206 ))),
207 }
208}
209
210#[derive(Clone, Debug)]
212pub struct AutoCompactConfig {
213 pub token_threshold: usize,
215 pub tool_output_max_chars: usize,
217 pub keep_last: usize,
219 pub compact_strategy: CompactStrategy,
221 pub custom_compactor: Option<VmValue>,
223}
224
225impl Default for AutoCompactConfig {
226 fn default() -> Self {
227 Self {
228 token_threshold: 80_000,
229 tool_output_max_chars: 20_000,
230 keep_last: 8,
231 compact_strategy: CompactStrategy::Llm,
232 custom_compactor: None,
233 }
234 }
235}
236
237pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
239 messages
240 .iter()
241 .map(|m| {
242 m.get("content")
243 .and_then(|c| c.as_str())
244 .map(|s| s.len())
245 .unwrap_or(0)
246 })
247 .sum::<usize>()
248 / 4
249}
250
251pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
254 if output.len() <= max_chars || max_chars < 200 {
255 return output.to_string();
256 }
257 let diagnostic_lines = output
258 .lines()
259 .filter(|line| {
260 let trimmed = line.trim();
261 let lower = trimmed.to_lowercase();
262 let has_file_line = {
264 let bytes = trimmed.as_bytes();
265 let mut i = 0;
266 let mut found_colon = false;
267 while i < bytes.len() {
268 if bytes[i] == b':' {
269 found_colon = true;
270 break;
271 }
272 i += 1;
273 }
274 found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
275 };
276 let has_keyword = trimmed.contains("error")
278 || trimmed.contains("FAIL")
279 || trimmed.contains("panic")
280 || trimmed.contains("undefined")
281 || trimmed.contains("expected")
282 || trimmed.contains("got")
283 || lower.contains("cannot find")
284 || lower.contains("not found")
285 || lower.contains("no such")
286 || lower.contains("unresolved")
287 || lower.contains("missing")
288 || lower.contains("declared but not used")
289 || lower.contains("unused")
290 || lower.contains("mismatch");
291 let positional = lower.contains(" error ")
292 || lower.starts_with("error:")
293 || lower.starts_with("fail")
294 || lower.contains("panic:");
295 (has_file_line && has_keyword) || positional
296 })
297 .take(32)
298 .collect::<Vec<_>>();
299 if !diagnostic_lines.is_empty() {
300 let diagnostics = diagnostic_lines.join("\n");
301 let budget = max_chars.saturating_sub(diagnostics.len() + 64);
302 let keep = budget / 2;
303 if keep >= 80 && output.len() > keep * 2 {
304 let head_end = output.floor_char_boundary(keep);
305 let tail_start = output.ceil_char_boundary(output.len() - keep);
306 let head = &output[..head_end];
307 let tail = &output[tail_start..];
308 return format!(
309 "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
310 );
311 }
312 }
313 let keep = max_chars / 2;
314 let head_end = output.floor_char_boundary(keep);
315 let tail_start = output.ceil_char_boundary(output.len() - keep);
316 let head = &output[..head_end];
317 let tail = &output[tail_start..];
318 let snipped = output.len() - max_chars;
319 format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
320}
321
322fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
323 messages
324 .iter()
325 .map(|msg| {
326 let role = msg
327 .get("role")
328 .and_then(|v| v.as_str())
329 .unwrap_or("user")
330 .to_uppercase();
331 let content = msg
332 .get("content")
333 .and_then(|v| v.as_str())
334 .unwrap_or_default();
335 format!("{role}: {content}")
336 })
337 .collect::<Vec<_>>()
338 .join("\n")
339}
340
341fn truncate_compaction_summary(
342 old_messages: &[serde_json::Value],
343 archived_count: usize,
344) -> String {
345 let summary_parts: Vec<String> = old_messages
346 .iter()
347 .filter_map(|m| {
348 let role = m.get("role")?.as_str()?;
349 let content = m.get("content")?.as_str()?;
350 if content.is_empty() {
351 return None;
352 }
353 let truncated = if content.len() > 500 {
354 format!("{}...", &content[..content.floor_char_boundary(500)])
355 } else {
356 content.to_string()
357 };
358 Some(format!("[{role}] {truncated}"))
359 })
360 .take(15)
361 .collect();
362 format!(
363 "[auto-compacted {archived_count} older messages]\n{}{}",
364 summary_parts.join("\n"),
365 if archived_count > 15 {
366 format!("\n... and {} more", archived_count - 15)
367 } else {
368 String::new()
369 }
370 )
371}
372
373fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
374 if let Some(map) = value.as_dict() {
375 if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
376 return Ok(summary.display());
377 }
378 }
379 match value {
380 VmValue::String(text) => Ok(text.to_string()),
381 VmValue::Nil => Ok(String::new()),
382 _ => serde_json::to_string_pretty(&vm_value_to_json(value))
383 .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
384 }
385}
386
387async fn llm_compaction_summary(
388 old_messages: &[serde_json::Value],
389 archived_count: usize,
390 llm_opts: &crate::llm::api::LlmCallOptions,
391) -> Result<String, VmError> {
392 let mut compact_opts = llm_opts.clone();
393 let formatted = format_compaction_messages(old_messages);
394 compact_opts.system = None;
395 compact_opts.transcript_id = None;
396 compact_opts.transcript_summary = None;
397 compact_opts.transcript_metadata = None;
398 compact_opts.native_tools = None;
399 compact_opts.tool_choice = None;
400 compact_opts.response_format = None;
401 compact_opts.json_schema = None;
402 compact_opts.messages = vec![serde_json::json!({
403 "role": "user",
404 "content": format!(
405 "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
406 ),
407 })];
408 let result = vm_call_llm_full(&compact_opts).await?;
409 let summary = result.text.trim();
410 if summary.is_empty() {
411 Ok(truncate_compaction_summary(old_messages, archived_count))
412 } else {
413 Ok(format!(
414 "[auto-compacted {archived_count} older messages]\n{summary}"
415 ))
416 }
417}
418
419async fn custom_compaction_summary(
420 old_messages: &[serde_json::Value],
421 archived_count: usize,
422 callback: &VmValue,
423) -> Result<String, VmError> {
424 let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
425 return Err(VmError::Runtime(
426 "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
427 ));
428 };
429 let mut vm = crate::vm::take_async_builtin_child_vm().ok_or_else(|| {
430 VmError::Runtime(
431 "custom transcript compaction requires an async builtin VM context".to_string(),
432 )
433 })?;
434 let messages_vm = VmValue::List(Rc::new(
435 old_messages
436 .iter()
437 .map(crate::stdlib::json_to_vm_value)
438 .collect(),
439 ));
440 let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await;
441 crate::vm::restore_async_builtin_child_vm(vm);
442 let summary = compact_summary_text_from_value(&result?)?;
443 if summary.trim().is_empty() {
444 Ok(truncate_compaction_summary(old_messages, archived_count))
445 } else {
446 Ok(format!(
447 "[auto-compacted {archived_count} older messages]\n{summary}"
448 ))
449 }
450}
451
452pub(crate) async fn auto_compact_messages(
455 messages: &mut Vec<serde_json::Value>,
456 config: &AutoCompactConfig,
457 llm_opts: Option<&crate::llm::api::LlmCallOptions>,
458) -> Result<Option<String>, VmError> {
459 if messages.len() <= config.keep_last {
460 return Ok(None);
461 }
462 let split_at = messages.len().saturating_sub(config.keep_last);
463 let old_messages: Vec<_> = messages.drain(..split_at).collect();
464 let archived_count = old_messages.len();
465 let summary = match config.compact_strategy {
466 CompactStrategy::Truncate => truncate_compaction_summary(&old_messages, archived_count),
467 CompactStrategy::Llm => {
468 llm_compaction_summary(
469 &old_messages,
470 archived_count,
471 llm_opts.ok_or_else(|| {
472 VmError::Runtime(
473 "LLM transcript compaction requires active LLM call options".to_string(),
474 )
475 })?,
476 )
477 .await?
478 }
479 CompactStrategy::Custom => {
480 custom_compaction_summary(
481 &old_messages,
482 archived_count,
483 config.custom_compactor.as_ref().ok_or_else(|| {
484 VmError::Runtime(
485 "compact_callback is required when compact_strategy is 'custom'"
486 .to_string(),
487 )
488 })?,
489 )
490 .await?
491 }
492 };
493 messages.insert(
494 0,
495 serde_json::json!({
496 "role": "user",
497 "content": summary,
498 }),
499 );
500 Ok(Some(summary))
501}
502
503pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
507 let max_chars = max_tokens * 4;
508 if let Some(ref text) = artifact.text {
509 if text.len() > max_chars && max_chars >= 200 {
510 artifact.text = Some(microcompact_tool_output(text, max_chars));
511 artifact.estimated_tokens = Some(max_tokens);
512 }
513 }
514}
515
516pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
519 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
520 artifacts.retain(|artifact| {
521 let text = artifact.text.as_deref().unwrap_or("");
522 if text.is_empty() {
523 return true;
524 }
525 let hash = {
527 use std::hash::{Hash, Hasher};
528 let mut hasher = std::collections::hash_map::DefaultHasher::new();
529 text.hash(&mut hasher);
530 hasher.finish()
531 };
532 seen_hashes.insert(hash)
533 });
534}
535
536pub fn select_artifacts_adaptive(
539 mut artifacts: Vec<ArtifactRecord>,
540 policy: &ContextPolicy,
541) -> Vec<ArtifactRecord> {
542 dedup_artifacts(&mut artifacts);
544
545 if let Some(max_tokens) = policy.max_tokens {
549 let count = artifacts.len().max(1);
550 let per_artifact_budget = max_tokens / count;
551 let cap = per_artifact_budget.max(500).min(max_tokens);
553 for artifact in &mut artifacts {
554 let est = artifact.estimated_tokens.unwrap_or(0);
555 if est > cap * 2 {
556 microcompact_artifact(artifact, cap);
557 }
558 }
559 }
560
561 select_artifacts(artifacts, policy)
563}
564
565#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
569#[serde(default)]
570pub struct ToolArgConstraint {
571 pub tool: String,
573 pub arg_patterns: Vec<String>,
576}
577
578pub fn enforce_tool_arg_constraints(
580 policy: &CapabilityPolicy,
581 tool_name: &str,
582 args: &serde_json::Value,
583) -> Result<(), VmError> {
584 for constraint in &policy.tool_arg_constraints {
585 if !glob_match(&constraint.tool, tool_name) {
586 continue;
587 }
588 if constraint.arg_patterns.is_empty() {
589 continue;
590 }
591 let first_arg = args
593 .as_object()
594 .and_then(|o| o.values().next())
595 .and_then(|v| v.as_str())
596 .or_else(|| args.as_str())
597 .unwrap_or("");
598 let matches = constraint
599 .arg_patterns
600 .iter()
601 .any(|pattern| glob_match(pattern, first_arg));
602 if !matches {
603 return reject_policy(format!(
604 "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
605 constraint.arg_patterns
606 ));
607 }
608 }
609 Ok(())
610}
611
612fn normalize_artifact_kind(kind: &str) -> String {
613 match kind {
614 "resource"
615 | "workspace_file"
616 | "editor_selection"
617 | "workspace_snapshot"
618 | "transcript_summary"
619 | "summary"
620 | "plan"
621 | "diff"
622 | "git_diff"
623 | "patch"
624 | "patch_set"
625 | "patch_proposal"
626 | "diff_review"
627 | "review_decision"
628 | "verification_bundle"
629 | "apply_intent"
630 | "verification_result"
631 | "test_result"
632 | "command_result"
633 | "provider_payload"
634 | "worker_result"
635 | "worker_notification"
636 | "artifact" => kind.to_string(),
637 "file" => "workspace_file".to_string(),
638 "transcript" => "transcript_summary".to_string(),
639 "verification" => "verification_result".to_string(),
640 "test" => "test_result".to_string(),
641 other if other.trim().is_empty() => "artifact".to_string(),
642 other => other.to_string(),
643 }
644}
645
646fn default_artifact_priority(kind: &str) -> i64 {
647 match kind {
648 "verification_result" | "test_result" => 100,
649 "verification_bundle" => 95,
650 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
651 | "review_decision" | "apply_intent" => 90,
652 "plan" => 80,
653 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
654 "summary" | "transcript_summary" => 60,
655 "command_result" => 50,
656 _ => 40,
657 }
658}
659
660fn freshness_rank(value: Option<&str>) -> i64 {
661 match value.unwrap_or_default() {
662 "fresh" | "live" => 3,
663 "recent" => 2,
664 "stale" => 0,
665 _ => 1,
666 }
667}
668
669#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
670#[serde(default)]
671pub struct ToolRuntimePolicyMetadata {
672 pub capabilities: BTreeMap<String, Vec<String>>,
673 pub side_effect_level: Option<String>,
674 pub path_params: Vec<String>,
675 pub mutation_classification: Option<String>,
676}
677
678#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
679#[serde(default)]
680pub struct CapabilityPolicy {
681 pub tools: Vec<String>,
682 pub capabilities: BTreeMap<String, Vec<String>>,
683 pub workspace_roots: Vec<String>,
684 pub side_effect_level: Option<String>,
685 pub recursion_limit: Option<usize>,
686 #[serde(default)]
688 pub tool_arg_constraints: Vec<ToolArgConstraint>,
689 #[serde(default)]
690 pub tool_metadata: BTreeMap<String, ToolRuntimePolicyMetadata>,
691}
692
693impl CapabilityPolicy {
694 pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
695 let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
696 (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
697 (Some(a), None) => Some(a.clone()),
698 (None, Some(b)) => Some(b.clone()),
699 (None, None) => None,
700 };
701
702 if !self.tools.is_empty() {
703 let denied: Vec<String> = requested
704 .tools
705 .iter()
706 .filter(|tool| !self.tools.contains(*tool))
707 .cloned()
708 .collect();
709 if !denied.is_empty() {
710 return Err(format!(
711 "requested tools exceed host ceiling: {}",
712 denied.join(", ")
713 ));
714 }
715 }
716
717 for (capability, requested_ops) in &requested.capabilities {
718 if let Some(allowed_ops) = self.capabilities.get(capability) {
719 let denied: Vec<String> = requested_ops
720 .iter()
721 .filter(|op| !allowed_ops.contains(*op))
722 .cloned()
723 .collect();
724 if !denied.is_empty() {
725 return Err(format!(
726 "requested capability operations exceed host ceiling: {}.{}",
727 capability,
728 denied.join(",")
729 ));
730 }
731 } else if !self.capabilities.is_empty() {
732 return Err(format!(
733 "requested capability exceeds host ceiling: {capability}"
734 ));
735 }
736 }
737
738 let tools = if self.tools.is_empty() {
739 requested.tools.clone()
740 } else if requested.tools.is_empty() {
741 self.tools.clone()
742 } else {
743 requested
744 .tools
745 .iter()
746 .filter(|tool| self.tools.contains(*tool))
747 .cloned()
748 .collect()
749 };
750
751 let capabilities = if self.capabilities.is_empty() {
752 requested.capabilities.clone()
753 } else if requested.capabilities.is_empty() {
754 self.capabilities.clone()
755 } else {
756 requested
757 .capabilities
758 .iter()
759 .filter_map(|(capability, requested_ops)| {
760 self.capabilities.get(capability).map(|allowed_ops| {
761 (
762 capability.clone(),
763 requested_ops
764 .iter()
765 .filter(|op| allowed_ops.contains(*op))
766 .cloned()
767 .collect::<Vec<_>>(),
768 )
769 })
770 })
771 .collect()
772 };
773
774 let workspace_roots = if self.workspace_roots.is_empty() {
775 requested.workspace_roots.clone()
776 } else if requested.workspace_roots.is_empty() {
777 self.workspace_roots.clone()
778 } else {
779 requested
780 .workspace_roots
781 .iter()
782 .filter(|root| self.workspace_roots.contains(*root))
783 .cloned()
784 .collect()
785 };
786
787 let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
788 (Some(a), Some(b)) => Some(a.min(b)),
789 (Some(a), None) => Some(a),
790 (None, Some(b)) => Some(b),
791 (None, None) => None,
792 };
793
794 let mut tool_arg_constraints = self.tool_arg_constraints.clone();
796 tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
797
798 let tool_metadata = tools
799 .iter()
800 .filter_map(|tool| {
801 requested
802 .tool_metadata
803 .get(tool)
804 .or_else(|| self.tool_metadata.get(tool))
805 .cloned()
806 .map(|metadata| (tool.clone(), metadata))
807 })
808 .collect();
809
810 Ok(CapabilityPolicy {
811 tools,
812 capabilities,
813 workspace_roots,
814 side_effect_level,
815 recursion_limit,
816 tool_arg_constraints,
817 tool_metadata,
818 })
819 }
820}
821
822fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
823 fn rank(v: &str) -> usize {
824 match v {
825 "none" => 0,
826 "read_only" => 1,
827 "workspace_write" => 2,
828 "process_exec" => 3,
829 "network" => 4,
830 _ => 5,
831 }
832 }
833 if rank(a) <= rank(b) {
834 a
835 } else {
836 b
837 }
838}
839
840#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
841#[serde(default)]
842pub struct ModelPolicy {
843 pub provider: Option<String>,
844 pub model: Option<String>,
845 pub model_tier: Option<String>,
846 pub temperature: Option<f64>,
847 pub max_tokens: Option<i64>,
848}
849
850#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
851#[serde(default)]
852pub struct TranscriptPolicy {
853 pub mode: Option<String>,
854 pub visibility: Option<String>,
855 pub summarize: bool,
856 pub compact: bool,
857 pub keep_last: Option<usize>,
858}
859
860#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
861#[serde(default)]
862pub struct ContextPolicy {
863 pub max_artifacts: Option<usize>,
864 pub max_tokens: Option<usize>,
865 pub reserve_tokens: Option<usize>,
866 pub include_kinds: Vec<String>,
867 pub exclude_kinds: Vec<String>,
868 pub prioritize_kinds: Vec<String>,
869 pub pinned_ids: Vec<String>,
870 pub include_stages: Vec<String>,
871 pub prefer_recent: bool,
872 pub prefer_fresh: bool,
873 pub render: Option<String>,
874}
875
876#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
877#[serde(default)]
878pub struct RetryPolicy {
879 pub max_attempts: usize,
880 pub verify: bool,
881 pub repair: bool,
882}
883
884#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
885#[serde(default)]
886pub struct StageContract {
887 pub input_kinds: Vec<String>,
888 pub output_kinds: Vec<String>,
889 pub min_inputs: Option<usize>,
890 pub max_inputs: Option<usize>,
891 pub require_transcript: bool,
892 pub schema: Option<serde_json::Value>,
893}
894
895#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
896#[serde(default)]
897pub struct BranchSemantics {
898 pub success: Option<String>,
899 pub failure: Option<String>,
900 pub verify_pass: Option<String>,
901 pub verify_fail: Option<String>,
902 pub condition_true: Option<String>,
903 pub condition_false: Option<String>,
904 pub loop_continue: Option<String>,
905 pub loop_exit: Option<String>,
906 pub escalation: Option<String>,
907}
908
909#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
910#[serde(default)]
911pub struct MapPolicy {
912 pub items: Vec<serde_json::Value>,
913 pub item_artifact_kind: Option<String>,
914 pub output_kind: Option<String>,
915 pub max_items: Option<usize>,
916}
917
918#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
919#[serde(default)]
920pub struct JoinPolicy {
921 pub strategy: String,
922 pub require_all_inputs: bool,
923 pub min_completed: Option<usize>,
924}
925
926#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
927#[serde(default)]
928pub struct ReducePolicy {
929 pub strategy: String,
930 pub separator: Option<String>,
931 pub output_kind: Option<String>,
932}
933
934#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
935#[serde(default)]
936pub struct EscalationPolicy {
937 pub level: Option<String>,
938 pub queue: Option<String>,
939 pub reason: Option<String>,
940}
941
942#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
943#[serde(default)]
944pub struct ArtifactRecord {
945 #[serde(rename = "_type")]
946 pub type_name: String,
947 pub id: String,
948 pub kind: String,
949 pub title: Option<String>,
950 pub text: Option<String>,
951 pub data: Option<serde_json::Value>,
952 pub source: Option<String>,
953 pub created_at: String,
954 pub freshness: Option<String>,
955 pub priority: Option<i64>,
956 pub lineage: Vec<String>,
957 pub relevance: Option<f64>,
958 pub estimated_tokens: Option<usize>,
959 pub stage: Option<String>,
960 pub metadata: BTreeMap<String, serde_json::Value>,
961}
962
963impl ArtifactRecord {
964 pub fn normalize(mut self) -> Self {
965 if self.type_name.is_empty() {
966 self.type_name = "artifact".to_string();
967 }
968 if self.id.is_empty() {
969 self.id = new_id("artifact");
970 }
971 if self.created_at.is_empty() {
972 self.created_at = now_rfc3339();
973 }
974 if self.kind.is_empty() {
975 self.kind = "artifact".to_string();
976 }
977 self.kind = normalize_artifact_kind(&self.kind);
978 if self.estimated_tokens.is_none() {
979 self.estimated_tokens = self
980 .text
981 .as_ref()
982 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
983 }
984 if self.priority.is_none() {
985 self.priority = Some(default_artifact_priority(&self.kind));
986 }
987 self
988 }
989}
990
991#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
992#[serde(default)]
993pub struct WorkflowNode {
994 pub id: Option<String>,
995 pub kind: String,
996 pub mode: Option<String>,
997 pub prompt: Option<String>,
998 pub system: Option<String>,
999 pub task_label: Option<String>,
1000 pub done_sentinel: Option<String>,
1001 pub tools: serde_json::Value,
1002 pub model_policy: ModelPolicy,
1003 pub transcript_policy: TranscriptPolicy,
1004 pub context_policy: ContextPolicy,
1005 pub retry_policy: RetryPolicy,
1006 pub capability_policy: CapabilityPolicy,
1007 pub input_contract: StageContract,
1008 pub output_contract: StageContract,
1009 pub branch_semantics: BranchSemantics,
1010 pub map_policy: MapPolicy,
1011 pub join_policy: JoinPolicy,
1012 pub reduce_policy: ReducePolicy,
1013 pub escalation_policy: EscalationPolicy,
1014 pub verify: Option<serde_json::Value>,
1015 pub metadata: BTreeMap<String, serde_json::Value>,
1016}
1017
1018pub fn workflow_tool_names(value: &serde_json::Value) -> Vec<String> {
1019 match value {
1020 serde_json::Value::Null => Vec::new(),
1021 serde_json::Value::Array(items) => items
1022 .iter()
1023 .filter_map(|item| match item {
1024 serde_json::Value::Object(map) => map
1025 .get("name")
1026 .and_then(|value| value.as_str())
1027 .filter(|name| !name.is_empty())
1028 .map(|name| name.to_string()),
1029 _ => None,
1030 })
1031 .collect(),
1032 serde_json::Value::Object(map) => {
1033 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1034 return map
1035 .get("tools")
1036 .map(workflow_tool_names)
1037 .unwrap_or_default();
1038 }
1039 map.get("name")
1040 .and_then(|value| value.as_str())
1041 .filter(|name| !name.is_empty())
1042 .map(|name| vec![name.to_string()])
1043 .unwrap_or_default()
1044 }
1045 _ => Vec::new(),
1046 }
1047}
1048
1049fn max_side_effect_level(levels: impl Iterator<Item = String>) -> Option<String> {
1050 fn rank(v: &str) -> usize {
1051 match v {
1052 "none" => 0,
1053 "read_only" => 1,
1054 "workspace_write" => 2,
1055 "process_exec" => 3,
1056 "network" => 4,
1057 _ => 5,
1058 }
1059 }
1060 levels.max_by_key(|level| rank(level))
1061}
1062
1063fn parse_tool_runtime_policy(
1064 map: &serde_json::Map<String, serde_json::Value>,
1065) -> ToolRuntimePolicyMetadata {
1066 let Some(policy) = map.get("policy").and_then(|value| value.as_object()) else {
1067 return ToolRuntimePolicyMetadata::default();
1068 };
1069
1070 let capabilities = policy
1071 .get("capabilities")
1072 .and_then(|value| value.as_object())
1073 .map(|caps| {
1074 caps.iter()
1075 .map(|(capability, ops)| {
1076 let values = ops
1077 .as_array()
1078 .map(|items| {
1079 items
1080 .iter()
1081 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1082 .collect::<Vec<_>>()
1083 })
1084 .unwrap_or_default();
1085 (capability.clone(), values)
1086 })
1087 .collect::<BTreeMap<_, _>>()
1088 })
1089 .unwrap_or_default();
1090
1091 let path_params = policy
1092 .get("path_params")
1093 .and_then(|value| value.as_array())
1094 .map(|items| {
1095 items
1096 .iter()
1097 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1098 .collect::<Vec<_>>()
1099 })
1100 .unwrap_or_default();
1101
1102 ToolRuntimePolicyMetadata {
1103 capabilities,
1104 side_effect_level: policy
1105 .get("side_effect_level")
1106 .and_then(|value| value.as_str())
1107 .map(|s| s.to_string()),
1108 path_params,
1109 mutation_classification: policy
1110 .get("mutation_classification")
1111 .and_then(|value| value.as_str())
1112 .map(|s| s.to_string()),
1113 }
1114}
1115
1116pub fn workflow_tool_metadata(
1117 value: &serde_json::Value,
1118) -> BTreeMap<String, ToolRuntimePolicyMetadata> {
1119 match value {
1120 serde_json::Value::Null => BTreeMap::new(),
1121 serde_json::Value::Array(items) => items
1122 .iter()
1123 .filter_map(|item| match item {
1124 serde_json::Value::Object(map) => map
1125 .get("name")
1126 .and_then(|value| value.as_str())
1127 .filter(|name| !name.is_empty())
1128 .map(|name| (name.to_string(), parse_tool_runtime_policy(map))),
1129 _ => None,
1130 })
1131 .collect(),
1132 serde_json::Value::Object(map) => {
1133 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1134 return map
1135 .get("tools")
1136 .map(workflow_tool_metadata)
1137 .unwrap_or_default();
1138 }
1139 map.get("name")
1140 .and_then(|value| value.as_str())
1141 .filter(|name| !name.is_empty())
1142 .map(|name| {
1143 let mut metadata = BTreeMap::new();
1144 metadata.insert(name.to_string(), parse_tool_runtime_policy(map));
1145 metadata
1146 })
1147 .unwrap_or_default()
1148 }
1149 _ => BTreeMap::new(),
1150 }
1151}
1152
1153pub fn workflow_tool_policy_from_tools(value: &serde_json::Value) -> CapabilityPolicy {
1154 let tools = workflow_tool_names(value);
1155 let tool_metadata = workflow_tool_metadata(value);
1156 let mut capabilities: BTreeMap<String, Vec<String>> = BTreeMap::new();
1157 for metadata in tool_metadata.values() {
1158 for (capability, ops) in &metadata.capabilities {
1159 let entry = capabilities.entry(capability.clone()).or_default();
1160 for op in ops {
1161 if !entry.contains(op) {
1162 entry.push(op.clone());
1163 }
1164 }
1165 entry.sort();
1166 }
1167 }
1168 let side_effect_level = max_side_effect_level(
1169 tool_metadata
1170 .values()
1171 .filter_map(|metadata| metadata.side_effect_level.clone()),
1172 );
1173 CapabilityPolicy {
1174 tools,
1175 capabilities,
1176 workspace_roots: Vec::new(),
1177 side_effect_level,
1178 recursion_limit: None,
1179 tool_arg_constraints: Vec::new(),
1180 tool_metadata,
1181 }
1182}
1183
1184#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1185#[serde(default)]
1186pub struct WorkflowEdge {
1187 pub from: String,
1188 pub to: String,
1189 pub branch: Option<String>,
1190 pub label: Option<String>,
1191}
1192
1193#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1194#[serde(default)]
1195pub struct WorkflowGraph {
1196 #[serde(rename = "_type")]
1197 pub type_name: String,
1198 pub id: String,
1199 pub name: Option<String>,
1200 pub version: usize,
1201 pub entry: String,
1202 pub nodes: BTreeMap<String, WorkflowNode>,
1203 pub edges: Vec<WorkflowEdge>,
1204 pub capability_policy: CapabilityPolicy,
1205 pub metadata: BTreeMap<String, serde_json::Value>,
1206 pub audit_log: Vec<WorkflowAuditEntry>,
1207}
1208
1209#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1210#[serde(default)]
1211pub struct WorkflowAuditEntry {
1212 pub id: String,
1213 pub op: String,
1214 pub node_id: Option<String>,
1215 pub timestamp: String,
1216 pub reason: Option<String>,
1217 pub metadata: BTreeMap<String, serde_json::Value>,
1218}
1219
1220#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1221#[serde(default)]
1222pub struct LlmUsageRecord {
1223 pub input_tokens: i64,
1224 pub output_tokens: i64,
1225 pub total_duration_ms: i64,
1226 pub call_count: i64,
1227 pub total_cost: f64,
1228 pub models: Vec<String>,
1229}
1230
1231#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1232#[serde(default)]
1233pub struct RunStageRecord {
1234 pub id: String,
1235 pub node_id: String,
1236 pub kind: String,
1237 pub status: String,
1238 pub outcome: String,
1239 pub branch: Option<String>,
1240 pub started_at: String,
1241 pub finished_at: Option<String>,
1242 pub visible_text: Option<String>,
1243 pub private_reasoning: Option<String>,
1244 pub transcript: Option<serde_json::Value>,
1245 pub verification: Option<serde_json::Value>,
1246 pub usage: Option<LlmUsageRecord>,
1247 pub artifacts: Vec<ArtifactRecord>,
1248 pub consumed_artifact_ids: Vec<String>,
1249 pub produced_artifact_ids: Vec<String>,
1250 pub attempts: Vec<RunStageAttemptRecord>,
1251 pub metadata: BTreeMap<String, serde_json::Value>,
1252}
1253
1254#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1255#[serde(default)]
1256pub struct RunStageAttemptRecord {
1257 pub attempt: usize,
1258 pub status: String,
1259 pub outcome: String,
1260 pub branch: Option<String>,
1261 pub error: Option<String>,
1262 pub verification: Option<serde_json::Value>,
1263 pub started_at: String,
1264 pub finished_at: Option<String>,
1265}
1266
1267#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1268#[serde(default)]
1269pub struct RunTransitionRecord {
1270 pub id: String,
1271 pub from_stage_id: Option<String>,
1272 pub from_node_id: Option<String>,
1273 pub to_node_id: String,
1274 pub branch: Option<String>,
1275 pub timestamp: String,
1276 pub consumed_artifact_ids: Vec<String>,
1277 pub produced_artifact_ids: Vec<String>,
1278}
1279
1280#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1281#[serde(default)]
1282pub struct RunCheckpointRecord {
1283 pub id: String,
1284 pub ready_nodes: Vec<String>,
1285 pub completed_nodes: Vec<String>,
1286 pub last_stage_id: Option<String>,
1287 pub persisted_at: String,
1288 pub reason: String,
1289}
1290
1291#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1292#[serde(default)]
1293pub struct ReplayFixture {
1294 #[serde(rename = "_type")]
1295 pub type_name: String,
1296 pub id: String,
1297 pub source_run_id: String,
1298 pub workflow_id: String,
1299 pub workflow_name: Option<String>,
1300 pub created_at: String,
1301 pub expected_status: String,
1302 pub stage_assertions: Vec<ReplayStageAssertion>,
1303}
1304
1305#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1306#[serde(default)]
1307pub struct ReplayStageAssertion {
1308 pub node_id: String,
1309 pub expected_status: String,
1310 pub expected_outcome: String,
1311 pub expected_branch: Option<String>,
1312 pub required_artifact_kinds: Vec<String>,
1313 pub visible_text_contains: Option<String>,
1314}
1315
1316#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1317#[serde(default)]
1318pub struct ReplayEvalReport {
1319 pub pass: bool,
1320 pub failures: Vec<String>,
1321 pub stage_count: usize,
1322}
1323
1324#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1325#[serde(default)]
1326pub struct ReplayEvalCaseReport {
1327 pub run_id: String,
1328 pub workflow_id: String,
1329 pub label: Option<String>,
1330 pub pass: bool,
1331 pub failures: Vec<String>,
1332 pub stage_count: usize,
1333 pub source_path: Option<String>,
1334 pub comparison: Option<RunDiffReport>,
1335}
1336
1337#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1338#[serde(default)]
1339pub struct ReplayEvalSuiteReport {
1340 pub pass: bool,
1341 pub total: usize,
1342 pub passed: usize,
1343 pub failed: usize,
1344 pub cases: Vec<ReplayEvalCaseReport>,
1345}
1346
1347#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1348#[serde(default)]
1349pub struct RunStageDiffRecord {
1350 pub node_id: String,
1351 pub change: String,
1352 pub details: Vec<String>,
1353}
1354
1355#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1356#[serde(default)]
1357pub struct RunDiffReport {
1358 pub left_run_id: String,
1359 pub right_run_id: String,
1360 pub identical: bool,
1361 pub status_changed: bool,
1362 pub left_status: String,
1363 pub right_status: String,
1364 pub stage_diffs: Vec<RunStageDiffRecord>,
1365 pub transition_count_delta: isize,
1366 pub artifact_count_delta: isize,
1367 pub checkpoint_count_delta: isize,
1368}
1369
1370#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1371#[serde(default)]
1372pub struct EvalSuiteManifest {
1373 #[serde(rename = "_type")]
1374 pub type_name: String,
1375 pub id: String,
1376 pub name: Option<String>,
1377 pub base_dir: Option<String>,
1378 pub cases: Vec<EvalSuiteCase>,
1379}
1380
1381#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1382#[serde(default)]
1383pub struct EvalSuiteCase {
1384 pub label: Option<String>,
1385 pub run_path: String,
1386 pub fixture_path: Option<String>,
1387 pub compare_to: Option<String>,
1388}
1389
1390#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1391#[serde(default)]
1392pub struct RunRecord {
1393 #[serde(rename = "_type")]
1394 pub type_name: String,
1395 pub id: String,
1396 pub workflow_id: String,
1397 pub workflow_name: Option<String>,
1398 pub task: String,
1399 pub status: String,
1400 pub started_at: String,
1401 pub finished_at: Option<String>,
1402 pub parent_run_id: Option<String>,
1403 pub root_run_id: Option<String>,
1404 pub stages: Vec<RunStageRecord>,
1405 pub transitions: Vec<RunTransitionRecord>,
1406 pub checkpoints: Vec<RunCheckpointRecord>,
1407 pub pending_nodes: Vec<String>,
1408 pub completed_nodes: Vec<String>,
1409 pub child_runs: Vec<RunChildRecord>,
1410 pub artifacts: Vec<ArtifactRecord>,
1411 pub policy: CapabilityPolicy,
1412 pub execution: Option<RunExecutionRecord>,
1413 pub transcript: Option<serde_json::Value>,
1414 pub usage: Option<LlmUsageRecord>,
1415 pub replay_fixture: Option<ReplayFixture>,
1416 pub trace_spans: Vec<RunTraceSpanRecord>,
1417 pub metadata: BTreeMap<String, serde_json::Value>,
1418 pub persisted_path: Option<String>,
1419}
1420
1421#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1422#[serde(default)]
1423pub struct RunTraceSpanRecord {
1424 pub span_id: u64,
1425 pub parent_id: Option<u64>,
1426 pub kind: String,
1427 pub name: String,
1428 pub start_ms: u64,
1429 pub duration_ms: u64,
1430 pub metadata: BTreeMap<String, serde_json::Value>,
1431}
1432
1433#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1434#[serde(default)]
1435pub struct RunChildRecord {
1436 pub worker_id: String,
1437 pub worker_name: String,
1438 pub parent_stage_id: Option<String>,
1439 pub session_id: Option<String>,
1440 pub parent_session_id: Option<String>,
1441 pub mutation_scope: Option<String>,
1442 pub approval_mode: Option<String>,
1443 pub task: String,
1444 pub status: String,
1445 pub started_at: String,
1446 pub finished_at: Option<String>,
1447 pub run_id: Option<String>,
1448 pub run_path: Option<String>,
1449 pub snapshot_path: Option<String>,
1450 pub execution: Option<RunExecutionRecord>,
1451}
1452
1453#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1454#[serde(default)]
1455pub struct RunExecutionRecord {
1456 pub cwd: Option<String>,
1457 pub source_dir: Option<String>,
1458 pub env: BTreeMap<String, String>,
1459 pub adapter: Option<String>,
1460 pub repo_path: Option<String>,
1461 pub worktree_path: Option<String>,
1462 pub branch: Option<String>,
1463 pub base_ref: Option<String>,
1464 pub cleanup: Option<String>,
1465}
1466
1467#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1468#[serde(default)]
1469pub struct WorkflowValidationReport {
1470 pub valid: bool,
1471 pub errors: Vec<String>,
1472 pub warnings: Vec<String>,
1473 pub reachable_nodes: Vec<String>,
1474}
1475
1476fn parse_json_payload<T: for<'de> Deserialize<'de>>(
1477 json: serde_json::Value,
1478 label: &str,
1479) -> Result<T, VmError> {
1480 let payload = json.to_string();
1481 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1482 let mut tracker = serde_path_to_error::Track::new();
1483 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1484 T::deserialize(path_deserializer).map_err(|error| {
1485 let snippet = if payload.len() > 600 {
1486 format!("{}...", &payload[..600])
1487 } else {
1488 payload.clone()
1489 };
1490 VmError::Runtime(format!(
1491 "{label} parse error at {}: {} | payload={}",
1492 tracker.path(),
1493 error,
1494 snippet
1495 ))
1496 })
1497}
1498
1499fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1500 parse_json_payload(vm_value_to_json(value), "orchestration")
1501}
1502
1503pub fn parse_workflow_node_value(value: &VmValue, label: &str) -> Result<WorkflowNode, VmError> {
1504 parse_json_payload(vm_value_to_json(value), label)
1505}
1506
1507pub fn parse_workflow_node_json(
1508 json: serde_json::Value,
1509 label: &str,
1510) -> Result<WorkflowNode, VmError> {
1511 parse_json_payload(json, label)
1512}
1513
1514pub fn parse_workflow_edge_json(
1515 json: serde_json::Value,
1516 label: &str,
1517) -> Result<WorkflowEdge, VmError> {
1518 parse_json_payload(json, label)
1519}
1520
1521pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1522 let mut graph: WorkflowGraph = parse_json_value(value)?;
1523 let as_dict = value.as_dict().cloned().unwrap_or_default();
1524
1525 if graph.nodes.is_empty() {
1526 for key in ["act", "verify", "repair"] {
1527 if let Some(node_value) = as_dict.get(key) {
1528 let mut node: WorkflowNode = parse_json_value(node_value)?;
1529 let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1530 node.id = Some(key.to_string());
1531 if node.kind.is_empty() {
1532 node.kind = if key == "verify" {
1533 "verify".to_string()
1534 } else {
1535 "stage".to_string()
1536 };
1537 }
1538 if node.model_policy.provider.is_none() {
1539 node.model_policy.provider = as_dict
1540 .get("provider")
1541 .map(|value| value.display())
1542 .filter(|value| !value.is_empty());
1543 }
1544 if node.model_policy.model.is_none() {
1545 node.model_policy.model = as_dict
1546 .get("model")
1547 .map(|value| value.display())
1548 .filter(|value| !value.is_empty());
1549 }
1550 if node.model_policy.model_tier.is_none() {
1551 node.model_policy.model_tier = as_dict
1552 .get("model_tier")
1553 .or_else(|| as_dict.get("tier"))
1554 .map(|value| value.display())
1555 .filter(|value| !value.is_empty());
1556 }
1557 if node.model_policy.temperature.is_none() {
1558 node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1559 if let VmValue::Float(number) = value {
1560 Some(*number)
1561 } else {
1562 value.as_int().map(|number| number as f64)
1563 }
1564 });
1565 }
1566 if node.model_policy.max_tokens.is_none() {
1567 node.model_policy.max_tokens =
1568 as_dict.get("max_tokens").and_then(|value| value.as_int());
1569 }
1570 if node.mode.is_none() {
1571 node.mode = as_dict
1572 .get("mode")
1573 .map(|value| value.display())
1574 .filter(|value| !value.is_empty());
1575 }
1576 if node.done_sentinel.is_none() {
1577 node.done_sentinel = as_dict
1578 .get("done_sentinel")
1579 .map(|value| value.display())
1580 .filter(|value| !value.is_empty());
1581 }
1582 if key == "verify"
1583 && node.verify.is_none()
1584 && (raw_node.contains_key("assert_text")
1585 || raw_node.contains_key("command")
1586 || raw_node.contains_key("expect_status")
1587 || raw_node.contains_key("expect_text"))
1588 {
1589 node.verify = Some(serde_json::json!({
1590 "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1591 "command": raw_node.get("command").map(vm_value_to_json),
1592 "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1593 "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1594 }));
1595 }
1596 graph.nodes.insert(key.to_string(), node);
1597 }
1598 }
1599 if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1600 graph.entry = "act".to_string();
1601 }
1602 if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1603 if graph.nodes.contains_key("verify") {
1604 graph.edges.push(WorkflowEdge {
1605 from: "act".to_string(),
1606 to: "verify".to_string(),
1607 branch: None,
1608 label: None,
1609 });
1610 }
1611 if graph.nodes.contains_key("repair") {
1612 graph.edges.push(WorkflowEdge {
1613 from: "verify".to_string(),
1614 to: "repair".to_string(),
1615 branch: Some("failed".to_string()),
1616 label: None,
1617 });
1618 graph.edges.push(WorkflowEdge {
1619 from: "repair".to_string(),
1620 to: "verify".to_string(),
1621 branch: Some("retry".to_string()),
1622 label: None,
1623 });
1624 }
1625 }
1626 }
1627
1628 if graph.type_name.is_empty() {
1629 graph.type_name = "workflow_graph".to_string();
1630 }
1631 if graph.id.is_empty() {
1632 graph.id = new_id("workflow");
1633 }
1634 if graph.version == 0 {
1635 graph.version = 1;
1636 }
1637 if graph.entry.is_empty() {
1638 graph.entry = graph
1639 .nodes
1640 .keys()
1641 .next()
1642 .cloned()
1643 .unwrap_or_else(|| "act".to_string());
1644 }
1645 for (node_id, node) in &mut graph.nodes {
1646 if node.id.is_none() {
1647 node.id = Some(node_id.clone());
1648 }
1649 if node.kind.is_empty() {
1650 node.kind = "stage".to_string();
1651 }
1652 if node.join_policy.strategy.is_empty() {
1653 node.join_policy.strategy = "all".to_string();
1654 }
1655 if node.reduce_policy.strategy.is_empty() {
1656 node.reduce_policy.strategy = "concat".to_string();
1657 }
1658 if node.output_contract.output_kinds.is_empty() {
1659 node.output_contract.output_kinds = vec![match node.kind.as_str() {
1660 "verify" => "verification_result".to_string(),
1661 "reduce" => node
1662 .reduce_policy
1663 .output_kind
1664 .clone()
1665 .unwrap_or_else(|| "summary".to_string()),
1666 "map" => node
1667 .map_policy
1668 .output_kind
1669 .clone()
1670 .unwrap_or_else(|| "artifact".to_string()),
1671 "escalation" => "plan".to_string(),
1672 _ => "artifact".to_string(),
1673 }];
1674 }
1675 if node.retry_policy.max_attempts == 0 {
1676 node.retry_policy.max_attempts = 1;
1677 }
1678 }
1679 Ok(graph)
1680}
1681
1682pub fn validate_workflow(
1683 graph: &WorkflowGraph,
1684 ceiling: Option<&CapabilityPolicy>,
1685) -> WorkflowValidationReport {
1686 let mut errors = Vec::new();
1687 let mut warnings = Vec::new();
1688
1689 if !graph.nodes.contains_key(&graph.entry) {
1690 errors.push(format!("entry node does not exist: {}", graph.entry));
1691 }
1692
1693 let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1694 for edge in &graph.edges {
1695 if !node_ids.contains(&edge.from) {
1696 errors.push(format!("edge.from references unknown node: {}", edge.from));
1697 }
1698 if !node_ids.contains(&edge.to) {
1699 errors.push(format!("edge.to references unknown node: {}", edge.to));
1700 }
1701 }
1702
1703 let reachable_nodes = reachable_nodes(graph);
1704 for node_id in &node_ids {
1705 if !reachable_nodes.contains(node_id) {
1706 warnings.push(format!("node is unreachable: {node_id}"));
1707 }
1708 }
1709
1710 for (node_id, node) in &graph.nodes {
1711 let incoming = graph
1712 .edges
1713 .iter()
1714 .filter(|edge| edge.to == *node_id)
1715 .count();
1716 let outgoing: Vec<&WorkflowEdge> = graph
1717 .edges
1718 .iter()
1719 .filter(|edge| edge.from == *node_id)
1720 .collect();
1721 if let Some(min_inputs) = node.input_contract.min_inputs {
1722 if let Some(max_inputs) = node.input_contract.max_inputs {
1723 if min_inputs > max_inputs {
1724 errors.push(format!(
1725 "node {node_id}: input contract min_inputs exceeds max_inputs"
1726 ));
1727 }
1728 }
1729 }
1730 match node.kind.as_str() {
1731 "condition" => {
1732 let has_true = outgoing
1733 .iter()
1734 .any(|edge| edge.branch.as_deref() == Some("true"));
1735 let has_false = outgoing
1736 .iter()
1737 .any(|edge| edge.branch.as_deref() == Some("false"));
1738 if !has_true || !has_false {
1739 errors.push(format!(
1740 "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
1741 ));
1742 }
1743 }
1744 "fork" => {
1745 if outgoing.len() < 2 {
1746 errors.push(format!(
1747 "node {node_id}: fork nodes require at least two outgoing edges"
1748 ));
1749 }
1750 }
1751 "join" => {
1752 if incoming < 2 {
1753 warnings.push(format!(
1754 "node {node_id}: join node has fewer than two incoming edges"
1755 ));
1756 }
1757 }
1758 "map" => {
1759 if node.map_policy.items.is_empty()
1760 && node.map_policy.item_artifact_kind.is_none()
1761 && node.input_contract.input_kinds.is_empty()
1762 {
1763 errors.push(format!(
1764 "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
1765 ));
1766 }
1767 }
1768 "reduce" => {
1769 if node.input_contract.input_kinds.is_empty() {
1770 warnings.push(format!(
1771 "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
1772 ));
1773 }
1774 }
1775 _ => {}
1776 }
1777 }
1778
1779 if let Some(ceiling) = ceiling {
1780 if let Err(error) = ceiling.intersect(&graph.capability_policy) {
1781 errors.push(error);
1782 }
1783 for (node_id, node) in &graph.nodes {
1784 if let Err(error) = ceiling.intersect(&node.capability_policy) {
1785 errors.push(format!("node {node_id}: {error}"));
1786 }
1787 }
1788 }
1789
1790 WorkflowValidationReport {
1791 valid: errors.is_empty(),
1792 errors,
1793 warnings,
1794 reachable_nodes: reachable_nodes.into_iter().collect(),
1795 }
1796}
1797
1798fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
1799 let mut seen = BTreeSet::new();
1800 let mut stack = vec![graph.entry.clone()];
1801 while let Some(node_id) = stack.pop() {
1802 if !seen.insert(node_id.clone()) {
1803 continue;
1804 }
1805 for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
1806 stack.push(edge.to.clone());
1807 }
1808 }
1809 seen
1810}
1811
1812pub fn select_artifacts(
1813 mut artifacts: Vec<ArtifactRecord>,
1814 policy: &ContextPolicy,
1815) -> Vec<ArtifactRecord> {
1816 artifacts.retain(|artifact| {
1817 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
1818 && !policy.exclude_kinds.contains(&artifact.kind)
1819 && (policy.include_stages.is_empty()
1820 || artifact
1821 .stage
1822 .as_ref()
1823 .is_some_and(|stage| policy.include_stages.contains(stage)))
1824 });
1825 artifacts.sort_by(|a, b| {
1826 let b_pinned = policy.pinned_ids.contains(&b.id);
1827 let a_pinned = policy.pinned_ids.contains(&a.id);
1828 b_pinned
1829 .cmp(&a_pinned)
1830 .then_with(|| {
1831 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
1832 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
1833 b_prio_kind.cmp(&a_prio_kind)
1834 })
1835 .then_with(|| {
1836 b.priority
1837 .unwrap_or_default()
1838 .cmp(&a.priority.unwrap_or_default())
1839 })
1840 .then_with(|| {
1841 if policy.prefer_fresh {
1842 freshness_rank(b.freshness.as_deref())
1843 .cmp(&freshness_rank(a.freshness.as_deref()))
1844 } else {
1845 std::cmp::Ordering::Equal
1846 }
1847 })
1848 .then_with(|| {
1849 if policy.prefer_recent {
1850 b.created_at.cmp(&a.created_at)
1851 } else {
1852 std::cmp::Ordering::Equal
1853 }
1854 })
1855 .then_with(|| {
1856 b.relevance
1857 .partial_cmp(&a.relevance)
1858 .unwrap_or(std::cmp::Ordering::Equal)
1859 })
1860 .then_with(|| {
1861 a.estimated_tokens
1862 .unwrap_or(usize::MAX)
1863 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
1864 })
1865 });
1866
1867 let mut selected = Vec::new();
1868 let mut used_tokens = 0usize;
1869 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
1870 let effective_max_tokens = policy
1871 .max_tokens
1872 .map(|max| max.saturating_sub(reserve_tokens));
1873 for artifact in artifacts {
1874 if let Some(max_artifacts) = policy.max_artifacts {
1875 if selected.len() >= max_artifacts {
1876 break;
1877 }
1878 }
1879 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
1880 if let Some(max_tokens) = effective_max_tokens {
1881 if used_tokens + next_tokens > max_tokens {
1882 continue;
1883 }
1884 }
1885 used_tokens += next_tokens;
1886 selected.push(artifact);
1887 }
1888 selected
1889}
1890
1891pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
1892 let mut parts = Vec::new();
1893 for artifact in artifacts {
1894 let title = artifact
1895 .title
1896 .clone()
1897 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
1898 let body = artifact
1899 .text
1900 .clone()
1901 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
1902 .unwrap_or_default();
1903 match policy.render.as_deref() {
1904 Some("json") => {
1905 parts.push(
1906 serde_json::json!({
1907 "id": artifact.id,
1908 "kind": artifact.kind,
1909 "title": title,
1910 "source": artifact.source,
1911 "freshness": artifact.freshness,
1912 "priority": artifact.priority,
1913 "text": body,
1914 })
1915 .to_string(),
1916 );
1917 }
1918 _ => parts.push(format!(
1919 "[{title}] kind={} source={} freshness={} priority={}\n{}",
1920 artifact.kind,
1921 artifact
1922 .source
1923 .clone()
1924 .unwrap_or_else(|| "unknown".to_string()),
1925 artifact
1926 .freshness
1927 .clone()
1928 .unwrap_or_else(|| "normal".to_string()),
1929 artifact.priority.unwrap_or_default(),
1930 body
1931 )),
1932 }
1933 }
1934 parts.join("\n\n")
1935}
1936
1937pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
1938 let artifact: ArtifactRecord = parse_json_value(value)?;
1939 Ok(artifact.normalize())
1940}
1941
1942pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
1943 let json = vm_value_to_json(value);
1944 let payload = json.to_string();
1945 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1946 let mut tracker = serde_path_to_error::Track::new();
1947 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1948 let mut run: RunRecord = RunRecord::deserialize(path_deserializer).map_err(|error| {
1949 let snippet = if payload.len() > 600 {
1950 format!("{}...", &payload[..600])
1951 } else {
1952 payload.clone()
1953 };
1954 VmError::Runtime(format!(
1955 "orchestration parse error at {}: {} | payload={}",
1956 tracker.path(),
1957 error,
1958 snippet
1959 ))
1960 })?;
1961 if run.type_name.is_empty() {
1962 run.type_name = "run_record".to_string();
1963 }
1964 if run.id.is_empty() {
1965 run.id = new_id("run");
1966 }
1967 if run.started_at.is_empty() {
1968 run.started_at = now_rfc3339();
1969 }
1970 if run.status.is_empty() {
1971 run.status = "running".to_string();
1972 }
1973 if run.root_run_id.is_none() {
1974 run.root_run_id = Some(run.id.clone());
1975 }
1976 if run.replay_fixture.is_none() {
1977 run.replay_fixture = Some(replay_fixture_from_run(&run));
1978 }
1979 Ok(run)
1980}
1981
1982pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
1983 let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
1984 if manifest.type_name.is_empty() {
1985 manifest.type_name = "eval_suite_manifest".to_string();
1986 }
1987 if manifest.id.is_empty() {
1988 manifest.id = new_id("eval_suite");
1989 }
1990 Ok(manifest)
1991}
1992
1993fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
1994 let content = std::fs::read_to_string(path)
1995 .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
1996 serde_json::from_str(&content)
1997 .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
1998}
1999
2000fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
2001 let path_buf = PathBuf::from(path);
2002 if path_buf.is_absolute() {
2003 path_buf
2004 } else if let Some(base_dir) = base_dir {
2005 base_dir.join(path_buf)
2006 } else {
2007 path_buf
2008 }
2009}
2010
2011pub fn evaluate_run_suite_manifest(
2012 manifest: &EvalSuiteManifest,
2013) -> Result<ReplayEvalSuiteReport, VmError> {
2014 let base_dir = manifest.base_dir.as_deref().map(Path::new);
2015 let mut reports = Vec::new();
2016 for case in &manifest.cases {
2017 let run_path = resolve_manifest_path(base_dir, &case.run_path);
2018 let run = load_run_record(&run_path)?;
2019 let fixture = match &case.fixture_path {
2020 Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
2021 None => run
2022 .replay_fixture
2023 .clone()
2024 .unwrap_or_else(|| replay_fixture_from_run(&run)),
2025 };
2026 let eval = evaluate_run_against_fixture(&run, &fixture);
2027 let mut pass = eval.pass;
2028 let mut failures = eval.failures;
2029 let comparison = match &case.compare_to {
2030 Some(path) => {
2031 let baseline_path = resolve_manifest_path(base_dir, path);
2032 let baseline = load_run_record(&baseline_path)?;
2033 let diff = diff_run_records(&baseline, &run);
2034 if !diff.identical {
2035 pass = false;
2036 failures.push(format!(
2037 "run differs from baseline {} with {} stage changes",
2038 baseline_path.display(),
2039 diff.stage_diffs.len()
2040 ));
2041 }
2042 Some(diff)
2043 }
2044 None => None,
2045 };
2046 reports.push(ReplayEvalCaseReport {
2047 run_id: run.id.clone(),
2048 workflow_id: run.workflow_id.clone(),
2049 label: case.label.clone(),
2050 pass,
2051 failures,
2052 stage_count: eval.stage_count,
2053 source_path: Some(run_path.display().to_string()),
2054 comparison,
2055 });
2056 }
2057 let total = reports.len();
2058 let passed = reports.iter().filter(|report| report.pass).count();
2059 let failed = total.saturating_sub(passed);
2060 Ok(ReplayEvalSuiteReport {
2061 pass: failed == 0,
2062 total,
2063 passed,
2064 failed,
2065 cases: reports,
2066 })
2067}
2068
2069pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
2070 let before_lines: Vec<&str> = before.lines().collect();
2071 let after_lines: Vec<&str> = after.lines().collect();
2072 let mut table = vec![vec![0usize; after_lines.len() + 1]; before_lines.len() + 1];
2073 for i in (0..before_lines.len()).rev() {
2074 for j in (0..after_lines.len()).rev() {
2075 table[i][j] = if before_lines[i] == after_lines[j] {
2076 table[i + 1][j + 1] + 1
2077 } else {
2078 table[i + 1][j].max(table[i][j + 1])
2079 };
2080 }
2081 }
2082
2083 let mut diff = String::new();
2084 let file = path.unwrap_or("artifact");
2085 diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
2086 let mut i = 0;
2087 let mut j = 0;
2088 while i < before_lines.len() && j < after_lines.len() {
2089 if before_lines[i] == after_lines[j] {
2090 diff.push_str(&format!(" {}\n", before_lines[i]));
2091 i += 1;
2092 j += 1;
2093 } else if table[i + 1][j] >= table[i][j + 1] {
2094 diff.push_str(&format!("-{}\n", before_lines[i]));
2095 i += 1;
2096 } else {
2097 diff.push_str(&format!("+{}\n", after_lines[j]));
2098 j += 1;
2099 }
2100 }
2101 while i < before_lines.len() {
2102 diff.push_str(&format!("-{}\n", before_lines[i]));
2103 i += 1;
2104 }
2105 while j < after_lines.len() {
2106 diff.push_str(&format!("+{}\n", after_lines[j]));
2107 j += 1;
2108 }
2109 diff
2110}
2111
2112pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
2113 let path = path
2114 .map(PathBuf::from)
2115 .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
2116 if let Some(parent) = path.parent() {
2117 std::fs::create_dir_all(parent)
2118 .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
2119 }
2120 let json = serde_json::to_string_pretty(run)
2121 .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
2122 let tmp_path = path.with_extension("json.tmp");
2124 std::fs::write(&tmp_path, &json)
2125 .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
2126 std::fs::rename(&tmp_path, &path).map_err(|e| {
2127 let _ = std::fs::write(&path, &json);
2129 VmError::Runtime(format!("failed to finalize run record: {e}"))
2130 })?;
2131 Ok(path.to_string_lossy().to_string())
2132}
2133
2134pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
2135 let content = std::fs::read_to_string(path)
2136 .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
2137 serde_json::from_str(&content)
2138 .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
2139}
2140
2141pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
2142 ReplayFixture {
2143 type_name: "replay_fixture".to_string(),
2144 id: new_id("fixture"),
2145 source_run_id: run.id.clone(),
2146 workflow_id: run.workflow_id.clone(),
2147 workflow_name: run.workflow_name.clone(),
2148 created_at: now_rfc3339(),
2149 expected_status: run.status.clone(),
2150 stage_assertions: run
2151 .stages
2152 .iter()
2153 .map(|stage| ReplayStageAssertion {
2154 node_id: stage.node_id.clone(),
2155 expected_status: stage.status.clone(),
2156 expected_outcome: stage.outcome.clone(),
2157 expected_branch: stage.branch.clone(),
2158 required_artifact_kinds: stage
2159 .artifacts
2160 .iter()
2161 .map(|artifact| artifact.kind.clone())
2162 .collect(),
2163 visible_text_contains: stage
2164 .visible_text
2165 .as_ref()
2166 .filter(|text| !text.is_empty())
2167 .map(|text| text.chars().take(80).collect()),
2168 })
2169 .collect(),
2170 }
2171}
2172
2173pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
2174 let mut failures = Vec::new();
2175 if run.status != fixture.expected_status {
2176 failures.push(format!(
2177 "run status mismatch: expected {}, got {}",
2178 fixture.expected_status, run.status
2179 ));
2180 }
2181 for assertion in &fixture.stage_assertions {
2182 let Some(stage) = run
2183 .stages
2184 .iter()
2185 .find(|stage| stage.node_id == assertion.node_id)
2186 else {
2187 failures.push(format!("missing stage {}", assertion.node_id));
2188 continue;
2189 };
2190 if stage.status != assertion.expected_status {
2191 failures.push(format!(
2192 "stage {} status mismatch: expected {}, got {}",
2193 assertion.node_id, assertion.expected_status, stage.status
2194 ));
2195 }
2196 if stage.outcome != assertion.expected_outcome {
2197 failures.push(format!(
2198 "stage {} outcome mismatch: expected {}, got {}",
2199 assertion.node_id, assertion.expected_outcome, stage.outcome
2200 ));
2201 }
2202 if stage.branch != assertion.expected_branch {
2203 failures.push(format!(
2204 "stage {} branch mismatch: expected {:?}, got {:?}",
2205 assertion.node_id, assertion.expected_branch, stage.branch
2206 ));
2207 }
2208 for required_kind in &assertion.required_artifact_kinds {
2209 if !stage
2210 .artifacts
2211 .iter()
2212 .any(|artifact| &artifact.kind == required_kind)
2213 {
2214 failures.push(format!(
2215 "stage {} missing artifact kind {}",
2216 assertion.node_id, required_kind
2217 ));
2218 }
2219 }
2220 if let Some(snippet) = &assertion.visible_text_contains {
2221 let actual = stage.visible_text.clone().unwrap_or_default();
2222 if !actual.contains(snippet) {
2223 failures.push(format!(
2224 "stage {} visible text does not contain expected snippet {:?}",
2225 assertion.node_id, snippet
2226 ));
2227 }
2228 }
2229 }
2230
2231 ReplayEvalReport {
2232 pass: failures.is_empty(),
2233 failures,
2234 stage_count: run.stages.len(),
2235 }
2236}
2237
2238pub fn evaluate_run_suite(
2239 cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
2240) -> ReplayEvalSuiteReport {
2241 let mut reports = Vec::new();
2242 for (run, fixture, source_path) in cases {
2243 let report = evaluate_run_against_fixture(&run, &fixture);
2244 reports.push(ReplayEvalCaseReport {
2245 run_id: run.id.clone(),
2246 workflow_id: run.workflow_id.clone(),
2247 label: None,
2248 pass: report.pass,
2249 failures: report.failures,
2250 stage_count: report.stage_count,
2251 source_path,
2252 comparison: None,
2253 });
2254 }
2255 let total = reports.len();
2256 let passed = reports.iter().filter(|report| report.pass).count();
2257 let failed = total.saturating_sub(passed);
2258 ReplayEvalSuiteReport {
2259 pass: failed == 0,
2260 total,
2261 passed,
2262 failed,
2263 cases: reports,
2264 }
2265}
2266
2267pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
2268 let mut stage_diffs = Vec::new();
2269 let mut all_node_ids = BTreeSet::new();
2270 all_node_ids.extend(left.stages.iter().map(|stage| stage.node_id.clone()));
2271 all_node_ids.extend(right.stages.iter().map(|stage| stage.node_id.clone()));
2272
2273 for node_id in all_node_ids {
2274 let left_stage = left.stages.iter().find(|stage| stage.node_id == node_id);
2275 let right_stage = right.stages.iter().find(|stage| stage.node_id == node_id);
2276 match (left_stage, right_stage) {
2277 (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
2278 node_id,
2279 change: "removed".to_string(),
2280 details: vec!["stage missing from right run".to_string()],
2281 }),
2282 (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
2283 node_id,
2284 change: "added".to_string(),
2285 details: vec!["stage missing from left run".to_string()],
2286 }),
2287 (Some(left_stage), Some(right_stage)) => {
2288 let mut details = Vec::new();
2289 if left_stage.status != right_stage.status {
2290 details.push(format!(
2291 "status: {} -> {}",
2292 left_stage.status, right_stage.status
2293 ));
2294 }
2295 if left_stage.outcome != right_stage.outcome {
2296 details.push(format!(
2297 "outcome: {} -> {}",
2298 left_stage.outcome, right_stage.outcome
2299 ));
2300 }
2301 if left_stage.branch != right_stage.branch {
2302 details.push(format!(
2303 "branch: {:?} -> {:?}",
2304 left_stage.branch, right_stage.branch
2305 ));
2306 }
2307 if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
2308 {
2309 details.push(format!(
2310 "produced_artifacts: {} -> {}",
2311 left_stage.produced_artifact_ids.len(),
2312 right_stage.produced_artifact_ids.len()
2313 ));
2314 }
2315 if left_stage.artifacts.len() != right_stage.artifacts.len() {
2316 details.push(format!(
2317 "artifact_records: {} -> {}",
2318 left_stage.artifacts.len(),
2319 right_stage.artifacts.len()
2320 ));
2321 }
2322 if !details.is_empty() {
2323 stage_diffs.push(RunStageDiffRecord {
2324 node_id,
2325 change: "changed".to_string(),
2326 details,
2327 });
2328 }
2329 }
2330 (None, None) => {}
2331 }
2332 }
2333
2334 let status_changed = left.status != right.status;
2335 let identical = !status_changed
2336 && stage_diffs.is_empty()
2337 && left.transitions.len() == right.transitions.len()
2338 && left.artifacts.len() == right.artifacts.len()
2339 && left.checkpoints.len() == right.checkpoints.len();
2340
2341 RunDiffReport {
2342 left_run_id: left.id.clone(),
2343 right_run_id: right.id.clone(),
2344 identical,
2345 status_changed,
2346 left_status: left.status.clone(),
2347 right_status: right.status.clone(),
2348 stage_diffs,
2349 transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
2350 artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
2351 checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
2352 }
2353}
2354
2355pub fn push_execution_policy(policy: CapabilityPolicy) {
2356 EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
2357}
2358
2359pub fn pop_execution_policy() {
2360 EXECUTION_POLICY_STACK.with(|stack| {
2361 stack.borrow_mut().pop();
2362 });
2363}
2364
2365pub fn current_execution_policy() -> Option<CapabilityPolicy> {
2366 EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
2367}
2368
2369pub fn current_tool_metadata(tool: &str) -> Option<ToolRuntimePolicyMetadata> {
2370 current_execution_policy().and_then(|policy| policy.tool_metadata.get(tool).cloned())
2371}
2372
2373fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
2374 policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
2375}
2376
2377fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
2378 policy.capabilities.is_empty()
2379 || policy
2380 .capabilities
2381 .get(capability)
2382 .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
2383}
2384
2385fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
2386 fn rank(v: &str) -> usize {
2387 match v {
2388 "none" => 0,
2389 "read_only" => 1,
2390 "workspace_write" => 2,
2391 "process_exec" => 3,
2392 "network" => 4,
2393 _ => 5,
2394 }
2395 }
2396 policy
2397 .side_effect_level
2398 .as_ref()
2399 .map(|allowed| rank(allowed) >= rank(requested))
2400 .unwrap_or(true)
2401}
2402
2403fn reject_policy(reason: String) -> Result<(), VmError> {
2404 Err(VmError::CategorizedError {
2405 message: reason,
2406 category: crate::value::ErrorCategory::ToolRejected,
2407 })
2408}
2409
2410fn fallback_mutation_classification(tool_name: &str) -> String {
2411 let lower = tool_name.to_ascii_lowercase();
2412 if lower.starts_with("mcp_") {
2413 return "host_defined".to_string();
2414 }
2415 if lower == "exec"
2416 || lower == "shell"
2417 || lower == "exec_at"
2418 || lower == "shell_at"
2419 || lower == "run"
2420 || lower.starts_with("run_")
2421 {
2422 return "ambient_side_effect".to_string();
2423 }
2424 if lower.starts_with("delete")
2425 || lower.starts_with("remove")
2426 || lower.starts_with("move")
2427 || lower.starts_with("rename")
2428 {
2429 return "destructive".to_string();
2430 }
2431 if lower.contains("write")
2432 || lower.contains("edit")
2433 || lower.contains("patch")
2434 || lower.contains("create")
2435 || lower.contains("scaffold")
2436 || lower.starts_with("insert")
2437 || lower.starts_with("replace")
2438 || lower == "add_import"
2439 {
2440 return "apply_workspace".to_string();
2441 }
2442 "read_only".to_string()
2443}
2444
2445pub fn current_tool_mutation_classification(tool_name: &str) -> String {
2446 current_tool_metadata(tool_name)
2447 .and_then(|metadata| metadata.mutation_classification)
2448 .unwrap_or_else(|| fallback_mutation_classification(tool_name))
2449}
2450
2451pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
2452 let Some(map) = args.as_object() else {
2453 return Vec::new();
2454 };
2455 let path_keys = current_tool_metadata(tool_name)
2456 .map(|metadata| metadata.path_params)
2457 .filter(|keys| !keys.is_empty())
2458 .unwrap_or_else(|| {
2459 vec![
2460 "path".to_string(),
2461 "file".to_string(),
2462 "cwd".to_string(),
2463 "repo".to_string(),
2464 "target".to_string(),
2465 "destination".to_string(),
2466 ]
2467 });
2468 let mut paths = Vec::new();
2469 for key in path_keys {
2470 if let Some(value) = map.get(&key).and_then(|value| value.as_str()) {
2471 if !value.is_empty() {
2472 paths.push(value.to_string());
2473 }
2474 }
2475 }
2476 if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
2477 for item in items {
2478 if let Some(value) = item.as_str() {
2479 if !value.is_empty() {
2480 paths.push(value.to_string());
2481 }
2482 }
2483 }
2484 }
2485 paths.sort();
2486 paths.dedup();
2487 paths
2488}
2489
2490pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
2491 let Some(policy) = current_execution_policy() else {
2492 return Ok(());
2493 };
2494 match name {
2495 "read" | "read_file" => {
2496 if !policy_allows_tool(&policy, name)
2497 || !policy_allows_capability(&policy, "workspace", "read_text")
2498 {
2499 return reject_policy(format!(
2500 "builtin '{name}' exceeds workspace.read_text ceiling"
2501 ));
2502 }
2503 }
2504 "search" | "list_dir" => {
2505 if !policy_allows_tool(&policy, name)
2506 || !policy_allows_capability(&policy, "workspace", "list")
2507 {
2508 return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
2509 }
2510 }
2511 "file_exists" | "stat" => {
2512 if !policy_allows_capability(&policy, "workspace", "exists") {
2513 return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
2514 }
2515 }
2516 "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
2517 if !policy_allows_tool(&policy, "edit")
2518 || !policy_allows_capability(&policy, "workspace", "write_text")
2519 || !policy_allows_side_effect(&policy, "workspace_write")
2520 {
2521 return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
2522 }
2523 }
2524 "delete_file" => {
2525 if !policy_allows_capability(&policy, "workspace", "delete")
2526 || !policy_allows_side_effect(&policy, "workspace_write")
2527 {
2528 return reject_policy(
2529 "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
2530 );
2531 }
2532 }
2533 "apply_edit" => {
2534 if !policy_allows_capability(&policy, "workspace", "apply_edit")
2535 || !policy_allows_side_effect(&policy, "workspace_write")
2536 {
2537 return reject_policy(
2538 "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
2539 );
2540 }
2541 }
2542 "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
2543 if !policy_allows_tool(&policy, "run")
2544 || !policy_allows_capability(&policy, "process", "exec")
2545 || !policy_allows_side_effect(&policy, "process_exec")
2546 {
2547 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2548 }
2549 }
2550 "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
2551 if !policy_allows_side_effect(&policy, "network") {
2552 return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
2553 }
2554 }
2555 "mcp_connect"
2556 | "mcp_call"
2557 | "mcp_list_tools"
2558 | "mcp_list_resources"
2559 | "mcp_list_resource_templates"
2560 | "mcp_read_resource"
2561 | "mcp_list_prompts"
2562 | "mcp_get_prompt"
2563 | "mcp_server_info"
2564 | "mcp_disconnect" => {
2565 if !policy_allows_tool(&policy, "run")
2566 || !policy_allows_capability(&policy, "process", "exec")
2567 || !policy_allows_side_effect(&policy, "process_exec")
2568 {
2569 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2570 }
2571 }
2572 "host_call" => {
2573 let name = args.first().map(|v| v.display()).unwrap_or_default();
2574 let Some((capability, op)) = name.split_once('.') else {
2575 return reject_policy(format!(
2576 "host_call '{name}' must use capability.operation naming"
2577 ));
2578 };
2579 if !policy_allows_capability(&policy, capability, op) {
2580 return reject_policy(format!(
2581 "host_call {capability}.{op} exceeds capability ceiling"
2582 ));
2583 }
2584 let requested_side_effect = match (capability, op) {
2585 ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
2586 ("process", "exec") => "process_exec",
2587 _ => "read_only",
2588 };
2589 if !policy_allows_side_effect(&policy, requested_side_effect) {
2590 return reject_policy(format!(
2591 "host_call {capability}.{op} exceeds side-effect ceiling"
2592 ));
2593 }
2594 }
2595 _ => {}
2596 }
2597 Ok(())
2598}
2599
2600pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
2601 if current_execution_policy().is_some() {
2602 return reject_policy(format!(
2603 "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
2604 ));
2605 }
2606 Ok(())
2607}
2608
2609pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
2610 let Some(policy) = current_execution_policy() else {
2611 return Ok(());
2612 };
2613 if !policy_allows_tool(&policy, tool_name) {
2614 return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
2615 }
2616 if let Some(metadata) = policy.tool_metadata.get(tool_name) {
2617 for (capability, ops) in &metadata.capabilities {
2618 for op in ops {
2619 if !policy_allows_capability(&policy, capability, op) {
2620 return reject_policy(format!(
2621 "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
2622 ));
2623 }
2624 }
2625 }
2626 if let Some(side_effect_level) = metadata.side_effect_level.as_deref() {
2627 if !policy_allows_side_effect(&policy, side_effect_level) {
2628 return reject_policy(format!(
2629 "tool '{tool_name}' exceeds side-effect ceiling: {side_effect_level}"
2630 ));
2631 }
2632 }
2633 }
2634 Ok(())
2635}
2636
2637fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
2638 let dict = transcript.as_dict()?;
2639 let messages = match dict.get("messages") {
2640 Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
2641 _ => Vec::new(),
2642 };
2643 let retained = messages
2644 .into_iter()
2645 .rev()
2646 .take(keep_last)
2647 .collect::<Vec<_>>()
2648 .into_iter()
2649 .rev()
2650 .collect::<Vec<_>>();
2651 let mut compacted = dict.clone();
2652 compacted.insert(
2653 "messages".to_string(),
2654 VmValue::List(Rc::new(retained.clone())),
2655 );
2656 compacted.insert(
2657 "events".to_string(),
2658 VmValue::List(Rc::new(
2659 crate::llm::helpers::transcript_events_from_messages(&retained),
2660 )),
2661 );
2662 Some(VmValue::Dict(Rc::new(compacted)))
2663}
2664
2665fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
2666 let Some(visibility) = visibility else {
2667 return Some(transcript.clone());
2668 };
2669 if visibility != "public" && visibility != "public_only" {
2670 return Some(transcript.clone());
2671 }
2672 let dict = transcript.as_dict()?;
2673 let public_messages = match dict.get("messages") {
2674 Some(VmValue::List(list)) => list
2675 .iter()
2676 .filter(|message| {
2677 message
2678 .as_dict()
2679 .and_then(|d| d.get("role"))
2680 .map(|v| v.display())
2681 .map(|role| role != "tool_result")
2682 .unwrap_or(true)
2683 })
2684 .cloned()
2685 .collect::<Vec<_>>(),
2686 _ => Vec::new(),
2687 };
2688 let public_events = match dict.get("events") {
2689 Some(VmValue::List(list)) => list
2690 .iter()
2691 .filter(|event| {
2692 event
2693 .as_dict()
2694 .and_then(|d| d.get("visibility"))
2695 .map(|v| v.display())
2696 .map(|value| value == "public")
2697 .unwrap_or(true)
2698 })
2699 .cloned()
2700 .collect::<Vec<_>>(),
2701 _ => Vec::new(),
2702 };
2703 let mut redacted = dict.clone();
2704 redacted.insert(
2705 "messages".to_string(),
2706 VmValue::List(Rc::new(public_messages)),
2707 );
2708 redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
2709 Some(VmValue::Dict(Rc::new(redacted)))
2710}
2711
2712pub(crate) fn apply_input_transcript_policy(
2713 transcript: Option<VmValue>,
2714 policy: &TranscriptPolicy,
2715) -> Option<VmValue> {
2716 let mut transcript = transcript;
2717 match policy.mode.as_deref() {
2718 Some("reset") => return None,
2719 Some("fork") => {
2720 if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
2721 let mut forked = dict.as_ref().clone();
2722 forked.insert(
2723 "id".to_string(),
2724 VmValue::String(Rc::from(new_id("transcript"))),
2725 );
2726 transcript = Some(VmValue::Dict(Rc::new(forked)));
2727 }
2728 }
2729 _ => {}
2730 }
2731 if policy.compact {
2732 let keep_last = policy.keep_last.unwrap_or(6);
2733 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2734 }
2735 transcript
2736}
2737
2738fn apply_output_transcript_policy(
2739 transcript: Option<VmValue>,
2740 policy: &TranscriptPolicy,
2741) -> Option<VmValue> {
2742 let mut transcript = transcript;
2743 if policy.compact {
2744 let keep_last = policy.keep_last.unwrap_or(6);
2745 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2746 }
2747 transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
2748}
2749
2750pub async fn execute_stage_node(
2751 node_id: &str,
2752 node: &WorkflowNode,
2753 task: &str,
2754 artifacts: &[ArtifactRecord],
2755 transcript: Option<VmValue>,
2756) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
2757 let mut selection_policy = node.context_policy.clone();
2758 if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
2759 selection_policy.include_kinds = node.input_contract.input_kinds.clone();
2760 }
2761 let selected = select_artifacts_adaptive(artifacts.to_vec(), &selection_policy);
2762 let rendered_context = render_artifacts_context(&selected, &node.context_policy);
2763 let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
2764 if node.input_contract.require_transcript && transcript.is_none() {
2765 return Err(VmError::Runtime(format!(
2766 "workflow stage {node_id} requires transcript input"
2767 )));
2768 }
2769 if let Some(min_inputs) = node.input_contract.min_inputs {
2770 if selected.len() < min_inputs {
2771 return Err(VmError::Runtime(format!(
2772 "workflow stage {node_id} requires at least {min_inputs} input artifacts"
2773 )));
2774 }
2775 }
2776 if let Some(max_inputs) = node.input_contract.max_inputs {
2777 if selected.len() > max_inputs {
2778 return Err(VmError::Runtime(format!(
2779 "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
2780 )));
2781 }
2782 }
2783 let prompt = if rendered_context.is_empty() {
2784 task.to_string()
2785 } else {
2786 format!(
2787 "{rendered_context}\n\n{}:\n{task}",
2788 node.task_label
2789 .clone()
2790 .unwrap_or_else(|| "Task".to_string())
2791 )
2792 };
2793
2794 let tool_format = std::env::var("HARN_AGENT_TOOL_FORMAT")
2795 .ok()
2796 .filter(|value| !value.trim().is_empty())
2797 .unwrap_or_else(|| "text".to_string());
2798 let mut llm_result = if node.kind == "verify" {
2799 if let Some(command) = node
2800 .verify
2801 .as_ref()
2802 .and_then(|verify| verify.as_object())
2803 .and_then(|verify| verify.get("command"))
2804 .and_then(|value| value.as_str())
2805 .map(str::trim)
2806 .filter(|value| !value.is_empty())
2807 {
2808 let mut process = if cfg!(target_os = "windows") {
2809 let mut cmd = tokio::process::Command::new("cmd");
2810 cmd.arg("/C").arg(command);
2811 cmd
2812 } else {
2813 let mut cmd = tokio::process::Command::new("/bin/sh");
2814 cmd.arg("-lc").arg(command);
2815 cmd
2816 };
2817 process.stdin(std::process::Stdio::null());
2818 if let Some(context) = crate::stdlib::process::current_execution_context() {
2819 if let Some(cwd) = context.cwd.filter(|cwd| !cwd.is_empty()) {
2820 process.current_dir(cwd);
2821 }
2822 if !context.env.is_empty() {
2823 process.envs(context.env);
2824 }
2825 }
2826 let output = process
2827 .output()
2828 .await
2829 .map_err(|e| VmError::Runtime(format!("workflow verify exec failed: {e}")))?;
2830 let stdout = String::from_utf8_lossy(&output.stdout).to_string();
2831 let stderr = String::from_utf8_lossy(&output.stderr).to_string();
2832 let combined = if stderr.is_empty() {
2833 stdout.clone()
2834 } else if stdout.is_empty() {
2835 stderr.clone()
2836 } else {
2837 format!("{stdout}\n{stderr}")
2838 };
2839 serde_json::json!({
2840 "status": "completed",
2841 "text": combined,
2842 "visible_text": combined,
2843 "command": command,
2844 "stdout": stdout,
2845 "stderr": stderr,
2846 "exit_status": output.status.code().unwrap_or(-1),
2847 "success": output.status.success(),
2848 })
2849 } else {
2850 serde_json::json!({
2851 "status": "completed",
2852 "text": "",
2853 "visible_text": "",
2854 })
2855 }
2856 } else {
2857 let mut options = BTreeMap::new();
2858 if let Some(provider) = &node.model_policy.provider {
2859 options.insert(
2860 "provider".to_string(),
2861 VmValue::String(Rc::from(provider.clone())),
2862 );
2863 }
2864 if let Some(model) = &node.model_policy.model {
2865 options.insert(
2866 "model".to_string(),
2867 VmValue::String(Rc::from(model.clone())),
2868 );
2869 }
2870 if let Some(model_tier) = &node.model_policy.model_tier {
2871 options.insert(
2872 "model_tier".to_string(),
2873 VmValue::String(Rc::from(model_tier.clone())),
2874 );
2875 }
2876 if let Some(temperature) = node.model_policy.temperature {
2877 options.insert("temperature".to_string(), VmValue::Float(temperature));
2878 }
2879 if let Some(max_tokens) = node.model_policy.max_tokens {
2880 options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
2881 }
2882 let tool_names = workflow_tool_names(&node.tools);
2883 if !matches!(node.tools, serde_json::Value::Null) && !tool_names.is_empty() {
2884 options.insert(
2885 "tools".to_string(),
2886 crate::stdlib::json_to_vm_value(&node.tools),
2887 );
2888 }
2889 if let Some(transcript) = transcript.clone() {
2890 options.insert("transcript".to_string(), transcript);
2891 }
2892
2893 let args = vec![
2894 VmValue::String(Rc::from(prompt.clone())),
2895 node.system
2896 .clone()
2897 .map(|s| VmValue::String(Rc::from(s)))
2898 .unwrap_or(VmValue::Nil),
2899 VmValue::Dict(Rc::new(options)),
2900 ];
2901 let mut opts = extract_llm_options(&args)?;
2902
2903 if node.mode.as_deref() == Some("agent") || !tool_names.is_empty() {
2904 crate::llm::run_agent_loop_internal(
2905 &mut opts,
2906 crate::llm::AgentLoopConfig {
2907 persistent: true,
2908 max_iterations: 12,
2909 max_nudges: 3,
2910 nudge: None,
2911 done_sentinel: node.done_sentinel.clone(),
2912 break_unless_phase: None,
2913 tool_retries: 0,
2914 tool_backoff_ms: 1000,
2915 tool_format: tool_format.clone(),
2916 auto_compact: None,
2917 context_callback: None,
2918 policy: None,
2919 daemon: false,
2920 llm_retries: 2,
2921 llm_backoff_ms: 2000,
2922 },
2923 )
2924 .await?
2925 } else {
2926 let result = vm_call_llm_full(&opts).await?;
2927 crate::llm::agent_loop_result_from_llm(&result, opts)
2928 }
2929 };
2930 if let Some(payload) = llm_result.as_object_mut() {
2931 payload.insert("prompt".to_string(), serde_json::json!(prompt));
2932 payload.insert(
2933 "system_prompt".to_string(),
2934 serde_json::json!(node.system.clone().unwrap_or_default()),
2935 );
2936 payload.insert(
2937 "rendered_context".to_string(),
2938 serde_json::json!(rendered_context),
2939 );
2940 payload.insert(
2941 "selected_artifact_ids".to_string(),
2942 serde_json::json!(selected
2943 .iter()
2944 .map(|artifact| artifact.id.clone())
2945 .collect::<Vec<_>>()),
2946 );
2947 payload.insert(
2948 "selected_artifact_titles".to_string(),
2949 serde_json::json!(selected
2950 .iter()
2951 .map(|artifact| artifact.title.clone())
2952 .collect::<Vec<_>>()),
2953 );
2954 payload.insert(
2955 "tool_calling_mode".to_string(),
2956 serde_json::json!(tool_format.clone()),
2957 );
2958 }
2959
2960 let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
2961 let transcript = llm_result
2962 .get("transcript")
2963 .cloned()
2964 .map(|value| crate::stdlib::json_to_vm_value(&value));
2965 let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
2966 let output_kind = node
2967 .output_contract
2968 .output_kinds
2969 .first()
2970 .cloned()
2971 .unwrap_or_else(|| {
2972 if node.kind == "verify" {
2973 "verification_result".to_string()
2974 } else {
2975 "artifact".to_string()
2976 }
2977 });
2978 let mut metadata = BTreeMap::new();
2979 metadata.insert(
2980 "input_artifact_ids".to_string(),
2981 serde_json::json!(selected
2982 .iter()
2983 .map(|artifact| artifact.id.clone())
2984 .collect::<Vec<_>>()),
2985 );
2986 metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
2987 let artifact = ArtifactRecord {
2988 type_name: "artifact".to_string(),
2989 id: new_id("artifact"),
2990 kind: output_kind,
2991 title: Some(format!("stage {node_id} output")),
2992 text: Some(visible_text),
2993 data: Some(llm_result.clone()),
2994 source: Some(node_id.to_string()),
2995 created_at: now_rfc3339(),
2996 freshness: Some("fresh".to_string()),
2997 priority: None,
2998 lineage: selected
2999 .iter()
3000 .map(|artifact| artifact.id.clone())
3001 .collect(),
3002 relevance: Some(1.0),
3003 estimated_tokens: None,
3004 stage: Some(node_id.to_string()),
3005 metadata,
3006 }
3007 .normalize();
3008
3009 Ok((llm_result, vec![artifact], transcript))
3010}
3011
3012pub fn next_nodes_for(
3013 graph: &WorkflowGraph,
3014 current: &str,
3015 branch: Option<&str>,
3016) -> Vec<WorkflowEdge> {
3017 let mut matching: Vec<WorkflowEdge> = graph
3018 .edges
3019 .iter()
3020 .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
3021 .cloned()
3022 .collect();
3023 if matching.is_empty() {
3024 matching = graph
3025 .edges
3026 .iter()
3027 .filter(|edge| edge.from == current && edge.branch.is_none())
3028 .cloned()
3029 .collect();
3030 }
3031 matching
3032}
3033
3034pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
3035 next_nodes_for(graph, current, Some(branch))
3036 .into_iter()
3037 .next()
3038 .map(|edge| edge.to)
3039}
3040
3041pub fn append_audit_entry(
3042 graph: &mut WorkflowGraph,
3043 op: &str,
3044 node_id: Option<String>,
3045 reason: Option<String>,
3046 metadata: BTreeMap<String, serde_json::Value>,
3047) {
3048 graph.audit_log.push(WorkflowAuditEntry {
3049 id: new_id("audit"),
3050 op: op.to_string(),
3051 node_id,
3052 timestamp: now_rfc3339(),
3053 reason,
3054 metadata,
3055 });
3056}
3057
3058pub fn builtin_ceiling() -> CapabilityPolicy {
3059 CapabilityPolicy {
3060 tools: Vec::new(),
3063 capabilities: BTreeMap::from([
3064 (
3065 "workspace".to_string(),
3066 vec![
3067 "read_text".to_string(),
3068 "write_text".to_string(),
3069 "apply_edit".to_string(),
3070 "delete".to_string(),
3071 "exists".to_string(),
3072 "list".to_string(),
3073 ],
3074 ),
3075 ("process".to_string(), vec!["exec".to_string()]),
3076 ]),
3077 workspace_roots: Vec::new(),
3078 side_effect_level: Some("network".to_string()),
3079 recursion_limit: Some(8),
3080 tool_arg_constraints: Vec::new(),
3081 tool_metadata: BTreeMap::new(),
3082 }
3083}
3084
3085#[cfg(test)]
3086mod tests {
3087 use super::*;
3088
3089 #[test]
3090 fn capability_intersection_rejects_privilege_expansion() {
3091 let ceiling = CapabilityPolicy {
3092 tools: vec!["read".to_string()],
3093 side_effect_level: Some("read_only".to_string()),
3094 recursion_limit: Some(2),
3095 ..Default::default()
3096 };
3097 let requested = CapabilityPolicy {
3098 tools: vec!["read".to_string(), "edit".to_string()],
3099 ..Default::default()
3100 };
3101 let error = ceiling.intersect(&requested).unwrap_err();
3102 assert!(error.contains("host ceiling"));
3103 }
3104
3105 #[test]
3106 fn mutation_session_normalize_fills_defaults() {
3107 let normalized = MutationSessionRecord::default().normalize();
3108 assert!(normalized.session_id.starts_with("session_"));
3109 assert_eq!(normalized.mutation_scope, "read_only");
3110 assert_eq!(normalized.approval_mode, "host_enforced");
3111 }
3112
3113 #[test]
3114 fn install_current_mutation_session_round_trips() {
3115 install_current_mutation_session(Some(MutationSessionRecord {
3116 session_id: "session_test".to_string(),
3117 mutation_scope: "apply_workspace".to_string(),
3118 approval_mode: "explicit".to_string(),
3119 ..Default::default()
3120 }));
3121 let current = current_mutation_session().expect("session installed");
3122 assert_eq!(current.session_id, "session_test");
3123 assert_eq!(current.mutation_scope, "apply_workspace");
3124 assert_eq!(current.approval_mode, "explicit");
3125
3126 install_current_mutation_session(None);
3127 assert!(current_mutation_session().is_none());
3128 }
3129
3130 #[test]
3131 fn active_execution_policy_rejects_unknown_bridge_builtin() {
3132 push_execution_policy(CapabilityPolicy {
3133 tools: vec!["read".to_string()],
3134 capabilities: BTreeMap::from([(
3135 "workspace".to_string(),
3136 vec!["read_text".to_string()],
3137 )]),
3138 side_effect_level: Some("read_only".to_string()),
3139 recursion_limit: Some(1),
3140 ..Default::default()
3141 });
3142 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
3143 pop_execution_policy();
3144 assert!(matches!(
3145 error,
3146 VmError::CategorizedError {
3147 category: crate::value::ErrorCategory::ToolRejected,
3148 ..
3149 }
3150 ));
3151 }
3152
3153 #[test]
3154 fn active_execution_policy_rejects_mcp_escape_hatch() {
3155 push_execution_policy(CapabilityPolicy {
3156 tools: vec!["read".to_string()],
3157 capabilities: BTreeMap::from([(
3158 "workspace".to_string(),
3159 vec!["read_text".to_string()],
3160 )]),
3161 side_effect_level: Some("read_only".to_string()),
3162 recursion_limit: Some(1),
3163 ..Default::default()
3164 });
3165 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
3166 pop_execution_policy();
3167 assert!(matches!(
3168 error,
3169 VmError::CategorizedError {
3170 category: crate::value::ErrorCategory::ToolRejected,
3171 ..
3172 }
3173 ));
3174 }
3175
3176 #[test]
3177 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
3178 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3179 "name": "legacy",
3180 "act": {"mode": "llm"},
3181 "verify": {"kind": "verify"},
3182 "repair": {"mode": "agent"},
3183 }));
3184 let graph = normalize_workflow_value(&value).unwrap();
3185 assert_eq!(graph.type_name, "workflow_graph");
3186 assert!(graph.nodes.contains_key("act"));
3187 assert!(graph.nodes.contains_key("verify"));
3188 assert!(graph.nodes.contains_key("repair"));
3189 assert_eq!(graph.entry, "act");
3190 }
3191
3192 #[test]
3193 fn workflow_normalization_accepts_tool_registry_nodes() {
3194 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3195 "name": "registry_tools",
3196 "entry": "implement",
3197 "nodes": {
3198 "implement": {
3199 "kind": "stage",
3200 "mode": "agent",
3201 "tools": {
3202 "_type": "tool_registry",
3203 "tools": [
3204 {"name": "read", "description": "Read files"},
3205 {"name": "run", "description": "Run commands"}
3206 ]
3207 }
3208 }
3209 },
3210 "edges": []
3211 }));
3212 let graph = normalize_workflow_value(&value).unwrap();
3213 let node = graph.nodes.get("implement").unwrap();
3214 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
3215 }
3216
3217 #[test]
3218 fn artifact_selection_honors_budget_and_priority() {
3219 let policy = ContextPolicy {
3220 max_artifacts: Some(2),
3221 max_tokens: Some(30),
3222 prefer_recent: true,
3223 prefer_fresh: true,
3224 prioritize_kinds: vec!["verification_result".to_string()],
3225 ..Default::default()
3226 };
3227 let artifacts = vec![
3228 ArtifactRecord {
3229 type_name: "artifact".to_string(),
3230 id: "a".to_string(),
3231 kind: "summary".to_string(),
3232 text: Some("short".to_string()),
3233 relevance: Some(0.9),
3234 created_at: now_rfc3339(),
3235 ..Default::default()
3236 }
3237 .normalize(),
3238 ArtifactRecord {
3239 type_name: "artifact".to_string(),
3240 id: "b".to_string(),
3241 kind: "summary".to_string(),
3242 text: Some("this is a much larger artifact body".to_string()),
3243 relevance: Some(1.0),
3244 created_at: now_rfc3339(),
3245 ..Default::default()
3246 }
3247 .normalize(),
3248 ArtifactRecord {
3249 type_name: "artifact".to_string(),
3250 id: "c".to_string(),
3251 kind: "summary".to_string(),
3252 text: Some("tiny".to_string()),
3253 relevance: Some(0.5),
3254 created_at: now_rfc3339(),
3255 ..Default::default()
3256 }
3257 .normalize(),
3258 ];
3259 let selected = select_artifacts(artifacts, &policy);
3260 assert_eq!(selected.len(), 2);
3261 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
3262 }
3263
3264 #[test]
3265 fn workflow_validation_rejects_condition_without_true_false_edges() {
3266 let graph = WorkflowGraph {
3267 entry: "gate".to_string(),
3268 nodes: BTreeMap::from([(
3269 "gate".to_string(),
3270 WorkflowNode {
3271 id: Some("gate".to_string()),
3272 kind: "condition".to_string(),
3273 ..Default::default()
3274 },
3275 )]),
3276 edges: vec![WorkflowEdge {
3277 from: "gate".to_string(),
3278 to: "next".to_string(),
3279 branch: Some("true".to_string()),
3280 label: None,
3281 }],
3282 ..Default::default()
3283 };
3284 let report = validate_workflow(&graph, None);
3285 assert!(!report.valid);
3286 assert!(report
3287 .errors
3288 .iter()
3289 .any(|error| error.contains("true") && error.contains("false")));
3290 }
3291
3292 #[test]
3293 fn replay_fixture_round_trip_passes() {
3294 let run = RunRecord {
3295 type_name: "run_record".to_string(),
3296 id: "run_1".to_string(),
3297 workflow_id: "wf".to_string(),
3298 workflow_name: Some("demo".to_string()),
3299 task: "demo".to_string(),
3300 status: "completed".to_string(),
3301 started_at: "1".to_string(),
3302 finished_at: Some("2".to_string()),
3303 parent_run_id: None,
3304 root_run_id: Some("run_1".to_string()),
3305 stages: vec![RunStageRecord {
3306 id: "stage_1".to_string(),
3307 node_id: "act".to_string(),
3308 kind: "stage".to_string(),
3309 status: "completed".to_string(),
3310 outcome: "success".to_string(),
3311 branch: Some("success".to_string()),
3312 started_at: "1".to_string(),
3313 finished_at: Some("2".to_string()),
3314 visible_text: Some("done".to_string()),
3315 private_reasoning: None,
3316 transcript: None,
3317 verification: None,
3318 usage: None,
3319 artifacts: vec![ArtifactRecord {
3320 type_name: "artifact".to_string(),
3321 id: "a1".to_string(),
3322 kind: "summary".to_string(),
3323 text: Some("done".to_string()),
3324 created_at: "1".to_string(),
3325 ..Default::default()
3326 }
3327 .normalize()],
3328 consumed_artifact_ids: vec![],
3329 produced_artifact_ids: vec!["a1".to_string()],
3330 attempts: vec![],
3331 metadata: BTreeMap::new(),
3332 }],
3333 transitions: vec![],
3334 checkpoints: vec![],
3335 pending_nodes: vec![],
3336 completed_nodes: vec!["act".to_string()],
3337 child_runs: vec![],
3338 artifacts: vec![],
3339 policy: CapabilityPolicy::default(),
3340 execution: None,
3341 transcript: None,
3342 usage: None,
3343 replay_fixture: None,
3344 trace_spans: vec![],
3345 metadata: BTreeMap::new(),
3346 persisted_path: None,
3347 };
3348 let fixture = replay_fixture_from_run(&run);
3349 let report = evaluate_run_against_fixture(&run, &fixture);
3350 assert!(report.pass);
3351 assert!(report.failures.is_empty());
3352 }
3353
3354 #[test]
3355 fn replay_eval_suite_reports_failed_case() {
3356 let good = RunRecord {
3357 id: "run_good".to_string(),
3358 workflow_id: "wf".to_string(),
3359 status: "completed".to_string(),
3360 stages: vec![RunStageRecord {
3361 node_id: "act".to_string(),
3362 status: "completed".to_string(),
3363 outcome: "success".to_string(),
3364 ..Default::default()
3365 }],
3366 ..Default::default()
3367 };
3368 let bad = RunRecord {
3369 id: "run_bad".to_string(),
3370 workflow_id: "wf".to_string(),
3371 status: "failed".to_string(),
3372 stages: vec![RunStageRecord {
3373 node_id: "act".to_string(),
3374 status: "failed".to_string(),
3375 outcome: "error".to_string(),
3376 ..Default::default()
3377 }],
3378 ..Default::default()
3379 };
3380 let suite = evaluate_run_suite(vec![
3381 (
3382 good.clone(),
3383 replay_fixture_from_run(&good),
3384 Some("good.json".to_string()),
3385 ),
3386 (
3387 bad.clone(),
3388 replay_fixture_from_run(&good),
3389 Some("bad.json".to_string()),
3390 ),
3391 ]);
3392 assert!(!suite.pass);
3393 assert_eq!(suite.total, 2);
3394 assert_eq!(suite.failed, 1);
3395 assert!(suite.cases.iter().any(|case| !case.pass));
3396 }
3397
3398 #[test]
3399 fn run_diff_reports_changed_stage() {
3400 let left = RunRecord {
3401 id: "left".to_string(),
3402 workflow_id: "wf".to_string(),
3403 status: "completed".to_string(),
3404 stages: vec![RunStageRecord {
3405 node_id: "act".to_string(),
3406 status: "completed".to_string(),
3407 outcome: "success".to_string(),
3408 ..Default::default()
3409 }],
3410 ..Default::default()
3411 };
3412 let right = RunRecord {
3413 id: "right".to_string(),
3414 workflow_id: "wf".to_string(),
3415 status: "failed".to_string(),
3416 stages: vec![RunStageRecord {
3417 node_id: "act".to_string(),
3418 status: "failed".to_string(),
3419 outcome: "error".to_string(),
3420 ..Default::default()
3421 }],
3422 ..Default::default()
3423 };
3424 let diff = diff_run_records(&left, &right);
3425 assert!(diff.status_changed);
3426 assert!(!diff.identical);
3427 assert_eq!(diff.stage_diffs.len(), 1);
3428 }
3429
3430 #[test]
3431 fn eval_suite_manifest_can_fail_on_baseline_diff() {
3432 let temp_dir =
3433 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
3434 std::fs::create_dir_all(&temp_dir).unwrap();
3435 let baseline_path = temp_dir.join("baseline.json");
3436 let candidate_path = temp_dir.join("candidate.json");
3437
3438 let baseline = RunRecord {
3439 id: "baseline".to_string(),
3440 workflow_id: "wf".to_string(),
3441 status: "completed".to_string(),
3442 stages: vec![RunStageRecord {
3443 node_id: "act".to_string(),
3444 status: "completed".to_string(),
3445 outcome: "success".to_string(),
3446 ..Default::default()
3447 }],
3448 ..Default::default()
3449 };
3450 let candidate = RunRecord {
3451 id: "candidate".to_string(),
3452 workflow_id: "wf".to_string(),
3453 status: "failed".to_string(),
3454 stages: vec![RunStageRecord {
3455 node_id: "act".to_string(),
3456 status: "failed".to_string(),
3457 outcome: "error".to_string(),
3458 ..Default::default()
3459 }],
3460 ..Default::default()
3461 };
3462
3463 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
3464 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
3465
3466 let manifest = EvalSuiteManifest {
3467 base_dir: Some(temp_dir.display().to_string()),
3468 cases: vec![EvalSuiteCase {
3469 label: Some("candidate".to_string()),
3470 run_path: "candidate.json".to_string(),
3471 fixture_path: None,
3472 compare_to: Some("baseline.json".to_string()),
3473 }],
3474 ..Default::default()
3475 };
3476 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
3477 assert!(!suite.pass);
3478 assert_eq!(suite.failed, 1);
3479 assert!(suite.cases[0].comparison.is_some());
3480 assert!(suite.cases[0]
3481 .failures
3482 .iter()
3483 .any(|failure| failure.contains("baseline")));
3484 }
3485
3486 #[test]
3487 fn render_unified_diff_marks_removed_and_added_lines() {
3488 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
3489 assert!(diff.contains("--- a/src/main.rs"));
3490 assert!(diff.contains("+++ b/src/main.rs"));
3491 assert!(diff.contains("-old"));
3492 assert!(diff.contains("+new"));
3493 assert!(diff.contains(" same"));
3494 }
3495
3496 #[test]
3497 fn execution_policy_rejects_process_exec_when_read_only() {
3498 push_execution_policy(CapabilityPolicy {
3499 side_effect_level: Some("read_only".to_string()),
3500 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
3501 ..Default::default()
3502 });
3503 let result = enforce_current_policy_for_builtin("exec", &[]);
3504 pop_execution_policy();
3505 assert!(result.is_err());
3506 }
3507
3508 #[test]
3509 fn execution_policy_rejects_unlisted_tool() {
3510 push_execution_policy(CapabilityPolicy {
3511 tools: vec!["read".to_string()],
3512 ..Default::default()
3513 });
3514 let result = enforce_current_policy_for_tool("edit");
3515 pop_execution_policy();
3516 assert!(result.is_err());
3517 }
3518
3519 #[test]
3520 fn normalize_run_record_preserves_trace_spans() {
3521 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3522 "_type": "run_record",
3523 "id": "run_trace",
3524 "workflow_id": "wf",
3525 "status": "completed",
3526 "started_at": "1",
3527 "trace_spans": [
3528 {
3529 "span_id": 1,
3530 "parent_id": null,
3531 "kind": "pipeline",
3532 "name": "workflow",
3533 "start_ms": 0,
3534 "duration_ms": 42,
3535 "metadata": {"model": "demo"}
3536 }
3537 ]
3538 }));
3539
3540 let run = normalize_run_record(&value).unwrap();
3541 assert_eq!(run.trace_spans.len(), 1);
3542 assert_eq!(run.trace_spans[0].kind, "pipeline");
3543 assert_eq!(
3544 run.trace_spans[0].metadata["model"],
3545 serde_json::json!("demo")
3546 );
3547 }
3548
3549 #[test]
3552 fn pre_tool_hook_deny_blocks_execution() {
3553 clear_tool_hooks();
3554 register_tool_hook(ToolHook {
3555 pattern: "dangerous_*".to_string(),
3556 pre: Some(Rc::new(|_name, _args| {
3557 PreToolAction::Deny("blocked by policy".to_string())
3558 })),
3559 post: None,
3560 });
3561 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
3562 clear_tool_hooks();
3563 assert!(matches!(result, PreToolAction::Deny(_)));
3564 }
3565
3566 #[test]
3567 fn pre_tool_hook_allow_passes_through() {
3568 clear_tool_hooks();
3569 register_tool_hook(ToolHook {
3570 pattern: "safe_*".to_string(),
3571 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
3572 post: None,
3573 });
3574 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
3575 clear_tool_hooks();
3576 assert!(matches!(result, PreToolAction::Allow));
3577 }
3578
3579 #[test]
3580 fn pre_tool_hook_modify_rewrites_args() {
3581 clear_tool_hooks();
3582 register_tool_hook(ToolHook {
3583 pattern: "*".to_string(),
3584 pre: Some(Rc::new(|_name, _args| {
3585 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
3586 })),
3587 post: None,
3588 });
3589 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
3590 clear_tool_hooks();
3591 match result {
3592 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
3593 _ => panic!("expected Modify"),
3594 }
3595 }
3596
3597 #[test]
3598 fn post_tool_hook_modifies_result() {
3599 clear_tool_hooks();
3600 register_tool_hook(ToolHook {
3601 pattern: "exec".to_string(),
3602 pre: None,
3603 post: Some(Rc::new(|_name, result| {
3604 if result.contains("SECRET") {
3605 PostToolAction::Modify("[REDACTED]".to_string())
3606 } else {
3607 PostToolAction::Pass
3608 }
3609 })),
3610 });
3611 let result = run_post_tool_hooks("exec", "output with SECRET data");
3612 let clean = run_post_tool_hooks("exec", "clean output");
3613 clear_tool_hooks();
3614 assert_eq!(result, "[REDACTED]");
3615 assert_eq!(clean, "clean output");
3616 }
3617
3618 #[test]
3619 fn unmatched_hook_pattern_does_not_fire() {
3620 clear_tool_hooks();
3621 register_tool_hook(ToolHook {
3622 pattern: "exec".to_string(),
3623 pre: Some(Rc::new(|_name, _args| {
3624 PreToolAction::Deny("should not match".to_string())
3625 })),
3626 post: None,
3627 });
3628 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
3629 clear_tool_hooks();
3630 assert!(matches!(result, PreToolAction::Allow));
3631 }
3632
3633 #[test]
3634 fn glob_match_patterns() {
3635 assert!(glob_match("*", "anything"));
3636 assert!(glob_match("exec*", "exec_at"));
3637 assert!(glob_match("*_file", "read_file"));
3638 assert!(!glob_match("exec*", "read_file"));
3639 assert!(glob_match("read_file", "read_file"));
3640 assert!(!glob_match("read_file", "write_file"));
3641 }
3642
3643 #[test]
3646 fn microcompact_snips_large_output() {
3647 let large = "x".repeat(50_000);
3648 let result = microcompact_tool_output(&large, 10_000);
3649 assert!(result.len() < 15_000);
3650 assert!(result.contains("snipped"));
3651 }
3652
3653 #[test]
3654 fn microcompact_preserves_small_output() {
3655 let small = "hello world";
3656 let result = microcompact_tool_output(small, 10_000);
3657 assert_eq!(result, small);
3658 }
3659
3660 #[test]
3661 fn auto_compact_messages_reduces_count() {
3662 let mut messages: Vec<serde_json::Value> = (0..20)
3663 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
3664 .collect();
3665 let runtime = tokio::runtime::Builder::new_current_thread()
3666 .enable_all()
3667 .build()
3668 .unwrap();
3669 let compacted = runtime.block_on(auto_compact_messages(
3670 &mut messages,
3671 &AutoCompactConfig {
3672 compact_strategy: CompactStrategy::Truncate,
3673 keep_last: 6,
3674 ..Default::default()
3675 },
3676 None,
3677 ));
3678 let summary = compacted.unwrap();
3679 assert!(summary.is_some());
3680 assert!(messages.len() <= 7); assert!(messages[0]["content"]
3682 .as_str()
3683 .unwrap()
3684 .contains("auto-compacted"));
3685 }
3686
3687 #[test]
3688 fn auto_compact_noop_when_under_threshold() {
3689 let mut messages: Vec<serde_json::Value> = (0..4)
3690 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
3691 .collect();
3692 let runtime = tokio::runtime::Builder::new_current_thread()
3693 .enable_all()
3694 .build()
3695 .unwrap();
3696 let compacted = runtime.block_on(auto_compact_messages(
3697 &mut messages,
3698 &AutoCompactConfig {
3699 compact_strategy: CompactStrategy::Truncate,
3700 keep_last: 6,
3701 ..Default::default()
3702 },
3703 None,
3704 ));
3705 assert!(compacted.unwrap().is_none());
3706 assert_eq!(messages.len(), 4);
3707 }
3708
3709 #[test]
3710 fn estimate_message_tokens_basic() {
3711 let messages = vec![
3712 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
3713 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
3714 ];
3715 let tokens = estimate_message_tokens(&messages);
3716 assert_eq!(tokens, 200); }
3718
3719 #[test]
3722 fn dedup_artifacts_removes_duplicates() {
3723 let mut artifacts = vec![
3724 ArtifactRecord {
3725 id: "a1".to_string(),
3726 kind: "test".to_string(),
3727 text: Some("duplicate content".to_string()),
3728 ..Default::default()
3729 },
3730 ArtifactRecord {
3731 id: "a2".to_string(),
3732 kind: "test".to_string(),
3733 text: Some("duplicate content".to_string()),
3734 ..Default::default()
3735 },
3736 ArtifactRecord {
3737 id: "a3".to_string(),
3738 kind: "test".to_string(),
3739 text: Some("unique content".to_string()),
3740 ..Default::default()
3741 },
3742 ];
3743 dedup_artifacts(&mut artifacts);
3744 assert_eq!(artifacts.len(), 2);
3745 }
3746
3747 #[test]
3748 fn microcompact_artifact_snips_oversized() {
3749 let mut artifact = ArtifactRecord {
3750 id: "a1".to_string(),
3751 kind: "test".to_string(),
3752 text: Some("x".repeat(10_000)),
3753 estimated_tokens: Some(2_500),
3754 ..Default::default()
3755 };
3756 microcompact_artifact(&mut artifact, 500);
3757 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
3758 assert_eq!(artifact.estimated_tokens, Some(500));
3759 }
3760
3761 #[test]
3764 fn arg_constraint_allows_matching_pattern() {
3765 let policy = CapabilityPolicy {
3766 tool_arg_constraints: vec![ToolArgConstraint {
3767 tool: "exec".to_string(),
3768 arg_patterns: vec!["cargo *".to_string()],
3769 }],
3770 ..Default::default()
3771 };
3772 let result = enforce_tool_arg_constraints(
3773 &policy,
3774 "exec",
3775 &serde_json::json!({"command": "cargo test"}),
3776 );
3777 assert!(result.is_ok());
3778 }
3779
3780 #[test]
3781 fn arg_constraint_rejects_non_matching_pattern() {
3782 let policy = CapabilityPolicy {
3783 tool_arg_constraints: vec![ToolArgConstraint {
3784 tool: "exec".to_string(),
3785 arg_patterns: vec!["cargo *".to_string()],
3786 }],
3787 ..Default::default()
3788 };
3789 let result = enforce_tool_arg_constraints(
3790 &policy,
3791 "exec",
3792 &serde_json::json!({"command": "rm -rf /"}),
3793 );
3794 assert!(result.is_err());
3795 }
3796
3797 #[test]
3798 fn arg_constraint_ignores_unmatched_tool() {
3799 let policy = CapabilityPolicy {
3800 tool_arg_constraints: vec![ToolArgConstraint {
3801 tool: "exec".to_string(),
3802 arg_patterns: vec!["cargo *".to_string()],
3803 }],
3804 ..Default::default()
3805 };
3806 let result = enforce_tool_arg_constraints(
3807 &policy,
3808 "read_file",
3809 &serde_json::json!({"path": "/etc/passwd"}),
3810 );
3811 assert!(result.is_ok());
3812 }
3813
3814 #[test]
3815 fn microcompact_handles_multibyte_utf8() {
3816 let emoji_output = "🔥".repeat(500); let result = microcompact_tool_output(&emoji_output, 400);
3819 assert!(result.contains("snipped"));
3821
3822 let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
3824 let result2 = microcompact_tool_output(&mixed, 400);
3825 assert!(result2.contains("snipped"));
3826
3827 let cjk = "中文".repeat(500);
3829 let result3 = microcompact_tool_output(&cjk, 400);
3830 assert!(result3.contains("snipped"));
3831 }
3832}