1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12 use std::time::{SystemTime, UNIX_EPOCH};
13 let ts = SystemTime::now()
14 .duration_since(UNIX_EPOCH)
15 .unwrap_or_default()
16 .as_secs();
17 format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21 format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25 std::env::var("HARN_RUN_DIR")
26 .map(PathBuf::from)
27 .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31 static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32 static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33 static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
34}
35
36#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
37#[serde(default)]
38pub struct MutationSessionRecord {
39 pub session_id: String,
40 pub parent_session_id: Option<String>,
41 pub run_id: Option<String>,
42 pub worker_id: Option<String>,
43 pub execution_kind: Option<String>,
44 pub mutation_scope: String,
45 pub approval_mode: String,
46}
47
48impl MutationSessionRecord {
49 pub fn normalize(mut self) -> Self {
50 if self.session_id.is_empty() {
51 self.session_id = new_id("session");
52 }
53 if self.mutation_scope.is_empty() {
54 self.mutation_scope = "read_only".to_string();
55 }
56 if self.approval_mode.is_empty() {
57 self.approval_mode = "host_enforced".to_string();
58 }
59 self
60 }
61}
62
63pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
64 CURRENT_MUTATION_SESSION.with(|slot| {
65 *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
66 });
67}
68
69pub fn current_mutation_session() -> Option<MutationSessionRecord> {
70 CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
71}
72
73#[derive(Clone, Debug)]
77pub enum PreToolAction {
78 Allow,
80 Deny(String),
82 Modify(serde_json::Value),
84}
85
86#[derive(Clone, Debug)]
88pub enum PostToolAction {
89 Pass,
91 Modify(String),
93}
94
95pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
97pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
98
99#[derive(Clone)]
101pub struct ToolHook {
102 pub pattern: String,
104 pub pre: Option<PreToolHookFn>,
106 pub post: Option<PostToolHookFn>,
108}
109
110impl std::fmt::Debug for ToolHook {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 f.debug_struct("ToolHook")
113 .field("pattern", &self.pattern)
114 .field("has_pre", &self.pre.is_some())
115 .field("has_post", &self.post.is_some())
116 .finish()
117 }
118}
119
120fn glob_match(pattern: &str, name: &str) -> bool {
121 if pattern == "*" {
122 return true;
123 }
124 if let Some(prefix) = pattern.strip_suffix('*') {
125 return name.starts_with(prefix);
126 }
127 if let Some(suffix) = pattern.strip_prefix('*') {
128 return name.ends_with(suffix);
129 }
130 pattern == name
131}
132
133pub fn register_tool_hook(hook: ToolHook) {
134 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
135}
136
137pub fn clear_tool_hooks() {
138 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
139}
140
141pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
143 TOOL_HOOKS.with(|hooks| {
144 let hooks = hooks.borrow();
145 let mut current_args = args.clone();
146 for hook in hooks.iter() {
147 if !glob_match(&hook.pattern, tool_name) {
148 continue;
149 }
150 if let Some(ref pre) = hook.pre {
151 match pre(tool_name, ¤t_args) {
152 PreToolAction::Allow => {}
153 PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
154 PreToolAction::Modify(new_args) => {
155 current_args = new_args;
156 }
157 }
158 }
159 }
160 if current_args != *args {
161 PreToolAction::Modify(current_args)
162 } else {
163 PreToolAction::Allow
164 }
165 })
166}
167
168pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
170 TOOL_HOOKS.with(|hooks| {
171 let hooks = hooks.borrow();
172 let mut current = result.to_string();
173 for hook in hooks.iter() {
174 if !glob_match(&hook.pattern, tool_name) {
175 continue;
176 }
177 if let Some(ref post) = hook.post {
178 match post(tool_name, ¤t) {
179 PostToolAction::Pass => {}
180 PostToolAction::Modify(new_result) => {
181 current = new_result;
182 }
183 }
184 }
185 }
186 current
187 })
188}
189
190#[derive(Clone, Debug, PartialEq, Eq)]
193pub enum CompactStrategy {
194 Llm,
195 Truncate,
196 Custom,
197}
198
199pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
200 match value {
201 "llm" => Ok(CompactStrategy::Llm),
202 "truncate" => Ok(CompactStrategy::Truncate),
203 "custom" => Ok(CompactStrategy::Custom),
204 other => Err(VmError::Runtime(format!(
205 "unknown compact_strategy '{other}' (expected 'llm', 'truncate', or 'custom')"
206 ))),
207 }
208}
209
210#[derive(Clone, Debug)]
212pub struct AutoCompactConfig {
213 pub token_threshold: usize,
215 pub tool_output_max_chars: usize,
217 pub keep_last: usize,
219 pub compact_strategy: CompactStrategy,
221 pub custom_compactor: Option<VmValue>,
223}
224
225impl Default for AutoCompactConfig {
226 fn default() -> Self {
227 Self {
228 token_threshold: 80_000,
229 tool_output_max_chars: 20_000,
230 keep_last: 8,
231 compact_strategy: CompactStrategy::Llm,
232 custom_compactor: None,
233 }
234 }
235}
236
237pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
239 messages
240 .iter()
241 .map(|m| {
242 m.get("content")
243 .and_then(|c| c.as_str())
244 .map(|s| s.len())
245 .unwrap_or(0)
246 })
247 .sum::<usize>()
248 / 4
249}
250
251pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
254 if output.len() <= max_chars || max_chars < 200 {
255 return output.to_string();
256 }
257 let diagnostic_lines = output
258 .lines()
259 .filter(|line| {
260 let trimmed = line.trim();
261 let lower = trimmed.to_lowercase();
262 let has_file_line = {
264 let bytes = trimmed.as_bytes();
265 let mut i = 0;
266 let mut found_colon = false;
267 while i < bytes.len() {
268 if bytes[i] == b':' {
269 found_colon = true;
270 break;
271 }
272 i += 1;
273 }
274 found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
275 };
276 let has_strong_keyword =
290 trimmed.contains("FAIL") || trimmed.contains("panic") || trimmed.contains("Panic");
291 let has_weak_keyword = trimmed.contains("error")
292 || trimmed.contains("undefined")
293 || trimmed.contains("expected")
294 || trimmed.contains("got")
295 || lower.contains("cannot find")
296 || lower.contains("not found")
297 || lower.contains("no such")
298 || lower.contains("unresolved")
299 || lower.contains("missing")
300 || lower.contains("declared but not used")
301 || lower.contains("unused")
302 || lower.contains("mismatch");
303 let positional = lower.contains(" error ")
304 || lower.starts_with("error:")
305 || lower.starts_with("warning:")
306 || lower.starts_with("note:")
307 || lower.contains("panic:");
308 has_strong_keyword || (has_file_line && has_weak_keyword) || positional
309 })
310 .take(32)
311 .collect::<Vec<_>>();
312 if !diagnostic_lines.is_empty() {
313 let diagnostics = diagnostic_lines.join("\n");
314 let budget = max_chars.saturating_sub(diagnostics.len() + 64);
315 let keep = budget / 2;
316 if keep >= 80 && output.len() > keep * 2 {
317 let head_end = output.floor_char_boundary(keep);
318 let tail_start = output.ceil_char_boundary(output.len() - keep);
319 let head = &output[..head_end];
320 let tail = &output[tail_start..];
321 return format!(
322 "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
323 );
324 }
325 }
326 let keep = max_chars / 2;
327 let head_end = output.floor_char_boundary(keep);
328 let tail_start = output.ceil_char_boundary(output.len() - keep);
329 let head = &output[..head_end];
330 let tail = &output[tail_start..];
331 let snipped = output.len() - max_chars;
332 format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
333}
334
335fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
336 messages
337 .iter()
338 .map(|msg| {
339 let role = msg
340 .get("role")
341 .and_then(|v| v.as_str())
342 .unwrap_or("user")
343 .to_uppercase();
344 let content = msg
345 .get("content")
346 .and_then(|v| v.as_str())
347 .unwrap_or_default();
348 format!("{role}: {content}")
349 })
350 .collect::<Vec<_>>()
351 .join("\n")
352}
353
354fn truncate_compaction_summary(
355 old_messages: &[serde_json::Value],
356 archived_count: usize,
357) -> String {
358 truncate_compaction_summary_with_context(old_messages, archived_count, false)
359}
360
361fn truncate_compaction_summary_with_context(
362 old_messages: &[serde_json::Value],
363 archived_count: usize,
364 is_llm_fallback: bool,
365) -> String {
366 let per_msg_limit = 500_usize;
367 let summary_parts: Vec<String> = old_messages
368 .iter()
369 .filter_map(|m| {
370 let role = m.get("role")?.as_str()?;
371 let content = m.get("content")?.as_str()?;
372 if content.is_empty() {
373 return None;
374 }
375 let truncated = if content.len() > per_msg_limit {
376 format!(
377 "{}... [truncated from {} chars]",
378 &content[..content.floor_char_boundary(per_msg_limit)],
379 content.len()
380 )
381 } else {
382 content.to_string()
383 };
384 Some(format!("[{role}] {truncated}"))
385 })
386 .take(15)
387 .collect();
388 let header = if is_llm_fallback {
389 format!(
390 "[auto-compact fallback: LLM summarizer returned empty; {archived_count} older messages abbreviated to ~{per_msg_limit} chars each]"
391 )
392 } else {
393 format!("[auto-compacted {archived_count} older messages via truncate strategy]")
394 };
395 format!(
396 "{header}\n{}{}",
397 summary_parts.join("\n"),
398 if archived_count > 15 {
399 format!("\n... and {} more", archived_count - 15)
400 } else {
401 String::new()
402 }
403 )
404}
405
406fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
407 if let Some(map) = value.as_dict() {
408 if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
409 return Ok(summary.display());
410 }
411 }
412 match value {
413 VmValue::String(text) => Ok(text.to_string()),
414 VmValue::Nil => Ok(String::new()),
415 _ => serde_json::to_string_pretty(&vm_value_to_json(value))
416 .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
417 }
418}
419
420async fn llm_compaction_summary(
421 old_messages: &[serde_json::Value],
422 archived_count: usize,
423 llm_opts: &crate::llm::api::LlmCallOptions,
424) -> Result<String, VmError> {
425 let mut compact_opts = llm_opts.clone();
426 let formatted = format_compaction_messages(old_messages);
427 compact_opts.system = None;
428 compact_opts.transcript_id = None;
429 compact_opts.transcript_summary = None;
430 compact_opts.transcript_metadata = None;
431 compact_opts.native_tools = None;
432 compact_opts.tool_choice = None;
433 compact_opts.response_format = None;
434 compact_opts.json_schema = None;
435 compact_opts.messages = vec![serde_json::json!({
436 "role": "user",
437 "content": format!(
438 "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
439 ),
440 })];
441 let result = vm_call_llm_full(&compact_opts).await?;
442 let summary = result.text.trim();
443 if summary.is_empty() {
444 Ok(truncate_compaction_summary_with_context(
445 old_messages,
446 archived_count,
447 true,
448 ))
449 } else {
450 Ok(format!(
451 "[auto-compacted {archived_count} older messages]\n{summary}"
452 ))
453 }
454}
455
456async fn custom_compaction_summary(
457 old_messages: &[serde_json::Value],
458 archived_count: usize,
459 callback: &VmValue,
460) -> Result<String, VmError> {
461 let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
462 return Err(VmError::Runtime(
463 "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
464 ));
465 };
466 let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
467 VmError::Runtime(
468 "custom transcript compaction requires an async builtin VM context".to_string(),
469 )
470 })?;
471 let messages_vm = VmValue::List(Rc::new(
472 old_messages
473 .iter()
474 .map(crate::stdlib::json_to_vm_value)
475 .collect(),
476 ));
477 let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await;
478 let summary = compact_summary_text_from_value(&result?)?;
479 if summary.trim().is_empty() {
480 Ok(truncate_compaction_summary(old_messages, archived_count))
481 } else {
482 Ok(format!(
483 "[auto-compacted {archived_count} older messages]\n{summary}"
484 ))
485 }
486}
487
488pub(crate) async fn auto_compact_messages(
491 messages: &mut Vec<serde_json::Value>,
492 config: &AutoCompactConfig,
493 llm_opts: Option<&crate::llm::api::LlmCallOptions>,
494) -> Result<Option<String>, VmError> {
495 if messages.len() <= config.keep_last {
496 return Ok(None);
497 }
498 let split_at = messages.len().saturating_sub(config.keep_last);
499 let old_messages: Vec<_> = messages.drain(..split_at).collect();
500 let archived_count = old_messages.len();
501 let summary = match config.compact_strategy {
502 CompactStrategy::Truncate => truncate_compaction_summary(&old_messages, archived_count),
503 CompactStrategy::Llm => {
504 llm_compaction_summary(
505 &old_messages,
506 archived_count,
507 llm_opts.ok_or_else(|| {
508 VmError::Runtime(
509 "LLM transcript compaction requires active LLM call options".to_string(),
510 )
511 })?,
512 )
513 .await?
514 }
515 CompactStrategy::Custom => {
516 custom_compaction_summary(
517 &old_messages,
518 archived_count,
519 config.custom_compactor.as_ref().ok_or_else(|| {
520 VmError::Runtime(
521 "compact_callback is required when compact_strategy is 'custom'"
522 .to_string(),
523 )
524 })?,
525 )
526 .await?
527 }
528 };
529 messages.insert(
530 0,
531 serde_json::json!({
532 "role": "user",
533 "content": summary,
534 }),
535 );
536 Ok(Some(summary))
537}
538
539pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
543 let max_chars = max_tokens * 4;
544 if let Some(ref text) = artifact.text {
545 if text.len() > max_chars && max_chars >= 200 {
546 artifact.text = Some(microcompact_tool_output(text, max_chars));
547 artifact.estimated_tokens = Some(max_tokens);
548 }
549 }
550}
551
552pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
555 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
556 artifacts.retain(|artifact| {
557 let text = artifact.text.as_deref().unwrap_or("");
558 if text.is_empty() {
559 return true;
560 }
561 let hash = {
563 use std::hash::{Hash, Hasher};
564 let mut hasher = std::collections::hash_map::DefaultHasher::new();
565 text.hash(&mut hasher);
566 hasher.finish()
567 };
568 seen_hashes.insert(hash)
569 });
570}
571
572pub fn select_artifacts_adaptive(
575 mut artifacts: Vec<ArtifactRecord>,
576 policy: &ContextPolicy,
577) -> Vec<ArtifactRecord> {
578 dedup_artifacts(&mut artifacts);
580
581 if let Some(max_tokens) = policy.max_tokens {
585 let count = artifacts.len().max(1);
586 let per_artifact_budget = max_tokens / count;
587 let cap = per_artifact_budget.max(500).min(max_tokens);
589 for artifact in &mut artifacts {
590 let est = artifact.estimated_tokens.unwrap_or(0);
591 if est > cap * 2 {
592 microcompact_artifact(artifact, cap);
593 }
594 }
595 }
596
597 select_artifacts(artifacts, policy)
599}
600
601#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
605#[serde(default)]
606pub struct ToolArgConstraint {
607 pub tool: String,
609 pub arg_patterns: Vec<String>,
612}
613
614pub fn enforce_tool_arg_constraints(
616 policy: &CapabilityPolicy,
617 tool_name: &str,
618 args: &serde_json::Value,
619) -> Result<(), VmError> {
620 for constraint in &policy.tool_arg_constraints {
621 if !glob_match(&constraint.tool, tool_name) {
622 continue;
623 }
624 if constraint.arg_patterns.is_empty() {
625 continue;
626 }
627 let first_arg = args
629 .as_object()
630 .and_then(|o| o.values().next())
631 .and_then(|v| v.as_str())
632 .or_else(|| args.as_str())
633 .unwrap_or("");
634 let matches = constraint
635 .arg_patterns
636 .iter()
637 .any(|pattern| glob_match(pattern, first_arg));
638 if !matches {
639 return reject_policy(format!(
640 "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
641 constraint.arg_patterns
642 ));
643 }
644 }
645 Ok(())
646}
647
648fn normalize_artifact_kind(kind: &str) -> String {
649 match kind {
650 "resource"
651 | "workspace_file"
652 | "editor_selection"
653 | "workspace_snapshot"
654 | "transcript_summary"
655 | "summary"
656 | "plan"
657 | "diff"
658 | "git_diff"
659 | "patch"
660 | "patch_set"
661 | "patch_proposal"
662 | "diff_review"
663 | "review_decision"
664 | "verification_bundle"
665 | "apply_intent"
666 | "verification_result"
667 | "test_result"
668 | "command_result"
669 | "provider_payload"
670 | "worker_result"
671 | "worker_notification"
672 | "artifact" => kind.to_string(),
673 "file" => "workspace_file".to_string(),
674 "transcript" => "transcript_summary".to_string(),
675 "verification" => "verification_result".to_string(),
676 "test" => "test_result".to_string(),
677 other if other.trim().is_empty() => "artifact".to_string(),
678 other => other.to_string(),
679 }
680}
681
682fn default_artifact_priority(kind: &str) -> i64 {
683 match kind {
684 "verification_result" | "test_result" => 100,
685 "verification_bundle" => 95,
686 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
687 | "review_decision" | "apply_intent" => 90,
688 "plan" => 80,
689 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
690 "summary" | "transcript_summary" => 60,
691 "command_result" => 50,
692 _ => 40,
693 }
694}
695
696fn freshness_rank(value: Option<&str>) -> i64 {
697 match value.unwrap_or_default() {
698 "fresh" | "live" => 3,
699 "recent" => 2,
700 "stale" => 0,
701 _ => 1,
702 }
703}
704
705#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
706#[serde(default)]
707pub struct ToolRuntimePolicyMetadata {
708 pub capabilities: BTreeMap<String, Vec<String>>,
709 pub side_effect_level: Option<String>,
710 pub path_params: Vec<String>,
711 pub mutation_classification: Option<String>,
712}
713
714#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
715#[serde(default)]
716pub struct CapabilityPolicy {
717 pub tools: Vec<String>,
718 pub capabilities: BTreeMap<String, Vec<String>>,
719 pub workspace_roots: Vec<String>,
720 pub side_effect_level: Option<String>,
721 pub recursion_limit: Option<usize>,
722 #[serde(default)]
724 pub tool_arg_constraints: Vec<ToolArgConstraint>,
725 #[serde(default)]
726 pub tool_metadata: BTreeMap<String, ToolRuntimePolicyMetadata>,
727}
728
729impl CapabilityPolicy {
730 pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
731 let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
732 (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
733 (Some(a), None) => Some(a.clone()),
734 (None, Some(b)) => Some(b.clone()),
735 (None, None) => None,
736 };
737
738 if !self.tools.is_empty() {
739 let denied: Vec<String> = requested
740 .tools
741 .iter()
742 .filter(|tool| !self.tools.contains(*tool))
743 .cloned()
744 .collect();
745 if !denied.is_empty() {
746 return Err(format!(
747 "requested tools exceed host ceiling: {}",
748 denied.join(", ")
749 ));
750 }
751 }
752
753 for (capability, requested_ops) in &requested.capabilities {
754 if let Some(allowed_ops) = self.capabilities.get(capability) {
755 let denied: Vec<String> = requested_ops
756 .iter()
757 .filter(|op| !allowed_ops.contains(*op))
758 .cloned()
759 .collect();
760 if !denied.is_empty() {
761 return Err(format!(
762 "requested capability operations exceed host ceiling: {}.{}",
763 capability,
764 denied.join(",")
765 ));
766 }
767 } else if !self.capabilities.is_empty() {
768 return Err(format!(
769 "requested capability exceeds host ceiling: {capability}"
770 ));
771 }
772 }
773
774 let tools = if self.tools.is_empty() {
775 requested.tools.clone()
776 } else if requested.tools.is_empty() {
777 self.tools.clone()
778 } else {
779 requested
780 .tools
781 .iter()
782 .filter(|tool| self.tools.contains(*tool))
783 .cloned()
784 .collect()
785 };
786
787 let capabilities = if self.capabilities.is_empty() {
788 requested.capabilities.clone()
789 } else if requested.capabilities.is_empty() {
790 self.capabilities.clone()
791 } else {
792 requested
793 .capabilities
794 .iter()
795 .filter_map(|(capability, requested_ops)| {
796 self.capabilities.get(capability).map(|allowed_ops| {
797 (
798 capability.clone(),
799 requested_ops
800 .iter()
801 .filter(|op| allowed_ops.contains(*op))
802 .cloned()
803 .collect::<Vec<_>>(),
804 )
805 })
806 })
807 .collect()
808 };
809
810 let workspace_roots = if self.workspace_roots.is_empty() {
811 requested.workspace_roots.clone()
812 } else if requested.workspace_roots.is_empty() {
813 self.workspace_roots.clone()
814 } else {
815 requested
816 .workspace_roots
817 .iter()
818 .filter(|root| self.workspace_roots.contains(*root))
819 .cloned()
820 .collect()
821 };
822
823 let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
824 (Some(a), Some(b)) => Some(a.min(b)),
825 (Some(a), None) => Some(a),
826 (None, Some(b)) => Some(b),
827 (None, None) => None,
828 };
829
830 let mut tool_arg_constraints = self.tool_arg_constraints.clone();
832 tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
833
834 let tool_metadata = tools
835 .iter()
836 .filter_map(|tool| {
837 requested
838 .tool_metadata
839 .get(tool)
840 .or_else(|| self.tool_metadata.get(tool))
841 .cloned()
842 .map(|metadata| (tool.clone(), metadata))
843 })
844 .collect();
845
846 Ok(CapabilityPolicy {
847 tools,
848 capabilities,
849 workspace_roots,
850 side_effect_level,
851 recursion_limit,
852 tool_arg_constraints,
853 tool_metadata,
854 })
855 }
856}
857
858fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
859 fn rank(v: &str) -> usize {
860 match v {
861 "none" => 0,
862 "read_only" => 1,
863 "workspace_write" => 2,
864 "process_exec" => 3,
865 "network" => 4,
866 _ => 5,
867 }
868 }
869 if rank(a) <= rank(b) {
870 a
871 } else {
872 b
873 }
874}
875
876#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
877#[serde(default)]
878pub struct ModelPolicy {
879 pub provider: Option<String>,
880 pub model: Option<String>,
881 pub model_tier: Option<String>,
882 pub temperature: Option<f64>,
883 pub max_tokens: Option<i64>,
884}
885
886#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
887#[serde(default)]
888pub struct TranscriptPolicy {
889 pub mode: Option<String>,
890 pub visibility: Option<String>,
891 pub summarize: bool,
892 pub compact: bool,
893 pub keep_last: Option<usize>,
894}
895
896#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
897#[serde(default)]
898pub struct ContextPolicy {
899 pub max_artifacts: Option<usize>,
900 pub max_tokens: Option<usize>,
901 pub reserve_tokens: Option<usize>,
902 pub include_kinds: Vec<String>,
903 pub exclude_kinds: Vec<String>,
904 pub prioritize_kinds: Vec<String>,
905 pub pinned_ids: Vec<String>,
906 pub include_stages: Vec<String>,
907 pub prefer_recent: bool,
908 pub prefer_fresh: bool,
909 pub render: Option<String>,
910}
911
912#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
913#[serde(default)]
914pub struct RetryPolicy {
915 pub max_attempts: usize,
916 pub verify: bool,
917 pub repair: bool,
918}
919
920#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
921#[serde(default)]
922pub struct StageContract {
923 pub input_kinds: Vec<String>,
924 pub output_kinds: Vec<String>,
925 pub min_inputs: Option<usize>,
926 pub max_inputs: Option<usize>,
927 pub require_transcript: bool,
928 pub schema: Option<serde_json::Value>,
929}
930
931#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
932#[serde(default)]
933pub struct BranchSemantics {
934 pub success: Option<String>,
935 pub failure: Option<String>,
936 pub verify_pass: Option<String>,
937 pub verify_fail: Option<String>,
938 pub condition_true: Option<String>,
939 pub condition_false: Option<String>,
940 pub loop_continue: Option<String>,
941 pub loop_exit: Option<String>,
942 pub escalation: Option<String>,
943}
944
945#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
946#[serde(default)]
947pub struct MapPolicy {
948 pub items: Vec<serde_json::Value>,
949 pub item_artifact_kind: Option<String>,
950 pub output_kind: Option<String>,
951 pub max_items: Option<usize>,
952}
953
954#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
955#[serde(default)]
956pub struct JoinPolicy {
957 pub strategy: String,
958 pub require_all_inputs: bool,
959 pub min_completed: Option<usize>,
960}
961
962#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
963#[serde(default)]
964pub struct ReducePolicy {
965 pub strategy: String,
966 pub separator: Option<String>,
967 pub output_kind: Option<String>,
968}
969
970#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
971#[serde(default)]
972pub struct EscalationPolicy {
973 pub level: Option<String>,
974 pub queue: Option<String>,
975 pub reason: Option<String>,
976}
977
978#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
979#[serde(default)]
980pub struct ArtifactRecord {
981 #[serde(rename = "_type")]
982 pub type_name: String,
983 pub id: String,
984 pub kind: String,
985 pub title: Option<String>,
986 pub text: Option<String>,
987 pub data: Option<serde_json::Value>,
988 pub source: Option<String>,
989 pub created_at: String,
990 pub freshness: Option<String>,
991 pub priority: Option<i64>,
992 pub lineage: Vec<String>,
993 pub relevance: Option<f64>,
994 pub estimated_tokens: Option<usize>,
995 pub stage: Option<String>,
996 pub metadata: BTreeMap<String, serde_json::Value>,
997}
998
999impl ArtifactRecord {
1000 pub fn normalize(mut self) -> Self {
1001 if self.type_name.is_empty() {
1002 self.type_name = "artifact".to_string();
1003 }
1004 if self.id.is_empty() {
1005 self.id = new_id("artifact");
1006 }
1007 if self.created_at.is_empty() {
1008 self.created_at = now_rfc3339();
1009 }
1010 if self.kind.is_empty() {
1011 self.kind = "artifact".to_string();
1012 }
1013 self.kind = normalize_artifact_kind(&self.kind);
1014 if self.estimated_tokens.is_none() {
1015 self.estimated_tokens = self
1016 .text
1017 .as_ref()
1018 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
1019 }
1020 if self.priority.is_none() {
1021 self.priority = Some(default_artifact_priority(&self.kind));
1022 }
1023 self
1024 }
1025}
1026
1027#[derive(Clone, Debug, Default, Serialize, Deserialize)]
1028#[serde(default)]
1029pub struct WorkflowNode {
1030 pub id: Option<String>,
1031 pub kind: String,
1032 pub mode: Option<String>,
1033 pub prompt: Option<String>,
1034 pub system: Option<String>,
1035 pub task_label: Option<String>,
1036 pub done_sentinel: Option<String>,
1037 pub tools: serde_json::Value,
1038 pub model_policy: ModelPolicy,
1039 pub transcript_policy: TranscriptPolicy,
1040 pub context_policy: ContextPolicy,
1041 pub retry_policy: RetryPolicy,
1042 pub capability_policy: CapabilityPolicy,
1043 pub input_contract: StageContract,
1044 pub output_contract: StageContract,
1045 pub branch_semantics: BranchSemantics,
1046 pub map_policy: MapPolicy,
1047 pub join_policy: JoinPolicy,
1048 pub reduce_policy: ReducePolicy,
1049 pub escalation_policy: EscalationPolicy,
1050 pub verify: Option<serde_json::Value>,
1051 pub metadata: BTreeMap<String, serde_json::Value>,
1052 #[serde(skip)]
1053 pub raw_tools: Option<VmValue>,
1054}
1055
1056impl PartialEq for WorkflowNode {
1057 fn eq(&self, other: &Self) -> bool {
1058 serde_json::to_value(self).ok() == serde_json::to_value(other).ok()
1059 }
1060}
1061
1062pub fn workflow_tool_names(value: &serde_json::Value) -> Vec<String> {
1063 match value {
1064 serde_json::Value::Null => Vec::new(),
1065 serde_json::Value::Array(items) => items
1066 .iter()
1067 .filter_map(|item| match item {
1068 serde_json::Value::Object(map) => map
1069 .get("name")
1070 .and_then(|value| value.as_str())
1071 .filter(|name| !name.is_empty())
1072 .map(|name| name.to_string()),
1073 _ => None,
1074 })
1075 .collect(),
1076 serde_json::Value::Object(map) => {
1077 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1078 return map
1079 .get("tools")
1080 .map(workflow_tool_names)
1081 .unwrap_or_default();
1082 }
1083 map.get("name")
1084 .and_then(|value| value.as_str())
1085 .filter(|name| !name.is_empty())
1086 .map(|name| vec![name.to_string()])
1087 .unwrap_or_default()
1088 }
1089 _ => Vec::new(),
1090 }
1091}
1092
1093fn max_side_effect_level(levels: impl Iterator<Item = String>) -> Option<String> {
1094 fn rank(v: &str) -> usize {
1095 match v {
1096 "none" => 0,
1097 "read_only" => 1,
1098 "workspace_write" => 2,
1099 "process_exec" => 3,
1100 "network" => 4,
1101 _ => 5,
1102 }
1103 }
1104 levels.max_by_key(|level| rank(level))
1105}
1106
1107fn parse_tool_runtime_policy(
1108 map: &serde_json::Map<String, serde_json::Value>,
1109) -> ToolRuntimePolicyMetadata {
1110 let Some(policy) = map.get("policy").and_then(|value| value.as_object()) else {
1111 return ToolRuntimePolicyMetadata::default();
1112 };
1113
1114 let capabilities = policy
1115 .get("capabilities")
1116 .and_then(|value| value.as_object())
1117 .map(|caps| {
1118 caps.iter()
1119 .map(|(capability, ops)| {
1120 let values = ops
1121 .as_array()
1122 .map(|items| {
1123 items
1124 .iter()
1125 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1126 .collect::<Vec<_>>()
1127 })
1128 .unwrap_or_default();
1129 (capability.clone(), values)
1130 })
1131 .collect::<BTreeMap<_, _>>()
1132 })
1133 .unwrap_or_default();
1134
1135 let path_params = policy
1136 .get("path_params")
1137 .and_then(|value| value.as_array())
1138 .map(|items| {
1139 items
1140 .iter()
1141 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1142 .collect::<Vec<_>>()
1143 })
1144 .unwrap_or_default();
1145
1146 ToolRuntimePolicyMetadata {
1147 capabilities,
1148 side_effect_level: policy
1149 .get("side_effect_level")
1150 .and_then(|value| value.as_str())
1151 .map(|s| s.to_string()),
1152 path_params,
1153 mutation_classification: policy
1154 .get("mutation_classification")
1155 .and_then(|value| value.as_str())
1156 .map(|s| s.to_string()),
1157 }
1158}
1159
1160pub fn workflow_tool_metadata(
1161 value: &serde_json::Value,
1162) -> BTreeMap<String, ToolRuntimePolicyMetadata> {
1163 match value {
1164 serde_json::Value::Null => BTreeMap::new(),
1165 serde_json::Value::Array(items) => items
1166 .iter()
1167 .filter_map(|item| match item {
1168 serde_json::Value::Object(map) => map
1169 .get("name")
1170 .and_then(|value| value.as_str())
1171 .filter(|name| !name.is_empty())
1172 .map(|name| (name.to_string(), parse_tool_runtime_policy(map))),
1173 _ => None,
1174 })
1175 .collect(),
1176 serde_json::Value::Object(map) => {
1177 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1178 return map
1179 .get("tools")
1180 .map(workflow_tool_metadata)
1181 .unwrap_or_default();
1182 }
1183 map.get("name")
1184 .and_then(|value| value.as_str())
1185 .filter(|name| !name.is_empty())
1186 .map(|name| {
1187 let mut metadata = BTreeMap::new();
1188 metadata.insert(name.to_string(), parse_tool_runtime_policy(map));
1189 metadata
1190 })
1191 .unwrap_or_default()
1192 }
1193 _ => BTreeMap::new(),
1194 }
1195}
1196
1197pub fn workflow_tool_policy_from_tools(value: &serde_json::Value) -> CapabilityPolicy {
1198 let tools = workflow_tool_names(value);
1199 let tool_metadata = workflow_tool_metadata(value);
1200 let mut capabilities: BTreeMap<String, Vec<String>> = BTreeMap::new();
1201 for metadata in tool_metadata.values() {
1202 for (capability, ops) in &metadata.capabilities {
1203 let entry = capabilities.entry(capability.clone()).or_default();
1204 for op in ops {
1205 if !entry.contains(op) {
1206 entry.push(op.clone());
1207 }
1208 }
1209 entry.sort();
1210 }
1211 }
1212 let side_effect_level = max_side_effect_level(
1213 tool_metadata
1214 .values()
1215 .filter_map(|metadata| metadata.side_effect_level.clone()),
1216 );
1217 CapabilityPolicy {
1218 tools,
1219 capabilities,
1220 workspace_roots: Vec::new(),
1221 side_effect_level,
1222 recursion_limit: None,
1223 tool_arg_constraints: Vec::new(),
1224 tool_metadata,
1225 }
1226}
1227
1228#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1229#[serde(default)]
1230pub struct WorkflowEdge {
1231 pub from: String,
1232 pub to: String,
1233 pub branch: Option<String>,
1234 pub label: Option<String>,
1235}
1236
1237#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1238#[serde(default)]
1239pub struct WorkflowGraph {
1240 #[serde(rename = "_type")]
1241 pub type_name: String,
1242 pub id: String,
1243 pub name: Option<String>,
1244 pub version: usize,
1245 pub entry: String,
1246 pub nodes: BTreeMap<String, WorkflowNode>,
1247 pub edges: Vec<WorkflowEdge>,
1248 pub capability_policy: CapabilityPolicy,
1249 pub metadata: BTreeMap<String, serde_json::Value>,
1250 pub audit_log: Vec<WorkflowAuditEntry>,
1251}
1252
1253#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1254#[serde(default)]
1255pub struct WorkflowAuditEntry {
1256 pub id: String,
1257 pub op: String,
1258 pub node_id: Option<String>,
1259 pub timestamp: String,
1260 pub reason: Option<String>,
1261 pub metadata: BTreeMap<String, serde_json::Value>,
1262}
1263
1264#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1265#[serde(default)]
1266pub struct LlmUsageRecord {
1267 pub input_tokens: i64,
1268 pub output_tokens: i64,
1269 pub total_duration_ms: i64,
1270 pub call_count: i64,
1271 pub total_cost: f64,
1272 pub models: Vec<String>,
1273}
1274
1275#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1276#[serde(default)]
1277pub struct RunStageRecord {
1278 pub id: String,
1279 pub node_id: String,
1280 pub kind: String,
1281 pub status: String,
1282 pub outcome: String,
1283 pub branch: Option<String>,
1284 pub started_at: String,
1285 pub finished_at: Option<String>,
1286 pub visible_text: Option<String>,
1287 pub private_reasoning: Option<String>,
1288 pub transcript: Option<serde_json::Value>,
1289 pub verification: Option<serde_json::Value>,
1290 pub usage: Option<LlmUsageRecord>,
1291 pub artifacts: Vec<ArtifactRecord>,
1292 pub consumed_artifact_ids: Vec<String>,
1293 pub produced_artifact_ids: Vec<String>,
1294 pub attempts: Vec<RunStageAttemptRecord>,
1295 pub metadata: BTreeMap<String, serde_json::Value>,
1296}
1297
1298#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1299#[serde(default)]
1300pub struct RunStageAttemptRecord {
1301 pub attempt: usize,
1302 pub status: String,
1303 pub outcome: String,
1304 pub branch: Option<String>,
1305 pub error: Option<String>,
1306 pub verification: Option<serde_json::Value>,
1307 pub started_at: String,
1308 pub finished_at: Option<String>,
1309}
1310
1311#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1312#[serde(default)]
1313pub struct RunTransitionRecord {
1314 pub id: String,
1315 pub from_stage_id: Option<String>,
1316 pub from_node_id: Option<String>,
1317 pub to_node_id: String,
1318 pub branch: Option<String>,
1319 pub timestamp: String,
1320 pub consumed_artifact_ids: Vec<String>,
1321 pub produced_artifact_ids: Vec<String>,
1322}
1323
1324#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1325#[serde(default)]
1326pub struct RunCheckpointRecord {
1327 pub id: String,
1328 pub ready_nodes: Vec<String>,
1329 pub completed_nodes: Vec<String>,
1330 pub last_stage_id: Option<String>,
1331 pub persisted_at: String,
1332 pub reason: String,
1333}
1334
1335#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1336#[serde(default)]
1337pub struct ReplayFixture {
1338 #[serde(rename = "_type")]
1339 pub type_name: String,
1340 pub id: String,
1341 pub source_run_id: String,
1342 pub workflow_id: String,
1343 pub workflow_name: Option<String>,
1344 pub created_at: String,
1345 pub expected_status: String,
1346 pub stage_assertions: Vec<ReplayStageAssertion>,
1347}
1348
1349#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1350#[serde(default)]
1351pub struct ReplayStageAssertion {
1352 pub node_id: String,
1353 pub expected_status: String,
1354 pub expected_outcome: String,
1355 pub expected_branch: Option<String>,
1356 pub required_artifact_kinds: Vec<String>,
1357 pub visible_text_contains: Option<String>,
1358}
1359
1360#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1361#[serde(default)]
1362pub struct ReplayEvalReport {
1363 pub pass: bool,
1364 pub failures: Vec<String>,
1365 pub stage_count: usize,
1366}
1367
1368#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1369#[serde(default)]
1370pub struct ReplayEvalCaseReport {
1371 pub run_id: String,
1372 pub workflow_id: String,
1373 pub label: Option<String>,
1374 pub pass: bool,
1375 pub failures: Vec<String>,
1376 pub stage_count: usize,
1377 pub source_path: Option<String>,
1378 pub comparison: Option<RunDiffReport>,
1379}
1380
1381#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1382#[serde(default)]
1383pub struct ReplayEvalSuiteReport {
1384 pub pass: bool,
1385 pub total: usize,
1386 pub passed: usize,
1387 pub failed: usize,
1388 pub cases: Vec<ReplayEvalCaseReport>,
1389}
1390
1391#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1392#[serde(default)]
1393pub struct RunStageDiffRecord {
1394 pub node_id: String,
1395 pub change: String,
1396 pub details: Vec<String>,
1397}
1398
1399#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1400#[serde(default)]
1401pub struct RunDiffReport {
1402 pub left_run_id: String,
1403 pub right_run_id: String,
1404 pub identical: bool,
1405 pub status_changed: bool,
1406 pub left_status: String,
1407 pub right_status: String,
1408 pub stage_diffs: Vec<RunStageDiffRecord>,
1409 pub transition_count_delta: isize,
1410 pub artifact_count_delta: isize,
1411 pub checkpoint_count_delta: isize,
1412}
1413
1414#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1415#[serde(default)]
1416pub struct EvalSuiteManifest {
1417 #[serde(rename = "_type")]
1418 pub type_name: String,
1419 pub id: String,
1420 pub name: Option<String>,
1421 pub base_dir: Option<String>,
1422 pub cases: Vec<EvalSuiteCase>,
1423}
1424
1425#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1426#[serde(default)]
1427pub struct EvalSuiteCase {
1428 pub label: Option<String>,
1429 pub run_path: String,
1430 pub fixture_path: Option<String>,
1431 pub compare_to: Option<String>,
1432}
1433
1434#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1435#[serde(default)]
1436pub struct RunRecord {
1437 #[serde(rename = "_type")]
1438 pub type_name: String,
1439 pub id: String,
1440 pub workflow_id: String,
1441 pub workflow_name: Option<String>,
1442 pub task: String,
1443 pub status: String,
1444 pub started_at: String,
1445 pub finished_at: Option<String>,
1446 pub parent_run_id: Option<String>,
1447 pub root_run_id: Option<String>,
1448 pub stages: Vec<RunStageRecord>,
1449 pub transitions: Vec<RunTransitionRecord>,
1450 pub checkpoints: Vec<RunCheckpointRecord>,
1451 pub pending_nodes: Vec<String>,
1452 pub completed_nodes: Vec<String>,
1453 pub child_runs: Vec<RunChildRecord>,
1454 pub artifacts: Vec<ArtifactRecord>,
1455 pub policy: CapabilityPolicy,
1456 pub execution: Option<RunExecutionRecord>,
1457 pub transcript: Option<serde_json::Value>,
1458 pub usage: Option<LlmUsageRecord>,
1459 pub replay_fixture: Option<ReplayFixture>,
1460 pub trace_spans: Vec<RunTraceSpanRecord>,
1461 pub metadata: BTreeMap<String, serde_json::Value>,
1462 pub persisted_path: Option<String>,
1463}
1464
1465#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1466#[serde(default)]
1467pub struct RunTraceSpanRecord {
1468 pub span_id: u64,
1469 pub parent_id: Option<u64>,
1470 pub kind: String,
1471 pub name: String,
1472 pub start_ms: u64,
1473 pub duration_ms: u64,
1474 pub metadata: BTreeMap<String, serde_json::Value>,
1475}
1476
1477#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1478#[serde(default)]
1479pub struct RunChildRecord {
1480 pub worker_id: String,
1481 pub worker_name: String,
1482 pub parent_stage_id: Option<String>,
1483 pub session_id: Option<String>,
1484 pub parent_session_id: Option<String>,
1485 pub mutation_scope: Option<String>,
1486 pub approval_mode: Option<String>,
1487 pub task: String,
1488 pub status: String,
1489 pub started_at: String,
1490 pub finished_at: Option<String>,
1491 pub run_id: Option<String>,
1492 pub run_path: Option<String>,
1493 pub snapshot_path: Option<String>,
1494 pub execution: Option<RunExecutionRecord>,
1495}
1496
1497#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1498#[serde(default)]
1499pub struct RunExecutionRecord {
1500 pub cwd: Option<String>,
1501 pub source_dir: Option<String>,
1502 pub env: BTreeMap<String, String>,
1503 pub adapter: Option<String>,
1504 pub repo_path: Option<String>,
1505 pub worktree_path: Option<String>,
1506 pub branch: Option<String>,
1507 pub base_ref: Option<String>,
1508 pub cleanup: Option<String>,
1509}
1510
1511#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1512#[serde(default)]
1513pub struct WorkflowValidationReport {
1514 pub valid: bool,
1515 pub errors: Vec<String>,
1516 pub warnings: Vec<String>,
1517 pub reachable_nodes: Vec<String>,
1518}
1519
1520fn parse_json_payload<T: for<'de> Deserialize<'de>>(
1521 json: serde_json::Value,
1522 label: &str,
1523) -> Result<T, VmError> {
1524 let payload = json.to_string();
1525 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1526 let mut tracker = serde_path_to_error::Track::new();
1527 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1528 T::deserialize(path_deserializer).map_err(|error| {
1529 let snippet = if payload.len() > 600 {
1530 format!("{}...", &payload[..600])
1531 } else {
1532 payload.clone()
1533 };
1534 VmError::Runtime(format!(
1535 "{label} parse error at {}: {} | payload={}",
1536 tracker.path(),
1537 error,
1538 snippet
1539 ))
1540 })
1541}
1542
1543fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1544 parse_json_payload(vm_value_to_json(value), "orchestration")
1545}
1546
1547pub fn parse_workflow_node_value(value: &VmValue, label: &str) -> Result<WorkflowNode, VmError> {
1548 let mut node: WorkflowNode = parse_json_payload(vm_value_to_json(value), label)?;
1549 node.raw_tools = value.as_dict().and_then(|dict| dict.get("tools")).cloned();
1550 Ok(node)
1551}
1552
1553pub fn parse_workflow_node_json(
1554 json: serde_json::Value,
1555 label: &str,
1556) -> Result<WorkflowNode, VmError> {
1557 parse_json_payload(json, label)
1558}
1559
1560pub fn parse_workflow_edge_json(
1561 json: serde_json::Value,
1562 label: &str,
1563) -> Result<WorkflowEdge, VmError> {
1564 parse_json_payload(json, label)
1565}
1566
1567pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1568 let mut graph: WorkflowGraph = parse_json_value(value)?;
1569 let as_dict = value.as_dict().cloned().unwrap_or_default();
1570
1571 if graph.nodes.is_empty() {
1572 for key in ["act", "verify", "repair"] {
1573 if let Some(node_value) = as_dict.get(key) {
1574 let mut node = parse_workflow_node_value(node_value, "orchestration")?;
1575 let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1576 node.id = Some(key.to_string());
1577 if node.kind.is_empty() {
1578 node.kind = if key == "verify" {
1579 "verify".to_string()
1580 } else {
1581 "stage".to_string()
1582 };
1583 }
1584 if node.model_policy.provider.is_none() {
1585 node.model_policy.provider = as_dict
1586 .get("provider")
1587 .map(|value| value.display())
1588 .filter(|value| !value.is_empty());
1589 }
1590 if node.model_policy.model.is_none() {
1591 node.model_policy.model = as_dict
1592 .get("model")
1593 .map(|value| value.display())
1594 .filter(|value| !value.is_empty());
1595 }
1596 if node.model_policy.model_tier.is_none() {
1597 node.model_policy.model_tier = as_dict
1598 .get("model_tier")
1599 .or_else(|| as_dict.get("tier"))
1600 .map(|value| value.display())
1601 .filter(|value| !value.is_empty());
1602 }
1603 if node.model_policy.temperature.is_none() {
1604 node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1605 if let VmValue::Float(number) = value {
1606 Some(*number)
1607 } else {
1608 value.as_int().map(|number| number as f64)
1609 }
1610 });
1611 }
1612 if node.model_policy.max_tokens.is_none() {
1613 node.model_policy.max_tokens =
1614 as_dict.get("max_tokens").and_then(|value| value.as_int());
1615 }
1616 if node.mode.is_none() {
1617 node.mode = as_dict
1618 .get("mode")
1619 .map(|value| value.display())
1620 .filter(|value| !value.is_empty());
1621 }
1622 if node.done_sentinel.is_none() {
1623 node.done_sentinel = as_dict
1624 .get("done_sentinel")
1625 .map(|value| value.display())
1626 .filter(|value| !value.is_empty());
1627 }
1628 if key == "verify"
1629 && node.verify.is_none()
1630 && (raw_node.contains_key("assert_text")
1631 || raw_node.contains_key("command")
1632 || raw_node.contains_key("expect_status")
1633 || raw_node.contains_key("expect_text"))
1634 {
1635 node.verify = Some(serde_json::json!({
1636 "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1637 "command": raw_node.get("command").map(vm_value_to_json),
1638 "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1639 "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1640 }));
1641 }
1642 graph.nodes.insert(key.to_string(), node);
1643 }
1644 }
1645 if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1646 graph.entry = "act".to_string();
1647 }
1648 if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1649 if graph.nodes.contains_key("verify") {
1650 graph.edges.push(WorkflowEdge {
1651 from: "act".to_string(),
1652 to: "verify".to_string(),
1653 branch: None,
1654 label: None,
1655 });
1656 }
1657 if graph.nodes.contains_key("repair") {
1658 graph.edges.push(WorkflowEdge {
1659 from: "verify".to_string(),
1660 to: "repair".to_string(),
1661 branch: Some("failed".to_string()),
1662 label: None,
1663 });
1664 graph.edges.push(WorkflowEdge {
1665 from: "repair".to_string(),
1666 to: "verify".to_string(),
1667 branch: Some("retry".to_string()),
1668 label: None,
1669 });
1670 }
1671 }
1672 }
1673
1674 if graph.type_name.is_empty() {
1675 graph.type_name = "workflow_graph".to_string();
1676 }
1677 if graph.id.is_empty() {
1678 graph.id = new_id("workflow");
1679 }
1680 if graph.version == 0 {
1681 graph.version = 1;
1682 }
1683 if graph.entry.is_empty() {
1684 graph.entry = graph
1685 .nodes
1686 .keys()
1687 .next()
1688 .cloned()
1689 .unwrap_or_else(|| "act".to_string());
1690 }
1691 for (node_id, node) in &mut graph.nodes {
1692 if node.raw_tools.is_none() {
1693 node.raw_tools = as_dict
1694 .get("nodes")
1695 .and_then(|nodes| nodes.as_dict())
1696 .and_then(|nodes| nodes.get(node_id))
1697 .and_then(|node_value| node_value.as_dict())
1698 .and_then(|raw_node| raw_node.get("tools"))
1699 .cloned();
1700 }
1701 if node.id.is_none() {
1702 node.id = Some(node_id.clone());
1703 }
1704 if node.kind.is_empty() {
1705 node.kind = "stage".to_string();
1706 }
1707 if node.join_policy.strategy.is_empty() {
1708 node.join_policy.strategy = "all".to_string();
1709 }
1710 if node.reduce_policy.strategy.is_empty() {
1711 node.reduce_policy.strategy = "concat".to_string();
1712 }
1713 if node.output_contract.output_kinds.is_empty() {
1714 node.output_contract.output_kinds = vec![match node.kind.as_str() {
1715 "verify" => "verification_result".to_string(),
1716 "reduce" => node
1717 .reduce_policy
1718 .output_kind
1719 .clone()
1720 .unwrap_or_else(|| "summary".to_string()),
1721 "map" => node
1722 .map_policy
1723 .output_kind
1724 .clone()
1725 .unwrap_or_else(|| "artifact".to_string()),
1726 "escalation" => "plan".to_string(),
1727 _ => "artifact".to_string(),
1728 }];
1729 }
1730 if node.retry_policy.max_attempts == 0 {
1731 node.retry_policy.max_attempts = 1;
1732 }
1733 }
1734 Ok(graph)
1735}
1736
1737pub fn validate_workflow(
1738 graph: &WorkflowGraph,
1739 ceiling: Option<&CapabilityPolicy>,
1740) -> WorkflowValidationReport {
1741 let mut errors = Vec::new();
1742 let mut warnings = Vec::new();
1743
1744 if !graph.nodes.contains_key(&graph.entry) {
1745 errors.push(format!("entry node does not exist: {}", graph.entry));
1746 }
1747
1748 let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1749 for edge in &graph.edges {
1750 if !node_ids.contains(&edge.from) {
1751 errors.push(format!("edge.from references unknown node: {}", edge.from));
1752 }
1753 if !node_ids.contains(&edge.to) {
1754 errors.push(format!("edge.to references unknown node: {}", edge.to));
1755 }
1756 }
1757
1758 let reachable_nodes = reachable_nodes(graph);
1759 for node_id in &node_ids {
1760 if !reachable_nodes.contains(node_id) {
1761 warnings.push(format!("node is unreachable: {node_id}"));
1762 }
1763 }
1764
1765 for (node_id, node) in &graph.nodes {
1766 let incoming = graph
1767 .edges
1768 .iter()
1769 .filter(|edge| edge.to == *node_id)
1770 .count();
1771 let outgoing: Vec<&WorkflowEdge> = graph
1772 .edges
1773 .iter()
1774 .filter(|edge| edge.from == *node_id)
1775 .collect();
1776 if let Some(min_inputs) = node.input_contract.min_inputs {
1777 if let Some(max_inputs) = node.input_contract.max_inputs {
1778 if min_inputs > max_inputs {
1779 errors.push(format!(
1780 "node {node_id}: input contract min_inputs exceeds max_inputs"
1781 ));
1782 }
1783 }
1784 }
1785 match node.kind.as_str() {
1786 "condition" => {
1787 let has_true = outgoing
1788 .iter()
1789 .any(|edge| edge.branch.as_deref() == Some("true"));
1790 let has_false = outgoing
1791 .iter()
1792 .any(|edge| edge.branch.as_deref() == Some("false"));
1793 if !has_true || !has_false {
1794 errors.push(format!(
1795 "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
1796 ));
1797 }
1798 }
1799 "fork" => {
1800 if outgoing.len() < 2 {
1801 errors.push(format!(
1802 "node {node_id}: fork nodes require at least two outgoing edges"
1803 ));
1804 }
1805 }
1806 "join" => {
1807 if incoming < 2 {
1808 warnings.push(format!(
1809 "node {node_id}: join node has fewer than two incoming edges"
1810 ));
1811 }
1812 }
1813 "map" => {
1814 if node.map_policy.items.is_empty()
1815 && node.map_policy.item_artifact_kind.is_none()
1816 && node.input_contract.input_kinds.is_empty()
1817 {
1818 errors.push(format!(
1819 "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
1820 ));
1821 }
1822 }
1823 "reduce" => {
1824 if node.input_contract.input_kinds.is_empty() {
1825 warnings.push(format!(
1826 "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
1827 ));
1828 }
1829 }
1830 _ => {}
1831 }
1832 }
1833
1834 if let Some(ceiling) = ceiling {
1835 if let Err(error) = ceiling.intersect(&graph.capability_policy) {
1836 errors.push(error);
1837 }
1838 for (node_id, node) in &graph.nodes {
1839 if let Err(error) = ceiling.intersect(&node.capability_policy) {
1840 errors.push(format!("node {node_id}: {error}"));
1841 }
1842 }
1843 }
1844
1845 WorkflowValidationReport {
1846 valid: errors.is_empty(),
1847 errors,
1848 warnings,
1849 reachable_nodes: reachable_nodes.into_iter().collect(),
1850 }
1851}
1852
1853fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
1854 let mut seen = BTreeSet::new();
1855 let mut stack = vec![graph.entry.clone()];
1856 while let Some(node_id) = stack.pop() {
1857 if !seen.insert(node_id.clone()) {
1858 continue;
1859 }
1860 for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
1861 stack.push(edge.to.clone());
1862 }
1863 }
1864 seen
1865}
1866
1867pub fn select_artifacts(
1868 mut artifacts: Vec<ArtifactRecord>,
1869 policy: &ContextPolicy,
1870) -> Vec<ArtifactRecord> {
1871 artifacts.retain(|artifact| {
1872 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
1873 && !policy.exclude_kinds.contains(&artifact.kind)
1874 && (policy.include_stages.is_empty()
1875 || artifact
1876 .stage
1877 .as_ref()
1878 .is_some_and(|stage| policy.include_stages.contains(stage)))
1879 });
1880 artifacts.sort_by(|a, b| {
1881 let b_pinned = policy.pinned_ids.contains(&b.id);
1882 let a_pinned = policy.pinned_ids.contains(&a.id);
1883 b_pinned
1884 .cmp(&a_pinned)
1885 .then_with(|| {
1886 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
1887 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
1888 b_prio_kind.cmp(&a_prio_kind)
1889 })
1890 .then_with(|| {
1891 b.priority
1892 .unwrap_or_default()
1893 .cmp(&a.priority.unwrap_or_default())
1894 })
1895 .then_with(|| {
1896 if policy.prefer_fresh {
1897 freshness_rank(b.freshness.as_deref())
1898 .cmp(&freshness_rank(a.freshness.as_deref()))
1899 } else {
1900 std::cmp::Ordering::Equal
1901 }
1902 })
1903 .then_with(|| {
1904 if policy.prefer_recent {
1905 b.created_at.cmp(&a.created_at)
1906 } else {
1907 std::cmp::Ordering::Equal
1908 }
1909 })
1910 .then_with(|| {
1911 b.relevance
1912 .partial_cmp(&a.relevance)
1913 .unwrap_or(std::cmp::Ordering::Equal)
1914 })
1915 .then_with(|| {
1916 a.estimated_tokens
1917 .unwrap_or(usize::MAX)
1918 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
1919 })
1920 });
1921
1922 let mut selected = Vec::new();
1923 let mut used_tokens = 0usize;
1924 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
1925 let effective_max_tokens = policy
1926 .max_tokens
1927 .map(|max| max.saturating_sub(reserve_tokens));
1928 for artifact in artifacts {
1929 if let Some(max_artifacts) = policy.max_artifacts {
1930 if selected.len() >= max_artifacts {
1931 break;
1932 }
1933 }
1934 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
1935 if let Some(max_tokens) = effective_max_tokens {
1936 if used_tokens + next_tokens > max_tokens {
1937 continue;
1938 }
1939 }
1940 used_tokens += next_tokens;
1941 selected.push(artifact);
1942 }
1943 selected
1944}
1945
1946pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
1947 let mut parts = Vec::new();
1948 for artifact in artifacts {
1949 let title = artifact
1950 .title
1951 .clone()
1952 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
1953 let body = artifact
1954 .text
1955 .clone()
1956 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
1957 .unwrap_or_default();
1958 match policy.render.as_deref() {
1959 Some("json") => {
1960 parts.push(
1961 serde_json::json!({
1962 "id": artifact.id,
1963 "kind": artifact.kind,
1964 "title": title,
1965 "source": artifact.source,
1966 "freshness": artifact.freshness,
1967 "priority": artifact.priority,
1968 "text": body,
1969 })
1970 .to_string(),
1971 );
1972 }
1973 _ => parts.push(format!(
1974 "[{title}] kind={} source={} freshness={} priority={}\n{}",
1975 artifact.kind,
1976 artifact
1977 .source
1978 .clone()
1979 .unwrap_or_else(|| "unknown".to_string()),
1980 artifact
1981 .freshness
1982 .clone()
1983 .unwrap_or_else(|| "normal".to_string()),
1984 artifact.priority.unwrap_or_default(),
1985 body
1986 )),
1987 }
1988 }
1989 parts.join("\n\n")
1990}
1991
1992pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
1993 let artifact: ArtifactRecord = parse_json_value(value)?;
1994 Ok(artifact.normalize())
1995}
1996
1997pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
1998 let json = vm_value_to_json(value);
1999 let payload = json.to_string();
2000 let mut deserializer = serde_json::Deserializer::from_str(&payload);
2001 let mut tracker = serde_path_to_error::Track::new();
2002 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
2003 let mut run: RunRecord = RunRecord::deserialize(path_deserializer).map_err(|error| {
2004 let snippet = if payload.len() > 600 {
2005 format!("{}...", &payload[..600])
2006 } else {
2007 payload.clone()
2008 };
2009 VmError::Runtime(format!(
2010 "orchestration parse error at {}: {} | payload={}",
2011 tracker.path(),
2012 error,
2013 snippet
2014 ))
2015 })?;
2016 if run.type_name.is_empty() {
2017 run.type_name = "run_record".to_string();
2018 }
2019 if run.id.is_empty() {
2020 run.id = new_id("run");
2021 }
2022 if run.started_at.is_empty() {
2023 run.started_at = now_rfc3339();
2024 }
2025 if run.status.is_empty() {
2026 run.status = "running".to_string();
2027 }
2028 if run.root_run_id.is_none() {
2029 run.root_run_id = Some(run.id.clone());
2030 }
2031 if run.replay_fixture.is_none() {
2032 run.replay_fixture = Some(replay_fixture_from_run(&run));
2033 }
2034 Ok(run)
2035}
2036
2037pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
2038 let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
2039 if manifest.type_name.is_empty() {
2040 manifest.type_name = "eval_suite_manifest".to_string();
2041 }
2042 if manifest.id.is_empty() {
2043 manifest.id = new_id("eval_suite");
2044 }
2045 Ok(manifest)
2046}
2047
2048fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
2049 let content = std::fs::read_to_string(path)
2050 .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
2051 serde_json::from_str(&content)
2052 .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
2053}
2054
2055fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
2056 let path_buf = PathBuf::from(path);
2057 if path_buf.is_absolute() {
2058 path_buf
2059 } else if let Some(base_dir) = base_dir {
2060 base_dir.join(path_buf)
2061 } else {
2062 path_buf
2063 }
2064}
2065
2066pub fn evaluate_run_suite_manifest(
2067 manifest: &EvalSuiteManifest,
2068) -> Result<ReplayEvalSuiteReport, VmError> {
2069 let base_dir = manifest.base_dir.as_deref().map(Path::new);
2070 let mut reports = Vec::new();
2071 for case in &manifest.cases {
2072 let run_path = resolve_manifest_path(base_dir, &case.run_path);
2073 let run = load_run_record(&run_path)?;
2074 let fixture = match &case.fixture_path {
2075 Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
2076 None => run
2077 .replay_fixture
2078 .clone()
2079 .unwrap_or_else(|| replay_fixture_from_run(&run)),
2080 };
2081 let eval = evaluate_run_against_fixture(&run, &fixture);
2082 let mut pass = eval.pass;
2083 let mut failures = eval.failures;
2084 let comparison = match &case.compare_to {
2085 Some(path) => {
2086 let baseline_path = resolve_manifest_path(base_dir, path);
2087 let baseline = load_run_record(&baseline_path)?;
2088 let diff = diff_run_records(&baseline, &run);
2089 if !diff.identical {
2090 pass = false;
2091 failures.push(format!(
2092 "run differs from baseline {} with {} stage changes",
2093 baseline_path.display(),
2094 diff.stage_diffs.len()
2095 ));
2096 }
2097 Some(diff)
2098 }
2099 None => None,
2100 };
2101 reports.push(ReplayEvalCaseReport {
2102 run_id: run.id.clone(),
2103 workflow_id: run.workflow_id.clone(),
2104 label: case.label.clone(),
2105 pass,
2106 failures,
2107 stage_count: eval.stage_count,
2108 source_path: Some(run_path.display().to_string()),
2109 comparison,
2110 });
2111 }
2112 let total = reports.len();
2113 let passed = reports.iter().filter(|report| report.pass).count();
2114 let failed = total.saturating_sub(passed);
2115 Ok(ReplayEvalSuiteReport {
2116 pass: failed == 0,
2117 total,
2118 passed,
2119 failed,
2120 cases: reports,
2121 })
2122}
2123
2124pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
2125 let before_lines: Vec<&str> = before.lines().collect();
2126 let after_lines: Vec<&str> = after.lines().collect();
2127 let mut table = vec![vec![0usize; after_lines.len() + 1]; before_lines.len() + 1];
2128 for i in (0..before_lines.len()).rev() {
2129 for j in (0..after_lines.len()).rev() {
2130 table[i][j] = if before_lines[i] == after_lines[j] {
2131 table[i + 1][j + 1] + 1
2132 } else {
2133 table[i + 1][j].max(table[i][j + 1])
2134 };
2135 }
2136 }
2137
2138 let mut diff = String::new();
2139 let file = path.unwrap_or("artifact");
2140 diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
2141 let mut i = 0;
2142 let mut j = 0;
2143 while i < before_lines.len() && j < after_lines.len() {
2144 if before_lines[i] == after_lines[j] {
2145 diff.push_str(&format!(" {}\n", before_lines[i]));
2146 i += 1;
2147 j += 1;
2148 } else if table[i + 1][j] >= table[i][j + 1] {
2149 diff.push_str(&format!("-{}\n", before_lines[i]));
2150 i += 1;
2151 } else {
2152 diff.push_str(&format!("+{}\n", after_lines[j]));
2153 j += 1;
2154 }
2155 }
2156 while i < before_lines.len() {
2157 diff.push_str(&format!("-{}\n", before_lines[i]));
2158 i += 1;
2159 }
2160 while j < after_lines.len() {
2161 diff.push_str(&format!("+{}\n", after_lines[j]));
2162 j += 1;
2163 }
2164 diff
2165}
2166
2167pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
2168 let path = path
2169 .map(PathBuf::from)
2170 .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
2171 if let Some(parent) = path.parent() {
2172 std::fs::create_dir_all(parent)
2173 .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
2174 }
2175 let json = serde_json::to_string_pretty(run)
2176 .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
2177 let tmp_path = path.with_extension("json.tmp");
2179 std::fs::write(&tmp_path, &json)
2180 .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
2181 std::fs::rename(&tmp_path, &path).map_err(|e| {
2182 let _ = std::fs::write(&path, &json);
2184 VmError::Runtime(format!("failed to finalize run record: {e}"))
2185 })?;
2186 Ok(path.to_string_lossy().to_string())
2187}
2188
2189pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
2190 let content = std::fs::read_to_string(path)
2191 .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
2192 serde_json::from_str(&content)
2193 .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
2194}
2195
2196pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
2197 ReplayFixture {
2198 type_name: "replay_fixture".to_string(),
2199 id: new_id("fixture"),
2200 source_run_id: run.id.clone(),
2201 workflow_id: run.workflow_id.clone(),
2202 workflow_name: run.workflow_name.clone(),
2203 created_at: now_rfc3339(),
2204 expected_status: run.status.clone(),
2205 stage_assertions: run
2206 .stages
2207 .iter()
2208 .map(|stage| ReplayStageAssertion {
2209 node_id: stage.node_id.clone(),
2210 expected_status: stage.status.clone(),
2211 expected_outcome: stage.outcome.clone(),
2212 expected_branch: stage.branch.clone(),
2213 required_artifact_kinds: stage
2214 .artifacts
2215 .iter()
2216 .map(|artifact| artifact.kind.clone())
2217 .collect(),
2218 visible_text_contains: stage
2219 .visible_text
2220 .as_ref()
2221 .filter(|text| !text.is_empty())
2222 .map(|text| text.chars().take(80).collect()),
2223 })
2224 .collect(),
2225 }
2226}
2227
2228pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
2229 let mut failures = Vec::new();
2230 if run.status != fixture.expected_status {
2231 failures.push(format!(
2232 "run status mismatch: expected {}, got {}",
2233 fixture.expected_status, run.status
2234 ));
2235 }
2236 for assertion in &fixture.stage_assertions {
2237 let Some(stage) = run
2238 .stages
2239 .iter()
2240 .find(|stage| stage.node_id == assertion.node_id)
2241 else {
2242 failures.push(format!("missing stage {}", assertion.node_id));
2243 continue;
2244 };
2245 if stage.status != assertion.expected_status {
2246 failures.push(format!(
2247 "stage {} status mismatch: expected {}, got {}",
2248 assertion.node_id, assertion.expected_status, stage.status
2249 ));
2250 }
2251 if stage.outcome != assertion.expected_outcome {
2252 failures.push(format!(
2253 "stage {} outcome mismatch: expected {}, got {}",
2254 assertion.node_id, assertion.expected_outcome, stage.outcome
2255 ));
2256 }
2257 if stage.branch != assertion.expected_branch {
2258 failures.push(format!(
2259 "stage {} branch mismatch: expected {:?}, got {:?}",
2260 assertion.node_id, assertion.expected_branch, stage.branch
2261 ));
2262 }
2263 for required_kind in &assertion.required_artifact_kinds {
2264 if !stage
2265 .artifacts
2266 .iter()
2267 .any(|artifact| &artifact.kind == required_kind)
2268 {
2269 failures.push(format!(
2270 "stage {} missing artifact kind {}",
2271 assertion.node_id, required_kind
2272 ));
2273 }
2274 }
2275 if let Some(snippet) = &assertion.visible_text_contains {
2276 let actual = stage.visible_text.clone().unwrap_or_default();
2277 if !actual.contains(snippet) {
2278 failures.push(format!(
2279 "stage {} visible text does not contain expected snippet {:?}",
2280 assertion.node_id, snippet
2281 ));
2282 }
2283 }
2284 }
2285
2286 ReplayEvalReport {
2287 pass: failures.is_empty(),
2288 failures,
2289 stage_count: run.stages.len(),
2290 }
2291}
2292
2293pub fn evaluate_run_suite(
2294 cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
2295) -> ReplayEvalSuiteReport {
2296 let mut reports = Vec::new();
2297 for (run, fixture, source_path) in cases {
2298 let report = evaluate_run_against_fixture(&run, &fixture);
2299 reports.push(ReplayEvalCaseReport {
2300 run_id: run.id.clone(),
2301 workflow_id: run.workflow_id.clone(),
2302 label: None,
2303 pass: report.pass,
2304 failures: report.failures,
2305 stage_count: report.stage_count,
2306 source_path,
2307 comparison: None,
2308 });
2309 }
2310 let total = reports.len();
2311 let passed = reports.iter().filter(|report| report.pass).count();
2312 let failed = total.saturating_sub(passed);
2313 ReplayEvalSuiteReport {
2314 pass: failed == 0,
2315 total,
2316 passed,
2317 failed,
2318 cases: reports,
2319 }
2320}
2321
2322pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
2323 let mut stage_diffs = Vec::new();
2324 let mut all_node_ids = BTreeSet::new();
2325 all_node_ids.extend(left.stages.iter().map(|stage| stage.node_id.clone()));
2326 all_node_ids.extend(right.stages.iter().map(|stage| stage.node_id.clone()));
2327
2328 for node_id in all_node_ids {
2329 let left_stage = left.stages.iter().find(|stage| stage.node_id == node_id);
2330 let right_stage = right.stages.iter().find(|stage| stage.node_id == node_id);
2331 match (left_stage, right_stage) {
2332 (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
2333 node_id,
2334 change: "removed".to_string(),
2335 details: vec!["stage missing from right run".to_string()],
2336 }),
2337 (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
2338 node_id,
2339 change: "added".to_string(),
2340 details: vec!["stage missing from left run".to_string()],
2341 }),
2342 (Some(left_stage), Some(right_stage)) => {
2343 let mut details = Vec::new();
2344 if left_stage.status != right_stage.status {
2345 details.push(format!(
2346 "status: {} -> {}",
2347 left_stage.status, right_stage.status
2348 ));
2349 }
2350 if left_stage.outcome != right_stage.outcome {
2351 details.push(format!(
2352 "outcome: {} -> {}",
2353 left_stage.outcome, right_stage.outcome
2354 ));
2355 }
2356 if left_stage.branch != right_stage.branch {
2357 details.push(format!(
2358 "branch: {:?} -> {:?}",
2359 left_stage.branch, right_stage.branch
2360 ));
2361 }
2362 if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
2363 {
2364 details.push(format!(
2365 "produced_artifacts: {} -> {}",
2366 left_stage.produced_artifact_ids.len(),
2367 right_stage.produced_artifact_ids.len()
2368 ));
2369 }
2370 if left_stage.artifacts.len() != right_stage.artifacts.len() {
2371 details.push(format!(
2372 "artifact_records: {} -> {}",
2373 left_stage.artifacts.len(),
2374 right_stage.artifacts.len()
2375 ));
2376 }
2377 if !details.is_empty() {
2378 stage_diffs.push(RunStageDiffRecord {
2379 node_id,
2380 change: "changed".to_string(),
2381 details,
2382 });
2383 }
2384 }
2385 (None, None) => {}
2386 }
2387 }
2388
2389 let status_changed = left.status != right.status;
2390 let identical = !status_changed
2391 && stage_diffs.is_empty()
2392 && left.transitions.len() == right.transitions.len()
2393 && left.artifacts.len() == right.artifacts.len()
2394 && left.checkpoints.len() == right.checkpoints.len();
2395
2396 RunDiffReport {
2397 left_run_id: left.id.clone(),
2398 right_run_id: right.id.clone(),
2399 identical,
2400 status_changed,
2401 left_status: left.status.clone(),
2402 right_status: right.status.clone(),
2403 stage_diffs,
2404 transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
2405 artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
2406 checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
2407 }
2408}
2409
2410pub fn push_execution_policy(policy: CapabilityPolicy) {
2411 EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
2412}
2413
2414pub fn pop_execution_policy() {
2415 EXECUTION_POLICY_STACK.with(|stack| {
2416 stack.borrow_mut().pop();
2417 });
2418}
2419
2420pub fn current_execution_policy() -> Option<CapabilityPolicy> {
2421 EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
2422}
2423
2424pub fn current_tool_metadata(tool: &str) -> Option<ToolRuntimePolicyMetadata> {
2425 current_execution_policy().and_then(|policy| policy.tool_metadata.get(tool).cloned())
2426}
2427
2428fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
2429 policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
2430}
2431
2432fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
2433 policy.capabilities.is_empty()
2434 || policy
2435 .capabilities
2436 .get(capability)
2437 .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
2438}
2439
2440fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
2441 fn rank(v: &str) -> usize {
2442 match v {
2443 "none" => 0,
2444 "read_only" => 1,
2445 "workspace_write" => 2,
2446 "process_exec" => 3,
2447 "network" => 4,
2448 _ => 5,
2449 }
2450 }
2451 policy
2452 .side_effect_level
2453 .as_ref()
2454 .map(|allowed| rank(allowed) >= rank(requested))
2455 .unwrap_or(true)
2456}
2457
2458fn reject_policy(reason: String) -> Result<(), VmError> {
2459 Err(VmError::CategorizedError {
2460 message: reason,
2461 category: crate::value::ErrorCategory::ToolRejected,
2462 })
2463}
2464
2465fn fallback_mutation_classification(tool_name: &str) -> String {
2466 let lower = tool_name.to_ascii_lowercase();
2467 if lower.starts_with("mcp_") {
2468 return "host_defined".to_string();
2469 }
2470 if lower == "exec"
2471 || lower == "shell"
2472 || lower == "exec_at"
2473 || lower == "shell_at"
2474 || lower == "run"
2475 || lower.starts_with("run_")
2476 {
2477 return "ambient_side_effect".to_string();
2478 }
2479 if lower.starts_with("delete")
2480 || lower.starts_with("remove")
2481 || lower.starts_with("move")
2482 || lower.starts_with("rename")
2483 {
2484 return "destructive".to_string();
2485 }
2486 if lower.contains("write")
2487 || lower.contains("edit")
2488 || lower.contains("patch")
2489 || lower.contains("create")
2490 || lower.contains("scaffold")
2491 || lower.starts_with("insert")
2492 || lower.starts_with("replace")
2493 || lower == "add_import"
2494 {
2495 return "apply_workspace".to_string();
2496 }
2497 "read_only".to_string()
2498}
2499
2500pub fn current_tool_mutation_classification(tool_name: &str) -> String {
2501 current_tool_metadata(tool_name)
2502 .and_then(|metadata| metadata.mutation_classification)
2503 .unwrap_or_else(|| fallback_mutation_classification(tool_name))
2504}
2505
2506pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
2507 let Some(map) = args.as_object() else {
2508 return Vec::new();
2509 };
2510 let path_keys = current_tool_metadata(tool_name)
2511 .map(|metadata| metadata.path_params)
2512 .filter(|keys| !keys.is_empty())
2513 .unwrap_or_else(|| {
2514 vec![
2515 "path".to_string(),
2516 "file".to_string(),
2517 "cwd".to_string(),
2518 "repo".to_string(),
2519 "target".to_string(),
2520 "destination".to_string(),
2521 ]
2522 });
2523 let mut paths = Vec::new();
2524 for key in path_keys {
2525 if let Some(value) = map.get(&key).and_then(|value| value.as_str()) {
2526 if !value.is_empty() {
2527 paths.push(value.to_string());
2528 }
2529 }
2530 }
2531 if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
2532 for item in items {
2533 if let Some(value) = item.as_str() {
2534 if !value.is_empty() {
2535 paths.push(value.to_string());
2536 }
2537 }
2538 }
2539 }
2540 paths.sort();
2541 paths.dedup();
2542 paths
2543}
2544
2545pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
2546 let Some(policy) = current_execution_policy() else {
2547 return Ok(());
2548 };
2549 match name {
2550 "read" | "read_file" => {
2551 if !policy_allows_tool(&policy, name)
2552 || !policy_allows_capability(&policy, "workspace", "read_text")
2553 {
2554 return reject_policy(format!(
2555 "builtin '{name}' exceeds workspace.read_text ceiling"
2556 ));
2557 }
2558 }
2559 "search" | "list_dir" => {
2560 if !policy_allows_tool(&policy, name)
2561 || !policy_allows_capability(&policy, "workspace", "list")
2562 {
2563 return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
2564 }
2565 }
2566 "file_exists" | "stat" => {
2567 if !policy_allows_capability(&policy, "workspace", "exists") {
2568 return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
2569 }
2570 }
2571 "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
2572 if !policy_allows_tool(&policy, "edit")
2573 || !policy_allows_capability(&policy, "workspace", "write_text")
2574 || !policy_allows_side_effect(&policy, "workspace_write")
2575 {
2576 return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
2577 }
2578 }
2579 "delete_file" => {
2580 if !policy_allows_capability(&policy, "workspace", "delete")
2581 || !policy_allows_side_effect(&policy, "workspace_write")
2582 {
2583 return reject_policy(
2584 "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
2585 );
2586 }
2587 }
2588 "apply_edit" => {
2589 if !policy_allows_capability(&policy, "workspace", "apply_edit")
2590 || !policy_allows_side_effect(&policy, "workspace_write")
2591 {
2592 return reject_policy(
2593 "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
2594 );
2595 }
2596 }
2597 "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
2598 if !policy_allows_tool(&policy, "run")
2599 || !policy_allows_capability(&policy, "process", "exec")
2600 || !policy_allows_side_effect(&policy, "process_exec")
2601 {
2602 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2603 }
2604 }
2605 "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
2606 if !policy_allows_side_effect(&policy, "network") {
2607 return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
2608 }
2609 }
2610 "mcp_connect"
2611 | "mcp_call"
2612 | "mcp_list_tools"
2613 | "mcp_list_resources"
2614 | "mcp_list_resource_templates"
2615 | "mcp_read_resource"
2616 | "mcp_list_prompts"
2617 | "mcp_get_prompt"
2618 | "mcp_server_info"
2619 | "mcp_disconnect" => {
2620 if !policy_allows_tool(&policy, "run")
2621 || !policy_allows_capability(&policy, "process", "exec")
2622 || !policy_allows_side_effect(&policy, "process_exec")
2623 {
2624 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2625 }
2626 }
2627 "host_call" => {
2628 let name = args.first().map(|v| v.display()).unwrap_or_default();
2629 let Some((capability, op)) = name.split_once('.') else {
2630 return reject_policy(format!(
2631 "host_call '{name}' must use capability.operation naming"
2632 ));
2633 };
2634 if !policy_allows_capability(&policy, capability, op) {
2635 return reject_policy(format!(
2636 "host_call {capability}.{op} exceeds capability ceiling"
2637 ));
2638 }
2639 let requested_side_effect = match (capability, op) {
2640 ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
2641 ("process", "exec") => "process_exec",
2642 _ => "read_only",
2643 };
2644 if !policy_allows_side_effect(&policy, requested_side_effect) {
2645 return reject_policy(format!(
2646 "host_call {capability}.{op} exceeds side-effect ceiling"
2647 ));
2648 }
2649 }
2650 _ => {}
2651 }
2652 Ok(())
2653}
2654
2655pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
2656 if current_execution_policy().is_some() {
2657 return reject_policy(format!(
2658 "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
2659 ));
2660 }
2661 Ok(())
2662}
2663
2664pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
2665 let Some(policy) = current_execution_policy() else {
2666 return Ok(());
2667 };
2668 if !policy_allows_tool(&policy, tool_name) {
2669 return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
2670 }
2671 if let Some(metadata) = policy.tool_metadata.get(tool_name) {
2672 for (capability, ops) in &metadata.capabilities {
2673 for op in ops {
2674 if !policy_allows_capability(&policy, capability, op) {
2675 return reject_policy(format!(
2676 "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
2677 ));
2678 }
2679 }
2680 }
2681 if let Some(side_effect_level) = metadata.side_effect_level.as_deref() {
2682 if !policy_allows_side_effect(&policy, side_effect_level) {
2683 return reject_policy(format!(
2684 "tool '{tool_name}' exceeds side-effect ceiling: {side_effect_level}"
2685 ));
2686 }
2687 }
2688 }
2689 Ok(())
2690}
2691
2692fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
2693 let dict = transcript.as_dict()?;
2694 let messages = match dict.get("messages") {
2695 Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
2696 _ => Vec::new(),
2697 };
2698 let retained = messages
2699 .into_iter()
2700 .rev()
2701 .take(keep_last)
2702 .collect::<Vec<_>>()
2703 .into_iter()
2704 .rev()
2705 .collect::<Vec<_>>();
2706 let mut compacted = dict.clone();
2707 compacted.insert(
2708 "messages".to_string(),
2709 VmValue::List(Rc::new(retained.clone())),
2710 );
2711 compacted.insert(
2712 "events".to_string(),
2713 VmValue::List(Rc::new(
2714 crate::llm::helpers::transcript_events_from_messages(&retained),
2715 )),
2716 );
2717 Some(VmValue::Dict(Rc::new(compacted)))
2718}
2719
2720fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
2721 let Some(visibility) = visibility else {
2722 return Some(transcript.clone());
2723 };
2724 if visibility != "public" && visibility != "public_only" {
2725 return Some(transcript.clone());
2726 }
2727 let dict = transcript.as_dict()?;
2728 let public_messages = match dict.get("messages") {
2729 Some(VmValue::List(list)) => list
2730 .iter()
2731 .filter(|message| {
2732 message
2733 .as_dict()
2734 .and_then(|d| d.get("role"))
2735 .map(|v| v.display())
2736 .map(|role| role != "tool_result")
2737 .unwrap_or(true)
2738 })
2739 .cloned()
2740 .collect::<Vec<_>>(),
2741 _ => Vec::new(),
2742 };
2743 let public_events = match dict.get("events") {
2744 Some(VmValue::List(list)) => list
2745 .iter()
2746 .filter(|event| {
2747 event
2748 .as_dict()
2749 .and_then(|d| d.get("visibility"))
2750 .map(|v| v.display())
2751 .map(|value| value == "public")
2752 .unwrap_or(true)
2753 })
2754 .cloned()
2755 .collect::<Vec<_>>(),
2756 _ => Vec::new(),
2757 };
2758 let mut redacted = dict.clone();
2759 redacted.insert(
2760 "messages".to_string(),
2761 VmValue::List(Rc::new(public_messages)),
2762 );
2763 redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
2764 Some(VmValue::Dict(Rc::new(redacted)))
2765}
2766
2767pub(crate) fn apply_input_transcript_policy(
2768 transcript: Option<VmValue>,
2769 policy: &TranscriptPolicy,
2770) -> Option<VmValue> {
2771 let mut transcript = transcript;
2772 match policy.mode.as_deref() {
2773 Some("reset") => return None,
2774 Some("fork") => {
2775 if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
2776 let mut forked = dict.as_ref().clone();
2777 forked.insert(
2778 "id".to_string(),
2779 VmValue::String(Rc::from(new_id("transcript"))),
2780 );
2781 transcript = Some(VmValue::Dict(Rc::new(forked)));
2782 }
2783 }
2784 _ => {}
2785 }
2786 if policy.compact {
2787 let keep_last = policy.keep_last.unwrap_or(6);
2788 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2789 }
2790 transcript
2791}
2792
2793fn apply_output_transcript_policy(
2794 transcript: Option<VmValue>,
2795 policy: &TranscriptPolicy,
2796) -> Option<VmValue> {
2797 let mut transcript = transcript;
2798 if policy.compact {
2799 let keep_last = policy.keep_last.unwrap_or(6);
2800 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2801 }
2802 transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
2803}
2804
2805pub async fn execute_stage_node(
2806 node_id: &str,
2807 node: &WorkflowNode,
2808 task: &str,
2809 artifacts: &[ArtifactRecord],
2810 transcript: Option<VmValue>,
2811) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
2812 let mut selection_policy = node.context_policy.clone();
2813 if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
2814 selection_policy.include_kinds = node.input_contract.input_kinds.clone();
2815 }
2816 let selected = select_artifacts_adaptive(artifacts.to_vec(), &selection_policy);
2817 let rendered_context = render_artifacts_context(&selected, &node.context_policy);
2818 let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
2819 if node.input_contract.require_transcript && transcript.is_none() {
2820 return Err(VmError::Runtime(format!(
2821 "workflow stage {node_id} requires transcript input"
2822 )));
2823 }
2824 if let Some(min_inputs) = node.input_contract.min_inputs {
2825 if selected.len() < min_inputs {
2826 return Err(VmError::Runtime(format!(
2827 "workflow stage {node_id} requires at least {min_inputs} input artifacts"
2828 )));
2829 }
2830 }
2831 if let Some(max_inputs) = node.input_contract.max_inputs {
2832 if selected.len() > max_inputs {
2833 return Err(VmError::Runtime(format!(
2834 "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
2835 )));
2836 }
2837 }
2838 let prompt = if rendered_context.is_empty() {
2839 task.to_string()
2840 } else {
2841 format!(
2842 "{rendered_context}\n\n{}:\n{task}",
2843 node.task_label
2844 .clone()
2845 .unwrap_or_else(|| "Task".to_string())
2846 )
2847 };
2848
2849 let tool_format = std::env::var("HARN_AGENT_TOOL_FORMAT")
2850 .ok()
2851 .filter(|value| !value.trim().is_empty())
2852 .unwrap_or_else(|| "text".to_string());
2853 let mut llm_result = if node.kind == "verify" {
2854 if let Some(command) = node
2855 .verify
2856 .as_ref()
2857 .and_then(|verify| verify.as_object())
2858 .and_then(|verify| verify.get("command"))
2859 .and_then(|value| value.as_str())
2860 .map(str::trim)
2861 .filter(|value| !value.is_empty())
2862 {
2863 let mut process = if cfg!(target_os = "windows") {
2864 let mut cmd = tokio::process::Command::new("cmd");
2865 cmd.arg("/C").arg(command);
2866 cmd
2867 } else {
2868 let mut cmd = tokio::process::Command::new("/bin/sh");
2869 cmd.arg("-lc").arg(command);
2870 cmd
2871 };
2872 process.stdin(std::process::Stdio::null());
2873 if let Some(context) = crate::stdlib::process::current_execution_context() {
2874 if let Some(cwd) = context.cwd.filter(|cwd| !cwd.is_empty()) {
2875 process.current_dir(cwd);
2876 }
2877 if !context.env.is_empty() {
2878 process.envs(context.env);
2879 }
2880 }
2881 let output = process
2882 .output()
2883 .await
2884 .map_err(|e| VmError::Runtime(format!("workflow verify exec failed: {e}")))?;
2885 let stdout = String::from_utf8_lossy(&output.stdout).to_string();
2886 let stderr = String::from_utf8_lossy(&output.stderr).to_string();
2887 let combined = if stderr.is_empty() {
2888 stdout.clone()
2889 } else if stdout.is_empty() {
2890 stderr.clone()
2891 } else {
2892 format!("{stdout}\n{stderr}")
2893 };
2894 serde_json::json!({
2895 "status": "completed",
2896 "text": combined,
2897 "visible_text": combined,
2898 "command": command,
2899 "stdout": stdout,
2900 "stderr": stderr,
2901 "exit_status": output.status.code().unwrap_or(-1),
2902 "success": output.status.success(),
2903 })
2904 } else {
2905 serde_json::json!({
2906 "status": "completed",
2907 "text": "",
2908 "visible_text": "",
2909 })
2910 }
2911 } else {
2912 let mut options = BTreeMap::new();
2913 if let Some(provider) = &node.model_policy.provider {
2914 options.insert(
2915 "provider".to_string(),
2916 VmValue::String(Rc::from(provider.clone())),
2917 );
2918 }
2919 if let Some(model) = &node.model_policy.model {
2920 options.insert(
2921 "model".to_string(),
2922 VmValue::String(Rc::from(model.clone())),
2923 );
2924 }
2925 if let Some(model_tier) = &node.model_policy.model_tier {
2926 options.insert(
2927 "model_tier".to_string(),
2928 VmValue::String(Rc::from(model_tier.clone())),
2929 );
2930 }
2931 if let Some(temperature) = node.model_policy.temperature {
2932 options.insert("temperature".to_string(), VmValue::Float(temperature));
2933 }
2934 if let Some(max_tokens) = node.model_policy.max_tokens {
2935 options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
2936 }
2937 let tool_names = workflow_tool_names(&node.tools);
2938 let tools_value = node.raw_tools.clone().or_else(|| {
2939 if matches!(node.tools, serde_json::Value::Null) {
2940 None
2941 } else {
2942 Some(crate::stdlib::json_to_vm_value(&node.tools))
2943 }
2944 });
2945 if tools_value.is_some() && !tool_names.is_empty() {
2946 options.insert("tools".to_string(), tools_value.unwrap_or(VmValue::Nil));
2947 }
2948 if let Some(transcript) = transcript.clone() {
2949 options.insert("transcript".to_string(), transcript);
2950 }
2951
2952 let args = vec![
2953 VmValue::String(Rc::from(prompt.clone())),
2954 node.system
2955 .clone()
2956 .map(|s| VmValue::String(Rc::from(s)))
2957 .unwrap_or(VmValue::Nil),
2958 VmValue::Dict(Rc::new(options)),
2959 ];
2960 let mut opts = extract_llm_options(&args)?;
2961
2962 if node.mode.as_deref() == Some("agent") || !tool_names.is_empty() {
2963 crate::llm::run_agent_loop_internal(
2964 &mut opts,
2965 crate::llm::AgentLoopConfig {
2966 persistent: true,
2967 max_iterations: 12,
2968 max_nudges: 3,
2969 nudge: None,
2970 done_sentinel: node.done_sentinel.clone(),
2971 break_unless_phase: None,
2972 tool_retries: 0,
2973 tool_backoff_ms: 1000,
2974 tool_format: tool_format.clone(),
2975 auto_compact: None,
2976 context_callback: None,
2977 policy: None,
2978 daemon: false,
2979 llm_retries: 2,
2980 llm_backoff_ms: 2000,
2981 },
2982 )
2983 .await?
2984 } else {
2985 let result = vm_call_llm_full(&opts).await?;
2986 crate::llm::agent_loop_result_from_llm(&result, opts)
2987 }
2988 };
2989 if let Some(payload) = llm_result.as_object_mut() {
2990 payload.insert("prompt".to_string(), serde_json::json!(prompt));
2991 payload.insert(
2992 "system_prompt".to_string(),
2993 serde_json::json!(node.system.clone().unwrap_or_default()),
2994 );
2995 payload.insert(
2996 "rendered_context".to_string(),
2997 serde_json::json!(rendered_context),
2998 );
2999 payload.insert(
3000 "selected_artifact_ids".to_string(),
3001 serde_json::json!(selected
3002 .iter()
3003 .map(|artifact| artifact.id.clone())
3004 .collect::<Vec<_>>()),
3005 );
3006 payload.insert(
3007 "selected_artifact_titles".to_string(),
3008 serde_json::json!(selected
3009 .iter()
3010 .map(|artifact| artifact.title.clone())
3011 .collect::<Vec<_>>()),
3012 );
3013 payload.insert(
3014 "tool_calling_mode".to_string(),
3015 serde_json::json!(tool_format.clone()),
3016 );
3017 }
3018
3019 let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
3020 let transcript = llm_result
3021 .get("transcript")
3022 .cloned()
3023 .map(|value| crate::stdlib::json_to_vm_value(&value));
3024 let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
3025 let output_kind = node
3026 .output_contract
3027 .output_kinds
3028 .first()
3029 .cloned()
3030 .unwrap_or_else(|| {
3031 if node.kind == "verify" {
3032 "verification_result".to_string()
3033 } else {
3034 "artifact".to_string()
3035 }
3036 });
3037 let mut metadata = BTreeMap::new();
3038 metadata.insert(
3039 "input_artifact_ids".to_string(),
3040 serde_json::json!(selected
3041 .iter()
3042 .map(|artifact| artifact.id.clone())
3043 .collect::<Vec<_>>()),
3044 );
3045 metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
3046 let artifact = ArtifactRecord {
3047 type_name: "artifact".to_string(),
3048 id: new_id("artifact"),
3049 kind: output_kind,
3050 title: Some(format!("stage {node_id} output")),
3051 text: Some(visible_text),
3052 data: Some(llm_result.clone()),
3053 source: Some(node_id.to_string()),
3054 created_at: now_rfc3339(),
3055 freshness: Some("fresh".to_string()),
3056 priority: None,
3057 lineage: selected
3058 .iter()
3059 .map(|artifact| artifact.id.clone())
3060 .collect(),
3061 relevance: Some(1.0),
3062 estimated_tokens: None,
3063 stage: Some(node_id.to_string()),
3064 metadata,
3065 }
3066 .normalize();
3067
3068 Ok((llm_result, vec![artifact], transcript))
3069}
3070
3071pub fn next_nodes_for(
3072 graph: &WorkflowGraph,
3073 current: &str,
3074 branch: Option<&str>,
3075) -> Vec<WorkflowEdge> {
3076 let mut matching: Vec<WorkflowEdge> = graph
3077 .edges
3078 .iter()
3079 .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
3080 .cloned()
3081 .collect();
3082 if matching.is_empty() {
3083 matching = graph
3084 .edges
3085 .iter()
3086 .filter(|edge| edge.from == current && edge.branch.is_none())
3087 .cloned()
3088 .collect();
3089 }
3090 matching
3091}
3092
3093pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
3094 next_nodes_for(graph, current, Some(branch))
3095 .into_iter()
3096 .next()
3097 .map(|edge| edge.to)
3098}
3099
3100pub fn append_audit_entry(
3101 graph: &mut WorkflowGraph,
3102 op: &str,
3103 node_id: Option<String>,
3104 reason: Option<String>,
3105 metadata: BTreeMap<String, serde_json::Value>,
3106) {
3107 graph.audit_log.push(WorkflowAuditEntry {
3108 id: new_id("audit"),
3109 op: op.to_string(),
3110 node_id,
3111 timestamp: now_rfc3339(),
3112 reason,
3113 metadata,
3114 });
3115}
3116
3117pub fn builtin_ceiling() -> CapabilityPolicy {
3118 CapabilityPolicy {
3119 tools: Vec::new(),
3122 capabilities: BTreeMap::from([
3123 (
3124 "workspace".to_string(),
3125 vec![
3126 "read_text".to_string(),
3127 "write_text".to_string(),
3128 "apply_edit".to_string(),
3129 "delete".to_string(),
3130 "exists".to_string(),
3131 "list".to_string(),
3132 ],
3133 ),
3134 ("process".to_string(), vec!["exec".to_string()]),
3135 ]),
3136 workspace_roots: Vec::new(),
3137 side_effect_level: Some("network".to_string()),
3138 recursion_limit: Some(8),
3139 tool_arg_constraints: Vec::new(),
3140 tool_metadata: BTreeMap::new(),
3141 }
3142}
3143
3144#[cfg(test)]
3145mod tests {
3146 use super::*;
3147
3148 #[test]
3149 fn capability_intersection_rejects_privilege_expansion() {
3150 let ceiling = CapabilityPolicy {
3151 tools: vec!["read".to_string()],
3152 side_effect_level: Some("read_only".to_string()),
3153 recursion_limit: Some(2),
3154 ..Default::default()
3155 };
3156 let requested = CapabilityPolicy {
3157 tools: vec!["read".to_string(), "edit".to_string()],
3158 ..Default::default()
3159 };
3160 let error = ceiling.intersect(&requested).unwrap_err();
3161 assert!(error.contains("host ceiling"));
3162 }
3163
3164 #[test]
3165 fn mutation_session_normalize_fills_defaults() {
3166 let normalized = MutationSessionRecord::default().normalize();
3167 assert!(normalized.session_id.starts_with("session_"));
3168 assert_eq!(normalized.mutation_scope, "read_only");
3169 assert_eq!(normalized.approval_mode, "host_enforced");
3170 }
3171
3172 #[test]
3173 fn install_current_mutation_session_round_trips() {
3174 install_current_mutation_session(Some(MutationSessionRecord {
3175 session_id: "session_test".to_string(),
3176 mutation_scope: "apply_workspace".to_string(),
3177 approval_mode: "explicit".to_string(),
3178 ..Default::default()
3179 }));
3180 let current = current_mutation_session().expect("session installed");
3181 assert_eq!(current.session_id, "session_test");
3182 assert_eq!(current.mutation_scope, "apply_workspace");
3183 assert_eq!(current.approval_mode, "explicit");
3184
3185 install_current_mutation_session(None);
3186 assert!(current_mutation_session().is_none());
3187 }
3188
3189 #[test]
3190 fn active_execution_policy_rejects_unknown_bridge_builtin() {
3191 push_execution_policy(CapabilityPolicy {
3192 tools: vec!["read".to_string()],
3193 capabilities: BTreeMap::from([(
3194 "workspace".to_string(),
3195 vec!["read_text".to_string()],
3196 )]),
3197 side_effect_level: Some("read_only".to_string()),
3198 recursion_limit: Some(1),
3199 ..Default::default()
3200 });
3201 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
3202 pop_execution_policy();
3203 assert!(matches!(
3204 error,
3205 VmError::CategorizedError {
3206 category: crate::value::ErrorCategory::ToolRejected,
3207 ..
3208 }
3209 ));
3210 }
3211
3212 #[test]
3213 fn active_execution_policy_rejects_mcp_escape_hatch() {
3214 push_execution_policy(CapabilityPolicy {
3215 tools: vec!["read".to_string()],
3216 capabilities: BTreeMap::from([(
3217 "workspace".to_string(),
3218 vec!["read_text".to_string()],
3219 )]),
3220 side_effect_level: Some("read_only".to_string()),
3221 recursion_limit: Some(1),
3222 ..Default::default()
3223 });
3224 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
3225 pop_execution_policy();
3226 assert!(matches!(
3227 error,
3228 VmError::CategorizedError {
3229 category: crate::value::ErrorCategory::ToolRejected,
3230 ..
3231 }
3232 ));
3233 }
3234
3235 #[test]
3236 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
3237 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3238 "name": "legacy",
3239 "act": {"mode": "llm"},
3240 "verify": {"kind": "verify"},
3241 "repair": {"mode": "agent"},
3242 }));
3243 let graph = normalize_workflow_value(&value).unwrap();
3244 assert_eq!(graph.type_name, "workflow_graph");
3245 assert!(graph.nodes.contains_key("act"));
3246 assert!(graph.nodes.contains_key("verify"));
3247 assert!(graph.nodes.contains_key("repair"));
3248 assert_eq!(graph.entry, "act");
3249 }
3250
3251 #[test]
3252 fn workflow_normalization_accepts_tool_registry_nodes() {
3253 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3254 "name": "registry_tools",
3255 "entry": "implement",
3256 "nodes": {
3257 "implement": {
3258 "kind": "stage",
3259 "mode": "agent",
3260 "tools": {
3261 "_type": "tool_registry",
3262 "tools": [
3263 {"name": "read", "description": "Read files"},
3264 {"name": "run", "description": "Run commands"}
3265 ]
3266 }
3267 }
3268 },
3269 "edges": []
3270 }));
3271 let graph = normalize_workflow_value(&value).unwrap();
3272 let node = graph.nodes.get("implement").unwrap();
3273 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
3274 }
3275
3276 #[test]
3277 fn artifact_selection_honors_budget_and_priority() {
3278 let policy = ContextPolicy {
3279 max_artifacts: Some(2),
3280 max_tokens: Some(30),
3281 prefer_recent: true,
3282 prefer_fresh: true,
3283 prioritize_kinds: vec!["verification_result".to_string()],
3284 ..Default::default()
3285 };
3286 let artifacts = vec![
3287 ArtifactRecord {
3288 type_name: "artifact".to_string(),
3289 id: "a".to_string(),
3290 kind: "summary".to_string(),
3291 text: Some("short".to_string()),
3292 relevance: Some(0.9),
3293 created_at: now_rfc3339(),
3294 ..Default::default()
3295 }
3296 .normalize(),
3297 ArtifactRecord {
3298 type_name: "artifact".to_string(),
3299 id: "b".to_string(),
3300 kind: "summary".to_string(),
3301 text: Some("this is a much larger artifact body".to_string()),
3302 relevance: Some(1.0),
3303 created_at: now_rfc3339(),
3304 ..Default::default()
3305 }
3306 .normalize(),
3307 ArtifactRecord {
3308 type_name: "artifact".to_string(),
3309 id: "c".to_string(),
3310 kind: "summary".to_string(),
3311 text: Some("tiny".to_string()),
3312 relevance: Some(0.5),
3313 created_at: now_rfc3339(),
3314 ..Default::default()
3315 }
3316 .normalize(),
3317 ];
3318 let selected = select_artifacts(artifacts, &policy);
3319 assert_eq!(selected.len(), 2);
3320 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
3321 }
3322
3323 #[test]
3324 fn workflow_validation_rejects_condition_without_true_false_edges() {
3325 let graph = WorkflowGraph {
3326 entry: "gate".to_string(),
3327 nodes: BTreeMap::from([(
3328 "gate".to_string(),
3329 WorkflowNode {
3330 id: Some("gate".to_string()),
3331 kind: "condition".to_string(),
3332 ..Default::default()
3333 },
3334 )]),
3335 edges: vec![WorkflowEdge {
3336 from: "gate".to_string(),
3337 to: "next".to_string(),
3338 branch: Some("true".to_string()),
3339 label: None,
3340 }],
3341 ..Default::default()
3342 };
3343 let report = validate_workflow(&graph, None);
3344 assert!(!report.valid);
3345 assert!(report
3346 .errors
3347 .iter()
3348 .any(|error| error.contains("true") && error.contains("false")));
3349 }
3350
3351 #[test]
3352 fn replay_fixture_round_trip_passes() {
3353 let run = RunRecord {
3354 type_name: "run_record".to_string(),
3355 id: "run_1".to_string(),
3356 workflow_id: "wf".to_string(),
3357 workflow_name: Some("demo".to_string()),
3358 task: "demo".to_string(),
3359 status: "completed".to_string(),
3360 started_at: "1".to_string(),
3361 finished_at: Some("2".to_string()),
3362 parent_run_id: None,
3363 root_run_id: Some("run_1".to_string()),
3364 stages: vec![RunStageRecord {
3365 id: "stage_1".to_string(),
3366 node_id: "act".to_string(),
3367 kind: "stage".to_string(),
3368 status: "completed".to_string(),
3369 outcome: "success".to_string(),
3370 branch: Some("success".to_string()),
3371 started_at: "1".to_string(),
3372 finished_at: Some("2".to_string()),
3373 visible_text: Some("done".to_string()),
3374 private_reasoning: None,
3375 transcript: None,
3376 verification: None,
3377 usage: None,
3378 artifacts: vec![ArtifactRecord {
3379 type_name: "artifact".to_string(),
3380 id: "a1".to_string(),
3381 kind: "summary".to_string(),
3382 text: Some("done".to_string()),
3383 created_at: "1".to_string(),
3384 ..Default::default()
3385 }
3386 .normalize()],
3387 consumed_artifact_ids: vec![],
3388 produced_artifact_ids: vec!["a1".to_string()],
3389 attempts: vec![],
3390 metadata: BTreeMap::new(),
3391 }],
3392 transitions: vec![],
3393 checkpoints: vec![],
3394 pending_nodes: vec![],
3395 completed_nodes: vec!["act".to_string()],
3396 child_runs: vec![],
3397 artifacts: vec![],
3398 policy: CapabilityPolicy::default(),
3399 execution: None,
3400 transcript: None,
3401 usage: None,
3402 replay_fixture: None,
3403 trace_spans: vec![],
3404 metadata: BTreeMap::new(),
3405 persisted_path: None,
3406 };
3407 let fixture = replay_fixture_from_run(&run);
3408 let report = evaluate_run_against_fixture(&run, &fixture);
3409 assert!(report.pass);
3410 assert!(report.failures.is_empty());
3411 }
3412
3413 #[test]
3414 fn replay_eval_suite_reports_failed_case() {
3415 let good = RunRecord {
3416 id: "run_good".to_string(),
3417 workflow_id: "wf".to_string(),
3418 status: "completed".to_string(),
3419 stages: vec![RunStageRecord {
3420 node_id: "act".to_string(),
3421 status: "completed".to_string(),
3422 outcome: "success".to_string(),
3423 ..Default::default()
3424 }],
3425 ..Default::default()
3426 };
3427 let bad = RunRecord {
3428 id: "run_bad".to_string(),
3429 workflow_id: "wf".to_string(),
3430 status: "failed".to_string(),
3431 stages: vec![RunStageRecord {
3432 node_id: "act".to_string(),
3433 status: "failed".to_string(),
3434 outcome: "error".to_string(),
3435 ..Default::default()
3436 }],
3437 ..Default::default()
3438 };
3439 let suite = evaluate_run_suite(vec![
3440 (
3441 good.clone(),
3442 replay_fixture_from_run(&good),
3443 Some("good.json".to_string()),
3444 ),
3445 (
3446 bad.clone(),
3447 replay_fixture_from_run(&good),
3448 Some("bad.json".to_string()),
3449 ),
3450 ]);
3451 assert!(!suite.pass);
3452 assert_eq!(suite.total, 2);
3453 assert_eq!(suite.failed, 1);
3454 assert!(suite.cases.iter().any(|case| !case.pass));
3455 }
3456
3457 #[test]
3458 fn run_diff_reports_changed_stage() {
3459 let left = RunRecord {
3460 id: "left".to_string(),
3461 workflow_id: "wf".to_string(),
3462 status: "completed".to_string(),
3463 stages: vec![RunStageRecord {
3464 node_id: "act".to_string(),
3465 status: "completed".to_string(),
3466 outcome: "success".to_string(),
3467 ..Default::default()
3468 }],
3469 ..Default::default()
3470 };
3471 let right = RunRecord {
3472 id: "right".to_string(),
3473 workflow_id: "wf".to_string(),
3474 status: "failed".to_string(),
3475 stages: vec![RunStageRecord {
3476 node_id: "act".to_string(),
3477 status: "failed".to_string(),
3478 outcome: "error".to_string(),
3479 ..Default::default()
3480 }],
3481 ..Default::default()
3482 };
3483 let diff = diff_run_records(&left, &right);
3484 assert!(diff.status_changed);
3485 assert!(!diff.identical);
3486 assert_eq!(diff.stage_diffs.len(), 1);
3487 }
3488
3489 #[test]
3490 fn eval_suite_manifest_can_fail_on_baseline_diff() {
3491 let temp_dir =
3492 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
3493 std::fs::create_dir_all(&temp_dir).unwrap();
3494 let baseline_path = temp_dir.join("baseline.json");
3495 let candidate_path = temp_dir.join("candidate.json");
3496
3497 let baseline = RunRecord {
3498 id: "baseline".to_string(),
3499 workflow_id: "wf".to_string(),
3500 status: "completed".to_string(),
3501 stages: vec![RunStageRecord {
3502 node_id: "act".to_string(),
3503 status: "completed".to_string(),
3504 outcome: "success".to_string(),
3505 ..Default::default()
3506 }],
3507 ..Default::default()
3508 };
3509 let candidate = RunRecord {
3510 id: "candidate".to_string(),
3511 workflow_id: "wf".to_string(),
3512 status: "failed".to_string(),
3513 stages: vec![RunStageRecord {
3514 node_id: "act".to_string(),
3515 status: "failed".to_string(),
3516 outcome: "error".to_string(),
3517 ..Default::default()
3518 }],
3519 ..Default::default()
3520 };
3521
3522 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
3523 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
3524
3525 let manifest = EvalSuiteManifest {
3526 base_dir: Some(temp_dir.display().to_string()),
3527 cases: vec![EvalSuiteCase {
3528 label: Some("candidate".to_string()),
3529 run_path: "candidate.json".to_string(),
3530 fixture_path: None,
3531 compare_to: Some("baseline.json".to_string()),
3532 }],
3533 ..Default::default()
3534 };
3535 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
3536 assert!(!suite.pass);
3537 assert_eq!(suite.failed, 1);
3538 assert!(suite.cases[0].comparison.is_some());
3539 assert!(suite.cases[0]
3540 .failures
3541 .iter()
3542 .any(|failure| failure.contains("baseline")));
3543 }
3544
3545 #[test]
3546 fn render_unified_diff_marks_removed_and_added_lines() {
3547 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
3548 assert!(diff.contains("--- a/src/main.rs"));
3549 assert!(diff.contains("+++ b/src/main.rs"));
3550 assert!(diff.contains("-old"));
3551 assert!(diff.contains("+new"));
3552 assert!(diff.contains(" same"));
3553 }
3554
3555 #[test]
3556 fn execution_policy_rejects_process_exec_when_read_only() {
3557 push_execution_policy(CapabilityPolicy {
3558 side_effect_level: Some("read_only".to_string()),
3559 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
3560 ..Default::default()
3561 });
3562 let result = enforce_current_policy_for_builtin("exec", &[]);
3563 pop_execution_policy();
3564 assert!(result.is_err());
3565 }
3566
3567 #[test]
3568 fn execution_policy_rejects_unlisted_tool() {
3569 push_execution_policy(CapabilityPolicy {
3570 tools: vec!["read".to_string()],
3571 ..Default::default()
3572 });
3573 let result = enforce_current_policy_for_tool("edit");
3574 pop_execution_policy();
3575 assert!(result.is_err());
3576 }
3577
3578 #[test]
3579 fn normalize_run_record_preserves_trace_spans() {
3580 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3581 "_type": "run_record",
3582 "id": "run_trace",
3583 "workflow_id": "wf",
3584 "status": "completed",
3585 "started_at": "1",
3586 "trace_spans": [
3587 {
3588 "span_id": 1,
3589 "parent_id": null,
3590 "kind": "pipeline",
3591 "name": "workflow",
3592 "start_ms": 0,
3593 "duration_ms": 42,
3594 "metadata": {"model": "demo"}
3595 }
3596 ]
3597 }));
3598
3599 let run = normalize_run_record(&value).unwrap();
3600 assert_eq!(run.trace_spans.len(), 1);
3601 assert_eq!(run.trace_spans[0].kind, "pipeline");
3602 assert_eq!(
3603 run.trace_spans[0].metadata["model"],
3604 serde_json::json!("demo")
3605 );
3606 }
3607
3608 #[test]
3611 fn pre_tool_hook_deny_blocks_execution() {
3612 clear_tool_hooks();
3613 register_tool_hook(ToolHook {
3614 pattern: "dangerous_*".to_string(),
3615 pre: Some(Rc::new(|_name, _args| {
3616 PreToolAction::Deny("blocked by policy".to_string())
3617 })),
3618 post: None,
3619 });
3620 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
3621 clear_tool_hooks();
3622 assert!(matches!(result, PreToolAction::Deny(_)));
3623 }
3624
3625 #[test]
3626 fn pre_tool_hook_allow_passes_through() {
3627 clear_tool_hooks();
3628 register_tool_hook(ToolHook {
3629 pattern: "safe_*".to_string(),
3630 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
3631 post: None,
3632 });
3633 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
3634 clear_tool_hooks();
3635 assert!(matches!(result, PreToolAction::Allow));
3636 }
3637
3638 #[test]
3639 fn pre_tool_hook_modify_rewrites_args() {
3640 clear_tool_hooks();
3641 register_tool_hook(ToolHook {
3642 pattern: "*".to_string(),
3643 pre: Some(Rc::new(|_name, _args| {
3644 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
3645 })),
3646 post: None,
3647 });
3648 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
3649 clear_tool_hooks();
3650 match result {
3651 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
3652 _ => panic!("expected Modify"),
3653 }
3654 }
3655
3656 #[test]
3657 fn post_tool_hook_modifies_result() {
3658 clear_tool_hooks();
3659 register_tool_hook(ToolHook {
3660 pattern: "exec".to_string(),
3661 pre: None,
3662 post: Some(Rc::new(|_name, result| {
3663 if result.contains("SECRET") {
3664 PostToolAction::Modify("[REDACTED]".to_string())
3665 } else {
3666 PostToolAction::Pass
3667 }
3668 })),
3669 });
3670 let result = run_post_tool_hooks("exec", "output with SECRET data");
3671 let clean = run_post_tool_hooks("exec", "clean output");
3672 clear_tool_hooks();
3673 assert_eq!(result, "[REDACTED]");
3674 assert_eq!(clean, "clean output");
3675 }
3676
3677 #[test]
3678 fn unmatched_hook_pattern_does_not_fire() {
3679 clear_tool_hooks();
3680 register_tool_hook(ToolHook {
3681 pattern: "exec".to_string(),
3682 pre: Some(Rc::new(|_name, _args| {
3683 PreToolAction::Deny("should not match".to_string())
3684 })),
3685 post: None,
3686 });
3687 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
3688 clear_tool_hooks();
3689 assert!(matches!(result, PreToolAction::Allow));
3690 }
3691
3692 #[test]
3693 fn glob_match_patterns() {
3694 assert!(glob_match("*", "anything"));
3695 assert!(glob_match("exec*", "exec_at"));
3696 assert!(glob_match("*_file", "read_file"));
3697 assert!(!glob_match("exec*", "read_file"));
3698 assert!(glob_match("read_file", "read_file"));
3699 assert!(!glob_match("read_file", "write_file"));
3700 }
3701
3702 #[test]
3705 fn microcompact_snips_large_output() {
3706 let large = "x".repeat(50_000);
3707 let result = microcompact_tool_output(&large, 10_000);
3708 assert!(result.len() < 15_000);
3709 assert!(result.contains("snipped"));
3710 }
3711
3712 #[test]
3713 fn microcompact_preserves_small_output() {
3714 let small = "hello world";
3715 let result = microcompact_tool_output(small, 10_000);
3716 assert_eq!(result, small);
3717 }
3718
3719 #[test]
3720 fn microcompact_preserves_strong_keyword_lines_without_file_line() {
3721 let mut output = String::new();
3730 for i in 0..100 {
3731 output.push_str(&format!("verbose progress line {i}\n"));
3732 }
3733 output.push_str("--- FAIL: TestEmpty (0.00s)\n");
3734 output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
3735 output.push_str("FAILED tests/test_parser.py::test_empty\n");
3736 for i in 0..100 {
3737 output.push_str(&format!("more output after failures {i}\n"));
3738 }
3739 let result = microcompact_tool_output(&output, 2_000);
3740 assert!(
3741 result.contains("--- FAIL: TestEmpty"),
3742 "strong 'FAIL' keyword should preserve the line:\n{result}"
3743 );
3744 assert!(
3745 result.contains("panicked at"),
3746 "strong 'panic' keyword should preserve the line:\n{result}"
3747 );
3748 assert!(
3749 result.contains("FAILED tests/test_parser.py"),
3750 "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
3751 );
3752 }
3753
3754 #[test]
3755 fn auto_compact_messages_reduces_count() {
3756 let mut messages: Vec<serde_json::Value> = (0..20)
3757 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
3758 .collect();
3759 let runtime = tokio::runtime::Builder::new_current_thread()
3760 .enable_all()
3761 .build()
3762 .unwrap();
3763 let compacted = runtime.block_on(auto_compact_messages(
3764 &mut messages,
3765 &AutoCompactConfig {
3766 compact_strategy: CompactStrategy::Truncate,
3767 keep_last: 6,
3768 ..Default::default()
3769 },
3770 None,
3771 ));
3772 let summary = compacted.unwrap();
3773 assert!(summary.is_some());
3774 assert!(messages.len() <= 7); assert!(messages[0]["content"]
3776 .as_str()
3777 .unwrap()
3778 .contains("auto-compacted"));
3779 }
3780
3781 #[test]
3782 fn auto_compact_noop_when_under_threshold() {
3783 let mut messages: Vec<serde_json::Value> = (0..4)
3784 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
3785 .collect();
3786 let runtime = tokio::runtime::Builder::new_current_thread()
3787 .enable_all()
3788 .build()
3789 .unwrap();
3790 let compacted = runtime.block_on(auto_compact_messages(
3791 &mut messages,
3792 &AutoCompactConfig {
3793 compact_strategy: CompactStrategy::Truncate,
3794 keep_last: 6,
3795 ..Default::default()
3796 },
3797 None,
3798 ));
3799 assert!(compacted.unwrap().is_none());
3800 assert_eq!(messages.len(), 4);
3801 }
3802
3803 #[test]
3804 fn estimate_message_tokens_basic() {
3805 let messages = vec![
3806 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
3807 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
3808 ];
3809 let tokens = estimate_message_tokens(&messages);
3810 assert_eq!(tokens, 200); }
3812
3813 #[test]
3816 fn dedup_artifacts_removes_duplicates() {
3817 let mut artifacts = vec![
3818 ArtifactRecord {
3819 id: "a1".to_string(),
3820 kind: "test".to_string(),
3821 text: Some("duplicate content".to_string()),
3822 ..Default::default()
3823 },
3824 ArtifactRecord {
3825 id: "a2".to_string(),
3826 kind: "test".to_string(),
3827 text: Some("duplicate content".to_string()),
3828 ..Default::default()
3829 },
3830 ArtifactRecord {
3831 id: "a3".to_string(),
3832 kind: "test".to_string(),
3833 text: Some("unique content".to_string()),
3834 ..Default::default()
3835 },
3836 ];
3837 dedup_artifacts(&mut artifacts);
3838 assert_eq!(artifacts.len(), 2);
3839 }
3840
3841 #[test]
3842 fn microcompact_artifact_snips_oversized() {
3843 let mut artifact = ArtifactRecord {
3844 id: "a1".to_string(),
3845 kind: "test".to_string(),
3846 text: Some("x".repeat(10_000)),
3847 estimated_tokens: Some(2_500),
3848 ..Default::default()
3849 };
3850 microcompact_artifact(&mut artifact, 500);
3851 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
3852 assert_eq!(artifact.estimated_tokens, Some(500));
3853 }
3854
3855 #[test]
3858 fn arg_constraint_allows_matching_pattern() {
3859 let policy = CapabilityPolicy {
3860 tool_arg_constraints: vec![ToolArgConstraint {
3861 tool: "exec".to_string(),
3862 arg_patterns: vec!["cargo *".to_string()],
3863 }],
3864 ..Default::default()
3865 };
3866 let result = enforce_tool_arg_constraints(
3867 &policy,
3868 "exec",
3869 &serde_json::json!({"command": "cargo test"}),
3870 );
3871 assert!(result.is_ok());
3872 }
3873
3874 #[test]
3875 fn arg_constraint_rejects_non_matching_pattern() {
3876 let policy = CapabilityPolicy {
3877 tool_arg_constraints: vec![ToolArgConstraint {
3878 tool: "exec".to_string(),
3879 arg_patterns: vec!["cargo *".to_string()],
3880 }],
3881 ..Default::default()
3882 };
3883 let result = enforce_tool_arg_constraints(
3884 &policy,
3885 "exec",
3886 &serde_json::json!({"command": "rm -rf /"}),
3887 );
3888 assert!(result.is_err());
3889 }
3890
3891 #[test]
3892 fn arg_constraint_ignores_unmatched_tool() {
3893 let policy = CapabilityPolicy {
3894 tool_arg_constraints: vec![ToolArgConstraint {
3895 tool: "exec".to_string(),
3896 arg_patterns: vec!["cargo *".to_string()],
3897 }],
3898 ..Default::default()
3899 };
3900 let result = enforce_tool_arg_constraints(
3901 &policy,
3902 "read_file",
3903 &serde_json::json!({"path": "/etc/passwd"}),
3904 );
3905 assert!(result.is_ok());
3906 }
3907
3908 #[test]
3909 fn microcompact_handles_multibyte_utf8() {
3910 let emoji_output = "🔥".repeat(500); let result = microcompact_tool_output(&emoji_output, 400);
3913 assert!(result.contains("snipped"));
3915
3916 let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
3918 let result2 = microcompact_tool_output(&mixed, 400);
3919 assert!(result2.contains("snipped"));
3920
3921 let cjk = "中文".repeat(500);
3923 let result3 = microcompact_tool_output(&cjk, 400);
3924 assert!(result3.contains("snipped"));
3925 }
3926}