1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12 use std::time::{SystemTime, UNIX_EPOCH};
13 let ts = SystemTime::now()
14 .duration_since(UNIX_EPOCH)
15 .unwrap_or_default()
16 .as_secs();
17 format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21 format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25 std::env::var("HARN_RUN_DIR")
26 .map(PathBuf::from)
27 .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31 static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32 static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33}
34
35#[derive(Clone, Debug)]
39pub enum PreToolAction {
40 Allow,
42 Deny(String),
44 Modify(serde_json::Value),
46}
47
48#[derive(Clone, Debug)]
50pub enum PostToolAction {
51 Pass,
53 Modify(String),
55}
56
57pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
59pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
60
61#[derive(Clone)]
63pub struct ToolHook {
64 pub pattern: String,
66 pub pre: Option<PreToolHookFn>,
68 pub post: Option<PostToolHookFn>,
70}
71
72impl std::fmt::Debug for ToolHook {
73 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74 f.debug_struct("ToolHook")
75 .field("pattern", &self.pattern)
76 .field("has_pre", &self.pre.is_some())
77 .field("has_post", &self.post.is_some())
78 .finish()
79 }
80}
81
82fn glob_match(pattern: &str, name: &str) -> bool {
83 if pattern == "*" {
84 return true;
85 }
86 if let Some(prefix) = pattern.strip_suffix('*') {
87 return name.starts_with(prefix);
88 }
89 if let Some(suffix) = pattern.strip_prefix('*') {
90 return name.ends_with(suffix);
91 }
92 pattern == name
93}
94
95pub fn register_tool_hook(hook: ToolHook) {
96 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
97}
98
99pub fn clear_tool_hooks() {
100 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
101}
102
103pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
105 TOOL_HOOKS.with(|hooks| {
106 let hooks = hooks.borrow();
107 let mut current_args = args.clone();
108 for hook in hooks.iter() {
109 if !glob_match(&hook.pattern, tool_name) {
110 continue;
111 }
112 if let Some(ref pre) = hook.pre {
113 match pre(tool_name, ¤t_args) {
114 PreToolAction::Allow => {}
115 PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
116 PreToolAction::Modify(new_args) => {
117 current_args = new_args;
118 }
119 }
120 }
121 }
122 if current_args != *args {
123 PreToolAction::Modify(current_args)
124 } else {
125 PreToolAction::Allow
126 }
127 })
128}
129
130pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
132 TOOL_HOOKS.with(|hooks| {
133 let hooks = hooks.borrow();
134 let mut current = result.to_string();
135 for hook in hooks.iter() {
136 if !glob_match(&hook.pattern, tool_name) {
137 continue;
138 }
139 if let Some(ref post) = hook.post {
140 match post(tool_name, ¤t) {
141 PostToolAction::Pass => {}
142 PostToolAction::Modify(new_result) => {
143 current = new_result;
144 }
145 }
146 }
147 }
148 current
149 })
150}
151
152#[derive(Clone, Debug, PartialEq, Eq)]
155pub enum CompactStrategy {
156 Llm,
157 Truncate,
158 Custom,
159}
160
161pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
162 match value {
163 "llm" => Ok(CompactStrategy::Llm),
164 "truncate" => Ok(CompactStrategy::Truncate),
165 "custom" => Ok(CompactStrategy::Custom),
166 other => Err(VmError::Runtime(format!(
167 "unknown compact_strategy '{other}' (expected 'llm', 'truncate', or 'custom')"
168 ))),
169 }
170}
171
172#[derive(Clone, Debug)]
174pub struct AutoCompactConfig {
175 pub token_threshold: usize,
177 pub tool_output_max_chars: usize,
179 pub keep_last: usize,
181 pub compact_strategy: CompactStrategy,
183 pub custom_compactor: Option<VmValue>,
185}
186
187impl Default for AutoCompactConfig {
188 fn default() -> Self {
189 Self {
190 token_threshold: 80_000,
191 tool_output_max_chars: 20_000,
192 keep_last: 8,
193 compact_strategy: CompactStrategy::Llm,
194 custom_compactor: None,
195 }
196 }
197}
198
199pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
201 messages
202 .iter()
203 .map(|m| {
204 m.get("content")
205 .and_then(|c| c.as_str())
206 .map(|s| s.len())
207 .unwrap_or(0)
208 })
209 .sum::<usize>()
210 / 4
211}
212
213pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
216 if output.len() <= max_chars || max_chars < 200 {
217 return output.to_string();
218 }
219 let keep = max_chars / 2;
220 let head = &output[..keep];
221 let tail = &output[output.len() - keep..];
222 let snipped = output.len() - max_chars;
223 format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
224}
225
226fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
227 messages
228 .iter()
229 .map(|msg| {
230 let role = msg
231 .get("role")
232 .and_then(|v| v.as_str())
233 .unwrap_or("user")
234 .to_uppercase();
235 let content = msg
236 .get("content")
237 .and_then(|v| v.as_str())
238 .unwrap_or_default();
239 format!("{role}: {content}")
240 })
241 .collect::<Vec<_>>()
242 .join("\n")
243}
244
245fn truncate_compaction_summary(
246 old_messages: &[serde_json::Value],
247 archived_count: usize,
248) -> String {
249 let summary_parts: Vec<String> = old_messages
250 .iter()
251 .filter_map(|m| {
252 let role = m.get("role")?.as_str()?;
253 let content = m.get("content")?.as_str()?;
254 if content.is_empty() {
255 return None;
256 }
257 let truncated = if content.len() > 500 {
258 format!("{}...", &content[..500])
259 } else {
260 content.to_string()
261 };
262 Some(format!("[{role}] {truncated}"))
263 })
264 .take(15)
265 .collect();
266 format!(
267 "[auto-compacted {archived_count} older messages]\n{}{}",
268 summary_parts.join("\n"),
269 if archived_count > 15 {
270 format!("\n... and {} more", archived_count - 15)
271 } else {
272 String::new()
273 }
274 )
275}
276
277fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
278 if let Some(map) = value.as_dict() {
279 if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
280 return Ok(summary.display());
281 }
282 }
283 match value {
284 VmValue::String(text) => Ok(text.to_string()),
285 VmValue::Nil => Ok(String::new()),
286 _ => serde_json::to_string_pretty(&vm_value_to_json(value))
287 .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
288 }
289}
290
291async fn llm_compaction_summary(
292 old_messages: &[serde_json::Value],
293 archived_count: usize,
294 llm_opts: &crate::llm::api::LlmCallOptions,
295) -> Result<String, VmError> {
296 let mut compact_opts = llm_opts.clone();
297 let formatted = format_compaction_messages(old_messages);
298 compact_opts.system = None;
299 compact_opts.transcript_id = None;
300 compact_opts.transcript_summary = None;
301 compact_opts.transcript_metadata = None;
302 compact_opts.native_tools = None;
303 compact_opts.tool_choice = None;
304 compact_opts.response_format = None;
305 compact_opts.json_schema = None;
306 compact_opts.messages = vec![serde_json::json!({
307 "role": "user",
308 "content": format!(
309 "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
310 ),
311 })];
312 let result = vm_call_llm_full(&compact_opts).await?;
313 let summary = result.text.trim();
314 if summary.is_empty() {
315 Ok(truncate_compaction_summary(old_messages, archived_count))
316 } else {
317 Ok(format!(
318 "[auto-compacted {archived_count} older messages]\n{summary}"
319 ))
320 }
321}
322
323async fn custom_compaction_summary(
324 old_messages: &[serde_json::Value],
325 archived_count: usize,
326 callback: &VmValue,
327) -> Result<String, VmError> {
328 let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
329 return Err(VmError::Runtime(
330 "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
331 ));
332 };
333 let mut vm = crate::vm::take_async_builtin_child_vm().ok_or_else(|| {
334 VmError::Runtime(
335 "custom transcript compaction requires an async builtin VM context".to_string(),
336 )
337 })?;
338 let messages_vm = VmValue::List(Rc::new(
339 old_messages
340 .iter()
341 .map(crate::stdlib::json_to_vm_value)
342 .collect(),
343 ));
344 let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await;
345 crate::vm::restore_async_builtin_child_vm(vm);
346 let summary = compact_summary_text_from_value(&result?)?;
347 if summary.trim().is_empty() {
348 Ok(truncate_compaction_summary(old_messages, archived_count))
349 } else {
350 Ok(format!(
351 "[auto-compacted {archived_count} older messages]\n{summary}"
352 ))
353 }
354}
355
356pub(crate) async fn auto_compact_messages(
359 messages: &mut Vec<serde_json::Value>,
360 config: &AutoCompactConfig,
361 llm_opts: Option<&crate::llm::api::LlmCallOptions>,
362) -> Result<bool, VmError> {
363 if messages.len() <= config.keep_last {
364 return Ok(false);
365 }
366 let split_at = messages.len().saturating_sub(config.keep_last);
367 let old_messages: Vec<_> = messages.drain(..split_at).collect();
368 let archived_count = old_messages.len();
369 let summary = match config.compact_strategy {
370 CompactStrategy::Truncate => truncate_compaction_summary(&old_messages, archived_count),
371 CompactStrategy::Llm => {
372 llm_compaction_summary(
373 &old_messages,
374 archived_count,
375 llm_opts.ok_or_else(|| {
376 VmError::Runtime(
377 "LLM transcript compaction requires active LLM call options".to_string(),
378 )
379 })?,
380 )
381 .await?
382 }
383 CompactStrategy::Custom => {
384 custom_compaction_summary(
385 &old_messages,
386 archived_count,
387 config.custom_compactor.as_ref().ok_or_else(|| {
388 VmError::Runtime(
389 "compact_callback is required when compact_strategy is 'custom'"
390 .to_string(),
391 )
392 })?,
393 )
394 .await?
395 }
396 };
397 messages.insert(
398 0,
399 serde_json::json!({
400 "role": "user",
401 "content": summary,
402 }),
403 );
404 Ok(true)
405}
406
407pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
411 let max_chars = max_tokens * 4;
412 if let Some(ref text) = artifact.text {
413 if text.len() > max_chars && max_chars >= 200 {
414 artifact.text = Some(microcompact_tool_output(text, max_chars));
415 artifact.estimated_tokens = Some(max_tokens);
416 }
417 }
418}
419
420pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
423 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
424 artifacts.retain(|artifact| {
425 let text = artifact.text.as_deref().unwrap_or("");
426 if text.is_empty() {
427 return true;
428 }
429 let hash = {
431 use std::hash::{Hash, Hasher};
432 let mut hasher = std::collections::hash_map::DefaultHasher::new();
433 text.hash(&mut hasher);
434 hasher.finish()
435 };
436 seen_hashes.insert(hash)
437 });
438}
439
440pub fn select_artifacts_adaptive(
443 mut artifacts: Vec<ArtifactRecord>,
444 policy: &ContextPolicy,
445) -> Vec<ArtifactRecord> {
446 dedup_artifacts(&mut artifacts);
448
449 if let Some(max_tokens) = policy.max_tokens {
453 let count = artifacts.len().max(1);
454 let per_artifact_budget = max_tokens / count;
455 let cap = per_artifact_budget.max(500).min(max_tokens);
457 for artifact in &mut artifacts {
458 let est = artifact.estimated_tokens.unwrap_or(0);
459 if est > cap * 2 {
460 microcompact_artifact(artifact, cap);
461 }
462 }
463 }
464
465 select_artifacts(artifacts, policy)
467}
468
469#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
473#[serde(default)]
474pub struct ToolArgConstraint {
475 pub tool: String,
477 pub arg_patterns: Vec<String>,
480}
481
482pub fn enforce_tool_arg_constraints(
484 policy: &CapabilityPolicy,
485 tool_name: &str,
486 args: &serde_json::Value,
487) -> Result<(), VmError> {
488 for constraint in &policy.tool_arg_constraints {
489 if !glob_match(&constraint.tool, tool_name) {
490 continue;
491 }
492 if constraint.arg_patterns.is_empty() {
493 continue;
494 }
495 let first_arg = args
497 .as_object()
498 .and_then(|o| o.values().next())
499 .and_then(|v| v.as_str())
500 .or_else(|| args.as_str())
501 .unwrap_or("");
502 let matches = constraint
503 .arg_patterns
504 .iter()
505 .any(|pattern| glob_match(pattern, first_arg));
506 if !matches {
507 return reject_policy(format!(
508 "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
509 constraint.arg_patterns
510 ));
511 }
512 }
513 Ok(())
514}
515
516fn normalize_artifact_kind(kind: &str) -> String {
517 match kind {
518 "resource"
519 | "workspace_file"
520 | "editor_selection"
521 | "workspace_snapshot"
522 | "transcript_summary"
523 | "summary"
524 | "plan"
525 | "diff"
526 | "git_diff"
527 | "patch"
528 | "patch_set"
529 | "patch_proposal"
530 | "diff_review"
531 | "review_decision"
532 | "verification_bundle"
533 | "apply_intent"
534 | "verification_result"
535 | "test_result"
536 | "command_result"
537 | "provider_payload"
538 | "worker_result"
539 | "worker_notification"
540 | "artifact" => kind.to_string(),
541 "file" => "workspace_file".to_string(),
542 "transcript" => "transcript_summary".to_string(),
543 "verification" => "verification_result".to_string(),
544 "test" => "test_result".to_string(),
545 other if other.trim().is_empty() => "artifact".to_string(),
546 other => other.to_string(),
547 }
548}
549
550fn default_artifact_priority(kind: &str) -> i64 {
551 match kind {
552 "verification_result" | "test_result" => 100,
553 "verification_bundle" => 95,
554 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
555 | "review_decision" | "apply_intent" => 90,
556 "plan" => 80,
557 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
558 "summary" | "transcript_summary" => 60,
559 "command_result" => 50,
560 _ => 40,
561 }
562}
563
564fn freshness_rank(value: Option<&str>) -> i64 {
565 match value.unwrap_or_default() {
566 "fresh" | "live" => 3,
567 "recent" => 2,
568 "stale" => 0,
569 _ => 1,
570 }
571}
572
573#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
574#[serde(default)]
575pub struct CapabilityPolicy {
576 pub tools: Vec<String>,
577 pub capabilities: BTreeMap<String, Vec<String>>,
578 pub workspace_roots: Vec<String>,
579 pub side_effect_level: Option<String>,
580 pub recursion_limit: Option<usize>,
581 #[serde(default)]
583 pub tool_arg_constraints: Vec<ToolArgConstraint>,
584}
585
586impl CapabilityPolicy {
587 pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
588 let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
589 (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
590 (Some(a), None) => Some(a.clone()),
591 (None, Some(b)) => Some(b.clone()),
592 (None, None) => None,
593 };
594
595 if !self.tools.is_empty() {
596 let denied: Vec<String> = requested
597 .tools
598 .iter()
599 .filter(|tool| !self.tools.contains(*tool))
600 .cloned()
601 .collect();
602 if !denied.is_empty() {
603 return Err(format!(
604 "requested tools exceed host ceiling: {}",
605 denied.join(", ")
606 ));
607 }
608 }
609
610 for (capability, requested_ops) in &requested.capabilities {
611 if let Some(allowed_ops) = self.capabilities.get(capability) {
612 let denied: Vec<String> = requested_ops
613 .iter()
614 .filter(|op| !allowed_ops.contains(*op))
615 .cloned()
616 .collect();
617 if !denied.is_empty() {
618 return Err(format!(
619 "requested capability operations exceed host ceiling: {}.{}",
620 capability,
621 denied.join(",")
622 ));
623 }
624 } else if !self.capabilities.is_empty() {
625 return Err(format!(
626 "requested capability exceeds host ceiling: {capability}"
627 ));
628 }
629 }
630
631 let tools = if self.tools.is_empty() {
632 requested.tools.clone()
633 } else if requested.tools.is_empty() {
634 self.tools.clone()
635 } else {
636 requested
637 .tools
638 .iter()
639 .filter(|tool| self.tools.contains(*tool))
640 .cloned()
641 .collect()
642 };
643
644 let capabilities = if self.capabilities.is_empty() {
645 requested.capabilities.clone()
646 } else if requested.capabilities.is_empty() {
647 self.capabilities.clone()
648 } else {
649 requested
650 .capabilities
651 .iter()
652 .filter_map(|(capability, requested_ops)| {
653 self.capabilities.get(capability).map(|allowed_ops| {
654 (
655 capability.clone(),
656 requested_ops
657 .iter()
658 .filter(|op| allowed_ops.contains(*op))
659 .cloned()
660 .collect::<Vec<_>>(),
661 )
662 })
663 })
664 .collect()
665 };
666
667 let workspace_roots = if self.workspace_roots.is_empty() {
668 requested.workspace_roots.clone()
669 } else if requested.workspace_roots.is_empty() {
670 self.workspace_roots.clone()
671 } else {
672 requested
673 .workspace_roots
674 .iter()
675 .filter(|root| self.workspace_roots.contains(*root))
676 .cloned()
677 .collect()
678 };
679
680 let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
681 (Some(a), Some(b)) => Some(a.min(b)),
682 (Some(a), None) => Some(a),
683 (None, Some(b)) => Some(b),
684 (None, None) => None,
685 };
686
687 let mut tool_arg_constraints = self.tool_arg_constraints.clone();
689 tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
690
691 Ok(CapabilityPolicy {
692 tools,
693 capabilities,
694 workspace_roots,
695 side_effect_level,
696 recursion_limit,
697 tool_arg_constraints,
698 })
699 }
700}
701
702fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
703 fn rank(v: &str) -> usize {
704 match v {
705 "none" => 0,
706 "read_only" => 1,
707 "workspace_write" => 2,
708 "process_exec" => 3,
709 "network" => 4,
710 _ => 5,
711 }
712 }
713 if rank(a) <= rank(b) {
714 a
715 } else {
716 b
717 }
718}
719
720#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
721#[serde(default)]
722pub struct ModelPolicy {
723 pub provider: Option<String>,
724 pub model: Option<String>,
725 pub model_tier: Option<String>,
726 pub temperature: Option<f64>,
727 pub max_tokens: Option<i64>,
728}
729
730#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
731#[serde(default)]
732pub struct TranscriptPolicy {
733 pub mode: Option<String>,
734 pub visibility: Option<String>,
735 pub summarize: bool,
736 pub compact: bool,
737 pub keep_last: Option<usize>,
738}
739
740#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
741#[serde(default)]
742pub struct ContextPolicy {
743 pub max_artifacts: Option<usize>,
744 pub max_tokens: Option<usize>,
745 pub reserve_tokens: Option<usize>,
746 pub include_kinds: Vec<String>,
747 pub exclude_kinds: Vec<String>,
748 pub prioritize_kinds: Vec<String>,
749 pub pinned_ids: Vec<String>,
750 pub include_stages: Vec<String>,
751 pub prefer_recent: bool,
752 pub prefer_fresh: bool,
753 pub render: Option<String>,
754}
755
756#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
757#[serde(default)]
758pub struct RetryPolicy {
759 pub max_attempts: usize,
760 pub verify: bool,
761 pub repair: bool,
762}
763
764#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
765#[serde(default)]
766pub struct StageContract {
767 pub input_kinds: Vec<String>,
768 pub output_kinds: Vec<String>,
769 pub min_inputs: Option<usize>,
770 pub max_inputs: Option<usize>,
771 pub require_transcript: bool,
772 pub schema: Option<serde_json::Value>,
773}
774
775#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
776#[serde(default)]
777pub struct BranchSemantics {
778 pub success: Option<String>,
779 pub failure: Option<String>,
780 pub verify_pass: Option<String>,
781 pub verify_fail: Option<String>,
782 pub condition_true: Option<String>,
783 pub condition_false: Option<String>,
784 pub loop_continue: Option<String>,
785 pub loop_exit: Option<String>,
786 pub escalation: Option<String>,
787}
788
789#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
790#[serde(default)]
791pub struct MapPolicy {
792 pub items: Vec<serde_json::Value>,
793 pub item_artifact_kind: Option<String>,
794 pub output_kind: Option<String>,
795 pub max_items: Option<usize>,
796}
797
798#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
799#[serde(default)]
800pub struct JoinPolicy {
801 pub strategy: String,
802 pub require_all_inputs: bool,
803 pub min_completed: Option<usize>,
804}
805
806#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
807#[serde(default)]
808pub struct ReducePolicy {
809 pub strategy: String,
810 pub separator: Option<String>,
811 pub output_kind: Option<String>,
812}
813
814#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
815#[serde(default)]
816pub struct EscalationPolicy {
817 pub level: Option<String>,
818 pub queue: Option<String>,
819 pub reason: Option<String>,
820}
821
822#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
823#[serde(default)]
824pub struct ArtifactRecord {
825 #[serde(rename = "_type")]
826 pub type_name: String,
827 pub id: String,
828 pub kind: String,
829 pub title: Option<String>,
830 pub text: Option<String>,
831 pub data: Option<serde_json::Value>,
832 pub source: Option<String>,
833 pub created_at: String,
834 pub freshness: Option<String>,
835 pub priority: Option<i64>,
836 pub lineage: Vec<String>,
837 pub relevance: Option<f64>,
838 pub estimated_tokens: Option<usize>,
839 pub stage: Option<String>,
840 pub metadata: BTreeMap<String, serde_json::Value>,
841}
842
843impl ArtifactRecord {
844 pub fn normalize(mut self) -> Self {
845 if self.type_name.is_empty() {
846 self.type_name = "artifact".to_string();
847 }
848 if self.id.is_empty() {
849 self.id = new_id("artifact");
850 }
851 if self.created_at.is_empty() {
852 self.created_at = now_rfc3339();
853 }
854 if self.kind.is_empty() {
855 self.kind = "artifact".to_string();
856 }
857 self.kind = normalize_artifact_kind(&self.kind);
858 if self.estimated_tokens.is_none() {
859 self.estimated_tokens = self
860 .text
861 .as_ref()
862 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
863 }
864 if self.priority.is_none() {
865 self.priority = Some(default_artifact_priority(&self.kind));
866 }
867 self
868 }
869}
870
871#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
872#[serde(default)]
873pub struct WorkflowNode {
874 pub id: Option<String>,
875 pub kind: String,
876 pub mode: Option<String>,
877 pub prompt: Option<String>,
878 pub system: Option<String>,
879 pub task_label: Option<String>,
880 pub tools: serde_json::Value,
881 pub model_policy: ModelPolicy,
882 pub transcript_policy: TranscriptPolicy,
883 pub context_policy: ContextPolicy,
884 pub retry_policy: RetryPolicy,
885 pub capability_policy: CapabilityPolicy,
886 pub input_contract: StageContract,
887 pub output_contract: StageContract,
888 pub branch_semantics: BranchSemantics,
889 pub map_policy: MapPolicy,
890 pub join_policy: JoinPolicy,
891 pub reduce_policy: ReducePolicy,
892 pub escalation_policy: EscalationPolicy,
893 pub verify: Option<serde_json::Value>,
894 pub metadata: BTreeMap<String, serde_json::Value>,
895}
896
897pub fn workflow_tool_names(value: &serde_json::Value) -> Vec<String> {
898 match value {
899 serde_json::Value::Null => Vec::new(),
900 serde_json::Value::Array(items) => items
901 .iter()
902 .filter_map(|item| match item {
903 serde_json::Value::Object(map) => map
904 .get("name")
905 .and_then(|value| value.as_str())
906 .filter(|name| !name.is_empty())
907 .map(|name| name.to_string()),
908 _ => None,
909 })
910 .collect(),
911 serde_json::Value::Object(map) => {
912 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
913 return map
914 .get("tools")
915 .map(workflow_tool_names)
916 .unwrap_or_default();
917 }
918 map.get("name")
919 .and_then(|value| value.as_str())
920 .filter(|name| !name.is_empty())
921 .map(|name| vec![name.to_string()])
922 .unwrap_or_default()
923 }
924 _ => Vec::new(),
925 }
926}
927
928#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
929#[serde(default)]
930pub struct WorkflowEdge {
931 pub from: String,
932 pub to: String,
933 pub branch: Option<String>,
934 pub label: Option<String>,
935}
936
937#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
938#[serde(default)]
939pub struct WorkflowGraph {
940 #[serde(rename = "_type")]
941 pub type_name: String,
942 pub id: String,
943 pub name: Option<String>,
944 pub version: usize,
945 pub entry: String,
946 pub nodes: BTreeMap<String, WorkflowNode>,
947 pub edges: Vec<WorkflowEdge>,
948 pub capability_policy: CapabilityPolicy,
949 pub metadata: BTreeMap<String, serde_json::Value>,
950 pub audit_log: Vec<WorkflowAuditEntry>,
951}
952
953#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
954#[serde(default)]
955pub struct WorkflowAuditEntry {
956 pub id: String,
957 pub op: String,
958 pub node_id: Option<String>,
959 pub timestamp: String,
960 pub reason: Option<String>,
961 pub metadata: BTreeMap<String, serde_json::Value>,
962}
963
964#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
965#[serde(default)]
966pub struct LlmUsageRecord {
967 pub input_tokens: i64,
968 pub output_tokens: i64,
969 pub total_duration_ms: i64,
970 pub call_count: i64,
971 pub total_cost: f64,
972 pub models: Vec<String>,
973}
974
975#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
976#[serde(default)]
977pub struct RunStageRecord {
978 pub id: String,
979 pub node_id: String,
980 pub kind: String,
981 pub status: String,
982 pub outcome: String,
983 pub branch: Option<String>,
984 pub started_at: String,
985 pub finished_at: Option<String>,
986 pub visible_text: Option<String>,
987 pub private_reasoning: Option<String>,
988 pub transcript: Option<serde_json::Value>,
989 pub verification: Option<serde_json::Value>,
990 pub usage: Option<LlmUsageRecord>,
991 pub artifacts: Vec<ArtifactRecord>,
992 pub consumed_artifact_ids: Vec<String>,
993 pub produced_artifact_ids: Vec<String>,
994 pub attempts: Vec<RunStageAttemptRecord>,
995 pub metadata: BTreeMap<String, serde_json::Value>,
996}
997
998#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
999#[serde(default)]
1000pub struct RunStageAttemptRecord {
1001 pub attempt: usize,
1002 pub status: String,
1003 pub outcome: String,
1004 pub branch: Option<String>,
1005 pub error: Option<String>,
1006 pub verification: Option<serde_json::Value>,
1007 pub started_at: String,
1008 pub finished_at: Option<String>,
1009}
1010
1011#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1012#[serde(default)]
1013pub struct RunTransitionRecord {
1014 pub id: String,
1015 pub from_stage_id: Option<String>,
1016 pub from_node_id: Option<String>,
1017 pub to_node_id: String,
1018 pub branch: Option<String>,
1019 pub timestamp: String,
1020 pub consumed_artifact_ids: Vec<String>,
1021 pub produced_artifact_ids: Vec<String>,
1022}
1023
1024#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1025#[serde(default)]
1026pub struct RunCheckpointRecord {
1027 pub id: String,
1028 pub ready_nodes: Vec<String>,
1029 pub completed_nodes: Vec<String>,
1030 pub last_stage_id: Option<String>,
1031 pub persisted_at: String,
1032 pub reason: String,
1033}
1034
1035#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1036#[serde(default)]
1037pub struct ReplayFixture {
1038 #[serde(rename = "_type")]
1039 pub type_name: String,
1040 pub id: String,
1041 pub source_run_id: String,
1042 pub workflow_id: String,
1043 pub workflow_name: Option<String>,
1044 pub created_at: String,
1045 pub expected_status: String,
1046 pub stage_assertions: Vec<ReplayStageAssertion>,
1047}
1048
1049#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1050#[serde(default)]
1051pub struct ReplayStageAssertion {
1052 pub node_id: String,
1053 pub expected_status: String,
1054 pub expected_outcome: String,
1055 pub expected_branch: Option<String>,
1056 pub required_artifact_kinds: Vec<String>,
1057 pub visible_text_contains: Option<String>,
1058}
1059
1060#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1061#[serde(default)]
1062pub struct ReplayEvalReport {
1063 pub pass: bool,
1064 pub failures: Vec<String>,
1065 pub stage_count: usize,
1066}
1067
1068#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1069#[serde(default)]
1070pub struct ReplayEvalCaseReport {
1071 pub run_id: String,
1072 pub workflow_id: String,
1073 pub label: Option<String>,
1074 pub pass: bool,
1075 pub failures: Vec<String>,
1076 pub stage_count: usize,
1077 pub source_path: Option<String>,
1078 pub comparison: Option<RunDiffReport>,
1079}
1080
1081#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1082#[serde(default)]
1083pub struct ReplayEvalSuiteReport {
1084 pub pass: bool,
1085 pub total: usize,
1086 pub passed: usize,
1087 pub failed: usize,
1088 pub cases: Vec<ReplayEvalCaseReport>,
1089}
1090
1091#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1092#[serde(default)]
1093pub struct RunStageDiffRecord {
1094 pub node_id: String,
1095 pub change: String,
1096 pub details: Vec<String>,
1097}
1098
1099#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1100#[serde(default)]
1101pub struct RunDiffReport {
1102 pub left_run_id: String,
1103 pub right_run_id: String,
1104 pub identical: bool,
1105 pub status_changed: bool,
1106 pub left_status: String,
1107 pub right_status: String,
1108 pub stage_diffs: Vec<RunStageDiffRecord>,
1109 pub transition_count_delta: isize,
1110 pub artifact_count_delta: isize,
1111 pub checkpoint_count_delta: isize,
1112}
1113
1114#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1115#[serde(default)]
1116pub struct EvalSuiteManifest {
1117 #[serde(rename = "_type")]
1118 pub type_name: String,
1119 pub id: String,
1120 pub name: Option<String>,
1121 pub base_dir: Option<String>,
1122 pub cases: Vec<EvalSuiteCase>,
1123}
1124
1125#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1126#[serde(default)]
1127pub struct EvalSuiteCase {
1128 pub label: Option<String>,
1129 pub run_path: String,
1130 pub fixture_path: Option<String>,
1131 pub compare_to: Option<String>,
1132}
1133
1134#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1135#[serde(default)]
1136pub struct RunRecord {
1137 #[serde(rename = "_type")]
1138 pub type_name: String,
1139 pub id: String,
1140 pub workflow_id: String,
1141 pub workflow_name: Option<String>,
1142 pub task: String,
1143 pub status: String,
1144 pub started_at: String,
1145 pub finished_at: Option<String>,
1146 pub parent_run_id: Option<String>,
1147 pub root_run_id: Option<String>,
1148 pub stages: Vec<RunStageRecord>,
1149 pub transitions: Vec<RunTransitionRecord>,
1150 pub checkpoints: Vec<RunCheckpointRecord>,
1151 pub pending_nodes: Vec<String>,
1152 pub completed_nodes: Vec<String>,
1153 pub child_runs: Vec<RunChildRecord>,
1154 pub artifacts: Vec<ArtifactRecord>,
1155 pub policy: CapabilityPolicy,
1156 pub execution: Option<RunExecutionRecord>,
1157 pub transcript: Option<serde_json::Value>,
1158 pub usage: Option<LlmUsageRecord>,
1159 pub replay_fixture: Option<ReplayFixture>,
1160 pub metadata: BTreeMap<String, serde_json::Value>,
1161 pub persisted_path: Option<String>,
1162}
1163
1164#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1165#[serde(default)]
1166pub struct RunChildRecord {
1167 pub worker_id: String,
1168 pub worker_name: String,
1169 pub parent_stage_id: Option<String>,
1170 pub task: String,
1171 pub status: String,
1172 pub started_at: String,
1173 pub finished_at: Option<String>,
1174 pub run_id: Option<String>,
1175 pub run_path: Option<String>,
1176 pub snapshot_path: Option<String>,
1177 pub execution: Option<RunExecutionRecord>,
1178}
1179
1180#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1181#[serde(default)]
1182pub struct RunExecutionRecord {
1183 pub cwd: Option<String>,
1184 pub source_dir: Option<String>,
1185 pub env: BTreeMap<String, String>,
1186 pub adapter: Option<String>,
1187 pub repo_path: Option<String>,
1188 pub worktree_path: Option<String>,
1189 pub branch: Option<String>,
1190 pub base_ref: Option<String>,
1191 pub cleanup: Option<String>,
1192}
1193
1194#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1195#[serde(default)]
1196pub struct WorkflowValidationReport {
1197 pub valid: bool,
1198 pub errors: Vec<String>,
1199 pub warnings: Vec<String>,
1200 pub reachable_nodes: Vec<String>,
1201}
1202
1203fn parse_json_payload<T: for<'de> Deserialize<'de>>(
1204 json: serde_json::Value,
1205 label: &str,
1206) -> Result<T, VmError> {
1207 let payload = json.to_string();
1208 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1209 let mut tracker = serde_path_to_error::Track::new();
1210 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1211 T::deserialize(path_deserializer).map_err(|error| {
1212 let snippet = if payload.len() > 600 {
1213 format!("{}...", &payload[..600])
1214 } else {
1215 payload.clone()
1216 };
1217 VmError::Runtime(format!(
1218 "{label} parse error at {}: {} | payload={}",
1219 tracker.path(),
1220 error,
1221 snippet
1222 ))
1223 })
1224}
1225
1226fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1227 parse_json_payload(vm_value_to_json(value), "orchestration")
1228}
1229
1230pub fn parse_workflow_node_value(value: &VmValue, label: &str) -> Result<WorkflowNode, VmError> {
1231 parse_json_payload(vm_value_to_json(value), label)
1232}
1233
1234pub fn parse_workflow_node_json(
1235 json: serde_json::Value,
1236 label: &str,
1237) -> Result<WorkflowNode, VmError> {
1238 parse_json_payload(json, label)
1239}
1240
1241pub fn parse_workflow_edge_json(
1242 json: serde_json::Value,
1243 label: &str,
1244) -> Result<WorkflowEdge, VmError> {
1245 parse_json_payload(json, label)
1246}
1247
1248pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1249 let mut graph: WorkflowGraph = parse_json_value(value)?;
1250 let as_dict = value.as_dict().cloned().unwrap_or_default();
1251
1252 if graph.nodes.is_empty() {
1253 for key in ["act", "verify", "repair"] {
1254 if let Some(node_value) = as_dict.get(key) {
1255 let mut node: WorkflowNode = parse_json_value(node_value)?;
1256 let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1257 node.id = Some(key.to_string());
1258 if node.kind.is_empty() {
1259 node.kind = if key == "verify" {
1260 "verify".to_string()
1261 } else {
1262 "stage".to_string()
1263 };
1264 }
1265 if node.model_policy.provider.is_none() {
1266 node.model_policy.provider = as_dict
1267 .get("provider")
1268 .map(|value| value.display())
1269 .filter(|value| !value.is_empty());
1270 }
1271 if node.model_policy.model.is_none() {
1272 node.model_policy.model = as_dict
1273 .get("model")
1274 .map(|value| value.display())
1275 .filter(|value| !value.is_empty());
1276 }
1277 if node.model_policy.model_tier.is_none() {
1278 node.model_policy.model_tier = as_dict
1279 .get("model_tier")
1280 .or_else(|| as_dict.get("tier"))
1281 .map(|value| value.display())
1282 .filter(|value| !value.is_empty());
1283 }
1284 if node.model_policy.temperature.is_none() {
1285 node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1286 if let VmValue::Float(number) = value {
1287 Some(*number)
1288 } else {
1289 value.as_int().map(|number| number as f64)
1290 }
1291 });
1292 }
1293 if node.model_policy.max_tokens.is_none() {
1294 node.model_policy.max_tokens =
1295 as_dict.get("max_tokens").and_then(|value| value.as_int());
1296 }
1297 if node.mode.is_none() {
1298 node.mode = as_dict
1299 .get("mode")
1300 .map(|value| value.display())
1301 .filter(|value| !value.is_empty());
1302 }
1303 if key == "verify"
1304 && node.verify.is_none()
1305 && (raw_node.contains_key("assert_text")
1306 || raw_node.contains_key("command")
1307 || raw_node.contains_key("expect_status")
1308 || raw_node.contains_key("expect_text"))
1309 {
1310 node.verify = Some(serde_json::json!({
1311 "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1312 "command": raw_node.get("command").map(vm_value_to_json),
1313 "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1314 "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1315 }));
1316 }
1317 graph.nodes.insert(key.to_string(), node);
1318 }
1319 }
1320 if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1321 graph.entry = "act".to_string();
1322 }
1323 if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1324 if graph.nodes.contains_key("verify") {
1325 graph.edges.push(WorkflowEdge {
1326 from: "act".to_string(),
1327 to: "verify".to_string(),
1328 branch: None,
1329 label: None,
1330 });
1331 }
1332 if graph.nodes.contains_key("repair") {
1333 graph.edges.push(WorkflowEdge {
1334 from: "verify".to_string(),
1335 to: "repair".to_string(),
1336 branch: Some("failed".to_string()),
1337 label: None,
1338 });
1339 graph.edges.push(WorkflowEdge {
1340 from: "repair".to_string(),
1341 to: "verify".to_string(),
1342 branch: Some("retry".to_string()),
1343 label: None,
1344 });
1345 }
1346 }
1347 }
1348
1349 if graph.type_name.is_empty() {
1350 graph.type_name = "workflow_graph".to_string();
1351 }
1352 if graph.id.is_empty() {
1353 graph.id = new_id("workflow");
1354 }
1355 if graph.version == 0 {
1356 graph.version = 1;
1357 }
1358 if graph.entry.is_empty() {
1359 graph.entry = graph
1360 .nodes
1361 .keys()
1362 .next()
1363 .cloned()
1364 .unwrap_or_else(|| "act".to_string());
1365 }
1366 for (node_id, node) in &mut graph.nodes {
1367 if node.id.is_none() {
1368 node.id = Some(node_id.clone());
1369 }
1370 if node.kind.is_empty() {
1371 node.kind = "stage".to_string();
1372 }
1373 if node.join_policy.strategy.is_empty() {
1374 node.join_policy.strategy = "all".to_string();
1375 }
1376 if node.reduce_policy.strategy.is_empty() {
1377 node.reduce_policy.strategy = "concat".to_string();
1378 }
1379 if node.output_contract.output_kinds.is_empty() {
1380 node.output_contract.output_kinds = vec![match node.kind.as_str() {
1381 "verify" => "verification_result".to_string(),
1382 "reduce" => node
1383 .reduce_policy
1384 .output_kind
1385 .clone()
1386 .unwrap_or_else(|| "summary".to_string()),
1387 "map" => node
1388 .map_policy
1389 .output_kind
1390 .clone()
1391 .unwrap_or_else(|| "artifact".to_string()),
1392 "escalation" => "plan".to_string(),
1393 _ => "artifact".to_string(),
1394 }];
1395 }
1396 if node.retry_policy.max_attempts == 0 {
1397 node.retry_policy.max_attempts = 1;
1398 }
1399 }
1400 Ok(graph)
1401}
1402
1403pub fn validate_workflow(
1404 graph: &WorkflowGraph,
1405 ceiling: Option<&CapabilityPolicy>,
1406) -> WorkflowValidationReport {
1407 let mut errors = Vec::new();
1408 let mut warnings = Vec::new();
1409
1410 if !graph.nodes.contains_key(&graph.entry) {
1411 errors.push(format!("entry node does not exist: {}", graph.entry));
1412 }
1413
1414 let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1415 for edge in &graph.edges {
1416 if !node_ids.contains(&edge.from) {
1417 errors.push(format!("edge.from references unknown node: {}", edge.from));
1418 }
1419 if !node_ids.contains(&edge.to) {
1420 errors.push(format!("edge.to references unknown node: {}", edge.to));
1421 }
1422 }
1423
1424 let reachable_nodes = reachable_nodes(graph);
1425 for node_id in &node_ids {
1426 if !reachable_nodes.contains(node_id) {
1427 warnings.push(format!("node is unreachable: {node_id}"));
1428 }
1429 }
1430
1431 for (node_id, node) in &graph.nodes {
1432 let incoming = graph
1433 .edges
1434 .iter()
1435 .filter(|edge| edge.to == *node_id)
1436 .count();
1437 let outgoing: Vec<&WorkflowEdge> = graph
1438 .edges
1439 .iter()
1440 .filter(|edge| edge.from == *node_id)
1441 .collect();
1442 if let Some(min_inputs) = node.input_contract.min_inputs {
1443 if let Some(max_inputs) = node.input_contract.max_inputs {
1444 if min_inputs > max_inputs {
1445 errors.push(format!(
1446 "node {node_id}: input contract min_inputs exceeds max_inputs"
1447 ));
1448 }
1449 }
1450 }
1451 match node.kind.as_str() {
1452 "condition" => {
1453 let has_true = outgoing
1454 .iter()
1455 .any(|edge| edge.branch.as_deref() == Some("true"));
1456 let has_false = outgoing
1457 .iter()
1458 .any(|edge| edge.branch.as_deref() == Some("false"));
1459 if !has_true || !has_false {
1460 errors.push(format!(
1461 "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
1462 ));
1463 }
1464 }
1465 "fork" => {
1466 if outgoing.len() < 2 {
1467 errors.push(format!(
1468 "node {node_id}: fork nodes require at least two outgoing edges"
1469 ));
1470 }
1471 }
1472 "join" => {
1473 if incoming < 2 {
1474 warnings.push(format!(
1475 "node {node_id}: join node has fewer than two incoming edges"
1476 ));
1477 }
1478 }
1479 "map" => {
1480 if node.map_policy.items.is_empty()
1481 && node.map_policy.item_artifact_kind.is_none()
1482 && node.input_contract.input_kinds.is_empty()
1483 {
1484 errors.push(format!(
1485 "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
1486 ));
1487 }
1488 }
1489 "reduce" => {
1490 if node.input_contract.input_kinds.is_empty() {
1491 warnings.push(format!(
1492 "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
1493 ));
1494 }
1495 }
1496 _ => {}
1497 }
1498 }
1499
1500 if let Some(ceiling) = ceiling {
1501 if let Err(error) = ceiling.intersect(&graph.capability_policy) {
1502 errors.push(error);
1503 }
1504 for (node_id, node) in &graph.nodes {
1505 if let Err(error) = ceiling.intersect(&node.capability_policy) {
1506 errors.push(format!("node {node_id}: {error}"));
1507 }
1508 }
1509 }
1510
1511 WorkflowValidationReport {
1512 valid: errors.is_empty(),
1513 errors,
1514 warnings,
1515 reachable_nodes: reachable_nodes.into_iter().collect(),
1516 }
1517}
1518
1519fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
1520 let mut seen = BTreeSet::new();
1521 let mut stack = vec![graph.entry.clone()];
1522 while let Some(node_id) = stack.pop() {
1523 if !seen.insert(node_id.clone()) {
1524 continue;
1525 }
1526 for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
1527 stack.push(edge.to.clone());
1528 }
1529 }
1530 seen
1531}
1532
1533pub fn select_artifacts(
1534 mut artifacts: Vec<ArtifactRecord>,
1535 policy: &ContextPolicy,
1536) -> Vec<ArtifactRecord> {
1537 artifacts.retain(|artifact| {
1538 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
1539 && !policy.exclude_kinds.contains(&artifact.kind)
1540 && (policy.include_stages.is_empty()
1541 || artifact
1542 .stage
1543 .as_ref()
1544 .is_some_and(|stage| policy.include_stages.contains(stage)))
1545 });
1546 artifacts.sort_by(|a, b| {
1547 let b_pinned = policy.pinned_ids.contains(&b.id);
1548 let a_pinned = policy.pinned_ids.contains(&a.id);
1549 b_pinned
1550 .cmp(&a_pinned)
1551 .then_with(|| {
1552 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
1553 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
1554 b_prio_kind.cmp(&a_prio_kind)
1555 })
1556 .then_with(|| {
1557 b.priority
1558 .unwrap_or_default()
1559 .cmp(&a.priority.unwrap_or_default())
1560 })
1561 .then_with(|| {
1562 if policy.prefer_fresh {
1563 freshness_rank(b.freshness.as_deref())
1564 .cmp(&freshness_rank(a.freshness.as_deref()))
1565 } else {
1566 std::cmp::Ordering::Equal
1567 }
1568 })
1569 .then_with(|| {
1570 if policy.prefer_recent {
1571 b.created_at.cmp(&a.created_at)
1572 } else {
1573 std::cmp::Ordering::Equal
1574 }
1575 })
1576 .then_with(|| {
1577 b.relevance
1578 .partial_cmp(&a.relevance)
1579 .unwrap_or(std::cmp::Ordering::Equal)
1580 })
1581 .then_with(|| {
1582 a.estimated_tokens
1583 .unwrap_or(usize::MAX)
1584 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
1585 })
1586 });
1587
1588 let mut selected = Vec::new();
1589 let mut used_tokens = 0usize;
1590 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
1591 let effective_max_tokens = policy
1592 .max_tokens
1593 .map(|max| max.saturating_sub(reserve_tokens));
1594 for artifact in artifacts {
1595 if let Some(max_artifacts) = policy.max_artifacts {
1596 if selected.len() >= max_artifacts {
1597 break;
1598 }
1599 }
1600 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
1601 if let Some(max_tokens) = effective_max_tokens {
1602 if used_tokens + next_tokens > max_tokens {
1603 continue;
1604 }
1605 }
1606 used_tokens += next_tokens;
1607 selected.push(artifact);
1608 }
1609 selected
1610}
1611
1612pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
1613 let mut parts = Vec::new();
1614 for artifact in artifacts {
1615 let title = artifact
1616 .title
1617 .clone()
1618 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
1619 let body = artifact
1620 .text
1621 .clone()
1622 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
1623 .unwrap_or_default();
1624 match policy.render.as_deref() {
1625 Some("json") => {
1626 parts.push(
1627 serde_json::json!({
1628 "id": artifact.id,
1629 "kind": artifact.kind,
1630 "title": title,
1631 "source": artifact.source,
1632 "freshness": artifact.freshness,
1633 "priority": artifact.priority,
1634 "text": body,
1635 })
1636 .to_string(),
1637 );
1638 }
1639 _ => parts.push(format!(
1640 "[{title}] kind={} source={} freshness={} priority={}\n{}",
1641 artifact.kind,
1642 artifact
1643 .source
1644 .clone()
1645 .unwrap_or_else(|| "unknown".to_string()),
1646 artifact
1647 .freshness
1648 .clone()
1649 .unwrap_or_else(|| "normal".to_string()),
1650 artifact.priority.unwrap_or_default(),
1651 body
1652 )),
1653 }
1654 }
1655 parts.join("\n\n")
1656}
1657
1658pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
1659 let artifact: ArtifactRecord = parse_json_value(value)?;
1660 Ok(artifact.normalize())
1661}
1662
1663pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
1664 let json = vm_value_to_json(value);
1665 let payload = json.to_string();
1666 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1667 let mut tracker = serde_path_to_error::Track::new();
1668 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1669 let mut run: RunRecord = RunRecord::deserialize(path_deserializer).map_err(|error| {
1670 let snippet = if payload.len() > 600 {
1671 format!("{}...", &payload[..600])
1672 } else {
1673 payload.clone()
1674 };
1675 VmError::Runtime(format!(
1676 "orchestration parse error at {}: {} | payload={}",
1677 tracker.path(),
1678 error,
1679 snippet
1680 ))
1681 })?;
1682 if run.type_name.is_empty() {
1683 run.type_name = "run_record".to_string();
1684 }
1685 if run.id.is_empty() {
1686 run.id = new_id("run");
1687 }
1688 if run.started_at.is_empty() {
1689 run.started_at = now_rfc3339();
1690 }
1691 if run.status.is_empty() {
1692 run.status = "running".to_string();
1693 }
1694 if run.root_run_id.is_none() {
1695 run.root_run_id = Some(run.id.clone());
1696 }
1697 if run.replay_fixture.is_none() {
1698 run.replay_fixture = Some(replay_fixture_from_run(&run));
1699 }
1700 Ok(run)
1701}
1702
1703pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
1704 let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
1705 if manifest.type_name.is_empty() {
1706 manifest.type_name = "eval_suite_manifest".to_string();
1707 }
1708 if manifest.id.is_empty() {
1709 manifest.id = new_id("eval_suite");
1710 }
1711 Ok(manifest)
1712}
1713
1714fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
1715 let content = std::fs::read_to_string(path)
1716 .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
1717 serde_json::from_str(&content)
1718 .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
1719}
1720
1721fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
1722 let path_buf = PathBuf::from(path);
1723 if path_buf.is_absolute() {
1724 path_buf
1725 } else if let Some(base_dir) = base_dir {
1726 base_dir.join(path_buf)
1727 } else {
1728 path_buf
1729 }
1730}
1731
1732pub fn evaluate_run_suite_manifest(
1733 manifest: &EvalSuiteManifest,
1734) -> Result<ReplayEvalSuiteReport, VmError> {
1735 let base_dir = manifest.base_dir.as_deref().map(Path::new);
1736 let mut reports = Vec::new();
1737 for case in &manifest.cases {
1738 let run_path = resolve_manifest_path(base_dir, &case.run_path);
1739 let run = load_run_record(&run_path)?;
1740 let fixture = match &case.fixture_path {
1741 Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
1742 None => run
1743 .replay_fixture
1744 .clone()
1745 .unwrap_or_else(|| replay_fixture_from_run(&run)),
1746 };
1747 let eval = evaluate_run_against_fixture(&run, &fixture);
1748 let mut pass = eval.pass;
1749 let mut failures = eval.failures;
1750 let comparison = match &case.compare_to {
1751 Some(path) => {
1752 let baseline_path = resolve_manifest_path(base_dir, path);
1753 let baseline = load_run_record(&baseline_path)?;
1754 let diff = diff_run_records(&baseline, &run);
1755 if !diff.identical {
1756 pass = false;
1757 failures.push(format!(
1758 "run differs from baseline {} with {} stage changes",
1759 baseline_path.display(),
1760 diff.stage_diffs.len()
1761 ));
1762 }
1763 Some(diff)
1764 }
1765 None => None,
1766 };
1767 reports.push(ReplayEvalCaseReport {
1768 run_id: run.id.clone(),
1769 workflow_id: run.workflow_id.clone(),
1770 label: case.label.clone(),
1771 pass,
1772 failures,
1773 stage_count: eval.stage_count,
1774 source_path: Some(run_path.display().to_string()),
1775 comparison,
1776 });
1777 }
1778 let total = reports.len();
1779 let passed = reports.iter().filter(|report| report.pass).count();
1780 let failed = total.saturating_sub(passed);
1781 Ok(ReplayEvalSuiteReport {
1782 pass: failed == 0,
1783 total,
1784 passed,
1785 failed,
1786 cases: reports,
1787 })
1788}
1789
1790pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
1791 let before_lines: Vec<&str> = before.lines().collect();
1792 let after_lines: Vec<&str> = after.lines().collect();
1793 let mut table = vec![vec![0usize; after_lines.len() + 1]; before_lines.len() + 1];
1794 for i in (0..before_lines.len()).rev() {
1795 for j in (0..after_lines.len()).rev() {
1796 table[i][j] = if before_lines[i] == after_lines[j] {
1797 table[i + 1][j + 1] + 1
1798 } else {
1799 table[i + 1][j].max(table[i][j + 1])
1800 };
1801 }
1802 }
1803
1804 let mut diff = String::new();
1805 let file = path.unwrap_or("artifact");
1806 diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
1807 let mut i = 0;
1808 let mut j = 0;
1809 while i < before_lines.len() && j < after_lines.len() {
1810 if before_lines[i] == after_lines[j] {
1811 diff.push_str(&format!(" {}\n", before_lines[i]));
1812 i += 1;
1813 j += 1;
1814 } else if table[i + 1][j] >= table[i][j + 1] {
1815 diff.push_str(&format!("-{}\n", before_lines[i]));
1816 i += 1;
1817 } else {
1818 diff.push_str(&format!("+{}\n", after_lines[j]));
1819 j += 1;
1820 }
1821 }
1822 while i < before_lines.len() {
1823 diff.push_str(&format!("-{}\n", before_lines[i]));
1824 i += 1;
1825 }
1826 while j < after_lines.len() {
1827 diff.push_str(&format!("+{}\n", after_lines[j]));
1828 j += 1;
1829 }
1830 diff
1831}
1832
1833pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
1834 let path = path
1835 .map(PathBuf::from)
1836 .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
1837 if let Some(parent) = path.parent() {
1838 std::fs::create_dir_all(parent)
1839 .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
1840 }
1841 let json = serde_json::to_string_pretty(run)
1842 .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
1843 std::fs::write(&path, json)
1844 .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
1845 Ok(path.to_string_lossy().to_string())
1846}
1847
1848pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
1849 let content = std::fs::read_to_string(path)
1850 .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
1851 serde_json::from_str(&content)
1852 .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
1853}
1854
1855pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
1856 ReplayFixture {
1857 type_name: "replay_fixture".to_string(),
1858 id: new_id("fixture"),
1859 source_run_id: run.id.clone(),
1860 workflow_id: run.workflow_id.clone(),
1861 workflow_name: run.workflow_name.clone(),
1862 created_at: now_rfc3339(),
1863 expected_status: run.status.clone(),
1864 stage_assertions: run
1865 .stages
1866 .iter()
1867 .map(|stage| ReplayStageAssertion {
1868 node_id: stage.node_id.clone(),
1869 expected_status: stage.status.clone(),
1870 expected_outcome: stage.outcome.clone(),
1871 expected_branch: stage.branch.clone(),
1872 required_artifact_kinds: stage
1873 .artifacts
1874 .iter()
1875 .map(|artifact| artifact.kind.clone())
1876 .collect(),
1877 visible_text_contains: stage
1878 .visible_text
1879 .as_ref()
1880 .filter(|text| !text.is_empty())
1881 .map(|text| text.chars().take(80).collect()),
1882 })
1883 .collect(),
1884 }
1885}
1886
1887pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
1888 let mut failures = Vec::new();
1889 if run.status != fixture.expected_status {
1890 failures.push(format!(
1891 "run status mismatch: expected {}, got {}",
1892 fixture.expected_status, run.status
1893 ));
1894 }
1895 for assertion in &fixture.stage_assertions {
1896 let Some(stage) = run
1897 .stages
1898 .iter()
1899 .find(|stage| stage.node_id == assertion.node_id)
1900 else {
1901 failures.push(format!("missing stage {}", assertion.node_id));
1902 continue;
1903 };
1904 if stage.status != assertion.expected_status {
1905 failures.push(format!(
1906 "stage {} status mismatch: expected {}, got {}",
1907 assertion.node_id, assertion.expected_status, stage.status
1908 ));
1909 }
1910 if stage.outcome != assertion.expected_outcome {
1911 failures.push(format!(
1912 "stage {} outcome mismatch: expected {}, got {}",
1913 assertion.node_id, assertion.expected_outcome, stage.outcome
1914 ));
1915 }
1916 if stage.branch != assertion.expected_branch {
1917 failures.push(format!(
1918 "stage {} branch mismatch: expected {:?}, got {:?}",
1919 assertion.node_id, assertion.expected_branch, stage.branch
1920 ));
1921 }
1922 for required_kind in &assertion.required_artifact_kinds {
1923 if !stage
1924 .artifacts
1925 .iter()
1926 .any(|artifact| &artifact.kind == required_kind)
1927 {
1928 failures.push(format!(
1929 "stage {} missing artifact kind {}",
1930 assertion.node_id, required_kind
1931 ));
1932 }
1933 }
1934 if let Some(snippet) = &assertion.visible_text_contains {
1935 let actual = stage.visible_text.clone().unwrap_or_default();
1936 if !actual.contains(snippet) {
1937 failures.push(format!(
1938 "stage {} visible text does not contain expected snippet {:?}",
1939 assertion.node_id, snippet
1940 ));
1941 }
1942 }
1943 }
1944
1945 ReplayEvalReport {
1946 pass: failures.is_empty(),
1947 failures,
1948 stage_count: run.stages.len(),
1949 }
1950}
1951
1952pub fn evaluate_run_suite(
1953 cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
1954) -> ReplayEvalSuiteReport {
1955 let mut reports = Vec::new();
1956 for (run, fixture, source_path) in cases {
1957 let report = evaluate_run_against_fixture(&run, &fixture);
1958 reports.push(ReplayEvalCaseReport {
1959 run_id: run.id.clone(),
1960 workflow_id: run.workflow_id.clone(),
1961 label: None,
1962 pass: report.pass,
1963 failures: report.failures,
1964 stage_count: report.stage_count,
1965 source_path,
1966 comparison: None,
1967 });
1968 }
1969 let total = reports.len();
1970 let passed = reports.iter().filter(|report| report.pass).count();
1971 let failed = total.saturating_sub(passed);
1972 ReplayEvalSuiteReport {
1973 pass: failed == 0,
1974 total,
1975 passed,
1976 failed,
1977 cases: reports,
1978 }
1979}
1980
1981pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
1982 let mut stage_diffs = Vec::new();
1983 let mut all_node_ids = BTreeSet::new();
1984 all_node_ids.extend(left.stages.iter().map(|stage| stage.node_id.clone()));
1985 all_node_ids.extend(right.stages.iter().map(|stage| stage.node_id.clone()));
1986
1987 for node_id in all_node_ids {
1988 let left_stage = left.stages.iter().find(|stage| stage.node_id == node_id);
1989 let right_stage = right.stages.iter().find(|stage| stage.node_id == node_id);
1990 match (left_stage, right_stage) {
1991 (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
1992 node_id,
1993 change: "removed".to_string(),
1994 details: vec!["stage missing from right run".to_string()],
1995 }),
1996 (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
1997 node_id,
1998 change: "added".to_string(),
1999 details: vec!["stage missing from left run".to_string()],
2000 }),
2001 (Some(left_stage), Some(right_stage)) => {
2002 let mut details = Vec::new();
2003 if left_stage.status != right_stage.status {
2004 details.push(format!(
2005 "status: {} -> {}",
2006 left_stage.status, right_stage.status
2007 ));
2008 }
2009 if left_stage.outcome != right_stage.outcome {
2010 details.push(format!(
2011 "outcome: {} -> {}",
2012 left_stage.outcome, right_stage.outcome
2013 ));
2014 }
2015 if left_stage.branch != right_stage.branch {
2016 details.push(format!(
2017 "branch: {:?} -> {:?}",
2018 left_stage.branch, right_stage.branch
2019 ));
2020 }
2021 if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
2022 {
2023 details.push(format!(
2024 "produced_artifacts: {} -> {}",
2025 left_stage.produced_artifact_ids.len(),
2026 right_stage.produced_artifact_ids.len()
2027 ));
2028 }
2029 if left_stage.artifacts.len() != right_stage.artifacts.len() {
2030 details.push(format!(
2031 "artifact_records: {} -> {}",
2032 left_stage.artifacts.len(),
2033 right_stage.artifacts.len()
2034 ));
2035 }
2036 if !details.is_empty() {
2037 stage_diffs.push(RunStageDiffRecord {
2038 node_id,
2039 change: "changed".to_string(),
2040 details,
2041 });
2042 }
2043 }
2044 (None, None) => {}
2045 }
2046 }
2047
2048 let status_changed = left.status != right.status;
2049 let identical = !status_changed
2050 && stage_diffs.is_empty()
2051 && left.transitions.len() == right.transitions.len()
2052 && left.artifacts.len() == right.artifacts.len()
2053 && left.checkpoints.len() == right.checkpoints.len();
2054
2055 RunDiffReport {
2056 left_run_id: left.id.clone(),
2057 right_run_id: right.id.clone(),
2058 identical,
2059 status_changed,
2060 left_status: left.status.clone(),
2061 right_status: right.status.clone(),
2062 stage_diffs,
2063 transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
2064 artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
2065 checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
2066 }
2067}
2068
2069pub fn push_execution_policy(policy: CapabilityPolicy) {
2070 EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
2071}
2072
2073pub fn pop_execution_policy() {
2074 EXECUTION_POLICY_STACK.with(|stack| {
2075 stack.borrow_mut().pop();
2076 });
2077}
2078
2079pub fn current_execution_policy() -> Option<CapabilityPolicy> {
2080 EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
2081}
2082
2083fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
2084 policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
2085}
2086
2087fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
2088 policy.capabilities.is_empty()
2089 || policy
2090 .capabilities
2091 .get(capability)
2092 .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
2093}
2094
2095fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
2096 fn rank(v: &str) -> usize {
2097 match v {
2098 "none" => 0,
2099 "read_only" => 1,
2100 "workspace_write" => 2,
2101 "process_exec" => 3,
2102 "network" => 4,
2103 _ => 5,
2104 }
2105 }
2106 policy
2107 .side_effect_level
2108 .as_ref()
2109 .map(|allowed| rank(allowed) >= rank(requested))
2110 .unwrap_or(true)
2111}
2112
2113fn reject_policy(reason: String) -> Result<(), VmError> {
2114 Err(VmError::CategorizedError {
2115 message: reason,
2116 category: crate::value::ErrorCategory::ToolRejected,
2117 })
2118}
2119
2120pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
2121 let Some(policy) = current_execution_policy() else {
2122 return Ok(());
2123 };
2124 match name {
2125 "read" | "read_file" => {
2126 if !policy_allows_tool(&policy, name)
2127 || !policy_allows_capability(&policy, "workspace", "read_text")
2128 {
2129 return reject_policy(format!(
2130 "builtin '{name}' exceeds workspace.read_text ceiling"
2131 ));
2132 }
2133 }
2134 "search" | "list_dir" => {
2135 if !policy_allows_tool(&policy, name)
2136 || !policy_allows_capability(&policy, "workspace", "list")
2137 {
2138 return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
2139 }
2140 }
2141 "file_exists" | "stat" => {
2142 if !policy_allows_capability(&policy, "workspace", "exists") {
2143 return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
2144 }
2145 }
2146 "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
2147 if !policy_allows_tool(&policy, "edit")
2148 || !policy_allows_capability(&policy, "workspace", "write_text")
2149 || !policy_allows_side_effect(&policy, "workspace_write")
2150 {
2151 return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
2152 }
2153 }
2154 "delete_file" => {
2155 if !policy_allows_capability(&policy, "workspace", "delete")
2156 || !policy_allows_side_effect(&policy, "workspace_write")
2157 {
2158 return reject_policy(
2159 "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
2160 );
2161 }
2162 }
2163 "apply_edit" => {
2164 if !policy_allows_capability(&policy, "workspace", "apply_edit")
2165 || !policy_allows_side_effect(&policy, "workspace_write")
2166 {
2167 return reject_policy(
2168 "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
2169 );
2170 }
2171 }
2172 "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
2173 if !policy_allows_tool(&policy, "run")
2174 || !policy_allows_capability(&policy, "process", "exec")
2175 || !policy_allows_side_effect(&policy, "process_exec")
2176 {
2177 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2178 }
2179 }
2180 "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
2181 if !policy_allows_side_effect(&policy, "network") {
2182 return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
2183 }
2184 }
2185 "mcp_connect"
2186 | "mcp_call"
2187 | "mcp_list_tools"
2188 | "mcp_list_resources"
2189 | "mcp_list_resource_templates"
2190 | "mcp_read_resource"
2191 | "mcp_list_prompts"
2192 | "mcp_get_prompt"
2193 | "mcp_server_info"
2194 | "mcp_disconnect" => {
2195 if !policy_allows_tool(&policy, "run")
2196 || !policy_allows_capability(&policy, "process", "exec")
2197 || !policy_allows_side_effect(&policy, "process_exec")
2198 {
2199 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2200 }
2201 }
2202 "host_invoke" => {
2203 let capability = args.first().map(|v| v.display()).unwrap_or_default();
2204 let op = args.get(1).map(|v| v.display()).unwrap_or_default();
2205 if !policy_allows_capability(&policy, &capability, &op) {
2206 return reject_policy(format!(
2207 "host_invoke {capability}.{op} exceeds capability ceiling"
2208 ));
2209 }
2210 let requested_side_effect = match (capability.as_str(), op.as_str()) {
2211 ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
2212 ("process", "exec") => "process_exec",
2213 _ => "read_only",
2214 };
2215 if !policy_allows_side_effect(&policy, requested_side_effect) {
2216 return reject_policy(format!(
2217 "host_invoke {capability}.{op} exceeds side-effect ceiling"
2218 ));
2219 }
2220 }
2221 _ => {}
2222 }
2223 Ok(())
2224}
2225
2226pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
2227 if current_execution_policy().is_some() {
2228 return reject_policy(format!(
2229 "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
2230 ));
2231 }
2232 Ok(())
2233}
2234
2235pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
2236 let Some(policy) = current_execution_policy() else {
2237 return Ok(());
2238 };
2239 if !policy_allows_tool(&policy, tool_name) {
2240 return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
2241 }
2242 Ok(())
2243}
2244
2245fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
2246 let dict = transcript.as_dict()?;
2247 let messages = match dict.get("messages") {
2248 Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
2249 _ => Vec::new(),
2250 };
2251 let retained = messages
2252 .into_iter()
2253 .rev()
2254 .take(keep_last)
2255 .collect::<Vec<_>>()
2256 .into_iter()
2257 .rev()
2258 .collect::<Vec<_>>();
2259 let mut compacted = dict.clone();
2260 compacted.insert(
2261 "messages".to_string(),
2262 VmValue::List(Rc::new(retained.clone())),
2263 );
2264 compacted.insert(
2265 "events".to_string(),
2266 VmValue::List(Rc::new(
2267 crate::llm::helpers::transcript_events_from_messages(&retained),
2268 )),
2269 );
2270 Some(VmValue::Dict(Rc::new(compacted)))
2271}
2272
2273fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
2274 let Some(visibility) = visibility else {
2275 return Some(transcript.clone());
2276 };
2277 if visibility != "public" && visibility != "public_only" {
2278 return Some(transcript.clone());
2279 }
2280 let dict = transcript.as_dict()?;
2281 let public_messages = match dict.get("messages") {
2282 Some(VmValue::List(list)) => list
2283 .iter()
2284 .filter(|message| {
2285 message
2286 .as_dict()
2287 .and_then(|d| d.get("role"))
2288 .map(|v| v.display())
2289 .map(|role| role != "tool_result")
2290 .unwrap_or(true)
2291 })
2292 .cloned()
2293 .collect::<Vec<_>>(),
2294 _ => Vec::new(),
2295 };
2296 let public_events = match dict.get("events") {
2297 Some(VmValue::List(list)) => list
2298 .iter()
2299 .filter(|event| {
2300 event
2301 .as_dict()
2302 .and_then(|d| d.get("visibility"))
2303 .map(|v| v.display())
2304 .map(|value| value == "public")
2305 .unwrap_or(true)
2306 })
2307 .cloned()
2308 .collect::<Vec<_>>(),
2309 _ => Vec::new(),
2310 };
2311 let mut redacted = dict.clone();
2312 redacted.insert(
2313 "messages".to_string(),
2314 VmValue::List(Rc::new(public_messages)),
2315 );
2316 redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
2317 Some(VmValue::Dict(Rc::new(redacted)))
2318}
2319
2320pub(crate) fn apply_input_transcript_policy(
2321 transcript: Option<VmValue>,
2322 policy: &TranscriptPolicy,
2323) -> Option<VmValue> {
2324 let mut transcript = transcript;
2325 match policy.mode.as_deref() {
2326 Some("reset") => return None,
2327 Some("fork") => {
2328 if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
2329 let mut forked = dict.as_ref().clone();
2330 forked.insert(
2331 "id".to_string(),
2332 VmValue::String(Rc::from(new_id("transcript"))),
2333 );
2334 transcript = Some(VmValue::Dict(Rc::new(forked)));
2335 }
2336 }
2337 _ => {}
2338 }
2339 if policy.compact {
2340 let keep_last = policy.keep_last.unwrap_or(6);
2341 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2342 }
2343 transcript
2344}
2345
2346fn apply_output_transcript_policy(
2347 transcript: Option<VmValue>,
2348 policy: &TranscriptPolicy,
2349) -> Option<VmValue> {
2350 let mut transcript = transcript;
2351 if policy.compact {
2352 let keep_last = policy.keep_last.unwrap_or(6);
2353 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2354 }
2355 transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
2356}
2357
2358pub async fn execute_stage_node(
2359 node_id: &str,
2360 node: &WorkflowNode,
2361 task: &str,
2362 artifacts: &[ArtifactRecord],
2363 transcript: Option<VmValue>,
2364) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
2365 let mut selection_policy = node.context_policy.clone();
2366 if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
2367 selection_policy.include_kinds = node.input_contract.input_kinds.clone();
2368 }
2369 let selected = select_artifacts_adaptive(artifacts.to_vec(), &selection_policy);
2370 let rendered_context = render_artifacts_context(&selected, &node.context_policy);
2371 let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
2372 if node.input_contract.require_transcript && transcript.is_none() {
2373 return Err(VmError::Runtime(format!(
2374 "workflow stage {node_id} requires transcript input"
2375 )));
2376 }
2377 if let Some(min_inputs) = node.input_contract.min_inputs {
2378 if selected.len() < min_inputs {
2379 return Err(VmError::Runtime(format!(
2380 "workflow stage {node_id} requires at least {min_inputs} input artifacts"
2381 )));
2382 }
2383 }
2384 if let Some(max_inputs) = node.input_contract.max_inputs {
2385 if selected.len() > max_inputs {
2386 return Err(VmError::Runtime(format!(
2387 "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
2388 )));
2389 }
2390 }
2391 let prompt = if rendered_context.is_empty() {
2392 task.to_string()
2393 } else {
2394 format!(
2395 "{rendered_context}\n\n{}:\n{task}",
2396 node.task_label
2397 .clone()
2398 .unwrap_or_else(|| "Task".to_string())
2399 )
2400 };
2401
2402 let mut options = BTreeMap::new();
2403 if let Some(provider) = &node.model_policy.provider {
2404 options.insert(
2405 "provider".to_string(),
2406 VmValue::String(Rc::from(provider.clone())),
2407 );
2408 }
2409 if let Some(model) = &node.model_policy.model {
2410 options.insert(
2411 "model".to_string(),
2412 VmValue::String(Rc::from(model.clone())),
2413 );
2414 }
2415 if let Some(model_tier) = &node.model_policy.model_tier {
2416 options.insert(
2417 "model_tier".to_string(),
2418 VmValue::String(Rc::from(model_tier.clone())),
2419 );
2420 }
2421 if let Some(temperature) = node.model_policy.temperature {
2422 options.insert("temperature".to_string(), VmValue::Float(temperature));
2423 }
2424 if let Some(max_tokens) = node.model_policy.max_tokens {
2425 options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
2426 }
2427 let tool_names = workflow_tool_names(&node.tools);
2428 if !matches!(node.tools, serde_json::Value::Null) && !tool_names.is_empty() {
2429 options.insert(
2430 "tools".to_string(),
2431 crate::stdlib::json_to_vm_value(&node.tools),
2432 );
2433 }
2434 if let Some(transcript) = transcript.clone() {
2435 options.insert("transcript".to_string(), transcript);
2436 }
2437
2438 let args = vec![
2439 VmValue::String(Rc::from(prompt)),
2440 node.system
2441 .clone()
2442 .map(|s| VmValue::String(Rc::from(s)))
2443 .unwrap_or(VmValue::Nil),
2444 VmValue::Dict(Rc::new(options)),
2445 ];
2446 let mut opts = extract_llm_options(&args)?;
2447
2448 let llm_result = if node.mode.as_deref() == Some("agent") || !tool_names.is_empty() {
2449 crate::llm::run_agent_loop_internal(
2450 &mut opts,
2451 crate::llm::AgentLoopConfig {
2452 persistent: true,
2453 max_iterations: 12,
2454 max_nudges: 3,
2455 nudge: None,
2456 tool_retries: 0,
2457 tool_backoff_ms: 1000,
2458 tool_format: "text".to_string(),
2459 auto_compact: None,
2460 policy: None,
2461 daemon: false,
2462 },
2463 )
2464 .await?
2465 } else {
2466 let result = vm_call_llm_full(&opts).await?;
2467 crate::llm::agent_loop_result_from_llm(&result, opts)
2468 };
2469
2470 let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
2471 let transcript = llm_result
2472 .get("transcript")
2473 .cloned()
2474 .map(|value| crate::stdlib::json_to_vm_value(&value));
2475 let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
2476 let output_kind = node
2477 .output_contract
2478 .output_kinds
2479 .first()
2480 .cloned()
2481 .unwrap_or_else(|| {
2482 if node.kind == "verify" {
2483 "verification_result".to_string()
2484 } else {
2485 "artifact".to_string()
2486 }
2487 });
2488 let mut metadata = BTreeMap::new();
2489 metadata.insert(
2490 "input_artifact_ids".to_string(),
2491 serde_json::json!(selected
2492 .iter()
2493 .map(|artifact| artifact.id.clone())
2494 .collect::<Vec<_>>()),
2495 );
2496 metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
2497 let artifact = ArtifactRecord {
2498 type_name: "artifact".to_string(),
2499 id: new_id("artifact"),
2500 kind: output_kind,
2501 title: Some(format!("stage {node_id} output")),
2502 text: Some(visible_text),
2503 data: Some(llm_result.clone()),
2504 source: Some(node_id.to_string()),
2505 created_at: now_rfc3339(),
2506 freshness: Some("fresh".to_string()),
2507 priority: None,
2508 lineage: selected
2509 .iter()
2510 .map(|artifact| artifact.id.clone())
2511 .collect(),
2512 relevance: Some(1.0),
2513 estimated_tokens: None,
2514 stage: Some(node_id.to_string()),
2515 metadata,
2516 }
2517 .normalize();
2518
2519 Ok((llm_result, vec![artifact], transcript))
2520}
2521
2522pub fn next_nodes_for(
2523 graph: &WorkflowGraph,
2524 current: &str,
2525 branch: Option<&str>,
2526) -> Vec<WorkflowEdge> {
2527 let mut matching: Vec<WorkflowEdge> = graph
2528 .edges
2529 .iter()
2530 .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
2531 .cloned()
2532 .collect();
2533 if matching.is_empty() {
2534 matching = graph
2535 .edges
2536 .iter()
2537 .filter(|edge| edge.from == current && edge.branch.is_none())
2538 .cloned()
2539 .collect();
2540 }
2541 matching
2542}
2543
2544pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
2545 next_nodes_for(graph, current, Some(branch))
2546 .into_iter()
2547 .next()
2548 .map(|edge| edge.to)
2549}
2550
2551pub fn append_audit_entry(
2552 graph: &mut WorkflowGraph,
2553 op: &str,
2554 node_id: Option<String>,
2555 reason: Option<String>,
2556 metadata: BTreeMap<String, serde_json::Value>,
2557) {
2558 graph.audit_log.push(WorkflowAuditEntry {
2559 id: new_id("audit"),
2560 op: op.to_string(),
2561 node_id,
2562 timestamp: now_rfc3339(),
2563 reason,
2564 metadata,
2565 });
2566}
2567
2568pub fn builtin_ceiling() -> CapabilityPolicy {
2569 CapabilityPolicy {
2570 tools: vec![
2571 "read".to_string(),
2572 "read_file".to_string(),
2573 "search".to_string(),
2574 "edit".to_string(),
2575 "run".to_string(),
2576 "exec".to_string(),
2577 "outline".to_string(),
2578 "list_directory".to_string(),
2579 "lsp_hover".to_string(),
2580 "lsp_definition".to_string(),
2581 "lsp_references".to_string(),
2582 "web_search".to_string(),
2583 "web_fetch".to_string(),
2584 ],
2585 capabilities: BTreeMap::from([
2586 (
2587 "workspace".to_string(),
2588 vec![
2589 "read_text".to_string(),
2590 "write_text".to_string(),
2591 "apply_edit".to_string(),
2592 "delete".to_string(),
2593 "exists".to_string(),
2594 "list".to_string(),
2595 ],
2596 ),
2597 ("process".to_string(), vec!["exec".to_string()]),
2598 ]),
2599 workspace_roots: Vec::new(),
2600 side_effect_level: Some("network".to_string()),
2601 recursion_limit: Some(8),
2602 tool_arg_constraints: Vec::new(),
2603 }
2604}
2605
2606#[cfg(test)]
2607mod tests {
2608 use super::*;
2609
2610 #[test]
2611 fn capability_intersection_rejects_privilege_expansion() {
2612 let ceiling = CapabilityPolicy {
2613 tools: vec!["read".to_string()],
2614 side_effect_level: Some("read_only".to_string()),
2615 recursion_limit: Some(2),
2616 ..Default::default()
2617 };
2618 let requested = CapabilityPolicy {
2619 tools: vec!["read".to_string(), "edit".to_string()],
2620 ..Default::default()
2621 };
2622 let error = ceiling.intersect(&requested).unwrap_err();
2623 assert!(error.contains("host ceiling"));
2624 }
2625
2626 #[test]
2627 fn active_execution_policy_rejects_unknown_bridge_builtin() {
2628 push_execution_policy(CapabilityPolicy {
2629 tools: vec!["read".to_string()],
2630 capabilities: BTreeMap::from([(
2631 "workspace".to_string(),
2632 vec!["read_text".to_string()],
2633 )]),
2634 side_effect_level: Some("read_only".to_string()),
2635 recursion_limit: Some(1),
2636 ..Default::default()
2637 });
2638 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
2639 pop_execution_policy();
2640 assert!(matches!(
2641 error,
2642 VmError::CategorizedError {
2643 category: crate::value::ErrorCategory::ToolRejected,
2644 ..
2645 }
2646 ));
2647 }
2648
2649 #[test]
2650 fn active_execution_policy_rejects_mcp_escape_hatch() {
2651 push_execution_policy(CapabilityPolicy {
2652 tools: vec!["read".to_string()],
2653 capabilities: BTreeMap::from([(
2654 "workspace".to_string(),
2655 vec!["read_text".to_string()],
2656 )]),
2657 side_effect_level: Some("read_only".to_string()),
2658 recursion_limit: Some(1),
2659 ..Default::default()
2660 });
2661 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
2662 pop_execution_policy();
2663 assert!(matches!(
2664 error,
2665 VmError::CategorizedError {
2666 category: crate::value::ErrorCategory::ToolRejected,
2667 ..
2668 }
2669 ));
2670 }
2671
2672 #[test]
2673 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
2674 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
2675 "name": "legacy",
2676 "act": {"mode": "llm"},
2677 "verify": {"kind": "verify"},
2678 "repair": {"mode": "agent"},
2679 }));
2680 let graph = normalize_workflow_value(&value).unwrap();
2681 assert_eq!(graph.type_name, "workflow_graph");
2682 assert!(graph.nodes.contains_key("act"));
2683 assert!(graph.nodes.contains_key("verify"));
2684 assert!(graph.nodes.contains_key("repair"));
2685 assert_eq!(graph.entry, "act");
2686 }
2687
2688 #[test]
2689 fn workflow_normalization_accepts_tool_registry_nodes() {
2690 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
2691 "name": "registry_tools",
2692 "entry": "implement",
2693 "nodes": {
2694 "implement": {
2695 "kind": "stage",
2696 "mode": "agent",
2697 "tools": {
2698 "_type": "tool_registry",
2699 "tools": [
2700 {"name": "read", "description": "Read files"},
2701 {"name": "run", "description": "Run commands"}
2702 ]
2703 }
2704 }
2705 },
2706 "edges": []
2707 }));
2708 let graph = normalize_workflow_value(&value).unwrap();
2709 let node = graph.nodes.get("implement").unwrap();
2710 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
2711 }
2712
2713 #[test]
2714 fn artifact_selection_honors_budget_and_priority() {
2715 let policy = ContextPolicy {
2716 max_artifacts: Some(2),
2717 max_tokens: Some(30),
2718 prefer_recent: true,
2719 prefer_fresh: true,
2720 prioritize_kinds: vec!["verification_result".to_string()],
2721 ..Default::default()
2722 };
2723 let artifacts = vec![
2724 ArtifactRecord {
2725 type_name: "artifact".to_string(),
2726 id: "a".to_string(),
2727 kind: "summary".to_string(),
2728 text: Some("short".to_string()),
2729 relevance: Some(0.9),
2730 created_at: now_rfc3339(),
2731 ..Default::default()
2732 }
2733 .normalize(),
2734 ArtifactRecord {
2735 type_name: "artifact".to_string(),
2736 id: "b".to_string(),
2737 kind: "summary".to_string(),
2738 text: Some("this is a much larger artifact body".to_string()),
2739 relevance: Some(1.0),
2740 created_at: now_rfc3339(),
2741 ..Default::default()
2742 }
2743 .normalize(),
2744 ArtifactRecord {
2745 type_name: "artifact".to_string(),
2746 id: "c".to_string(),
2747 kind: "summary".to_string(),
2748 text: Some("tiny".to_string()),
2749 relevance: Some(0.5),
2750 created_at: now_rfc3339(),
2751 ..Default::default()
2752 }
2753 .normalize(),
2754 ];
2755 let selected = select_artifacts(artifacts, &policy);
2756 assert_eq!(selected.len(), 2);
2757 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
2758 }
2759
2760 #[test]
2761 fn workflow_validation_rejects_condition_without_true_false_edges() {
2762 let graph = WorkflowGraph {
2763 entry: "gate".to_string(),
2764 nodes: BTreeMap::from([(
2765 "gate".to_string(),
2766 WorkflowNode {
2767 id: Some("gate".to_string()),
2768 kind: "condition".to_string(),
2769 ..Default::default()
2770 },
2771 )]),
2772 edges: vec![WorkflowEdge {
2773 from: "gate".to_string(),
2774 to: "next".to_string(),
2775 branch: Some("true".to_string()),
2776 label: None,
2777 }],
2778 ..Default::default()
2779 };
2780 let report = validate_workflow(&graph, None);
2781 assert!(!report.valid);
2782 assert!(report
2783 .errors
2784 .iter()
2785 .any(|error| error.contains("true") && error.contains("false")));
2786 }
2787
2788 #[test]
2789 fn replay_fixture_round_trip_passes() {
2790 let run = RunRecord {
2791 type_name: "run_record".to_string(),
2792 id: "run_1".to_string(),
2793 workflow_id: "wf".to_string(),
2794 workflow_name: Some("demo".to_string()),
2795 task: "demo".to_string(),
2796 status: "completed".to_string(),
2797 started_at: "1".to_string(),
2798 finished_at: Some("2".to_string()),
2799 parent_run_id: None,
2800 root_run_id: Some("run_1".to_string()),
2801 stages: vec![RunStageRecord {
2802 id: "stage_1".to_string(),
2803 node_id: "act".to_string(),
2804 kind: "stage".to_string(),
2805 status: "completed".to_string(),
2806 outcome: "success".to_string(),
2807 branch: Some("success".to_string()),
2808 started_at: "1".to_string(),
2809 finished_at: Some("2".to_string()),
2810 visible_text: Some("done".to_string()),
2811 private_reasoning: None,
2812 transcript: None,
2813 verification: None,
2814 usage: None,
2815 artifacts: vec![ArtifactRecord {
2816 type_name: "artifact".to_string(),
2817 id: "a1".to_string(),
2818 kind: "summary".to_string(),
2819 text: Some("done".to_string()),
2820 created_at: "1".to_string(),
2821 ..Default::default()
2822 }
2823 .normalize()],
2824 consumed_artifact_ids: vec![],
2825 produced_artifact_ids: vec!["a1".to_string()],
2826 attempts: vec![],
2827 metadata: BTreeMap::new(),
2828 }],
2829 transitions: vec![],
2830 checkpoints: vec![],
2831 pending_nodes: vec![],
2832 completed_nodes: vec!["act".to_string()],
2833 child_runs: vec![],
2834 artifacts: vec![],
2835 policy: CapabilityPolicy::default(),
2836 execution: None,
2837 transcript: None,
2838 usage: None,
2839 replay_fixture: None,
2840 metadata: BTreeMap::new(),
2841 persisted_path: None,
2842 };
2843 let fixture = replay_fixture_from_run(&run);
2844 let report = evaluate_run_against_fixture(&run, &fixture);
2845 assert!(report.pass);
2846 assert!(report.failures.is_empty());
2847 }
2848
2849 #[test]
2850 fn replay_eval_suite_reports_failed_case() {
2851 let good = RunRecord {
2852 id: "run_good".to_string(),
2853 workflow_id: "wf".to_string(),
2854 status: "completed".to_string(),
2855 stages: vec![RunStageRecord {
2856 node_id: "act".to_string(),
2857 status: "completed".to_string(),
2858 outcome: "success".to_string(),
2859 ..Default::default()
2860 }],
2861 ..Default::default()
2862 };
2863 let bad = RunRecord {
2864 id: "run_bad".to_string(),
2865 workflow_id: "wf".to_string(),
2866 status: "failed".to_string(),
2867 stages: vec![RunStageRecord {
2868 node_id: "act".to_string(),
2869 status: "failed".to_string(),
2870 outcome: "error".to_string(),
2871 ..Default::default()
2872 }],
2873 ..Default::default()
2874 };
2875 let suite = evaluate_run_suite(vec![
2876 (
2877 good.clone(),
2878 replay_fixture_from_run(&good),
2879 Some("good.json".to_string()),
2880 ),
2881 (
2882 bad.clone(),
2883 replay_fixture_from_run(&good),
2884 Some("bad.json".to_string()),
2885 ),
2886 ]);
2887 assert!(!suite.pass);
2888 assert_eq!(suite.total, 2);
2889 assert_eq!(suite.failed, 1);
2890 assert!(suite.cases.iter().any(|case| !case.pass));
2891 }
2892
2893 #[test]
2894 fn run_diff_reports_changed_stage() {
2895 let left = RunRecord {
2896 id: "left".to_string(),
2897 workflow_id: "wf".to_string(),
2898 status: "completed".to_string(),
2899 stages: vec![RunStageRecord {
2900 node_id: "act".to_string(),
2901 status: "completed".to_string(),
2902 outcome: "success".to_string(),
2903 ..Default::default()
2904 }],
2905 ..Default::default()
2906 };
2907 let right = RunRecord {
2908 id: "right".to_string(),
2909 workflow_id: "wf".to_string(),
2910 status: "failed".to_string(),
2911 stages: vec![RunStageRecord {
2912 node_id: "act".to_string(),
2913 status: "failed".to_string(),
2914 outcome: "error".to_string(),
2915 ..Default::default()
2916 }],
2917 ..Default::default()
2918 };
2919 let diff = diff_run_records(&left, &right);
2920 assert!(diff.status_changed);
2921 assert!(!diff.identical);
2922 assert_eq!(diff.stage_diffs.len(), 1);
2923 }
2924
2925 #[test]
2926 fn eval_suite_manifest_can_fail_on_baseline_diff() {
2927 let temp_dir =
2928 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
2929 std::fs::create_dir_all(&temp_dir).unwrap();
2930 let baseline_path = temp_dir.join("baseline.json");
2931 let candidate_path = temp_dir.join("candidate.json");
2932
2933 let baseline = RunRecord {
2934 id: "baseline".to_string(),
2935 workflow_id: "wf".to_string(),
2936 status: "completed".to_string(),
2937 stages: vec![RunStageRecord {
2938 node_id: "act".to_string(),
2939 status: "completed".to_string(),
2940 outcome: "success".to_string(),
2941 ..Default::default()
2942 }],
2943 ..Default::default()
2944 };
2945 let candidate = RunRecord {
2946 id: "candidate".to_string(),
2947 workflow_id: "wf".to_string(),
2948 status: "failed".to_string(),
2949 stages: vec![RunStageRecord {
2950 node_id: "act".to_string(),
2951 status: "failed".to_string(),
2952 outcome: "error".to_string(),
2953 ..Default::default()
2954 }],
2955 ..Default::default()
2956 };
2957
2958 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
2959 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
2960
2961 let manifest = EvalSuiteManifest {
2962 base_dir: Some(temp_dir.display().to_string()),
2963 cases: vec![EvalSuiteCase {
2964 label: Some("candidate".to_string()),
2965 run_path: "candidate.json".to_string(),
2966 fixture_path: None,
2967 compare_to: Some("baseline.json".to_string()),
2968 }],
2969 ..Default::default()
2970 };
2971 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
2972 assert!(!suite.pass);
2973 assert_eq!(suite.failed, 1);
2974 assert!(suite.cases[0].comparison.is_some());
2975 assert!(suite.cases[0]
2976 .failures
2977 .iter()
2978 .any(|failure| failure.contains("baseline")));
2979 }
2980
2981 #[test]
2982 fn render_unified_diff_marks_removed_and_added_lines() {
2983 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
2984 assert!(diff.contains("--- a/src/main.rs"));
2985 assert!(diff.contains("+++ b/src/main.rs"));
2986 assert!(diff.contains("-old"));
2987 assert!(diff.contains("+new"));
2988 assert!(diff.contains(" same"));
2989 }
2990
2991 #[test]
2992 fn execution_policy_rejects_process_exec_when_read_only() {
2993 push_execution_policy(CapabilityPolicy {
2994 side_effect_level: Some("read_only".to_string()),
2995 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
2996 ..Default::default()
2997 });
2998 let result = enforce_current_policy_for_builtin("exec", &[]);
2999 pop_execution_policy();
3000 assert!(result.is_err());
3001 }
3002
3003 #[test]
3004 fn execution_policy_rejects_unlisted_tool() {
3005 push_execution_policy(CapabilityPolicy {
3006 tools: vec!["read".to_string()],
3007 ..Default::default()
3008 });
3009 let result = enforce_current_policy_for_tool("edit");
3010 pop_execution_policy();
3011 assert!(result.is_err());
3012 }
3013
3014 #[test]
3017 fn pre_tool_hook_deny_blocks_execution() {
3018 clear_tool_hooks();
3019 register_tool_hook(ToolHook {
3020 pattern: "dangerous_*".to_string(),
3021 pre: Some(Rc::new(|_name, _args| {
3022 PreToolAction::Deny("blocked by policy".to_string())
3023 })),
3024 post: None,
3025 });
3026 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
3027 clear_tool_hooks();
3028 assert!(matches!(result, PreToolAction::Deny(_)));
3029 }
3030
3031 #[test]
3032 fn pre_tool_hook_allow_passes_through() {
3033 clear_tool_hooks();
3034 register_tool_hook(ToolHook {
3035 pattern: "safe_*".to_string(),
3036 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
3037 post: None,
3038 });
3039 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
3040 clear_tool_hooks();
3041 assert!(matches!(result, PreToolAction::Allow));
3042 }
3043
3044 #[test]
3045 fn pre_tool_hook_modify_rewrites_args() {
3046 clear_tool_hooks();
3047 register_tool_hook(ToolHook {
3048 pattern: "*".to_string(),
3049 pre: Some(Rc::new(|_name, _args| {
3050 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
3051 })),
3052 post: None,
3053 });
3054 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
3055 clear_tool_hooks();
3056 match result {
3057 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
3058 _ => panic!("expected Modify"),
3059 }
3060 }
3061
3062 #[test]
3063 fn post_tool_hook_modifies_result() {
3064 clear_tool_hooks();
3065 register_tool_hook(ToolHook {
3066 pattern: "exec".to_string(),
3067 pre: None,
3068 post: Some(Rc::new(|_name, result| {
3069 if result.contains("SECRET") {
3070 PostToolAction::Modify("[REDACTED]".to_string())
3071 } else {
3072 PostToolAction::Pass
3073 }
3074 })),
3075 });
3076 let result = run_post_tool_hooks("exec", "output with SECRET data");
3077 let clean = run_post_tool_hooks("exec", "clean output");
3078 clear_tool_hooks();
3079 assert_eq!(result, "[REDACTED]");
3080 assert_eq!(clean, "clean output");
3081 }
3082
3083 #[test]
3084 fn unmatched_hook_pattern_does_not_fire() {
3085 clear_tool_hooks();
3086 register_tool_hook(ToolHook {
3087 pattern: "exec".to_string(),
3088 pre: Some(Rc::new(|_name, _args| {
3089 PreToolAction::Deny("should not match".to_string())
3090 })),
3091 post: None,
3092 });
3093 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
3094 clear_tool_hooks();
3095 assert!(matches!(result, PreToolAction::Allow));
3096 }
3097
3098 #[test]
3099 fn glob_match_patterns() {
3100 assert!(glob_match("*", "anything"));
3101 assert!(glob_match("exec*", "exec_at"));
3102 assert!(glob_match("*_file", "read_file"));
3103 assert!(!glob_match("exec*", "read_file"));
3104 assert!(glob_match("read_file", "read_file"));
3105 assert!(!glob_match("read_file", "write_file"));
3106 }
3107
3108 #[test]
3111 fn microcompact_snips_large_output() {
3112 let large = "x".repeat(50_000);
3113 let result = microcompact_tool_output(&large, 10_000);
3114 assert!(result.len() < 15_000);
3115 assert!(result.contains("snipped"));
3116 }
3117
3118 #[test]
3119 fn microcompact_preserves_small_output() {
3120 let small = "hello world";
3121 let result = microcompact_tool_output(small, 10_000);
3122 assert_eq!(result, small);
3123 }
3124
3125 #[test]
3126 fn auto_compact_messages_reduces_count() {
3127 let mut messages: Vec<serde_json::Value> = (0..20)
3128 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
3129 .collect();
3130 let runtime = tokio::runtime::Builder::new_current_thread()
3131 .enable_all()
3132 .build()
3133 .unwrap();
3134 let compacted = runtime.block_on(auto_compact_messages(
3135 &mut messages,
3136 &AutoCompactConfig {
3137 compact_strategy: CompactStrategy::Truncate,
3138 keep_last: 6,
3139 ..Default::default()
3140 },
3141 None,
3142 ));
3143 assert!(compacted.unwrap());
3144 assert!(messages.len() <= 7); assert!(messages[0]["content"]
3146 .as_str()
3147 .unwrap()
3148 .contains("auto-compacted"));
3149 }
3150
3151 #[test]
3152 fn auto_compact_noop_when_under_threshold() {
3153 let mut messages: Vec<serde_json::Value> = (0..4)
3154 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
3155 .collect();
3156 let runtime = tokio::runtime::Builder::new_current_thread()
3157 .enable_all()
3158 .build()
3159 .unwrap();
3160 let compacted = runtime.block_on(auto_compact_messages(
3161 &mut messages,
3162 &AutoCompactConfig {
3163 compact_strategy: CompactStrategy::Truncate,
3164 keep_last: 6,
3165 ..Default::default()
3166 },
3167 None,
3168 ));
3169 assert!(!compacted.unwrap());
3170 assert_eq!(messages.len(), 4);
3171 }
3172
3173 #[test]
3174 fn estimate_message_tokens_basic() {
3175 let messages = vec![
3176 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
3177 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
3178 ];
3179 let tokens = estimate_message_tokens(&messages);
3180 assert_eq!(tokens, 200); }
3182
3183 #[test]
3186 fn dedup_artifacts_removes_duplicates() {
3187 let mut artifacts = vec![
3188 ArtifactRecord {
3189 id: "a1".to_string(),
3190 kind: "test".to_string(),
3191 text: Some("duplicate content".to_string()),
3192 ..Default::default()
3193 },
3194 ArtifactRecord {
3195 id: "a2".to_string(),
3196 kind: "test".to_string(),
3197 text: Some("duplicate content".to_string()),
3198 ..Default::default()
3199 },
3200 ArtifactRecord {
3201 id: "a3".to_string(),
3202 kind: "test".to_string(),
3203 text: Some("unique content".to_string()),
3204 ..Default::default()
3205 },
3206 ];
3207 dedup_artifacts(&mut artifacts);
3208 assert_eq!(artifacts.len(), 2);
3209 }
3210
3211 #[test]
3212 fn microcompact_artifact_snips_oversized() {
3213 let mut artifact = ArtifactRecord {
3214 id: "a1".to_string(),
3215 kind: "test".to_string(),
3216 text: Some("x".repeat(10_000)),
3217 estimated_tokens: Some(2_500),
3218 ..Default::default()
3219 };
3220 microcompact_artifact(&mut artifact, 500);
3221 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
3222 assert_eq!(artifact.estimated_tokens, Some(500));
3223 }
3224
3225 #[test]
3228 fn arg_constraint_allows_matching_pattern() {
3229 let policy = CapabilityPolicy {
3230 tool_arg_constraints: vec![ToolArgConstraint {
3231 tool: "exec".to_string(),
3232 arg_patterns: vec!["cargo *".to_string()],
3233 }],
3234 ..Default::default()
3235 };
3236 let result = enforce_tool_arg_constraints(
3237 &policy,
3238 "exec",
3239 &serde_json::json!({"command": "cargo test"}),
3240 );
3241 assert!(result.is_ok());
3242 }
3243
3244 #[test]
3245 fn arg_constraint_rejects_non_matching_pattern() {
3246 let policy = CapabilityPolicy {
3247 tool_arg_constraints: vec![ToolArgConstraint {
3248 tool: "exec".to_string(),
3249 arg_patterns: vec!["cargo *".to_string()],
3250 }],
3251 ..Default::default()
3252 };
3253 let result = enforce_tool_arg_constraints(
3254 &policy,
3255 "exec",
3256 &serde_json::json!({"command": "rm -rf /"}),
3257 );
3258 assert!(result.is_err());
3259 }
3260
3261 #[test]
3262 fn arg_constraint_ignores_unmatched_tool() {
3263 let policy = CapabilityPolicy {
3264 tool_arg_constraints: vec![ToolArgConstraint {
3265 tool: "exec".to_string(),
3266 arg_patterns: vec!["cargo *".to_string()],
3267 }],
3268 ..Default::default()
3269 };
3270 let result = enforce_tool_arg_constraints(
3271 &policy,
3272 "read_file",
3273 &serde_json::json!({"path": "/etc/passwd"}),
3274 );
3275 assert!(result.is_ok());
3276 }
3277}