1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12 use std::time::{SystemTime, UNIX_EPOCH};
13 let ts = SystemTime::now()
14 .duration_since(UNIX_EPOCH)
15 .unwrap_or_default()
16 .as_secs();
17 format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21 format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25 std::env::var("HARN_RUN_DIR")
26 .map(PathBuf::from)
27 .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31 static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32 static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33 static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
34}
35
36#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
37#[serde(default)]
38pub struct MutationSessionRecord {
39 pub session_id: String,
40 pub parent_session_id: Option<String>,
41 pub run_id: Option<String>,
42 pub worker_id: Option<String>,
43 pub execution_kind: Option<String>,
44 pub mutation_scope: String,
45 pub approval_mode: String,
46}
47
48impl MutationSessionRecord {
49 pub fn normalize(mut self) -> Self {
50 if self.session_id.is_empty() {
51 self.session_id = new_id("session");
52 }
53 if self.mutation_scope.is_empty() {
54 self.mutation_scope = "read_only".to_string();
55 }
56 if self.approval_mode.is_empty() {
57 self.approval_mode = "host_enforced".to_string();
58 }
59 self
60 }
61}
62
63pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
64 CURRENT_MUTATION_SESSION.with(|slot| {
65 *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
66 });
67}
68
69pub fn current_mutation_session() -> Option<MutationSessionRecord> {
70 CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
71}
72
73#[derive(Clone, Debug)]
77pub enum PreToolAction {
78 Allow,
80 Deny(String),
82 Modify(serde_json::Value),
84}
85
86#[derive(Clone, Debug)]
88pub enum PostToolAction {
89 Pass,
91 Modify(String),
93}
94
95pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
97pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
98
99#[derive(Clone)]
101pub struct ToolHook {
102 pub pattern: String,
104 pub pre: Option<PreToolHookFn>,
106 pub post: Option<PostToolHookFn>,
108}
109
110impl std::fmt::Debug for ToolHook {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 f.debug_struct("ToolHook")
113 .field("pattern", &self.pattern)
114 .field("has_pre", &self.pre.is_some())
115 .field("has_post", &self.post.is_some())
116 .finish()
117 }
118}
119
120fn glob_match(pattern: &str, name: &str) -> bool {
121 if pattern == "*" {
122 return true;
123 }
124 if let Some(prefix) = pattern.strip_suffix('*') {
125 return name.starts_with(prefix);
126 }
127 if let Some(suffix) = pattern.strip_prefix('*') {
128 return name.ends_with(suffix);
129 }
130 pattern == name
131}
132
133pub fn register_tool_hook(hook: ToolHook) {
134 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
135}
136
137pub fn clear_tool_hooks() {
138 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
139}
140
141pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
143 TOOL_HOOKS.with(|hooks| {
144 let hooks = hooks.borrow();
145 let mut current_args = args.clone();
146 for hook in hooks.iter() {
147 if !glob_match(&hook.pattern, tool_name) {
148 continue;
149 }
150 if let Some(ref pre) = hook.pre {
151 match pre(tool_name, ¤t_args) {
152 PreToolAction::Allow => {}
153 PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
154 PreToolAction::Modify(new_args) => {
155 current_args = new_args;
156 }
157 }
158 }
159 }
160 if current_args != *args {
161 PreToolAction::Modify(current_args)
162 } else {
163 PreToolAction::Allow
164 }
165 })
166}
167
168pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
170 TOOL_HOOKS.with(|hooks| {
171 let hooks = hooks.borrow();
172 let mut current = result.to_string();
173 for hook in hooks.iter() {
174 if !glob_match(&hook.pattern, tool_name) {
175 continue;
176 }
177 if let Some(ref post) = hook.post {
178 match post(tool_name, ¤t) {
179 PostToolAction::Pass => {}
180 PostToolAction::Modify(new_result) => {
181 current = new_result;
182 }
183 }
184 }
185 }
186 current
187 })
188}
189
190#[derive(Clone, Debug, PartialEq, Eq)]
193pub enum CompactStrategy {
194 Llm,
195 Truncate,
196 Custom,
197}
198
199pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
200 match value {
201 "llm" => Ok(CompactStrategy::Llm),
202 "truncate" => Ok(CompactStrategy::Truncate),
203 "custom" => Ok(CompactStrategy::Custom),
204 other => Err(VmError::Runtime(format!(
205 "unknown compact_strategy '{other}' (expected 'llm', 'truncate', or 'custom')"
206 ))),
207 }
208}
209
210#[derive(Clone, Debug)]
212pub struct AutoCompactConfig {
213 pub token_threshold: usize,
215 pub tool_output_max_chars: usize,
217 pub keep_last: usize,
219 pub compact_strategy: CompactStrategy,
221 pub custom_compactor: Option<VmValue>,
223}
224
225impl Default for AutoCompactConfig {
226 fn default() -> Self {
227 Self {
228 token_threshold: 80_000,
229 tool_output_max_chars: 20_000,
230 keep_last: 8,
231 compact_strategy: CompactStrategy::Llm,
232 custom_compactor: None,
233 }
234 }
235}
236
237pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
239 messages
240 .iter()
241 .map(|m| {
242 m.get("content")
243 .and_then(|c| c.as_str())
244 .map(|s| s.len())
245 .unwrap_or(0)
246 })
247 .sum::<usize>()
248 / 4
249}
250
251pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
254 if output.len() <= max_chars || max_chars < 200 {
255 return output.to_string();
256 }
257 let diagnostic_lines = output
258 .lines()
259 .filter(|line| {
260 let trimmed = line.trim();
261 let lower = trimmed.to_lowercase();
262 let has_file_line = {
264 let bytes = trimmed.as_bytes();
265 let mut i = 0;
266 let mut found_colon = false;
267 while i < bytes.len() {
268 if bytes[i] == b':' {
269 found_colon = true;
270 break;
271 }
272 i += 1;
273 }
274 found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
275 };
276 let has_strong_keyword =
290 trimmed.contains("FAIL") || trimmed.contains("panic") || trimmed.contains("Panic");
291 let has_weak_keyword = trimmed.contains("error")
292 || trimmed.contains("undefined")
293 || trimmed.contains("expected")
294 || trimmed.contains("got")
295 || lower.contains("cannot find")
296 || lower.contains("not found")
297 || lower.contains("no such")
298 || lower.contains("unresolved")
299 || lower.contains("missing")
300 || lower.contains("declared but not used")
301 || lower.contains("unused")
302 || lower.contains("mismatch");
303 let positional = lower.contains(" error ")
304 || lower.starts_with("error:")
305 || lower.starts_with("warning:")
306 || lower.starts_with("note:")
307 || lower.contains("panic:");
308 has_strong_keyword || (has_file_line && has_weak_keyword) || positional
309 })
310 .take(32)
311 .collect::<Vec<_>>();
312 if !diagnostic_lines.is_empty() {
313 let diagnostics = diagnostic_lines.join("\n");
314 let budget = max_chars.saturating_sub(diagnostics.len() + 64);
315 let keep = budget / 2;
316 if keep >= 80 && output.len() > keep * 2 {
317 let head_end = output.floor_char_boundary(keep);
318 let tail_start = output.ceil_char_boundary(output.len() - keep);
319 let head = &output[..head_end];
320 let tail = &output[tail_start..];
321 return format!(
322 "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
323 );
324 }
325 }
326 let keep = max_chars / 2;
327 let head_end = output.floor_char_boundary(keep);
328 let tail_start = output.ceil_char_boundary(output.len() - keep);
329 let head = &output[..head_end];
330 let tail = &output[tail_start..];
331 let snipped = output.len() - max_chars;
332 format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
333}
334
335fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
336 messages
337 .iter()
338 .map(|msg| {
339 let role = msg
340 .get("role")
341 .and_then(|v| v.as_str())
342 .unwrap_or("user")
343 .to_uppercase();
344 let content = msg
345 .get("content")
346 .and_then(|v| v.as_str())
347 .unwrap_or_default();
348 format!("{role}: {content}")
349 })
350 .collect::<Vec<_>>()
351 .join("\n")
352}
353
354fn truncate_compaction_summary(
355 old_messages: &[serde_json::Value],
356 archived_count: usize,
357) -> String {
358 truncate_compaction_summary_with_context(old_messages, archived_count, false)
359}
360
361fn truncate_compaction_summary_with_context(
362 old_messages: &[serde_json::Value],
363 archived_count: usize,
364 is_llm_fallback: bool,
365) -> String {
366 let per_msg_limit = 500_usize;
367 let summary_parts: Vec<String> = old_messages
368 .iter()
369 .filter_map(|m| {
370 let role = m.get("role")?.as_str()?;
371 let content = m.get("content")?.as_str()?;
372 if content.is_empty() {
373 return None;
374 }
375 let truncated = if content.len() > per_msg_limit {
376 format!(
377 "{}... [truncated from {} chars]",
378 &content[..content.floor_char_boundary(per_msg_limit)],
379 content.len()
380 )
381 } else {
382 content.to_string()
383 };
384 Some(format!("[{role}] {truncated}"))
385 })
386 .take(15)
387 .collect();
388 let header = if is_llm_fallback {
389 format!(
390 "[auto-compact fallback: LLM summarizer returned empty; {archived_count} older messages abbreviated to ~{per_msg_limit} chars each]"
391 )
392 } else {
393 format!("[auto-compacted {archived_count} older messages via truncate strategy]")
394 };
395 format!(
396 "{header}\n{}{}",
397 summary_parts.join("\n"),
398 if archived_count > 15 {
399 format!("\n... and {} more", archived_count - 15)
400 } else {
401 String::new()
402 }
403 )
404}
405
406fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
407 if let Some(map) = value.as_dict() {
408 if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
409 return Ok(summary.display());
410 }
411 }
412 match value {
413 VmValue::String(text) => Ok(text.to_string()),
414 VmValue::Nil => Ok(String::new()),
415 _ => serde_json::to_string_pretty(&vm_value_to_json(value))
416 .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
417 }
418}
419
420async fn llm_compaction_summary(
421 old_messages: &[serde_json::Value],
422 archived_count: usize,
423 llm_opts: &crate::llm::api::LlmCallOptions,
424) -> Result<String, VmError> {
425 let mut compact_opts = llm_opts.clone();
426 let formatted = format_compaction_messages(old_messages);
427 compact_opts.system = None;
428 compact_opts.transcript_id = None;
429 compact_opts.transcript_summary = None;
430 compact_opts.transcript_metadata = None;
431 compact_opts.native_tools = None;
432 compact_opts.tool_choice = None;
433 compact_opts.response_format = None;
434 compact_opts.json_schema = None;
435 compact_opts.messages = vec![serde_json::json!({
436 "role": "user",
437 "content": format!(
438 "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
439 ),
440 })];
441 let result = vm_call_llm_full(&compact_opts).await?;
442 let summary = result.text.trim();
443 if summary.is_empty() {
444 Ok(truncate_compaction_summary_with_context(
445 old_messages,
446 archived_count,
447 true,
448 ))
449 } else {
450 Ok(format!(
451 "[auto-compacted {archived_count} older messages]\n{summary}"
452 ))
453 }
454}
455
456async fn custom_compaction_summary(
457 old_messages: &[serde_json::Value],
458 archived_count: usize,
459 callback: &VmValue,
460) -> Result<String, VmError> {
461 let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
462 return Err(VmError::Runtime(
463 "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
464 ));
465 };
466 let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
467 VmError::Runtime(
468 "custom transcript compaction requires an async builtin VM context".to_string(),
469 )
470 })?;
471 let messages_vm = VmValue::List(Rc::new(
472 old_messages
473 .iter()
474 .map(crate::stdlib::json_to_vm_value)
475 .collect(),
476 ));
477 let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await;
478 let summary = compact_summary_text_from_value(&result?)?;
479 if summary.trim().is_empty() {
480 Ok(truncate_compaction_summary(old_messages, archived_count))
481 } else {
482 Ok(format!(
483 "[auto-compacted {archived_count} older messages]\n{summary}"
484 ))
485 }
486}
487
488pub(crate) async fn auto_compact_messages(
491 messages: &mut Vec<serde_json::Value>,
492 config: &AutoCompactConfig,
493 llm_opts: Option<&crate::llm::api::LlmCallOptions>,
494) -> Result<Option<String>, VmError> {
495 if messages.len() <= config.keep_last {
496 return Ok(None);
497 }
498 let split_at = messages.len().saturating_sub(config.keep_last);
499 let old_messages: Vec<_> = messages.drain(..split_at).collect();
500 let archived_count = old_messages.len();
501 let summary = match config.compact_strategy {
502 CompactStrategy::Truncate => truncate_compaction_summary(&old_messages, archived_count),
503 CompactStrategy::Llm => {
504 llm_compaction_summary(
505 &old_messages,
506 archived_count,
507 llm_opts.ok_or_else(|| {
508 VmError::Runtime(
509 "LLM transcript compaction requires active LLM call options".to_string(),
510 )
511 })?,
512 )
513 .await?
514 }
515 CompactStrategy::Custom => {
516 custom_compaction_summary(
517 &old_messages,
518 archived_count,
519 config.custom_compactor.as_ref().ok_or_else(|| {
520 VmError::Runtime(
521 "compact_callback is required when compact_strategy is 'custom'"
522 .to_string(),
523 )
524 })?,
525 )
526 .await?
527 }
528 };
529 messages.insert(
530 0,
531 serde_json::json!({
532 "role": "user",
533 "content": summary,
534 }),
535 );
536 Ok(Some(summary))
537}
538
539pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
543 let max_chars = max_tokens * 4;
544 if let Some(ref text) = artifact.text {
545 if text.len() > max_chars && max_chars >= 200 {
546 artifact.text = Some(microcompact_tool_output(text, max_chars));
547 artifact.estimated_tokens = Some(max_tokens);
548 }
549 }
550}
551
552pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
555 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
556 artifacts.retain(|artifact| {
557 let text = artifact.text.as_deref().unwrap_or("");
558 if text.is_empty() {
559 return true;
560 }
561 let hash = {
563 use std::hash::{Hash, Hasher};
564 let mut hasher = std::collections::hash_map::DefaultHasher::new();
565 text.hash(&mut hasher);
566 hasher.finish()
567 };
568 seen_hashes.insert(hash)
569 });
570}
571
572pub fn select_artifacts_adaptive(
575 mut artifacts: Vec<ArtifactRecord>,
576 policy: &ContextPolicy,
577) -> Vec<ArtifactRecord> {
578 dedup_artifacts(&mut artifacts);
580
581 if let Some(max_tokens) = policy.max_tokens {
585 let count = artifacts.len().max(1);
586 let per_artifact_budget = max_tokens / count;
587 let cap = per_artifact_budget.max(500).min(max_tokens);
589 for artifact in &mut artifacts {
590 let est = artifact.estimated_tokens.unwrap_or(0);
591 if est > cap * 2 {
592 microcompact_artifact(artifact, cap);
593 }
594 }
595 }
596
597 select_artifacts(artifacts, policy)
599}
600
601#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
605#[serde(default)]
606pub struct ToolArgConstraint {
607 pub tool: String,
609 pub arg_patterns: Vec<String>,
612}
613
614pub fn enforce_tool_arg_constraints(
616 policy: &CapabilityPolicy,
617 tool_name: &str,
618 args: &serde_json::Value,
619) -> Result<(), VmError> {
620 for constraint in &policy.tool_arg_constraints {
621 if !glob_match(&constraint.tool, tool_name) {
622 continue;
623 }
624 if constraint.arg_patterns.is_empty() {
625 continue;
626 }
627 let first_arg = args
629 .as_object()
630 .and_then(|o| o.values().next())
631 .and_then(|v| v.as_str())
632 .or_else(|| args.as_str())
633 .unwrap_or("");
634 let matches = constraint
635 .arg_patterns
636 .iter()
637 .any(|pattern| glob_match(pattern, first_arg));
638 if !matches {
639 return reject_policy(format!(
640 "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
641 constraint.arg_patterns
642 ));
643 }
644 }
645 Ok(())
646}
647
648fn normalize_artifact_kind(kind: &str) -> String {
649 match kind {
650 "resource"
651 | "workspace_file"
652 | "editor_selection"
653 | "workspace_snapshot"
654 | "transcript_summary"
655 | "summary"
656 | "plan"
657 | "diff"
658 | "git_diff"
659 | "patch"
660 | "patch_set"
661 | "patch_proposal"
662 | "diff_review"
663 | "review_decision"
664 | "verification_bundle"
665 | "apply_intent"
666 | "verification_result"
667 | "test_result"
668 | "command_result"
669 | "provider_payload"
670 | "worker_result"
671 | "worker_notification"
672 | "artifact" => kind.to_string(),
673 "file" => "workspace_file".to_string(),
674 "transcript" => "transcript_summary".to_string(),
675 "verification" => "verification_result".to_string(),
676 "test" => "test_result".to_string(),
677 other if other.trim().is_empty() => "artifact".to_string(),
678 other => other.to_string(),
679 }
680}
681
682fn default_artifact_priority(kind: &str) -> i64 {
683 match kind {
684 "verification_result" | "test_result" => 100,
685 "verification_bundle" => 95,
686 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
687 | "review_decision" | "apply_intent" => 90,
688 "plan" => 80,
689 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
690 "summary" | "transcript_summary" => 60,
691 "command_result" => 50,
692 _ => 40,
693 }
694}
695
696fn freshness_rank(value: Option<&str>) -> i64 {
697 match value.unwrap_or_default() {
698 "fresh" | "live" => 3,
699 "recent" => 2,
700 "stale" => 0,
701 _ => 1,
702 }
703}
704
705#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
706#[serde(default)]
707pub struct ToolRuntimePolicyMetadata {
708 pub capabilities: BTreeMap<String, Vec<String>>,
709 pub side_effect_level: Option<String>,
710 pub path_params: Vec<String>,
711 pub mutation_classification: Option<String>,
712}
713
714#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
715#[serde(default)]
716pub struct CapabilityPolicy {
717 pub tools: Vec<String>,
718 pub capabilities: BTreeMap<String, Vec<String>>,
719 pub workspace_roots: Vec<String>,
720 pub side_effect_level: Option<String>,
721 pub recursion_limit: Option<usize>,
722 #[serde(default)]
724 pub tool_arg_constraints: Vec<ToolArgConstraint>,
725 #[serde(default)]
726 pub tool_metadata: BTreeMap<String, ToolRuntimePolicyMetadata>,
727}
728
729impl CapabilityPolicy {
730 pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
731 let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
732 (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
733 (Some(a), None) => Some(a.clone()),
734 (None, Some(b)) => Some(b.clone()),
735 (None, None) => None,
736 };
737
738 if !self.tools.is_empty() {
739 let denied: Vec<String> = requested
740 .tools
741 .iter()
742 .filter(|tool| !self.tools.contains(*tool))
743 .cloned()
744 .collect();
745 if !denied.is_empty() {
746 return Err(format!(
747 "requested tools exceed host ceiling: {}",
748 denied.join(", ")
749 ));
750 }
751 }
752
753 for (capability, requested_ops) in &requested.capabilities {
754 if let Some(allowed_ops) = self.capabilities.get(capability) {
755 let denied: Vec<String> = requested_ops
756 .iter()
757 .filter(|op| !allowed_ops.contains(*op))
758 .cloned()
759 .collect();
760 if !denied.is_empty() {
761 return Err(format!(
762 "requested capability operations exceed host ceiling: {}.{}",
763 capability,
764 denied.join(",")
765 ));
766 }
767 } else if !self.capabilities.is_empty() {
768 return Err(format!(
769 "requested capability exceeds host ceiling: {capability}"
770 ));
771 }
772 }
773
774 let tools = if self.tools.is_empty() {
775 requested.tools.clone()
776 } else if requested.tools.is_empty() {
777 self.tools.clone()
778 } else {
779 requested
780 .tools
781 .iter()
782 .filter(|tool| self.tools.contains(*tool))
783 .cloned()
784 .collect()
785 };
786
787 let capabilities = if self.capabilities.is_empty() {
788 requested.capabilities.clone()
789 } else if requested.capabilities.is_empty() {
790 self.capabilities.clone()
791 } else {
792 requested
793 .capabilities
794 .iter()
795 .filter_map(|(capability, requested_ops)| {
796 self.capabilities.get(capability).map(|allowed_ops| {
797 (
798 capability.clone(),
799 requested_ops
800 .iter()
801 .filter(|op| allowed_ops.contains(*op))
802 .cloned()
803 .collect::<Vec<_>>(),
804 )
805 })
806 })
807 .collect()
808 };
809
810 let workspace_roots = if self.workspace_roots.is_empty() {
811 requested.workspace_roots.clone()
812 } else if requested.workspace_roots.is_empty() {
813 self.workspace_roots.clone()
814 } else {
815 requested
816 .workspace_roots
817 .iter()
818 .filter(|root| self.workspace_roots.contains(*root))
819 .cloned()
820 .collect()
821 };
822
823 let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
824 (Some(a), Some(b)) => Some(a.min(b)),
825 (Some(a), None) => Some(a),
826 (None, Some(b)) => Some(b),
827 (None, None) => None,
828 };
829
830 let mut tool_arg_constraints = self.tool_arg_constraints.clone();
832 tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
833
834 let tool_metadata = tools
835 .iter()
836 .filter_map(|tool| {
837 requested
838 .tool_metadata
839 .get(tool)
840 .or_else(|| self.tool_metadata.get(tool))
841 .cloned()
842 .map(|metadata| (tool.clone(), metadata))
843 })
844 .collect();
845
846 Ok(CapabilityPolicy {
847 tools,
848 capabilities,
849 workspace_roots,
850 side_effect_level,
851 recursion_limit,
852 tool_arg_constraints,
853 tool_metadata,
854 })
855 }
856}
857
858fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
859 fn rank(v: &str) -> usize {
860 match v {
861 "none" => 0,
862 "read_only" => 1,
863 "workspace_write" => 2,
864 "process_exec" => 3,
865 "network" => 4,
866 _ => 5,
867 }
868 }
869 if rank(a) <= rank(b) {
870 a
871 } else {
872 b
873 }
874}
875
876#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
877#[serde(default)]
878pub struct ModelPolicy {
879 pub provider: Option<String>,
880 pub model: Option<String>,
881 pub model_tier: Option<String>,
882 pub temperature: Option<f64>,
883 pub max_tokens: Option<i64>,
884}
885
886#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
887#[serde(default)]
888pub struct TranscriptPolicy {
889 pub mode: Option<String>,
890 pub visibility: Option<String>,
891 pub summarize: bool,
892 pub compact: bool,
893 pub keep_last: Option<usize>,
894}
895
896#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
897#[serde(default)]
898pub struct ContextPolicy {
899 pub max_artifacts: Option<usize>,
900 pub max_tokens: Option<usize>,
901 pub reserve_tokens: Option<usize>,
902 pub include_kinds: Vec<String>,
903 pub exclude_kinds: Vec<String>,
904 pub prioritize_kinds: Vec<String>,
905 pub pinned_ids: Vec<String>,
906 pub include_stages: Vec<String>,
907 pub prefer_recent: bool,
908 pub prefer_fresh: bool,
909 pub render: Option<String>,
910}
911
912#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
913#[serde(default)]
914pub struct RetryPolicy {
915 pub max_attempts: usize,
916 pub verify: bool,
917 pub repair: bool,
918}
919
920#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
921#[serde(default)]
922pub struct StageContract {
923 pub input_kinds: Vec<String>,
924 pub output_kinds: Vec<String>,
925 pub min_inputs: Option<usize>,
926 pub max_inputs: Option<usize>,
927 pub require_transcript: bool,
928 pub schema: Option<serde_json::Value>,
929}
930
931#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
932#[serde(default)]
933pub struct BranchSemantics {
934 pub success: Option<String>,
935 pub failure: Option<String>,
936 pub verify_pass: Option<String>,
937 pub verify_fail: Option<String>,
938 pub condition_true: Option<String>,
939 pub condition_false: Option<String>,
940 pub loop_continue: Option<String>,
941 pub loop_exit: Option<String>,
942 pub escalation: Option<String>,
943}
944
945#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
946#[serde(default)]
947pub struct MapPolicy {
948 pub items: Vec<serde_json::Value>,
949 pub item_artifact_kind: Option<String>,
950 pub output_kind: Option<String>,
951 pub max_items: Option<usize>,
952}
953
954#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
955#[serde(default)]
956pub struct JoinPolicy {
957 pub strategy: String,
958 pub require_all_inputs: bool,
959 pub min_completed: Option<usize>,
960}
961
962#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
963#[serde(default)]
964pub struct ReducePolicy {
965 pub strategy: String,
966 pub separator: Option<String>,
967 pub output_kind: Option<String>,
968}
969
970#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
971#[serde(default)]
972pub struct EscalationPolicy {
973 pub level: Option<String>,
974 pub queue: Option<String>,
975 pub reason: Option<String>,
976}
977
978#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
979#[serde(default)]
980pub struct ArtifactRecord {
981 #[serde(rename = "_type")]
982 pub type_name: String,
983 pub id: String,
984 pub kind: String,
985 pub title: Option<String>,
986 pub text: Option<String>,
987 pub data: Option<serde_json::Value>,
988 pub source: Option<String>,
989 pub created_at: String,
990 pub freshness: Option<String>,
991 pub priority: Option<i64>,
992 pub lineage: Vec<String>,
993 pub relevance: Option<f64>,
994 pub estimated_tokens: Option<usize>,
995 pub stage: Option<String>,
996 pub metadata: BTreeMap<String, serde_json::Value>,
997}
998
999impl ArtifactRecord {
1000 pub fn normalize(mut self) -> Self {
1001 if self.type_name.is_empty() {
1002 self.type_name = "artifact".to_string();
1003 }
1004 if self.id.is_empty() {
1005 self.id = new_id("artifact");
1006 }
1007 if self.created_at.is_empty() {
1008 self.created_at = now_rfc3339();
1009 }
1010 if self.kind.is_empty() {
1011 self.kind = "artifact".to_string();
1012 }
1013 self.kind = normalize_artifact_kind(&self.kind);
1014 if self.estimated_tokens.is_none() {
1015 self.estimated_tokens = self
1016 .text
1017 .as_ref()
1018 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
1019 }
1020 if self.priority.is_none() {
1021 self.priority = Some(default_artifact_priority(&self.kind));
1022 }
1023 self
1024 }
1025}
1026
1027#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1028#[serde(default)]
1029pub struct WorkflowNode {
1030 pub id: Option<String>,
1031 pub kind: String,
1032 pub mode: Option<String>,
1033 pub prompt: Option<String>,
1034 pub system: Option<String>,
1035 pub task_label: Option<String>,
1036 pub done_sentinel: Option<String>,
1037 pub tools: serde_json::Value,
1038 pub model_policy: ModelPolicy,
1039 pub transcript_policy: TranscriptPolicy,
1040 pub context_policy: ContextPolicy,
1041 pub retry_policy: RetryPolicy,
1042 pub capability_policy: CapabilityPolicy,
1043 pub input_contract: StageContract,
1044 pub output_contract: StageContract,
1045 pub branch_semantics: BranchSemantics,
1046 pub map_policy: MapPolicy,
1047 pub join_policy: JoinPolicy,
1048 pub reduce_policy: ReducePolicy,
1049 pub escalation_policy: EscalationPolicy,
1050 pub verify: Option<serde_json::Value>,
1051 pub metadata: BTreeMap<String, serde_json::Value>,
1052}
1053
1054pub fn workflow_tool_names(value: &serde_json::Value) -> Vec<String> {
1055 match value {
1056 serde_json::Value::Null => Vec::new(),
1057 serde_json::Value::Array(items) => items
1058 .iter()
1059 .filter_map(|item| match item {
1060 serde_json::Value::Object(map) => map
1061 .get("name")
1062 .and_then(|value| value.as_str())
1063 .filter(|name| !name.is_empty())
1064 .map(|name| name.to_string()),
1065 _ => None,
1066 })
1067 .collect(),
1068 serde_json::Value::Object(map) => {
1069 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1070 return map
1071 .get("tools")
1072 .map(workflow_tool_names)
1073 .unwrap_or_default();
1074 }
1075 map.get("name")
1076 .and_then(|value| value.as_str())
1077 .filter(|name| !name.is_empty())
1078 .map(|name| vec![name.to_string()])
1079 .unwrap_or_default()
1080 }
1081 _ => Vec::new(),
1082 }
1083}
1084
1085fn max_side_effect_level(levels: impl Iterator<Item = String>) -> Option<String> {
1086 fn rank(v: &str) -> usize {
1087 match v {
1088 "none" => 0,
1089 "read_only" => 1,
1090 "workspace_write" => 2,
1091 "process_exec" => 3,
1092 "network" => 4,
1093 _ => 5,
1094 }
1095 }
1096 levels.max_by_key(|level| rank(level))
1097}
1098
1099fn parse_tool_runtime_policy(
1100 map: &serde_json::Map<String, serde_json::Value>,
1101) -> ToolRuntimePolicyMetadata {
1102 let Some(policy) = map.get("policy").and_then(|value| value.as_object()) else {
1103 return ToolRuntimePolicyMetadata::default();
1104 };
1105
1106 let capabilities = policy
1107 .get("capabilities")
1108 .and_then(|value| value.as_object())
1109 .map(|caps| {
1110 caps.iter()
1111 .map(|(capability, ops)| {
1112 let values = ops
1113 .as_array()
1114 .map(|items| {
1115 items
1116 .iter()
1117 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1118 .collect::<Vec<_>>()
1119 })
1120 .unwrap_or_default();
1121 (capability.clone(), values)
1122 })
1123 .collect::<BTreeMap<_, _>>()
1124 })
1125 .unwrap_or_default();
1126
1127 let path_params = policy
1128 .get("path_params")
1129 .and_then(|value| value.as_array())
1130 .map(|items| {
1131 items
1132 .iter()
1133 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1134 .collect::<Vec<_>>()
1135 })
1136 .unwrap_or_default();
1137
1138 ToolRuntimePolicyMetadata {
1139 capabilities,
1140 side_effect_level: policy
1141 .get("side_effect_level")
1142 .and_then(|value| value.as_str())
1143 .map(|s| s.to_string()),
1144 path_params,
1145 mutation_classification: policy
1146 .get("mutation_classification")
1147 .and_then(|value| value.as_str())
1148 .map(|s| s.to_string()),
1149 }
1150}
1151
1152pub fn workflow_tool_metadata(
1153 value: &serde_json::Value,
1154) -> BTreeMap<String, ToolRuntimePolicyMetadata> {
1155 match value {
1156 serde_json::Value::Null => BTreeMap::new(),
1157 serde_json::Value::Array(items) => items
1158 .iter()
1159 .filter_map(|item| match item {
1160 serde_json::Value::Object(map) => map
1161 .get("name")
1162 .and_then(|value| value.as_str())
1163 .filter(|name| !name.is_empty())
1164 .map(|name| (name.to_string(), parse_tool_runtime_policy(map))),
1165 _ => None,
1166 })
1167 .collect(),
1168 serde_json::Value::Object(map) => {
1169 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1170 return map
1171 .get("tools")
1172 .map(workflow_tool_metadata)
1173 .unwrap_or_default();
1174 }
1175 map.get("name")
1176 .and_then(|value| value.as_str())
1177 .filter(|name| !name.is_empty())
1178 .map(|name| {
1179 let mut metadata = BTreeMap::new();
1180 metadata.insert(name.to_string(), parse_tool_runtime_policy(map));
1181 metadata
1182 })
1183 .unwrap_or_default()
1184 }
1185 _ => BTreeMap::new(),
1186 }
1187}
1188
1189pub fn workflow_tool_policy_from_tools(value: &serde_json::Value) -> CapabilityPolicy {
1190 let tools = workflow_tool_names(value);
1191 let tool_metadata = workflow_tool_metadata(value);
1192 let mut capabilities: BTreeMap<String, Vec<String>> = BTreeMap::new();
1193 for metadata in tool_metadata.values() {
1194 for (capability, ops) in &metadata.capabilities {
1195 let entry = capabilities.entry(capability.clone()).or_default();
1196 for op in ops {
1197 if !entry.contains(op) {
1198 entry.push(op.clone());
1199 }
1200 }
1201 entry.sort();
1202 }
1203 }
1204 let side_effect_level = max_side_effect_level(
1205 tool_metadata
1206 .values()
1207 .filter_map(|metadata| metadata.side_effect_level.clone()),
1208 );
1209 CapabilityPolicy {
1210 tools,
1211 capabilities,
1212 workspace_roots: Vec::new(),
1213 side_effect_level,
1214 recursion_limit: None,
1215 tool_arg_constraints: Vec::new(),
1216 tool_metadata,
1217 }
1218}
1219
1220#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1221#[serde(default)]
1222pub struct WorkflowEdge {
1223 pub from: String,
1224 pub to: String,
1225 pub branch: Option<String>,
1226 pub label: Option<String>,
1227}
1228
1229#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1230#[serde(default)]
1231pub struct WorkflowGraph {
1232 #[serde(rename = "_type")]
1233 pub type_name: String,
1234 pub id: String,
1235 pub name: Option<String>,
1236 pub version: usize,
1237 pub entry: String,
1238 pub nodes: BTreeMap<String, WorkflowNode>,
1239 pub edges: Vec<WorkflowEdge>,
1240 pub capability_policy: CapabilityPolicy,
1241 pub metadata: BTreeMap<String, serde_json::Value>,
1242 pub audit_log: Vec<WorkflowAuditEntry>,
1243}
1244
1245#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1246#[serde(default)]
1247pub struct WorkflowAuditEntry {
1248 pub id: String,
1249 pub op: String,
1250 pub node_id: Option<String>,
1251 pub timestamp: String,
1252 pub reason: Option<String>,
1253 pub metadata: BTreeMap<String, serde_json::Value>,
1254}
1255
1256#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1257#[serde(default)]
1258pub struct LlmUsageRecord {
1259 pub input_tokens: i64,
1260 pub output_tokens: i64,
1261 pub total_duration_ms: i64,
1262 pub call_count: i64,
1263 pub total_cost: f64,
1264 pub models: Vec<String>,
1265}
1266
1267#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1268#[serde(default)]
1269pub struct RunStageRecord {
1270 pub id: String,
1271 pub node_id: String,
1272 pub kind: String,
1273 pub status: String,
1274 pub outcome: String,
1275 pub branch: Option<String>,
1276 pub started_at: String,
1277 pub finished_at: Option<String>,
1278 pub visible_text: Option<String>,
1279 pub private_reasoning: Option<String>,
1280 pub transcript: Option<serde_json::Value>,
1281 pub verification: Option<serde_json::Value>,
1282 pub usage: Option<LlmUsageRecord>,
1283 pub artifacts: Vec<ArtifactRecord>,
1284 pub consumed_artifact_ids: Vec<String>,
1285 pub produced_artifact_ids: Vec<String>,
1286 pub attempts: Vec<RunStageAttemptRecord>,
1287 pub metadata: BTreeMap<String, serde_json::Value>,
1288}
1289
1290#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1291#[serde(default)]
1292pub struct RunStageAttemptRecord {
1293 pub attempt: usize,
1294 pub status: String,
1295 pub outcome: String,
1296 pub branch: Option<String>,
1297 pub error: Option<String>,
1298 pub verification: Option<serde_json::Value>,
1299 pub started_at: String,
1300 pub finished_at: Option<String>,
1301}
1302
1303#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1304#[serde(default)]
1305pub struct RunTransitionRecord {
1306 pub id: String,
1307 pub from_stage_id: Option<String>,
1308 pub from_node_id: Option<String>,
1309 pub to_node_id: String,
1310 pub branch: Option<String>,
1311 pub timestamp: String,
1312 pub consumed_artifact_ids: Vec<String>,
1313 pub produced_artifact_ids: Vec<String>,
1314}
1315
1316#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1317#[serde(default)]
1318pub struct RunCheckpointRecord {
1319 pub id: String,
1320 pub ready_nodes: Vec<String>,
1321 pub completed_nodes: Vec<String>,
1322 pub last_stage_id: Option<String>,
1323 pub persisted_at: String,
1324 pub reason: String,
1325}
1326
1327#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1328#[serde(default)]
1329pub struct ReplayFixture {
1330 #[serde(rename = "_type")]
1331 pub type_name: String,
1332 pub id: String,
1333 pub source_run_id: String,
1334 pub workflow_id: String,
1335 pub workflow_name: Option<String>,
1336 pub created_at: String,
1337 pub expected_status: String,
1338 pub stage_assertions: Vec<ReplayStageAssertion>,
1339}
1340
1341#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1342#[serde(default)]
1343pub struct ReplayStageAssertion {
1344 pub node_id: String,
1345 pub expected_status: String,
1346 pub expected_outcome: String,
1347 pub expected_branch: Option<String>,
1348 pub required_artifact_kinds: Vec<String>,
1349 pub visible_text_contains: Option<String>,
1350}
1351
1352#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1353#[serde(default)]
1354pub struct ReplayEvalReport {
1355 pub pass: bool,
1356 pub failures: Vec<String>,
1357 pub stage_count: usize,
1358}
1359
1360#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1361#[serde(default)]
1362pub struct ReplayEvalCaseReport {
1363 pub run_id: String,
1364 pub workflow_id: String,
1365 pub label: Option<String>,
1366 pub pass: bool,
1367 pub failures: Vec<String>,
1368 pub stage_count: usize,
1369 pub source_path: Option<String>,
1370 pub comparison: Option<RunDiffReport>,
1371}
1372
1373#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1374#[serde(default)]
1375pub struct ReplayEvalSuiteReport {
1376 pub pass: bool,
1377 pub total: usize,
1378 pub passed: usize,
1379 pub failed: usize,
1380 pub cases: Vec<ReplayEvalCaseReport>,
1381}
1382
1383#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1384#[serde(default)]
1385pub struct RunStageDiffRecord {
1386 pub node_id: String,
1387 pub change: String,
1388 pub details: Vec<String>,
1389}
1390
1391#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1392#[serde(default)]
1393pub struct RunDiffReport {
1394 pub left_run_id: String,
1395 pub right_run_id: String,
1396 pub identical: bool,
1397 pub status_changed: bool,
1398 pub left_status: String,
1399 pub right_status: String,
1400 pub stage_diffs: Vec<RunStageDiffRecord>,
1401 pub transition_count_delta: isize,
1402 pub artifact_count_delta: isize,
1403 pub checkpoint_count_delta: isize,
1404}
1405
1406#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1407#[serde(default)]
1408pub struct EvalSuiteManifest {
1409 #[serde(rename = "_type")]
1410 pub type_name: String,
1411 pub id: String,
1412 pub name: Option<String>,
1413 pub base_dir: Option<String>,
1414 pub cases: Vec<EvalSuiteCase>,
1415}
1416
1417#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1418#[serde(default)]
1419pub struct EvalSuiteCase {
1420 pub label: Option<String>,
1421 pub run_path: String,
1422 pub fixture_path: Option<String>,
1423 pub compare_to: Option<String>,
1424}
1425
1426#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1427#[serde(default)]
1428pub struct RunRecord {
1429 #[serde(rename = "_type")]
1430 pub type_name: String,
1431 pub id: String,
1432 pub workflow_id: String,
1433 pub workflow_name: Option<String>,
1434 pub task: String,
1435 pub status: String,
1436 pub started_at: String,
1437 pub finished_at: Option<String>,
1438 pub parent_run_id: Option<String>,
1439 pub root_run_id: Option<String>,
1440 pub stages: Vec<RunStageRecord>,
1441 pub transitions: Vec<RunTransitionRecord>,
1442 pub checkpoints: Vec<RunCheckpointRecord>,
1443 pub pending_nodes: Vec<String>,
1444 pub completed_nodes: Vec<String>,
1445 pub child_runs: Vec<RunChildRecord>,
1446 pub artifacts: Vec<ArtifactRecord>,
1447 pub policy: CapabilityPolicy,
1448 pub execution: Option<RunExecutionRecord>,
1449 pub transcript: Option<serde_json::Value>,
1450 pub usage: Option<LlmUsageRecord>,
1451 pub replay_fixture: Option<ReplayFixture>,
1452 pub trace_spans: Vec<RunTraceSpanRecord>,
1453 pub metadata: BTreeMap<String, serde_json::Value>,
1454 pub persisted_path: Option<String>,
1455}
1456
1457#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1458#[serde(default)]
1459pub struct RunTraceSpanRecord {
1460 pub span_id: u64,
1461 pub parent_id: Option<u64>,
1462 pub kind: String,
1463 pub name: String,
1464 pub start_ms: u64,
1465 pub duration_ms: u64,
1466 pub metadata: BTreeMap<String, serde_json::Value>,
1467}
1468
1469#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1470#[serde(default)]
1471pub struct RunChildRecord {
1472 pub worker_id: String,
1473 pub worker_name: String,
1474 pub parent_stage_id: Option<String>,
1475 pub session_id: Option<String>,
1476 pub parent_session_id: Option<String>,
1477 pub mutation_scope: Option<String>,
1478 pub approval_mode: Option<String>,
1479 pub task: String,
1480 pub status: String,
1481 pub started_at: String,
1482 pub finished_at: Option<String>,
1483 pub run_id: Option<String>,
1484 pub run_path: Option<String>,
1485 pub snapshot_path: Option<String>,
1486 pub execution: Option<RunExecutionRecord>,
1487}
1488
1489#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1490#[serde(default)]
1491pub struct RunExecutionRecord {
1492 pub cwd: Option<String>,
1493 pub source_dir: Option<String>,
1494 pub env: BTreeMap<String, String>,
1495 pub adapter: Option<String>,
1496 pub repo_path: Option<String>,
1497 pub worktree_path: Option<String>,
1498 pub branch: Option<String>,
1499 pub base_ref: Option<String>,
1500 pub cleanup: Option<String>,
1501}
1502
1503#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1504#[serde(default)]
1505pub struct WorkflowValidationReport {
1506 pub valid: bool,
1507 pub errors: Vec<String>,
1508 pub warnings: Vec<String>,
1509 pub reachable_nodes: Vec<String>,
1510}
1511
1512fn parse_json_payload<T: for<'de> Deserialize<'de>>(
1513 json: serde_json::Value,
1514 label: &str,
1515) -> Result<T, VmError> {
1516 let payload = json.to_string();
1517 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1518 let mut tracker = serde_path_to_error::Track::new();
1519 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1520 T::deserialize(path_deserializer).map_err(|error| {
1521 let snippet = if payload.len() > 600 {
1522 format!("{}...", &payload[..600])
1523 } else {
1524 payload.clone()
1525 };
1526 VmError::Runtime(format!(
1527 "{label} parse error at {}: {} | payload={}",
1528 tracker.path(),
1529 error,
1530 snippet
1531 ))
1532 })
1533}
1534
1535fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1536 parse_json_payload(vm_value_to_json(value), "orchestration")
1537}
1538
1539pub fn parse_workflow_node_value(value: &VmValue, label: &str) -> Result<WorkflowNode, VmError> {
1540 parse_json_payload(vm_value_to_json(value), label)
1541}
1542
1543pub fn parse_workflow_node_json(
1544 json: serde_json::Value,
1545 label: &str,
1546) -> Result<WorkflowNode, VmError> {
1547 parse_json_payload(json, label)
1548}
1549
1550pub fn parse_workflow_edge_json(
1551 json: serde_json::Value,
1552 label: &str,
1553) -> Result<WorkflowEdge, VmError> {
1554 parse_json_payload(json, label)
1555}
1556
1557pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1558 let mut graph: WorkflowGraph = parse_json_value(value)?;
1559 let as_dict = value.as_dict().cloned().unwrap_or_default();
1560
1561 if graph.nodes.is_empty() {
1562 for key in ["act", "verify", "repair"] {
1563 if let Some(node_value) = as_dict.get(key) {
1564 let mut node: WorkflowNode = parse_json_value(node_value)?;
1565 let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1566 node.id = Some(key.to_string());
1567 if node.kind.is_empty() {
1568 node.kind = if key == "verify" {
1569 "verify".to_string()
1570 } else {
1571 "stage".to_string()
1572 };
1573 }
1574 if node.model_policy.provider.is_none() {
1575 node.model_policy.provider = as_dict
1576 .get("provider")
1577 .map(|value| value.display())
1578 .filter(|value| !value.is_empty());
1579 }
1580 if node.model_policy.model.is_none() {
1581 node.model_policy.model = as_dict
1582 .get("model")
1583 .map(|value| value.display())
1584 .filter(|value| !value.is_empty());
1585 }
1586 if node.model_policy.model_tier.is_none() {
1587 node.model_policy.model_tier = as_dict
1588 .get("model_tier")
1589 .or_else(|| as_dict.get("tier"))
1590 .map(|value| value.display())
1591 .filter(|value| !value.is_empty());
1592 }
1593 if node.model_policy.temperature.is_none() {
1594 node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1595 if let VmValue::Float(number) = value {
1596 Some(*number)
1597 } else {
1598 value.as_int().map(|number| number as f64)
1599 }
1600 });
1601 }
1602 if node.model_policy.max_tokens.is_none() {
1603 node.model_policy.max_tokens =
1604 as_dict.get("max_tokens").and_then(|value| value.as_int());
1605 }
1606 if node.mode.is_none() {
1607 node.mode = as_dict
1608 .get("mode")
1609 .map(|value| value.display())
1610 .filter(|value| !value.is_empty());
1611 }
1612 if node.done_sentinel.is_none() {
1613 node.done_sentinel = as_dict
1614 .get("done_sentinel")
1615 .map(|value| value.display())
1616 .filter(|value| !value.is_empty());
1617 }
1618 if key == "verify"
1619 && node.verify.is_none()
1620 && (raw_node.contains_key("assert_text")
1621 || raw_node.contains_key("command")
1622 || raw_node.contains_key("expect_status")
1623 || raw_node.contains_key("expect_text"))
1624 {
1625 node.verify = Some(serde_json::json!({
1626 "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1627 "command": raw_node.get("command").map(vm_value_to_json),
1628 "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1629 "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1630 }));
1631 }
1632 graph.nodes.insert(key.to_string(), node);
1633 }
1634 }
1635 if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1636 graph.entry = "act".to_string();
1637 }
1638 if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1639 if graph.nodes.contains_key("verify") {
1640 graph.edges.push(WorkflowEdge {
1641 from: "act".to_string(),
1642 to: "verify".to_string(),
1643 branch: None,
1644 label: None,
1645 });
1646 }
1647 if graph.nodes.contains_key("repair") {
1648 graph.edges.push(WorkflowEdge {
1649 from: "verify".to_string(),
1650 to: "repair".to_string(),
1651 branch: Some("failed".to_string()),
1652 label: None,
1653 });
1654 graph.edges.push(WorkflowEdge {
1655 from: "repair".to_string(),
1656 to: "verify".to_string(),
1657 branch: Some("retry".to_string()),
1658 label: None,
1659 });
1660 }
1661 }
1662 }
1663
1664 if graph.type_name.is_empty() {
1665 graph.type_name = "workflow_graph".to_string();
1666 }
1667 if graph.id.is_empty() {
1668 graph.id = new_id("workflow");
1669 }
1670 if graph.version == 0 {
1671 graph.version = 1;
1672 }
1673 if graph.entry.is_empty() {
1674 graph.entry = graph
1675 .nodes
1676 .keys()
1677 .next()
1678 .cloned()
1679 .unwrap_or_else(|| "act".to_string());
1680 }
1681 for (node_id, node) in &mut graph.nodes {
1682 if node.id.is_none() {
1683 node.id = Some(node_id.clone());
1684 }
1685 if node.kind.is_empty() {
1686 node.kind = "stage".to_string();
1687 }
1688 if node.join_policy.strategy.is_empty() {
1689 node.join_policy.strategy = "all".to_string();
1690 }
1691 if node.reduce_policy.strategy.is_empty() {
1692 node.reduce_policy.strategy = "concat".to_string();
1693 }
1694 if node.output_contract.output_kinds.is_empty() {
1695 node.output_contract.output_kinds = vec![match node.kind.as_str() {
1696 "verify" => "verification_result".to_string(),
1697 "reduce" => node
1698 .reduce_policy
1699 .output_kind
1700 .clone()
1701 .unwrap_or_else(|| "summary".to_string()),
1702 "map" => node
1703 .map_policy
1704 .output_kind
1705 .clone()
1706 .unwrap_or_else(|| "artifact".to_string()),
1707 "escalation" => "plan".to_string(),
1708 _ => "artifact".to_string(),
1709 }];
1710 }
1711 if node.retry_policy.max_attempts == 0 {
1712 node.retry_policy.max_attempts = 1;
1713 }
1714 }
1715 Ok(graph)
1716}
1717
1718pub fn validate_workflow(
1719 graph: &WorkflowGraph,
1720 ceiling: Option<&CapabilityPolicy>,
1721) -> WorkflowValidationReport {
1722 let mut errors = Vec::new();
1723 let mut warnings = Vec::new();
1724
1725 if !graph.nodes.contains_key(&graph.entry) {
1726 errors.push(format!("entry node does not exist: {}", graph.entry));
1727 }
1728
1729 let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1730 for edge in &graph.edges {
1731 if !node_ids.contains(&edge.from) {
1732 errors.push(format!("edge.from references unknown node: {}", edge.from));
1733 }
1734 if !node_ids.contains(&edge.to) {
1735 errors.push(format!("edge.to references unknown node: {}", edge.to));
1736 }
1737 }
1738
1739 let reachable_nodes = reachable_nodes(graph);
1740 for node_id in &node_ids {
1741 if !reachable_nodes.contains(node_id) {
1742 warnings.push(format!("node is unreachable: {node_id}"));
1743 }
1744 }
1745
1746 for (node_id, node) in &graph.nodes {
1747 let incoming = graph
1748 .edges
1749 .iter()
1750 .filter(|edge| edge.to == *node_id)
1751 .count();
1752 let outgoing: Vec<&WorkflowEdge> = graph
1753 .edges
1754 .iter()
1755 .filter(|edge| edge.from == *node_id)
1756 .collect();
1757 if let Some(min_inputs) = node.input_contract.min_inputs {
1758 if let Some(max_inputs) = node.input_contract.max_inputs {
1759 if min_inputs > max_inputs {
1760 errors.push(format!(
1761 "node {node_id}: input contract min_inputs exceeds max_inputs"
1762 ));
1763 }
1764 }
1765 }
1766 match node.kind.as_str() {
1767 "condition" => {
1768 let has_true = outgoing
1769 .iter()
1770 .any(|edge| edge.branch.as_deref() == Some("true"));
1771 let has_false = outgoing
1772 .iter()
1773 .any(|edge| edge.branch.as_deref() == Some("false"));
1774 if !has_true || !has_false {
1775 errors.push(format!(
1776 "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
1777 ));
1778 }
1779 }
1780 "fork" => {
1781 if outgoing.len() < 2 {
1782 errors.push(format!(
1783 "node {node_id}: fork nodes require at least two outgoing edges"
1784 ));
1785 }
1786 }
1787 "join" => {
1788 if incoming < 2 {
1789 warnings.push(format!(
1790 "node {node_id}: join node has fewer than two incoming edges"
1791 ));
1792 }
1793 }
1794 "map" => {
1795 if node.map_policy.items.is_empty()
1796 && node.map_policy.item_artifact_kind.is_none()
1797 && node.input_contract.input_kinds.is_empty()
1798 {
1799 errors.push(format!(
1800 "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
1801 ));
1802 }
1803 }
1804 "reduce" => {
1805 if node.input_contract.input_kinds.is_empty() {
1806 warnings.push(format!(
1807 "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
1808 ));
1809 }
1810 }
1811 _ => {}
1812 }
1813 }
1814
1815 if let Some(ceiling) = ceiling {
1816 if let Err(error) = ceiling.intersect(&graph.capability_policy) {
1817 errors.push(error);
1818 }
1819 for (node_id, node) in &graph.nodes {
1820 if let Err(error) = ceiling.intersect(&node.capability_policy) {
1821 errors.push(format!("node {node_id}: {error}"));
1822 }
1823 }
1824 }
1825
1826 WorkflowValidationReport {
1827 valid: errors.is_empty(),
1828 errors,
1829 warnings,
1830 reachable_nodes: reachable_nodes.into_iter().collect(),
1831 }
1832}
1833
1834fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
1835 let mut seen = BTreeSet::new();
1836 let mut stack = vec![graph.entry.clone()];
1837 while let Some(node_id) = stack.pop() {
1838 if !seen.insert(node_id.clone()) {
1839 continue;
1840 }
1841 for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
1842 stack.push(edge.to.clone());
1843 }
1844 }
1845 seen
1846}
1847
1848pub fn select_artifacts(
1849 mut artifacts: Vec<ArtifactRecord>,
1850 policy: &ContextPolicy,
1851) -> Vec<ArtifactRecord> {
1852 artifacts.retain(|artifact| {
1853 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
1854 && !policy.exclude_kinds.contains(&artifact.kind)
1855 && (policy.include_stages.is_empty()
1856 || artifact
1857 .stage
1858 .as_ref()
1859 .is_some_and(|stage| policy.include_stages.contains(stage)))
1860 });
1861 artifacts.sort_by(|a, b| {
1862 let b_pinned = policy.pinned_ids.contains(&b.id);
1863 let a_pinned = policy.pinned_ids.contains(&a.id);
1864 b_pinned
1865 .cmp(&a_pinned)
1866 .then_with(|| {
1867 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
1868 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
1869 b_prio_kind.cmp(&a_prio_kind)
1870 })
1871 .then_with(|| {
1872 b.priority
1873 .unwrap_or_default()
1874 .cmp(&a.priority.unwrap_or_default())
1875 })
1876 .then_with(|| {
1877 if policy.prefer_fresh {
1878 freshness_rank(b.freshness.as_deref())
1879 .cmp(&freshness_rank(a.freshness.as_deref()))
1880 } else {
1881 std::cmp::Ordering::Equal
1882 }
1883 })
1884 .then_with(|| {
1885 if policy.prefer_recent {
1886 b.created_at.cmp(&a.created_at)
1887 } else {
1888 std::cmp::Ordering::Equal
1889 }
1890 })
1891 .then_with(|| {
1892 b.relevance
1893 .partial_cmp(&a.relevance)
1894 .unwrap_or(std::cmp::Ordering::Equal)
1895 })
1896 .then_with(|| {
1897 a.estimated_tokens
1898 .unwrap_or(usize::MAX)
1899 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
1900 })
1901 });
1902
1903 let mut selected = Vec::new();
1904 let mut used_tokens = 0usize;
1905 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
1906 let effective_max_tokens = policy
1907 .max_tokens
1908 .map(|max| max.saturating_sub(reserve_tokens));
1909 for artifact in artifacts {
1910 if let Some(max_artifacts) = policy.max_artifacts {
1911 if selected.len() >= max_artifacts {
1912 break;
1913 }
1914 }
1915 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
1916 if let Some(max_tokens) = effective_max_tokens {
1917 if used_tokens + next_tokens > max_tokens {
1918 continue;
1919 }
1920 }
1921 used_tokens += next_tokens;
1922 selected.push(artifact);
1923 }
1924 selected
1925}
1926
1927pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
1928 let mut parts = Vec::new();
1929 for artifact in artifacts {
1930 let title = artifact
1931 .title
1932 .clone()
1933 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
1934 let body = artifact
1935 .text
1936 .clone()
1937 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
1938 .unwrap_or_default();
1939 match policy.render.as_deref() {
1940 Some("json") => {
1941 parts.push(
1942 serde_json::json!({
1943 "id": artifact.id,
1944 "kind": artifact.kind,
1945 "title": title,
1946 "source": artifact.source,
1947 "freshness": artifact.freshness,
1948 "priority": artifact.priority,
1949 "text": body,
1950 })
1951 .to_string(),
1952 );
1953 }
1954 _ => parts.push(format!(
1955 "[{title}] kind={} source={} freshness={} priority={}\n{}",
1956 artifact.kind,
1957 artifact
1958 .source
1959 .clone()
1960 .unwrap_or_else(|| "unknown".to_string()),
1961 artifact
1962 .freshness
1963 .clone()
1964 .unwrap_or_else(|| "normal".to_string()),
1965 artifact.priority.unwrap_or_default(),
1966 body
1967 )),
1968 }
1969 }
1970 parts.join("\n\n")
1971}
1972
1973pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
1974 let artifact: ArtifactRecord = parse_json_value(value)?;
1975 Ok(artifact.normalize())
1976}
1977
1978pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
1979 let json = vm_value_to_json(value);
1980 let payload = json.to_string();
1981 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1982 let mut tracker = serde_path_to_error::Track::new();
1983 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1984 let mut run: RunRecord = RunRecord::deserialize(path_deserializer).map_err(|error| {
1985 let snippet = if payload.len() > 600 {
1986 format!("{}...", &payload[..600])
1987 } else {
1988 payload.clone()
1989 };
1990 VmError::Runtime(format!(
1991 "orchestration parse error at {}: {} | payload={}",
1992 tracker.path(),
1993 error,
1994 snippet
1995 ))
1996 })?;
1997 if run.type_name.is_empty() {
1998 run.type_name = "run_record".to_string();
1999 }
2000 if run.id.is_empty() {
2001 run.id = new_id("run");
2002 }
2003 if run.started_at.is_empty() {
2004 run.started_at = now_rfc3339();
2005 }
2006 if run.status.is_empty() {
2007 run.status = "running".to_string();
2008 }
2009 if run.root_run_id.is_none() {
2010 run.root_run_id = Some(run.id.clone());
2011 }
2012 if run.replay_fixture.is_none() {
2013 run.replay_fixture = Some(replay_fixture_from_run(&run));
2014 }
2015 Ok(run)
2016}
2017
2018pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
2019 let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
2020 if manifest.type_name.is_empty() {
2021 manifest.type_name = "eval_suite_manifest".to_string();
2022 }
2023 if manifest.id.is_empty() {
2024 manifest.id = new_id("eval_suite");
2025 }
2026 Ok(manifest)
2027}
2028
2029fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
2030 let content = std::fs::read_to_string(path)
2031 .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
2032 serde_json::from_str(&content)
2033 .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
2034}
2035
2036fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
2037 let path_buf = PathBuf::from(path);
2038 if path_buf.is_absolute() {
2039 path_buf
2040 } else if let Some(base_dir) = base_dir {
2041 base_dir.join(path_buf)
2042 } else {
2043 path_buf
2044 }
2045}
2046
2047pub fn evaluate_run_suite_manifest(
2048 manifest: &EvalSuiteManifest,
2049) -> Result<ReplayEvalSuiteReport, VmError> {
2050 let base_dir = manifest.base_dir.as_deref().map(Path::new);
2051 let mut reports = Vec::new();
2052 for case in &manifest.cases {
2053 let run_path = resolve_manifest_path(base_dir, &case.run_path);
2054 let run = load_run_record(&run_path)?;
2055 let fixture = match &case.fixture_path {
2056 Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
2057 None => run
2058 .replay_fixture
2059 .clone()
2060 .unwrap_or_else(|| replay_fixture_from_run(&run)),
2061 };
2062 let eval = evaluate_run_against_fixture(&run, &fixture);
2063 let mut pass = eval.pass;
2064 let mut failures = eval.failures;
2065 let comparison = match &case.compare_to {
2066 Some(path) => {
2067 let baseline_path = resolve_manifest_path(base_dir, path);
2068 let baseline = load_run_record(&baseline_path)?;
2069 let diff = diff_run_records(&baseline, &run);
2070 if !diff.identical {
2071 pass = false;
2072 failures.push(format!(
2073 "run differs from baseline {} with {} stage changes",
2074 baseline_path.display(),
2075 diff.stage_diffs.len()
2076 ));
2077 }
2078 Some(diff)
2079 }
2080 None => None,
2081 };
2082 reports.push(ReplayEvalCaseReport {
2083 run_id: run.id.clone(),
2084 workflow_id: run.workflow_id.clone(),
2085 label: case.label.clone(),
2086 pass,
2087 failures,
2088 stage_count: eval.stage_count,
2089 source_path: Some(run_path.display().to_string()),
2090 comparison,
2091 });
2092 }
2093 let total = reports.len();
2094 let passed = reports.iter().filter(|report| report.pass).count();
2095 let failed = total.saturating_sub(passed);
2096 Ok(ReplayEvalSuiteReport {
2097 pass: failed == 0,
2098 total,
2099 passed,
2100 failed,
2101 cases: reports,
2102 })
2103}
2104
2105pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
2106 let before_lines: Vec<&str> = before.lines().collect();
2107 let after_lines: Vec<&str> = after.lines().collect();
2108 let mut table = vec![vec![0usize; after_lines.len() + 1]; before_lines.len() + 1];
2109 for i in (0..before_lines.len()).rev() {
2110 for j in (0..after_lines.len()).rev() {
2111 table[i][j] = if before_lines[i] == after_lines[j] {
2112 table[i + 1][j + 1] + 1
2113 } else {
2114 table[i + 1][j].max(table[i][j + 1])
2115 };
2116 }
2117 }
2118
2119 let mut diff = String::new();
2120 let file = path.unwrap_or("artifact");
2121 diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
2122 let mut i = 0;
2123 let mut j = 0;
2124 while i < before_lines.len() && j < after_lines.len() {
2125 if before_lines[i] == after_lines[j] {
2126 diff.push_str(&format!(" {}\n", before_lines[i]));
2127 i += 1;
2128 j += 1;
2129 } else if table[i + 1][j] >= table[i][j + 1] {
2130 diff.push_str(&format!("-{}\n", before_lines[i]));
2131 i += 1;
2132 } else {
2133 diff.push_str(&format!("+{}\n", after_lines[j]));
2134 j += 1;
2135 }
2136 }
2137 while i < before_lines.len() {
2138 diff.push_str(&format!("-{}\n", before_lines[i]));
2139 i += 1;
2140 }
2141 while j < after_lines.len() {
2142 diff.push_str(&format!("+{}\n", after_lines[j]));
2143 j += 1;
2144 }
2145 diff
2146}
2147
2148pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
2149 let path = path
2150 .map(PathBuf::from)
2151 .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
2152 if let Some(parent) = path.parent() {
2153 std::fs::create_dir_all(parent)
2154 .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
2155 }
2156 let json = serde_json::to_string_pretty(run)
2157 .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
2158 let tmp_path = path.with_extension("json.tmp");
2160 std::fs::write(&tmp_path, &json)
2161 .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
2162 std::fs::rename(&tmp_path, &path).map_err(|e| {
2163 let _ = std::fs::write(&path, &json);
2165 VmError::Runtime(format!("failed to finalize run record: {e}"))
2166 })?;
2167 Ok(path.to_string_lossy().to_string())
2168}
2169
2170pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
2171 let content = std::fs::read_to_string(path)
2172 .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
2173 serde_json::from_str(&content)
2174 .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
2175}
2176
2177pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
2178 ReplayFixture {
2179 type_name: "replay_fixture".to_string(),
2180 id: new_id("fixture"),
2181 source_run_id: run.id.clone(),
2182 workflow_id: run.workflow_id.clone(),
2183 workflow_name: run.workflow_name.clone(),
2184 created_at: now_rfc3339(),
2185 expected_status: run.status.clone(),
2186 stage_assertions: run
2187 .stages
2188 .iter()
2189 .map(|stage| ReplayStageAssertion {
2190 node_id: stage.node_id.clone(),
2191 expected_status: stage.status.clone(),
2192 expected_outcome: stage.outcome.clone(),
2193 expected_branch: stage.branch.clone(),
2194 required_artifact_kinds: stage
2195 .artifacts
2196 .iter()
2197 .map(|artifact| artifact.kind.clone())
2198 .collect(),
2199 visible_text_contains: stage
2200 .visible_text
2201 .as_ref()
2202 .filter(|text| !text.is_empty())
2203 .map(|text| text.chars().take(80).collect()),
2204 })
2205 .collect(),
2206 }
2207}
2208
2209pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
2210 let mut failures = Vec::new();
2211 if run.status != fixture.expected_status {
2212 failures.push(format!(
2213 "run status mismatch: expected {}, got {}",
2214 fixture.expected_status, run.status
2215 ));
2216 }
2217 for assertion in &fixture.stage_assertions {
2218 let Some(stage) = run
2219 .stages
2220 .iter()
2221 .find(|stage| stage.node_id == assertion.node_id)
2222 else {
2223 failures.push(format!("missing stage {}", assertion.node_id));
2224 continue;
2225 };
2226 if stage.status != assertion.expected_status {
2227 failures.push(format!(
2228 "stage {} status mismatch: expected {}, got {}",
2229 assertion.node_id, assertion.expected_status, stage.status
2230 ));
2231 }
2232 if stage.outcome != assertion.expected_outcome {
2233 failures.push(format!(
2234 "stage {} outcome mismatch: expected {}, got {}",
2235 assertion.node_id, assertion.expected_outcome, stage.outcome
2236 ));
2237 }
2238 if stage.branch != assertion.expected_branch {
2239 failures.push(format!(
2240 "stage {} branch mismatch: expected {:?}, got {:?}",
2241 assertion.node_id, assertion.expected_branch, stage.branch
2242 ));
2243 }
2244 for required_kind in &assertion.required_artifact_kinds {
2245 if !stage
2246 .artifacts
2247 .iter()
2248 .any(|artifact| &artifact.kind == required_kind)
2249 {
2250 failures.push(format!(
2251 "stage {} missing artifact kind {}",
2252 assertion.node_id, required_kind
2253 ));
2254 }
2255 }
2256 if let Some(snippet) = &assertion.visible_text_contains {
2257 let actual = stage.visible_text.clone().unwrap_or_default();
2258 if !actual.contains(snippet) {
2259 failures.push(format!(
2260 "stage {} visible text does not contain expected snippet {:?}",
2261 assertion.node_id, snippet
2262 ));
2263 }
2264 }
2265 }
2266
2267 ReplayEvalReport {
2268 pass: failures.is_empty(),
2269 failures,
2270 stage_count: run.stages.len(),
2271 }
2272}
2273
2274pub fn evaluate_run_suite(
2275 cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
2276) -> ReplayEvalSuiteReport {
2277 let mut reports = Vec::new();
2278 for (run, fixture, source_path) in cases {
2279 let report = evaluate_run_against_fixture(&run, &fixture);
2280 reports.push(ReplayEvalCaseReport {
2281 run_id: run.id.clone(),
2282 workflow_id: run.workflow_id.clone(),
2283 label: None,
2284 pass: report.pass,
2285 failures: report.failures,
2286 stage_count: report.stage_count,
2287 source_path,
2288 comparison: None,
2289 });
2290 }
2291 let total = reports.len();
2292 let passed = reports.iter().filter(|report| report.pass).count();
2293 let failed = total.saturating_sub(passed);
2294 ReplayEvalSuiteReport {
2295 pass: failed == 0,
2296 total,
2297 passed,
2298 failed,
2299 cases: reports,
2300 }
2301}
2302
2303pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
2304 let mut stage_diffs = Vec::new();
2305 let mut all_node_ids = BTreeSet::new();
2306 all_node_ids.extend(left.stages.iter().map(|stage| stage.node_id.clone()));
2307 all_node_ids.extend(right.stages.iter().map(|stage| stage.node_id.clone()));
2308
2309 for node_id in all_node_ids {
2310 let left_stage = left.stages.iter().find(|stage| stage.node_id == node_id);
2311 let right_stage = right.stages.iter().find(|stage| stage.node_id == node_id);
2312 match (left_stage, right_stage) {
2313 (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
2314 node_id,
2315 change: "removed".to_string(),
2316 details: vec!["stage missing from right run".to_string()],
2317 }),
2318 (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
2319 node_id,
2320 change: "added".to_string(),
2321 details: vec!["stage missing from left run".to_string()],
2322 }),
2323 (Some(left_stage), Some(right_stage)) => {
2324 let mut details = Vec::new();
2325 if left_stage.status != right_stage.status {
2326 details.push(format!(
2327 "status: {} -> {}",
2328 left_stage.status, right_stage.status
2329 ));
2330 }
2331 if left_stage.outcome != right_stage.outcome {
2332 details.push(format!(
2333 "outcome: {} -> {}",
2334 left_stage.outcome, right_stage.outcome
2335 ));
2336 }
2337 if left_stage.branch != right_stage.branch {
2338 details.push(format!(
2339 "branch: {:?} -> {:?}",
2340 left_stage.branch, right_stage.branch
2341 ));
2342 }
2343 if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
2344 {
2345 details.push(format!(
2346 "produced_artifacts: {} -> {}",
2347 left_stage.produced_artifact_ids.len(),
2348 right_stage.produced_artifact_ids.len()
2349 ));
2350 }
2351 if left_stage.artifacts.len() != right_stage.artifacts.len() {
2352 details.push(format!(
2353 "artifact_records: {} -> {}",
2354 left_stage.artifacts.len(),
2355 right_stage.artifacts.len()
2356 ));
2357 }
2358 if !details.is_empty() {
2359 stage_diffs.push(RunStageDiffRecord {
2360 node_id,
2361 change: "changed".to_string(),
2362 details,
2363 });
2364 }
2365 }
2366 (None, None) => {}
2367 }
2368 }
2369
2370 let status_changed = left.status != right.status;
2371 let identical = !status_changed
2372 && stage_diffs.is_empty()
2373 && left.transitions.len() == right.transitions.len()
2374 && left.artifacts.len() == right.artifacts.len()
2375 && left.checkpoints.len() == right.checkpoints.len();
2376
2377 RunDiffReport {
2378 left_run_id: left.id.clone(),
2379 right_run_id: right.id.clone(),
2380 identical,
2381 status_changed,
2382 left_status: left.status.clone(),
2383 right_status: right.status.clone(),
2384 stage_diffs,
2385 transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
2386 artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
2387 checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
2388 }
2389}
2390
2391pub fn push_execution_policy(policy: CapabilityPolicy) {
2392 EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
2393}
2394
2395pub fn pop_execution_policy() {
2396 EXECUTION_POLICY_STACK.with(|stack| {
2397 stack.borrow_mut().pop();
2398 });
2399}
2400
2401pub fn current_execution_policy() -> Option<CapabilityPolicy> {
2402 EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
2403}
2404
2405pub fn current_tool_metadata(tool: &str) -> Option<ToolRuntimePolicyMetadata> {
2406 current_execution_policy().and_then(|policy| policy.tool_metadata.get(tool).cloned())
2407}
2408
2409fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
2410 policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
2411}
2412
2413fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
2414 policy.capabilities.is_empty()
2415 || policy
2416 .capabilities
2417 .get(capability)
2418 .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
2419}
2420
2421fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
2422 fn rank(v: &str) -> usize {
2423 match v {
2424 "none" => 0,
2425 "read_only" => 1,
2426 "workspace_write" => 2,
2427 "process_exec" => 3,
2428 "network" => 4,
2429 _ => 5,
2430 }
2431 }
2432 policy
2433 .side_effect_level
2434 .as_ref()
2435 .map(|allowed| rank(allowed) >= rank(requested))
2436 .unwrap_or(true)
2437}
2438
2439fn reject_policy(reason: String) -> Result<(), VmError> {
2440 Err(VmError::CategorizedError {
2441 message: reason,
2442 category: crate::value::ErrorCategory::ToolRejected,
2443 })
2444}
2445
2446fn fallback_mutation_classification(tool_name: &str) -> String {
2447 let lower = tool_name.to_ascii_lowercase();
2448 if lower.starts_with("mcp_") {
2449 return "host_defined".to_string();
2450 }
2451 if lower == "exec"
2452 || lower == "shell"
2453 || lower == "exec_at"
2454 || lower == "shell_at"
2455 || lower == "run"
2456 || lower.starts_with("run_")
2457 {
2458 return "ambient_side_effect".to_string();
2459 }
2460 if lower.starts_with("delete")
2461 || lower.starts_with("remove")
2462 || lower.starts_with("move")
2463 || lower.starts_with("rename")
2464 {
2465 return "destructive".to_string();
2466 }
2467 if lower.contains("write")
2468 || lower.contains("edit")
2469 || lower.contains("patch")
2470 || lower.contains("create")
2471 || lower.contains("scaffold")
2472 || lower.starts_with("insert")
2473 || lower.starts_with("replace")
2474 || lower == "add_import"
2475 {
2476 return "apply_workspace".to_string();
2477 }
2478 "read_only".to_string()
2479}
2480
2481pub fn current_tool_mutation_classification(tool_name: &str) -> String {
2482 current_tool_metadata(tool_name)
2483 .and_then(|metadata| metadata.mutation_classification)
2484 .unwrap_or_else(|| fallback_mutation_classification(tool_name))
2485}
2486
2487pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
2488 let Some(map) = args.as_object() else {
2489 return Vec::new();
2490 };
2491 let path_keys = current_tool_metadata(tool_name)
2492 .map(|metadata| metadata.path_params)
2493 .filter(|keys| !keys.is_empty())
2494 .unwrap_or_else(|| {
2495 vec![
2496 "path".to_string(),
2497 "file".to_string(),
2498 "cwd".to_string(),
2499 "repo".to_string(),
2500 "target".to_string(),
2501 "destination".to_string(),
2502 ]
2503 });
2504 let mut paths = Vec::new();
2505 for key in path_keys {
2506 if let Some(value) = map.get(&key).and_then(|value| value.as_str()) {
2507 if !value.is_empty() {
2508 paths.push(value.to_string());
2509 }
2510 }
2511 }
2512 if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
2513 for item in items {
2514 if let Some(value) = item.as_str() {
2515 if !value.is_empty() {
2516 paths.push(value.to_string());
2517 }
2518 }
2519 }
2520 }
2521 paths.sort();
2522 paths.dedup();
2523 paths
2524}
2525
2526pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
2527 let Some(policy) = current_execution_policy() else {
2528 return Ok(());
2529 };
2530 match name {
2531 "read" | "read_file" => {
2532 if !policy_allows_tool(&policy, name)
2533 || !policy_allows_capability(&policy, "workspace", "read_text")
2534 {
2535 return reject_policy(format!(
2536 "builtin '{name}' exceeds workspace.read_text ceiling"
2537 ));
2538 }
2539 }
2540 "search" | "list_dir" => {
2541 if !policy_allows_tool(&policy, name)
2542 || !policy_allows_capability(&policy, "workspace", "list")
2543 {
2544 return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
2545 }
2546 }
2547 "file_exists" | "stat" => {
2548 if !policy_allows_capability(&policy, "workspace", "exists") {
2549 return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
2550 }
2551 }
2552 "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
2553 if !policy_allows_tool(&policy, "edit")
2554 || !policy_allows_capability(&policy, "workspace", "write_text")
2555 || !policy_allows_side_effect(&policy, "workspace_write")
2556 {
2557 return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
2558 }
2559 }
2560 "delete_file" => {
2561 if !policy_allows_capability(&policy, "workspace", "delete")
2562 || !policy_allows_side_effect(&policy, "workspace_write")
2563 {
2564 return reject_policy(
2565 "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
2566 );
2567 }
2568 }
2569 "apply_edit" => {
2570 if !policy_allows_capability(&policy, "workspace", "apply_edit")
2571 || !policy_allows_side_effect(&policy, "workspace_write")
2572 {
2573 return reject_policy(
2574 "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
2575 );
2576 }
2577 }
2578 "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
2579 if !policy_allows_tool(&policy, "run")
2580 || !policy_allows_capability(&policy, "process", "exec")
2581 || !policy_allows_side_effect(&policy, "process_exec")
2582 {
2583 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2584 }
2585 }
2586 "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
2587 if !policy_allows_side_effect(&policy, "network") {
2588 return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
2589 }
2590 }
2591 "mcp_connect"
2592 | "mcp_call"
2593 | "mcp_list_tools"
2594 | "mcp_list_resources"
2595 | "mcp_list_resource_templates"
2596 | "mcp_read_resource"
2597 | "mcp_list_prompts"
2598 | "mcp_get_prompt"
2599 | "mcp_server_info"
2600 | "mcp_disconnect" => {
2601 if !policy_allows_tool(&policy, "run")
2602 || !policy_allows_capability(&policy, "process", "exec")
2603 || !policy_allows_side_effect(&policy, "process_exec")
2604 {
2605 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2606 }
2607 }
2608 "host_call" => {
2609 let name = args.first().map(|v| v.display()).unwrap_or_default();
2610 let Some((capability, op)) = name.split_once('.') else {
2611 return reject_policy(format!(
2612 "host_call '{name}' must use capability.operation naming"
2613 ));
2614 };
2615 if !policy_allows_capability(&policy, capability, op) {
2616 return reject_policy(format!(
2617 "host_call {capability}.{op} exceeds capability ceiling"
2618 ));
2619 }
2620 let requested_side_effect = match (capability, op) {
2621 ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
2622 ("process", "exec") => "process_exec",
2623 _ => "read_only",
2624 };
2625 if !policy_allows_side_effect(&policy, requested_side_effect) {
2626 return reject_policy(format!(
2627 "host_call {capability}.{op} exceeds side-effect ceiling"
2628 ));
2629 }
2630 }
2631 _ => {}
2632 }
2633 Ok(())
2634}
2635
2636pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
2637 if current_execution_policy().is_some() {
2638 return reject_policy(format!(
2639 "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
2640 ));
2641 }
2642 Ok(())
2643}
2644
2645pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
2646 let Some(policy) = current_execution_policy() else {
2647 return Ok(());
2648 };
2649 if !policy_allows_tool(&policy, tool_name) {
2650 return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
2651 }
2652 if let Some(metadata) = policy.tool_metadata.get(tool_name) {
2653 for (capability, ops) in &metadata.capabilities {
2654 for op in ops {
2655 if !policy_allows_capability(&policy, capability, op) {
2656 return reject_policy(format!(
2657 "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
2658 ));
2659 }
2660 }
2661 }
2662 if let Some(side_effect_level) = metadata.side_effect_level.as_deref() {
2663 if !policy_allows_side_effect(&policy, side_effect_level) {
2664 return reject_policy(format!(
2665 "tool '{tool_name}' exceeds side-effect ceiling: {side_effect_level}"
2666 ));
2667 }
2668 }
2669 }
2670 Ok(())
2671}
2672
2673fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
2674 let dict = transcript.as_dict()?;
2675 let messages = match dict.get("messages") {
2676 Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
2677 _ => Vec::new(),
2678 };
2679 let retained = messages
2680 .into_iter()
2681 .rev()
2682 .take(keep_last)
2683 .collect::<Vec<_>>()
2684 .into_iter()
2685 .rev()
2686 .collect::<Vec<_>>();
2687 let mut compacted = dict.clone();
2688 compacted.insert(
2689 "messages".to_string(),
2690 VmValue::List(Rc::new(retained.clone())),
2691 );
2692 compacted.insert(
2693 "events".to_string(),
2694 VmValue::List(Rc::new(
2695 crate::llm::helpers::transcript_events_from_messages(&retained),
2696 )),
2697 );
2698 Some(VmValue::Dict(Rc::new(compacted)))
2699}
2700
2701fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
2702 let Some(visibility) = visibility else {
2703 return Some(transcript.clone());
2704 };
2705 if visibility != "public" && visibility != "public_only" {
2706 return Some(transcript.clone());
2707 }
2708 let dict = transcript.as_dict()?;
2709 let public_messages = match dict.get("messages") {
2710 Some(VmValue::List(list)) => list
2711 .iter()
2712 .filter(|message| {
2713 message
2714 .as_dict()
2715 .and_then(|d| d.get("role"))
2716 .map(|v| v.display())
2717 .map(|role| role != "tool_result")
2718 .unwrap_or(true)
2719 })
2720 .cloned()
2721 .collect::<Vec<_>>(),
2722 _ => Vec::new(),
2723 };
2724 let public_events = match dict.get("events") {
2725 Some(VmValue::List(list)) => list
2726 .iter()
2727 .filter(|event| {
2728 event
2729 .as_dict()
2730 .and_then(|d| d.get("visibility"))
2731 .map(|v| v.display())
2732 .map(|value| value == "public")
2733 .unwrap_or(true)
2734 })
2735 .cloned()
2736 .collect::<Vec<_>>(),
2737 _ => Vec::new(),
2738 };
2739 let mut redacted = dict.clone();
2740 redacted.insert(
2741 "messages".to_string(),
2742 VmValue::List(Rc::new(public_messages)),
2743 );
2744 redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
2745 Some(VmValue::Dict(Rc::new(redacted)))
2746}
2747
2748pub(crate) fn apply_input_transcript_policy(
2749 transcript: Option<VmValue>,
2750 policy: &TranscriptPolicy,
2751) -> Option<VmValue> {
2752 let mut transcript = transcript;
2753 match policy.mode.as_deref() {
2754 Some("reset") => return None,
2755 Some("fork") => {
2756 if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
2757 let mut forked = dict.as_ref().clone();
2758 forked.insert(
2759 "id".to_string(),
2760 VmValue::String(Rc::from(new_id("transcript"))),
2761 );
2762 transcript = Some(VmValue::Dict(Rc::new(forked)));
2763 }
2764 }
2765 _ => {}
2766 }
2767 if policy.compact {
2768 let keep_last = policy.keep_last.unwrap_or(6);
2769 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2770 }
2771 transcript
2772}
2773
2774fn apply_output_transcript_policy(
2775 transcript: Option<VmValue>,
2776 policy: &TranscriptPolicy,
2777) -> Option<VmValue> {
2778 let mut transcript = transcript;
2779 if policy.compact {
2780 let keep_last = policy.keep_last.unwrap_or(6);
2781 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2782 }
2783 transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
2784}
2785
2786pub async fn execute_stage_node(
2787 node_id: &str,
2788 node: &WorkflowNode,
2789 task: &str,
2790 artifacts: &[ArtifactRecord],
2791 transcript: Option<VmValue>,
2792) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
2793 let mut selection_policy = node.context_policy.clone();
2794 if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
2795 selection_policy.include_kinds = node.input_contract.input_kinds.clone();
2796 }
2797 let selected = select_artifacts_adaptive(artifacts.to_vec(), &selection_policy);
2798 let rendered_context = render_artifacts_context(&selected, &node.context_policy);
2799 let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
2800 if node.input_contract.require_transcript && transcript.is_none() {
2801 return Err(VmError::Runtime(format!(
2802 "workflow stage {node_id} requires transcript input"
2803 )));
2804 }
2805 if let Some(min_inputs) = node.input_contract.min_inputs {
2806 if selected.len() < min_inputs {
2807 return Err(VmError::Runtime(format!(
2808 "workflow stage {node_id} requires at least {min_inputs} input artifacts"
2809 )));
2810 }
2811 }
2812 if let Some(max_inputs) = node.input_contract.max_inputs {
2813 if selected.len() > max_inputs {
2814 return Err(VmError::Runtime(format!(
2815 "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
2816 )));
2817 }
2818 }
2819 let prompt = if rendered_context.is_empty() {
2820 task.to_string()
2821 } else {
2822 format!(
2823 "{rendered_context}\n\n{}:\n{task}",
2824 node.task_label
2825 .clone()
2826 .unwrap_or_else(|| "Task".to_string())
2827 )
2828 };
2829
2830 let tool_format = std::env::var("HARN_AGENT_TOOL_FORMAT")
2831 .ok()
2832 .filter(|value| !value.trim().is_empty())
2833 .unwrap_or_else(|| "text".to_string());
2834 let mut llm_result = if node.kind == "verify" {
2835 if let Some(command) = node
2836 .verify
2837 .as_ref()
2838 .and_then(|verify| verify.as_object())
2839 .and_then(|verify| verify.get("command"))
2840 .and_then(|value| value.as_str())
2841 .map(str::trim)
2842 .filter(|value| !value.is_empty())
2843 {
2844 let mut process = if cfg!(target_os = "windows") {
2845 let mut cmd = tokio::process::Command::new("cmd");
2846 cmd.arg("/C").arg(command);
2847 cmd
2848 } else {
2849 let mut cmd = tokio::process::Command::new("/bin/sh");
2850 cmd.arg("-lc").arg(command);
2851 cmd
2852 };
2853 process.stdin(std::process::Stdio::null());
2854 if let Some(context) = crate::stdlib::process::current_execution_context() {
2855 if let Some(cwd) = context.cwd.filter(|cwd| !cwd.is_empty()) {
2856 process.current_dir(cwd);
2857 }
2858 if !context.env.is_empty() {
2859 process.envs(context.env);
2860 }
2861 }
2862 let output = process
2863 .output()
2864 .await
2865 .map_err(|e| VmError::Runtime(format!("workflow verify exec failed: {e}")))?;
2866 let stdout = String::from_utf8_lossy(&output.stdout).to_string();
2867 let stderr = String::from_utf8_lossy(&output.stderr).to_string();
2868 let combined = if stderr.is_empty() {
2869 stdout.clone()
2870 } else if stdout.is_empty() {
2871 stderr.clone()
2872 } else {
2873 format!("{stdout}\n{stderr}")
2874 };
2875 serde_json::json!({
2876 "status": "completed",
2877 "text": combined,
2878 "visible_text": combined,
2879 "command": command,
2880 "stdout": stdout,
2881 "stderr": stderr,
2882 "exit_status": output.status.code().unwrap_or(-1),
2883 "success": output.status.success(),
2884 })
2885 } else {
2886 serde_json::json!({
2887 "status": "completed",
2888 "text": "",
2889 "visible_text": "",
2890 })
2891 }
2892 } else {
2893 let mut options = BTreeMap::new();
2894 if let Some(provider) = &node.model_policy.provider {
2895 options.insert(
2896 "provider".to_string(),
2897 VmValue::String(Rc::from(provider.clone())),
2898 );
2899 }
2900 if let Some(model) = &node.model_policy.model {
2901 options.insert(
2902 "model".to_string(),
2903 VmValue::String(Rc::from(model.clone())),
2904 );
2905 }
2906 if let Some(model_tier) = &node.model_policy.model_tier {
2907 options.insert(
2908 "model_tier".to_string(),
2909 VmValue::String(Rc::from(model_tier.clone())),
2910 );
2911 }
2912 if let Some(temperature) = node.model_policy.temperature {
2913 options.insert("temperature".to_string(), VmValue::Float(temperature));
2914 }
2915 if let Some(max_tokens) = node.model_policy.max_tokens {
2916 options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
2917 }
2918 let tool_names = workflow_tool_names(&node.tools);
2919 if !matches!(node.tools, serde_json::Value::Null) && !tool_names.is_empty() {
2920 options.insert(
2921 "tools".to_string(),
2922 crate::stdlib::json_to_vm_value(&node.tools),
2923 );
2924 }
2925 if let Some(transcript) = transcript.clone() {
2926 options.insert("transcript".to_string(), transcript);
2927 }
2928
2929 let args = vec![
2930 VmValue::String(Rc::from(prompt.clone())),
2931 node.system
2932 .clone()
2933 .map(|s| VmValue::String(Rc::from(s)))
2934 .unwrap_or(VmValue::Nil),
2935 VmValue::Dict(Rc::new(options)),
2936 ];
2937 let mut opts = extract_llm_options(&args)?;
2938
2939 if node.mode.as_deref() == Some("agent") || !tool_names.is_empty() {
2940 crate::llm::run_agent_loop_internal(
2941 &mut opts,
2942 crate::llm::AgentLoopConfig {
2943 persistent: true,
2944 max_iterations: 12,
2945 max_nudges: 3,
2946 nudge: None,
2947 done_sentinel: node.done_sentinel.clone(),
2948 break_unless_phase: None,
2949 tool_retries: 0,
2950 tool_backoff_ms: 1000,
2951 tool_format: tool_format.clone(),
2952 auto_compact: None,
2953 context_callback: None,
2954 policy: None,
2955 daemon: false,
2956 llm_retries: 2,
2957 llm_backoff_ms: 2000,
2958 },
2959 )
2960 .await?
2961 } else {
2962 let result = vm_call_llm_full(&opts).await?;
2963 crate::llm::agent_loop_result_from_llm(&result, opts)
2964 }
2965 };
2966 if let Some(payload) = llm_result.as_object_mut() {
2967 payload.insert("prompt".to_string(), serde_json::json!(prompt));
2968 payload.insert(
2969 "system_prompt".to_string(),
2970 serde_json::json!(node.system.clone().unwrap_or_default()),
2971 );
2972 payload.insert(
2973 "rendered_context".to_string(),
2974 serde_json::json!(rendered_context),
2975 );
2976 payload.insert(
2977 "selected_artifact_ids".to_string(),
2978 serde_json::json!(selected
2979 .iter()
2980 .map(|artifact| artifact.id.clone())
2981 .collect::<Vec<_>>()),
2982 );
2983 payload.insert(
2984 "selected_artifact_titles".to_string(),
2985 serde_json::json!(selected
2986 .iter()
2987 .map(|artifact| artifact.title.clone())
2988 .collect::<Vec<_>>()),
2989 );
2990 payload.insert(
2991 "tool_calling_mode".to_string(),
2992 serde_json::json!(tool_format.clone()),
2993 );
2994 }
2995
2996 let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
2997 let transcript = llm_result
2998 .get("transcript")
2999 .cloned()
3000 .map(|value| crate::stdlib::json_to_vm_value(&value));
3001 let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
3002 let output_kind = node
3003 .output_contract
3004 .output_kinds
3005 .first()
3006 .cloned()
3007 .unwrap_or_else(|| {
3008 if node.kind == "verify" {
3009 "verification_result".to_string()
3010 } else {
3011 "artifact".to_string()
3012 }
3013 });
3014 let mut metadata = BTreeMap::new();
3015 metadata.insert(
3016 "input_artifact_ids".to_string(),
3017 serde_json::json!(selected
3018 .iter()
3019 .map(|artifact| artifact.id.clone())
3020 .collect::<Vec<_>>()),
3021 );
3022 metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
3023 let artifact = ArtifactRecord {
3024 type_name: "artifact".to_string(),
3025 id: new_id("artifact"),
3026 kind: output_kind,
3027 title: Some(format!("stage {node_id} output")),
3028 text: Some(visible_text),
3029 data: Some(llm_result.clone()),
3030 source: Some(node_id.to_string()),
3031 created_at: now_rfc3339(),
3032 freshness: Some("fresh".to_string()),
3033 priority: None,
3034 lineage: selected
3035 .iter()
3036 .map(|artifact| artifact.id.clone())
3037 .collect(),
3038 relevance: Some(1.0),
3039 estimated_tokens: None,
3040 stage: Some(node_id.to_string()),
3041 metadata,
3042 }
3043 .normalize();
3044
3045 Ok((llm_result, vec![artifact], transcript))
3046}
3047
3048pub fn next_nodes_for(
3049 graph: &WorkflowGraph,
3050 current: &str,
3051 branch: Option<&str>,
3052) -> Vec<WorkflowEdge> {
3053 let mut matching: Vec<WorkflowEdge> = graph
3054 .edges
3055 .iter()
3056 .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
3057 .cloned()
3058 .collect();
3059 if matching.is_empty() {
3060 matching = graph
3061 .edges
3062 .iter()
3063 .filter(|edge| edge.from == current && edge.branch.is_none())
3064 .cloned()
3065 .collect();
3066 }
3067 matching
3068}
3069
3070pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
3071 next_nodes_for(graph, current, Some(branch))
3072 .into_iter()
3073 .next()
3074 .map(|edge| edge.to)
3075}
3076
3077pub fn append_audit_entry(
3078 graph: &mut WorkflowGraph,
3079 op: &str,
3080 node_id: Option<String>,
3081 reason: Option<String>,
3082 metadata: BTreeMap<String, serde_json::Value>,
3083) {
3084 graph.audit_log.push(WorkflowAuditEntry {
3085 id: new_id("audit"),
3086 op: op.to_string(),
3087 node_id,
3088 timestamp: now_rfc3339(),
3089 reason,
3090 metadata,
3091 });
3092}
3093
3094pub fn builtin_ceiling() -> CapabilityPolicy {
3095 CapabilityPolicy {
3096 tools: Vec::new(),
3099 capabilities: BTreeMap::from([
3100 (
3101 "workspace".to_string(),
3102 vec![
3103 "read_text".to_string(),
3104 "write_text".to_string(),
3105 "apply_edit".to_string(),
3106 "delete".to_string(),
3107 "exists".to_string(),
3108 "list".to_string(),
3109 ],
3110 ),
3111 ("process".to_string(), vec!["exec".to_string()]),
3112 ]),
3113 workspace_roots: Vec::new(),
3114 side_effect_level: Some("network".to_string()),
3115 recursion_limit: Some(8),
3116 tool_arg_constraints: Vec::new(),
3117 tool_metadata: BTreeMap::new(),
3118 }
3119}
3120
3121#[cfg(test)]
3122mod tests {
3123 use super::*;
3124
3125 #[test]
3126 fn capability_intersection_rejects_privilege_expansion() {
3127 let ceiling = CapabilityPolicy {
3128 tools: vec!["read".to_string()],
3129 side_effect_level: Some("read_only".to_string()),
3130 recursion_limit: Some(2),
3131 ..Default::default()
3132 };
3133 let requested = CapabilityPolicy {
3134 tools: vec!["read".to_string(), "edit".to_string()],
3135 ..Default::default()
3136 };
3137 let error = ceiling.intersect(&requested).unwrap_err();
3138 assert!(error.contains("host ceiling"));
3139 }
3140
3141 #[test]
3142 fn mutation_session_normalize_fills_defaults() {
3143 let normalized = MutationSessionRecord::default().normalize();
3144 assert!(normalized.session_id.starts_with("session_"));
3145 assert_eq!(normalized.mutation_scope, "read_only");
3146 assert_eq!(normalized.approval_mode, "host_enforced");
3147 }
3148
3149 #[test]
3150 fn install_current_mutation_session_round_trips() {
3151 install_current_mutation_session(Some(MutationSessionRecord {
3152 session_id: "session_test".to_string(),
3153 mutation_scope: "apply_workspace".to_string(),
3154 approval_mode: "explicit".to_string(),
3155 ..Default::default()
3156 }));
3157 let current = current_mutation_session().expect("session installed");
3158 assert_eq!(current.session_id, "session_test");
3159 assert_eq!(current.mutation_scope, "apply_workspace");
3160 assert_eq!(current.approval_mode, "explicit");
3161
3162 install_current_mutation_session(None);
3163 assert!(current_mutation_session().is_none());
3164 }
3165
3166 #[test]
3167 fn active_execution_policy_rejects_unknown_bridge_builtin() {
3168 push_execution_policy(CapabilityPolicy {
3169 tools: vec!["read".to_string()],
3170 capabilities: BTreeMap::from([(
3171 "workspace".to_string(),
3172 vec!["read_text".to_string()],
3173 )]),
3174 side_effect_level: Some("read_only".to_string()),
3175 recursion_limit: Some(1),
3176 ..Default::default()
3177 });
3178 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
3179 pop_execution_policy();
3180 assert!(matches!(
3181 error,
3182 VmError::CategorizedError {
3183 category: crate::value::ErrorCategory::ToolRejected,
3184 ..
3185 }
3186 ));
3187 }
3188
3189 #[test]
3190 fn active_execution_policy_rejects_mcp_escape_hatch() {
3191 push_execution_policy(CapabilityPolicy {
3192 tools: vec!["read".to_string()],
3193 capabilities: BTreeMap::from([(
3194 "workspace".to_string(),
3195 vec!["read_text".to_string()],
3196 )]),
3197 side_effect_level: Some("read_only".to_string()),
3198 recursion_limit: Some(1),
3199 ..Default::default()
3200 });
3201 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
3202 pop_execution_policy();
3203 assert!(matches!(
3204 error,
3205 VmError::CategorizedError {
3206 category: crate::value::ErrorCategory::ToolRejected,
3207 ..
3208 }
3209 ));
3210 }
3211
3212 #[test]
3213 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
3214 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3215 "name": "legacy",
3216 "act": {"mode": "llm"},
3217 "verify": {"kind": "verify"},
3218 "repair": {"mode": "agent"},
3219 }));
3220 let graph = normalize_workflow_value(&value).unwrap();
3221 assert_eq!(graph.type_name, "workflow_graph");
3222 assert!(graph.nodes.contains_key("act"));
3223 assert!(graph.nodes.contains_key("verify"));
3224 assert!(graph.nodes.contains_key("repair"));
3225 assert_eq!(graph.entry, "act");
3226 }
3227
3228 #[test]
3229 fn workflow_normalization_accepts_tool_registry_nodes() {
3230 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3231 "name": "registry_tools",
3232 "entry": "implement",
3233 "nodes": {
3234 "implement": {
3235 "kind": "stage",
3236 "mode": "agent",
3237 "tools": {
3238 "_type": "tool_registry",
3239 "tools": [
3240 {"name": "read", "description": "Read files"},
3241 {"name": "run", "description": "Run commands"}
3242 ]
3243 }
3244 }
3245 },
3246 "edges": []
3247 }));
3248 let graph = normalize_workflow_value(&value).unwrap();
3249 let node = graph.nodes.get("implement").unwrap();
3250 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
3251 }
3252
3253 #[test]
3254 fn artifact_selection_honors_budget_and_priority() {
3255 let policy = ContextPolicy {
3256 max_artifacts: Some(2),
3257 max_tokens: Some(30),
3258 prefer_recent: true,
3259 prefer_fresh: true,
3260 prioritize_kinds: vec!["verification_result".to_string()],
3261 ..Default::default()
3262 };
3263 let artifacts = vec![
3264 ArtifactRecord {
3265 type_name: "artifact".to_string(),
3266 id: "a".to_string(),
3267 kind: "summary".to_string(),
3268 text: Some("short".to_string()),
3269 relevance: Some(0.9),
3270 created_at: now_rfc3339(),
3271 ..Default::default()
3272 }
3273 .normalize(),
3274 ArtifactRecord {
3275 type_name: "artifact".to_string(),
3276 id: "b".to_string(),
3277 kind: "summary".to_string(),
3278 text: Some("this is a much larger artifact body".to_string()),
3279 relevance: Some(1.0),
3280 created_at: now_rfc3339(),
3281 ..Default::default()
3282 }
3283 .normalize(),
3284 ArtifactRecord {
3285 type_name: "artifact".to_string(),
3286 id: "c".to_string(),
3287 kind: "summary".to_string(),
3288 text: Some("tiny".to_string()),
3289 relevance: Some(0.5),
3290 created_at: now_rfc3339(),
3291 ..Default::default()
3292 }
3293 .normalize(),
3294 ];
3295 let selected = select_artifacts(artifacts, &policy);
3296 assert_eq!(selected.len(), 2);
3297 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
3298 }
3299
3300 #[test]
3301 fn workflow_validation_rejects_condition_without_true_false_edges() {
3302 let graph = WorkflowGraph {
3303 entry: "gate".to_string(),
3304 nodes: BTreeMap::from([(
3305 "gate".to_string(),
3306 WorkflowNode {
3307 id: Some("gate".to_string()),
3308 kind: "condition".to_string(),
3309 ..Default::default()
3310 },
3311 )]),
3312 edges: vec![WorkflowEdge {
3313 from: "gate".to_string(),
3314 to: "next".to_string(),
3315 branch: Some("true".to_string()),
3316 label: None,
3317 }],
3318 ..Default::default()
3319 };
3320 let report = validate_workflow(&graph, None);
3321 assert!(!report.valid);
3322 assert!(report
3323 .errors
3324 .iter()
3325 .any(|error| error.contains("true") && error.contains("false")));
3326 }
3327
3328 #[test]
3329 fn replay_fixture_round_trip_passes() {
3330 let run = RunRecord {
3331 type_name: "run_record".to_string(),
3332 id: "run_1".to_string(),
3333 workflow_id: "wf".to_string(),
3334 workflow_name: Some("demo".to_string()),
3335 task: "demo".to_string(),
3336 status: "completed".to_string(),
3337 started_at: "1".to_string(),
3338 finished_at: Some("2".to_string()),
3339 parent_run_id: None,
3340 root_run_id: Some("run_1".to_string()),
3341 stages: vec![RunStageRecord {
3342 id: "stage_1".to_string(),
3343 node_id: "act".to_string(),
3344 kind: "stage".to_string(),
3345 status: "completed".to_string(),
3346 outcome: "success".to_string(),
3347 branch: Some("success".to_string()),
3348 started_at: "1".to_string(),
3349 finished_at: Some("2".to_string()),
3350 visible_text: Some("done".to_string()),
3351 private_reasoning: None,
3352 transcript: None,
3353 verification: None,
3354 usage: None,
3355 artifacts: vec![ArtifactRecord {
3356 type_name: "artifact".to_string(),
3357 id: "a1".to_string(),
3358 kind: "summary".to_string(),
3359 text: Some("done".to_string()),
3360 created_at: "1".to_string(),
3361 ..Default::default()
3362 }
3363 .normalize()],
3364 consumed_artifact_ids: vec![],
3365 produced_artifact_ids: vec!["a1".to_string()],
3366 attempts: vec![],
3367 metadata: BTreeMap::new(),
3368 }],
3369 transitions: vec![],
3370 checkpoints: vec![],
3371 pending_nodes: vec![],
3372 completed_nodes: vec!["act".to_string()],
3373 child_runs: vec![],
3374 artifacts: vec![],
3375 policy: CapabilityPolicy::default(),
3376 execution: None,
3377 transcript: None,
3378 usage: None,
3379 replay_fixture: None,
3380 trace_spans: vec![],
3381 metadata: BTreeMap::new(),
3382 persisted_path: None,
3383 };
3384 let fixture = replay_fixture_from_run(&run);
3385 let report = evaluate_run_against_fixture(&run, &fixture);
3386 assert!(report.pass);
3387 assert!(report.failures.is_empty());
3388 }
3389
3390 #[test]
3391 fn replay_eval_suite_reports_failed_case() {
3392 let good = RunRecord {
3393 id: "run_good".to_string(),
3394 workflow_id: "wf".to_string(),
3395 status: "completed".to_string(),
3396 stages: vec![RunStageRecord {
3397 node_id: "act".to_string(),
3398 status: "completed".to_string(),
3399 outcome: "success".to_string(),
3400 ..Default::default()
3401 }],
3402 ..Default::default()
3403 };
3404 let bad = RunRecord {
3405 id: "run_bad".to_string(),
3406 workflow_id: "wf".to_string(),
3407 status: "failed".to_string(),
3408 stages: vec![RunStageRecord {
3409 node_id: "act".to_string(),
3410 status: "failed".to_string(),
3411 outcome: "error".to_string(),
3412 ..Default::default()
3413 }],
3414 ..Default::default()
3415 };
3416 let suite = evaluate_run_suite(vec![
3417 (
3418 good.clone(),
3419 replay_fixture_from_run(&good),
3420 Some("good.json".to_string()),
3421 ),
3422 (
3423 bad.clone(),
3424 replay_fixture_from_run(&good),
3425 Some("bad.json".to_string()),
3426 ),
3427 ]);
3428 assert!(!suite.pass);
3429 assert_eq!(suite.total, 2);
3430 assert_eq!(suite.failed, 1);
3431 assert!(suite.cases.iter().any(|case| !case.pass));
3432 }
3433
3434 #[test]
3435 fn run_diff_reports_changed_stage() {
3436 let left = RunRecord {
3437 id: "left".to_string(),
3438 workflow_id: "wf".to_string(),
3439 status: "completed".to_string(),
3440 stages: vec![RunStageRecord {
3441 node_id: "act".to_string(),
3442 status: "completed".to_string(),
3443 outcome: "success".to_string(),
3444 ..Default::default()
3445 }],
3446 ..Default::default()
3447 };
3448 let right = RunRecord {
3449 id: "right".to_string(),
3450 workflow_id: "wf".to_string(),
3451 status: "failed".to_string(),
3452 stages: vec![RunStageRecord {
3453 node_id: "act".to_string(),
3454 status: "failed".to_string(),
3455 outcome: "error".to_string(),
3456 ..Default::default()
3457 }],
3458 ..Default::default()
3459 };
3460 let diff = diff_run_records(&left, &right);
3461 assert!(diff.status_changed);
3462 assert!(!diff.identical);
3463 assert_eq!(diff.stage_diffs.len(), 1);
3464 }
3465
3466 #[test]
3467 fn eval_suite_manifest_can_fail_on_baseline_diff() {
3468 let temp_dir =
3469 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
3470 std::fs::create_dir_all(&temp_dir).unwrap();
3471 let baseline_path = temp_dir.join("baseline.json");
3472 let candidate_path = temp_dir.join("candidate.json");
3473
3474 let baseline = RunRecord {
3475 id: "baseline".to_string(),
3476 workflow_id: "wf".to_string(),
3477 status: "completed".to_string(),
3478 stages: vec![RunStageRecord {
3479 node_id: "act".to_string(),
3480 status: "completed".to_string(),
3481 outcome: "success".to_string(),
3482 ..Default::default()
3483 }],
3484 ..Default::default()
3485 };
3486 let candidate = RunRecord {
3487 id: "candidate".to_string(),
3488 workflow_id: "wf".to_string(),
3489 status: "failed".to_string(),
3490 stages: vec![RunStageRecord {
3491 node_id: "act".to_string(),
3492 status: "failed".to_string(),
3493 outcome: "error".to_string(),
3494 ..Default::default()
3495 }],
3496 ..Default::default()
3497 };
3498
3499 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
3500 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
3501
3502 let manifest = EvalSuiteManifest {
3503 base_dir: Some(temp_dir.display().to_string()),
3504 cases: vec![EvalSuiteCase {
3505 label: Some("candidate".to_string()),
3506 run_path: "candidate.json".to_string(),
3507 fixture_path: None,
3508 compare_to: Some("baseline.json".to_string()),
3509 }],
3510 ..Default::default()
3511 };
3512 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
3513 assert!(!suite.pass);
3514 assert_eq!(suite.failed, 1);
3515 assert!(suite.cases[0].comparison.is_some());
3516 assert!(suite.cases[0]
3517 .failures
3518 .iter()
3519 .any(|failure| failure.contains("baseline")));
3520 }
3521
3522 #[test]
3523 fn render_unified_diff_marks_removed_and_added_lines() {
3524 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
3525 assert!(diff.contains("--- a/src/main.rs"));
3526 assert!(diff.contains("+++ b/src/main.rs"));
3527 assert!(diff.contains("-old"));
3528 assert!(diff.contains("+new"));
3529 assert!(diff.contains(" same"));
3530 }
3531
3532 #[test]
3533 fn execution_policy_rejects_process_exec_when_read_only() {
3534 push_execution_policy(CapabilityPolicy {
3535 side_effect_level: Some("read_only".to_string()),
3536 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
3537 ..Default::default()
3538 });
3539 let result = enforce_current_policy_for_builtin("exec", &[]);
3540 pop_execution_policy();
3541 assert!(result.is_err());
3542 }
3543
3544 #[test]
3545 fn execution_policy_rejects_unlisted_tool() {
3546 push_execution_policy(CapabilityPolicy {
3547 tools: vec!["read".to_string()],
3548 ..Default::default()
3549 });
3550 let result = enforce_current_policy_for_tool("edit");
3551 pop_execution_policy();
3552 assert!(result.is_err());
3553 }
3554
3555 #[test]
3556 fn normalize_run_record_preserves_trace_spans() {
3557 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3558 "_type": "run_record",
3559 "id": "run_trace",
3560 "workflow_id": "wf",
3561 "status": "completed",
3562 "started_at": "1",
3563 "trace_spans": [
3564 {
3565 "span_id": 1,
3566 "parent_id": null,
3567 "kind": "pipeline",
3568 "name": "workflow",
3569 "start_ms": 0,
3570 "duration_ms": 42,
3571 "metadata": {"model": "demo"}
3572 }
3573 ]
3574 }));
3575
3576 let run = normalize_run_record(&value).unwrap();
3577 assert_eq!(run.trace_spans.len(), 1);
3578 assert_eq!(run.trace_spans[0].kind, "pipeline");
3579 assert_eq!(
3580 run.trace_spans[0].metadata["model"],
3581 serde_json::json!("demo")
3582 );
3583 }
3584
3585 #[test]
3588 fn pre_tool_hook_deny_blocks_execution() {
3589 clear_tool_hooks();
3590 register_tool_hook(ToolHook {
3591 pattern: "dangerous_*".to_string(),
3592 pre: Some(Rc::new(|_name, _args| {
3593 PreToolAction::Deny("blocked by policy".to_string())
3594 })),
3595 post: None,
3596 });
3597 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
3598 clear_tool_hooks();
3599 assert!(matches!(result, PreToolAction::Deny(_)));
3600 }
3601
3602 #[test]
3603 fn pre_tool_hook_allow_passes_through() {
3604 clear_tool_hooks();
3605 register_tool_hook(ToolHook {
3606 pattern: "safe_*".to_string(),
3607 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
3608 post: None,
3609 });
3610 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
3611 clear_tool_hooks();
3612 assert!(matches!(result, PreToolAction::Allow));
3613 }
3614
3615 #[test]
3616 fn pre_tool_hook_modify_rewrites_args() {
3617 clear_tool_hooks();
3618 register_tool_hook(ToolHook {
3619 pattern: "*".to_string(),
3620 pre: Some(Rc::new(|_name, _args| {
3621 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
3622 })),
3623 post: None,
3624 });
3625 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
3626 clear_tool_hooks();
3627 match result {
3628 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
3629 _ => panic!("expected Modify"),
3630 }
3631 }
3632
3633 #[test]
3634 fn post_tool_hook_modifies_result() {
3635 clear_tool_hooks();
3636 register_tool_hook(ToolHook {
3637 pattern: "exec".to_string(),
3638 pre: None,
3639 post: Some(Rc::new(|_name, result| {
3640 if result.contains("SECRET") {
3641 PostToolAction::Modify("[REDACTED]".to_string())
3642 } else {
3643 PostToolAction::Pass
3644 }
3645 })),
3646 });
3647 let result = run_post_tool_hooks("exec", "output with SECRET data");
3648 let clean = run_post_tool_hooks("exec", "clean output");
3649 clear_tool_hooks();
3650 assert_eq!(result, "[REDACTED]");
3651 assert_eq!(clean, "clean output");
3652 }
3653
3654 #[test]
3655 fn unmatched_hook_pattern_does_not_fire() {
3656 clear_tool_hooks();
3657 register_tool_hook(ToolHook {
3658 pattern: "exec".to_string(),
3659 pre: Some(Rc::new(|_name, _args| {
3660 PreToolAction::Deny("should not match".to_string())
3661 })),
3662 post: None,
3663 });
3664 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
3665 clear_tool_hooks();
3666 assert!(matches!(result, PreToolAction::Allow));
3667 }
3668
3669 #[test]
3670 fn glob_match_patterns() {
3671 assert!(glob_match("*", "anything"));
3672 assert!(glob_match("exec*", "exec_at"));
3673 assert!(glob_match("*_file", "read_file"));
3674 assert!(!glob_match("exec*", "read_file"));
3675 assert!(glob_match("read_file", "read_file"));
3676 assert!(!glob_match("read_file", "write_file"));
3677 }
3678
3679 #[test]
3682 fn microcompact_snips_large_output() {
3683 let large = "x".repeat(50_000);
3684 let result = microcompact_tool_output(&large, 10_000);
3685 assert!(result.len() < 15_000);
3686 assert!(result.contains("snipped"));
3687 }
3688
3689 #[test]
3690 fn microcompact_preserves_small_output() {
3691 let small = "hello world";
3692 let result = microcompact_tool_output(small, 10_000);
3693 assert_eq!(result, small);
3694 }
3695
3696 #[test]
3697 fn microcompact_preserves_strong_keyword_lines_without_file_line() {
3698 let mut output = String::new();
3707 for i in 0..100 {
3708 output.push_str(&format!("verbose progress line {i}\n"));
3709 }
3710 output.push_str("--- FAIL: TestEmpty (0.00s)\n");
3711 output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
3712 output.push_str("FAILED tests/test_parser.py::test_empty\n");
3713 for i in 0..100 {
3714 output.push_str(&format!("more output after failures {i}\n"));
3715 }
3716 let result = microcompact_tool_output(&output, 2_000);
3717 assert!(
3718 result.contains("--- FAIL: TestEmpty"),
3719 "strong 'FAIL' keyword should preserve the line:\n{result}"
3720 );
3721 assert!(
3722 result.contains("panicked at"),
3723 "strong 'panic' keyword should preserve the line:\n{result}"
3724 );
3725 assert!(
3726 result.contains("FAILED tests/test_parser.py"),
3727 "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
3728 );
3729 }
3730
3731 #[test]
3732 fn auto_compact_messages_reduces_count() {
3733 let mut messages: Vec<serde_json::Value> = (0..20)
3734 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
3735 .collect();
3736 let runtime = tokio::runtime::Builder::new_current_thread()
3737 .enable_all()
3738 .build()
3739 .unwrap();
3740 let compacted = runtime.block_on(auto_compact_messages(
3741 &mut messages,
3742 &AutoCompactConfig {
3743 compact_strategy: CompactStrategy::Truncate,
3744 keep_last: 6,
3745 ..Default::default()
3746 },
3747 None,
3748 ));
3749 let summary = compacted.unwrap();
3750 assert!(summary.is_some());
3751 assert!(messages.len() <= 7); assert!(messages[0]["content"]
3753 .as_str()
3754 .unwrap()
3755 .contains("auto-compacted"));
3756 }
3757
3758 #[test]
3759 fn auto_compact_noop_when_under_threshold() {
3760 let mut messages: Vec<serde_json::Value> = (0..4)
3761 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
3762 .collect();
3763 let runtime = tokio::runtime::Builder::new_current_thread()
3764 .enable_all()
3765 .build()
3766 .unwrap();
3767 let compacted = runtime.block_on(auto_compact_messages(
3768 &mut messages,
3769 &AutoCompactConfig {
3770 compact_strategy: CompactStrategy::Truncate,
3771 keep_last: 6,
3772 ..Default::default()
3773 },
3774 None,
3775 ));
3776 assert!(compacted.unwrap().is_none());
3777 assert_eq!(messages.len(), 4);
3778 }
3779
3780 #[test]
3781 fn estimate_message_tokens_basic() {
3782 let messages = vec![
3783 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
3784 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
3785 ];
3786 let tokens = estimate_message_tokens(&messages);
3787 assert_eq!(tokens, 200); }
3789
3790 #[test]
3793 fn dedup_artifacts_removes_duplicates() {
3794 let mut artifacts = vec![
3795 ArtifactRecord {
3796 id: "a1".to_string(),
3797 kind: "test".to_string(),
3798 text: Some("duplicate content".to_string()),
3799 ..Default::default()
3800 },
3801 ArtifactRecord {
3802 id: "a2".to_string(),
3803 kind: "test".to_string(),
3804 text: Some("duplicate content".to_string()),
3805 ..Default::default()
3806 },
3807 ArtifactRecord {
3808 id: "a3".to_string(),
3809 kind: "test".to_string(),
3810 text: Some("unique content".to_string()),
3811 ..Default::default()
3812 },
3813 ];
3814 dedup_artifacts(&mut artifacts);
3815 assert_eq!(artifacts.len(), 2);
3816 }
3817
3818 #[test]
3819 fn microcompact_artifact_snips_oversized() {
3820 let mut artifact = ArtifactRecord {
3821 id: "a1".to_string(),
3822 kind: "test".to_string(),
3823 text: Some("x".repeat(10_000)),
3824 estimated_tokens: Some(2_500),
3825 ..Default::default()
3826 };
3827 microcompact_artifact(&mut artifact, 500);
3828 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
3829 assert_eq!(artifact.estimated_tokens, Some(500));
3830 }
3831
3832 #[test]
3835 fn arg_constraint_allows_matching_pattern() {
3836 let policy = CapabilityPolicy {
3837 tool_arg_constraints: vec![ToolArgConstraint {
3838 tool: "exec".to_string(),
3839 arg_patterns: vec!["cargo *".to_string()],
3840 }],
3841 ..Default::default()
3842 };
3843 let result = enforce_tool_arg_constraints(
3844 &policy,
3845 "exec",
3846 &serde_json::json!({"command": "cargo test"}),
3847 );
3848 assert!(result.is_ok());
3849 }
3850
3851 #[test]
3852 fn arg_constraint_rejects_non_matching_pattern() {
3853 let policy = CapabilityPolicy {
3854 tool_arg_constraints: vec![ToolArgConstraint {
3855 tool: "exec".to_string(),
3856 arg_patterns: vec!["cargo *".to_string()],
3857 }],
3858 ..Default::default()
3859 };
3860 let result = enforce_tool_arg_constraints(
3861 &policy,
3862 "exec",
3863 &serde_json::json!({"command": "rm -rf /"}),
3864 );
3865 assert!(result.is_err());
3866 }
3867
3868 #[test]
3869 fn arg_constraint_ignores_unmatched_tool() {
3870 let policy = CapabilityPolicy {
3871 tool_arg_constraints: vec![ToolArgConstraint {
3872 tool: "exec".to_string(),
3873 arg_patterns: vec!["cargo *".to_string()],
3874 }],
3875 ..Default::default()
3876 };
3877 let result = enforce_tool_arg_constraints(
3878 &policy,
3879 "read_file",
3880 &serde_json::json!({"path": "/etc/passwd"}),
3881 );
3882 assert!(result.is_ok());
3883 }
3884
3885 #[test]
3886 fn microcompact_handles_multibyte_utf8() {
3887 let emoji_output = "🔥".repeat(500); let result = microcompact_tool_output(&emoji_output, 400);
3890 assert!(result.contains("snipped"));
3892
3893 let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
3895 let result2 = microcompact_tool_output(&mixed, 400);
3896 assert!(result2.contains("snipped"));
3897
3898 let cjk = "中文".repeat(500);
3900 let result3 = microcompact_tool_output(&cjk, 400);
3901 assert!(result3.contains("snipped"));
3902 }
3903}