1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12 use std::time::{SystemTime, UNIX_EPOCH};
13 let ts = SystemTime::now()
14 .duration_since(UNIX_EPOCH)
15 .unwrap_or_default()
16 .as_secs();
17 format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21 format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25 std::env::var("HARN_RUN_DIR")
26 .map(PathBuf::from)
27 .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31 static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32 static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33}
34
35#[derive(Clone, Debug)]
39pub enum PreToolAction {
40 Allow,
42 Deny(String),
44 Modify(serde_json::Value),
46}
47
48#[derive(Clone, Debug)]
50pub enum PostToolAction {
51 Pass,
53 Modify(String),
55}
56
57pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
59pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
60
61#[derive(Clone)]
63pub struct ToolHook {
64 pub pattern: String,
66 pub pre: Option<PreToolHookFn>,
68 pub post: Option<PostToolHookFn>,
70}
71
72impl std::fmt::Debug for ToolHook {
73 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74 f.debug_struct("ToolHook")
75 .field("pattern", &self.pattern)
76 .field("has_pre", &self.pre.is_some())
77 .field("has_post", &self.post.is_some())
78 .finish()
79 }
80}
81
82fn glob_match(pattern: &str, name: &str) -> bool {
83 if pattern == "*" {
84 return true;
85 }
86 if let Some(prefix) = pattern.strip_suffix('*') {
87 return name.starts_with(prefix);
88 }
89 if let Some(suffix) = pattern.strip_prefix('*') {
90 return name.ends_with(suffix);
91 }
92 pattern == name
93}
94
95pub fn register_tool_hook(hook: ToolHook) {
96 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
97}
98
99pub fn clear_tool_hooks() {
100 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
101}
102
103pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
105 TOOL_HOOKS.with(|hooks| {
106 let hooks = hooks.borrow();
107 let mut current_args = args.clone();
108 for hook in hooks.iter() {
109 if !glob_match(&hook.pattern, tool_name) {
110 continue;
111 }
112 if let Some(ref pre) = hook.pre {
113 match pre(tool_name, ¤t_args) {
114 PreToolAction::Allow => {}
115 PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
116 PreToolAction::Modify(new_args) => {
117 current_args = new_args;
118 }
119 }
120 }
121 }
122 if current_args != *args {
123 PreToolAction::Modify(current_args)
124 } else {
125 PreToolAction::Allow
126 }
127 })
128}
129
130pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
132 TOOL_HOOKS.with(|hooks| {
133 let hooks = hooks.borrow();
134 let mut current = result.to_string();
135 for hook in hooks.iter() {
136 if !glob_match(&hook.pattern, tool_name) {
137 continue;
138 }
139 if let Some(ref post) = hook.post {
140 match post(tool_name, ¤t) {
141 PostToolAction::Pass => {}
142 PostToolAction::Modify(new_result) => {
143 current = new_result;
144 }
145 }
146 }
147 }
148 current
149 })
150}
151
152#[derive(Clone, Debug)]
156pub struct AutoCompactConfig {
157 pub token_threshold: usize,
159 pub tool_output_max_chars: usize,
161 pub keep_last: usize,
163}
164
165impl Default for AutoCompactConfig {
166 fn default() -> Self {
167 Self {
168 token_threshold: 80_000,
169 tool_output_max_chars: 20_000,
170 keep_last: 8,
171 }
172 }
173}
174
175pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
177 messages
178 .iter()
179 .map(|m| {
180 m.get("content")
181 .and_then(|c| c.as_str())
182 .map(|s| s.len())
183 .unwrap_or(0)
184 })
185 .sum::<usize>()
186 / 4
187}
188
189pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
192 if output.len() <= max_chars || max_chars < 200 {
193 return output.to_string();
194 }
195 let keep = max_chars / 2;
196 let head = &output[..keep];
197 let tail = &output[output.len() - keep..];
198 let snipped = output.len() - max_chars;
199 format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
200}
201
202pub fn auto_compact_messages(messages: &mut Vec<serde_json::Value>, keep_last: usize) -> bool {
205 if messages.len() <= keep_last {
206 return false;
207 }
208 let split_at = messages.len().saturating_sub(keep_last);
209 let old_messages: Vec<_> = messages.drain(..split_at).collect();
210 let archived_count = old_messages.len();
211 let summary_parts: Vec<String> = old_messages
214 .iter()
215 .filter_map(|m| {
216 let role = m.get("role")?.as_str()?;
217 let content = m.get("content")?.as_str()?;
218 if content.is_empty() {
219 return None;
220 }
221 let truncated = if content.len() > 500 {
222 format!("{}...", &content[..500])
223 } else {
224 content.to_string()
225 };
226 Some(format!("[{role}] {truncated}"))
227 })
228 .take(15)
229 .collect();
230 let summary = format!(
231 "[auto-compacted {archived_count} older messages]\n{}{}",
232 summary_parts.join("\n"),
233 if archived_count > 15 {
234 format!("\n... and {} more", archived_count - 15)
235 } else {
236 String::new()
237 }
238 );
239 messages.insert(
240 0,
241 serde_json::json!({
242 "role": "user",
243 "content": summary,
244 }),
245 );
246 true
247}
248
249pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
253 let max_chars = max_tokens * 4;
254 if let Some(ref text) = artifact.text {
255 if text.len() > max_chars && max_chars >= 200 {
256 artifact.text = Some(microcompact_tool_output(text, max_chars));
257 artifact.estimated_tokens = Some(max_tokens);
258 }
259 }
260}
261
262pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
265 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
266 artifacts.retain(|artifact| {
267 let text = artifact.text.as_deref().unwrap_or("");
268 if text.is_empty() {
269 return true;
270 }
271 let hash = {
273 use std::hash::{Hash, Hasher};
274 let mut hasher = std::collections::hash_map::DefaultHasher::new();
275 text.hash(&mut hasher);
276 hasher.finish()
277 };
278 seen_hashes.insert(hash)
279 });
280}
281
282pub fn select_artifacts_adaptive(
285 mut artifacts: Vec<ArtifactRecord>,
286 policy: &ContextPolicy,
287) -> Vec<ArtifactRecord> {
288 dedup_artifacts(&mut artifacts);
290
291 if let Some(max_tokens) = policy.max_tokens {
295 let count = artifacts.len().max(1);
296 let per_artifact_budget = max_tokens / count;
297 let cap = per_artifact_budget.max(500).min(max_tokens);
299 for artifact in &mut artifacts {
300 let est = artifact.estimated_tokens.unwrap_or(0);
301 if est > cap * 2 {
302 microcompact_artifact(artifact, cap);
303 }
304 }
305 }
306
307 select_artifacts(artifacts, policy)
309}
310
311#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
315#[serde(default)]
316pub struct ToolArgConstraint {
317 pub tool: String,
319 pub arg_patterns: Vec<String>,
322}
323
324pub fn enforce_tool_arg_constraints(
326 policy: &CapabilityPolicy,
327 tool_name: &str,
328 args: &serde_json::Value,
329) -> Result<(), VmError> {
330 for constraint in &policy.tool_arg_constraints {
331 if !glob_match(&constraint.tool, tool_name) {
332 continue;
333 }
334 if constraint.arg_patterns.is_empty() {
335 continue;
336 }
337 let first_arg = args
339 .as_object()
340 .and_then(|o| o.values().next())
341 .and_then(|v| v.as_str())
342 .or_else(|| args.as_str())
343 .unwrap_or("");
344 let matches = constraint
345 .arg_patterns
346 .iter()
347 .any(|pattern| glob_match(pattern, first_arg));
348 if !matches {
349 return reject_policy(format!(
350 "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
351 constraint.arg_patterns
352 ));
353 }
354 }
355 Ok(())
356}
357
358fn normalize_artifact_kind(kind: &str) -> String {
359 match kind {
360 "resource"
361 | "workspace_file"
362 | "editor_selection"
363 | "workspace_snapshot"
364 | "transcript_summary"
365 | "summary"
366 | "plan"
367 | "diff"
368 | "git_diff"
369 | "patch"
370 | "patch_set"
371 | "patch_proposal"
372 | "diff_review"
373 | "review_decision"
374 | "verification_bundle"
375 | "apply_intent"
376 | "verification_result"
377 | "test_result"
378 | "command_result"
379 | "provider_payload"
380 | "worker_result"
381 | "worker_notification"
382 | "artifact" => kind.to_string(),
383 "file" => "workspace_file".to_string(),
384 "transcript" => "transcript_summary".to_string(),
385 "verification" => "verification_result".to_string(),
386 "test" => "test_result".to_string(),
387 other if other.trim().is_empty() => "artifact".to_string(),
388 other => other.to_string(),
389 }
390}
391
392fn default_artifact_priority(kind: &str) -> i64 {
393 match kind {
394 "verification_result" | "test_result" => 100,
395 "verification_bundle" => 95,
396 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
397 | "review_decision" | "apply_intent" => 90,
398 "plan" => 80,
399 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
400 "summary" | "transcript_summary" => 60,
401 "command_result" => 50,
402 _ => 40,
403 }
404}
405
406fn freshness_rank(value: Option<&str>) -> i64 {
407 match value.unwrap_or_default() {
408 "fresh" | "live" => 3,
409 "recent" => 2,
410 "stale" => 0,
411 _ => 1,
412 }
413}
414
415#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
416#[serde(default)]
417pub struct CapabilityPolicy {
418 pub tools: Vec<String>,
419 pub capabilities: BTreeMap<String, Vec<String>>,
420 pub workspace_roots: Vec<String>,
421 pub side_effect_level: Option<String>,
422 pub recursion_limit: Option<usize>,
423 #[serde(default)]
425 pub tool_arg_constraints: Vec<ToolArgConstraint>,
426}
427
428impl CapabilityPolicy {
429 pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
430 let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
431 (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
432 (Some(a), None) => Some(a.clone()),
433 (None, Some(b)) => Some(b.clone()),
434 (None, None) => None,
435 };
436
437 if !self.tools.is_empty() {
438 let denied: Vec<String> = requested
439 .tools
440 .iter()
441 .filter(|tool| !self.tools.contains(*tool))
442 .cloned()
443 .collect();
444 if !denied.is_empty() {
445 return Err(format!(
446 "requested tools exceed host ceiling: {}",
447 denied.join(", ")
448 ));
449 }
450 }
451
452 for (capability, requested_ops) in &requested.capabilities {
453 if let Some(allowed_ops) = self.capabilities.get(capability) {
454 let denied: Vec<String> = requested_ops
455 .iter()
456 .filter(|op| !allowed_ops.contains(*op))
457 .cloned()
458 .collect();
459 if !denied.is_empty() {
460 return Err(format!(
461 "requested capability operations exceed host ceiling: {}.{}",
462 capability,
463 denied.join(",")
464 ));
465 }
466 } else if !self.capabilities.is_empty() {
467 return Err(format!(
468 "requested capability exceeds host ceiling: {capability}"
469 ));
470 }
471 }
472
473 let tools = if self.tools.is_empty() {
474 requested.tools.clone()
475 } else if requested.tools.is_empty() {
476 self.tools.clone()
477 } else {
478 requested
479 .tools
480 .iter()
481 .filter(|tool| self.tools.contains(*tool))
482 .cloned()
483 .collect()
484 };
485
486 let capabilities = if self.capabilities.is_empty() {
487 requested.capabilities.clone()
488 } else if requested.capabilities.is_empty() {
489 self.capabilities.clone()
490 } else {
491 requested
492 .capabilities
493 .iter()
494 .filter_map(|(capability, requested_ops)| {
495 self.capabilities.get(capability).map(|allowed_ops| {
496 (
497 capability.clone(),
498 requested_ops
499 .iter()
500 .filter(|op| allowed_ops.contains(*op))
501 .cloned()
502 .collect::<Vec<_>>(),
503 )
504 })
505 })
506 .collect()
507 };
508
509 let workspace_roots = if self.workspace_roots.is_empty() {
510 requested.workspace_roots.clone()
511 } else if requested.workspace_roots.is_empty() {
512 self.workspace_roots.clone()
513 } else {
514 requested
515 .workspace_roots
516 .iter()
517 .filter(|root| self.workspace_roots.contains(*root))
518 .cloned()
519 .collect()
520 };
521
522 let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
523 (Some(a), Some(b)) => Some(a.min(b)),
524 (Some(a), None) => Some(a),
525 (None, Some(b)) => Some(b),
526 (None, None) => None,
527 };
528
529 let mut tool_arg_constraints = self.tool_arg_constraints.clone();
531 tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
532
533 Ok(CapabilityPolicy {
534 tools,
535 capabilities,
536 workspace_roots,
537 side_effect_level,
538 recursion_limit,
539 tool_arg_constraints,
540 })
541 }
542}
543
544fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
545 fn rank(v: &str) -> usize {
546 match v {
547 "none" => 0,
548 "read_only" => 1,
549 "workspace_write" => 2,
550 "process_exec" => 3,
551 "network" => 4,
552 _ => 5,
553 }
554 }
555 if rank(a) <= rank(b) {
556 a
557 } else {
558 b
559 }
560}
561
562#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
563#[serde(default)]
564pub struct ModelPolicy {
565 pub provider: Option<String>,
566 pub model: Option<String>,
567 pub model_tier: Option<String>,
568 pub temperature: Option<f64>,
569 pub max_tokens: Option<i64>,
570}
571
572#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
573#[serde(default)]
574pub struct TranscriptPolicy {
575 pub mode: Option<String>,
576 pub visibility: Option<String>,
577 pub summarize: bool,
578 pub compact: bool,
579 pub keep_last: Option<usize>,
580}
581
582#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
583#[serde(default)]
584pub struct ContextPolicy {
585 pub max_artifacts: Option<usize>,
586 pub max_tokens: Option<usize>,
587 pub reserve_tokens: Option<usize>,
588 pub include_kinds: Vec<String>,
589 pub exclude_kinds: Vec<String>,
590 pub prioritize_kinds: Vec<String>,
591 pub pinned_ids: Vec<String>,
592 pub include_stages: Vec<String>,
593 pub prefer_recent: bool,
594 pub prefer_fresh: bool,
595 pub render: Option<String>,
596}
597
598#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
599#[serde(default)]
600pub struct RetryPolicy {
601 pub max_attempts: usize,
602 pub verify: bool,
603 pub repair: bool,
604}
605
606#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
607#[serde(default)]
608pub struct StageContract {
609 pub input_kinds: Vec<String>,
610 pub output_kinds: Vec<String>,
611 pub min_inputs: Option<usize>,
612 pub max_inputs: Option<usize>,
613 pub require_transcript: bool,
614 pub schema: Option<serde_json::Value>,
615}
616
617#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
618#[serde(default)]
619pub struct BranchSemantics {
620 pub success: Option<String>,
621 pub failure: Option<String>,
622 pub verify_pass: Option<String>,
623 pub verify_fail: Option<String>,
624 pub condition_true: Option<String>,
625 pub condition_false: Option<String>,
626 pub loop_continue: Option<String>,
627 pub loop_exit: Option<String>,
628 pub escalation: Option<String>,
629}
630
631#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
632#[serde(default)]
633pub struct MapPolicy {
634 pub items: Vec<serde_json::Value>,
635 pub item_artifact_kind: Option<String>,
636 pub output_kind: Option<String>,
637 pub max_items: Option<usize>,
638}
639
640#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
641#[serde(default)]
642pub struct JoinPolicy {
643 pub strategy: String,
644 pub require_all_inputs: bool,
645 pub min_completed: Option<usize>,
646}
647
648#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
649#[serde(default)]
650pub struct ReducePolicy {
651 pub strategy: String,
652 pub separator: Option<String>,
653 pub output_kind: Option<String>,
654}
655
656#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
657#[serde(default)]
658pub struct EscalationPolicy {
659 pub level: Option<String>,
660 pub queue: Option<String>,
661 pub reason: Option<String>,
662}
663
664#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
665#[serde(default)]
666pub struct ArtifactRecord {
667 #[serde(rename = "_type")]
668 pub type_name: String,
669 pub id: String,
670 pub kind: String,
671 pub title: Option<String>,
672 pub text: Option<String>,
673 pub data: Option<serde_json::Value>,
674 pub source: Option<String>,
675 pub created_at: String,
676 pub freshness: Option<String>,
677 pub priority: Option<i64>,
678 pub lineage: Vec<String>,
679 pub relevance: Option<f64>,
680 pub estimated_tokens: Option<usize>,
681 pub stage: Option<String>,
682 pub metadata: BTreeMap<String, serde_json::Value>,
683}
684
685impl ArtifactRecord {
686 pub fn normalize(mut self) -> Self {
687 if self.type_name.is_empty() {
688 self.type_name = "artifact".to_string();
689 }
690 if self.id.is_empty() {
691 self.id = new_id("artifact");
692 }
693 if self.created_at.is_empty() {
694 self.created_at = now_rfc3339();
695 }
696 if self.kind.is_empty() {
697 self.kind = "artifact".to_string();
698 }
699 self.kind = normalize_artifact_kind(&self.kind);
700 if self.estimated_tokens.is_none() {
701 self.estimated_tokens = self
702 .text
703 .as_ref()
704 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
705 }
706 if self.priority.is_none() {
707 self.priority = Some(default_artifact_priority(&self.kind));
708 }
709 self
710 }
711}
712
713#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
714#[serde(default)]
715pub struct WorkflowNode {
716 pub id: Option<String>,
717 pub kind: String,
718 pub mode: Option<String>,
719 pub prompt: Option<String>,
720 pub system: Option<String>,
721 pub task_label: Option<String>,
722 pub tools: Vec<String>,
723 pub model_policy: ModelPolicy,
724 pub transcript_policy: TranscriptPolicy,
725 pub context_policy: ContextPolicy,
726 pub retry_policy: RetryPolicy,
727 pub capability_policy: CapabilityPolicy,
728 pub input_contract: StageContract,
729 pub output_contract: StageContract,
730 pub branch_semantics: BranchSemantics,
731 pub map_policy: MapPolicy,
732 pub join_policy: JoinPolicy,
733 pub reduce_policy: ReducePolicy,
734 pub escalation_policy: EscalationPolicy,
735 pub verify: Option<serde_json::Value>,
736 pub metadata: BTreeMap<String, serde_json::Value>,
737}
738
739#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
740#[serde(default)]
741pub struct WorkflowEdge {
742 pub from: String,
743 pub to: String,
744 pub branch: Option<String>,
745 pub label: Option<String>,
746}
747
748#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
749#[serde(default)]
750pub struct WorkflowGraph {
751 #[serde(rename = "_type")]
752 pub type_name: String,
753 pub id: String,
754 pub name: Option<String>,
755 pub version: usize,
756 pub entry: String,
757 pub nodes: BTreeMap<String, WorkflowNode>,
758 pub edges: Vec<WorkflowEdge>,
759 pub capability_policy: CapabilityPolicy,
760 pub metadata: BTreeMap<String, serde_json::Value>,
761 pub audit_log: Vec<WorkflowAuditEntry>,
762}
763
764#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
765#[serde(default)]
766pub struct WorkflowAuditEntry {
767 pub id: String,
768 pub op: String,
769 pub node_id: Option<String>,
770 pub timestamp: String,
771 pub reason: Option<String>,
772 pub metadata: BTreeMap<String, serde_json::Value>,
773}
774
775#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
776#[serde(default)]
777pub struct RunStageRecord {
778 pub id: String,
779 pub node_id: String,
780 pub kind: String,
781 pub status: String,
782 pub outcome: String,
783 pub branch: Option<String>,
784 pub started_at: String,
785 pub finished_at: Option<String>,
786 pub visible_text: Option<String>,
787 pub private_reasoning: Option<String>,
788 pub transcript: Option<serde_json::Value>,
789 pub verification: Option<serde_json::Value>,
790 pub artifacts: Vec<ArtifactRecord>,
791 pub consumed_artifact_ids: Vec<String>,
792 pub produced_artifact_ids: Vec<String>,
793 pub attempts: Vec<RunStageAttemptRecord>,
794 pub metadata: BTreeMap<String, serde_json::Value>,
795}
796
797#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
798#[serde(default)]
799pub struct RunStageAttemptRecord {
800 pub attempt: usize,
801 pub status: String,
802 pub outcome: String,
803 pub branch: Option<String>,
804 pub error: Option<String>,
805 pub verification: Option<serde_json::Value>,
806 pub started_at: String,
807 pub finished_at: Option<String>,
808}
809
810#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
811#[serde(default)]
812pub struct RunTransitionRecord {
813 pub id: String,
814 pub from_stage_id: Option<String>,
815 pub from_node_id: Option<String>,
816 pub to_node_id: String,
817 pub branch: Option<String>,
818 pub timestamp: String,
819 pub consumed_artifact_ids: Vec<String>,
820 pub produced_artifact_ids: Vec<String>,
821}
822
823#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
824#[serde(default)]
825pub struct RunCheckpointRecord {
826 pub id: String,
827 pub ready_nodes: Vec<String>,
828 pub completed_nodes: Vec<String>,
829 pub last_stage_id: Option<String>,
830 pub persisted_at: String,
831 pub reason: String,
832}
833
834#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
835#[serde(default)]
836pub struct ReplayFixture {
837 #[serde(rename = "_type")]
838 pub type_name: String,
839 pub id: String,
840 pub source_run_id: String,
841 pub workflow_id: String,
842 pub workflow_name: Option<String>,
843 pub created_at: String,
844 pub expected_status: String,
845 pub stage_assertions: Vec<ReplayStageAssertion>,
846}
847
848#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
849#[serde(default)]
850pub struct ReplayStageAssertion {
851 pub node_id: String,
852 pub expected_status: String,
853 pub expected_outcome: String,
854 pub expected_branch: Option<String>,
855 pub required_artifact_kinds: Vec<String>,
856 pub visible_text_contains: Option<String>,
857}
858
859#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
860#[serde(default)]
861pub struct ReplayEvalReport {
862 pub pass: bool,
863 pub failures: Vec<String>,
864 pub stage_count: usize,
865}
866
867#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
868#[serde(default)]
869pub struct ReplayEvalCaseReport {
870 pub run_id: String,
871 pub workflow_id: String,
872 pub label: Option<String>,
873 pub pass: bool,
874 pub failures: Vec<String>,
875 pub stage_count: usize,
876 pub source_path: Option<String>,
877 pub comparison: Option<RunDiffReport>,
878}
879
880#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
881#[serde(default)]
882pub struct ReplayEvalSuiteReport {
883 pub pass: bool,
884 pub total: usize,
885 pub passed: usize,
886 pub failed: usize,
887 pub cases: Vec<ReplayEvalCaseReport>,
888}
889
890#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
891#[serde(default)]
892pub struct RunStageDiffRecord {
893 pub node_id: String,
894 pub change: String,
895 pub details: Vec<String>,
896}
897
898#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
899#[serde(default)]
900pub struct RunDiffReport {
901 pub left_run_id: String,
902 pub right_run_id: String,
903 pub identical: bool,
904 pub status_changed: bool,
905 pub left_status: String,
906 pub right_status: String,
907 pub stage_diffs: Vec<RunStageDiffRecord>,
908 pub transition_count_delta: isize,
909 pub artifact_count_delta: isize,
910 pub checkpoint_count_delta: isize,
911}
912
913#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
914#[serde(default)]
915pub struct EvalSuiteManifest {
916 #[serde(rename = "_type")]
917 pub type_name: String,
918 pub id: String,
919 pub name: Option<String>,
920 pub base_dir: Option<String>,
921 pub cases: Vec<EvalSuiteCase>,
922}
923
924#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
925#[serde(default)]
926pub struct EvalSuiteCase {
927 pub label: Option<String>,
928 pub run_path: String,
929 pub fixture_path: Option<String>,
930 pub compare_to: Option<String>,
931}
932
933#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
934#[serde(default)]
935pub struct RunRecord {
936 #[serde(rename = "_type")]
937 pub type_name: String,
938 pub id: String,
939 pub workflow_id: String,
940 pub workflow_name: Option<String>,
941 pub task: String,
942 pub status: String,
943 pub started_at: String,
944 pub finished_at: Option<String>,
945 pub parent_run_id: Option<String>,
946 pub root_run_id: Option<String>,
947 pub stages: Vec<RunStageRecord>,
948 pub transitions: Vec<RunTransitionRecord>,
949 pub checkpoints: Vec<RunCheckpointRecord>,
950 pub pending_nodes: Vec<String>,
951 pub completed_nodes: Vec<String>,
952 pub child_runs: Vec<RunChildRecord>,
953 pub artifacts: Vec<ArtifactRecord>,
954 pub policy: CapabilityPolicy,
955 pub execution: Option<RunExecutionRecord>,
956 pub transcript: Option<serde_json::Value>,
957 pub replay_fixture: Option<ReplayFixture>,
958 pub metadata: BTreeMap<String, serde_json::Value>,
959 pub persisted_path: Option<String>,
960}
961
962#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
963#[serde(default)]
964pub struct RunChildRecord {
965 pub worker_id: String,
966 pub worker_name: String,
967 pub parent_stage_id: Option<String>,
968 pub task: String,
969 pub status: String,
970 pub started_at: String,
971 pub finished_at: Option<String>,
972 pub run_id: Option<String>,
973 pub run_path: Option<String>,
974 pub snapshot_path: Option<String>,
975 pub execution: Option<RunExecutionRecord>,
976}
977
978#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
979#[serde(default)]
980pub struct RunExecutionRecord {
981 pub cwd: Option<String>,
982 pub source_dir: Option<String>,
983 pub env: BTreeMap<String, String>,
984 pub adapter: Option<String>,
985 pub repo_path: Option<String>,
986 pub worktree_path: Option<String>,
987 pub branch: Option<String>,
988 pub base_ref: Option<String>,
989 pub cleanup: Option<String>,
990}
991
992#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
993#[serde(default)]
994pub struct WorkflowValidationReport {
995 pub valid: bool,
996 pub errors: Vec<String>,
997 pub warnings: Vec<String>,
998 pub reachable_nodes: Vec<String>,
999}
1000
1001fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1002 serde_json::from_value(vm_value_to_json(value))
1003 .map_err(|e| VmError::Runtime(format!("orchestration parse error: {e}")))
1004}
1005
1006pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1007 let mut graph: WorkflowGraph = parse_json_value(value)?;
1008 let as_dict = value.as_dict().cloned().unwrap_or_default();
1009
1010 if graph.nodes.is_empty() {
1011 for key in ["act", "verify", "repair"] {
1012 if let Some(node_value) = as_dict.get(key) {
1013 let mut node: WorkflowNode = parse_json_value(node_value)?;
1014 let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1015 node.id = Some(key.to_string());
1016 if node.kind.is_empty() {
1017 node.kind = if key == "verify" {
1018 "verify".to_string()
1019 } else {
1020 "stage".to_string()
1021 };
1022 }
1023 if node.model_policy.provider.is_none() {
1024 node.model_policy.provider = as_dict
1025 .get("provider")
1026 .map(|value| value.display())
1027 .filter(|value| !value.is_empty());
1028 }
1029 if node.model_policy.model.is_none() {
1030 node.model_policy.model = as_dict
1031 .get("model")
1032 .map(|value| value.display())
1033 .filter(|value| !value.is_empty());
1034 }
1035 if node.model_policy.model_tier.is_none() {
1036 node.model_policy.model_tier = as_dict
1037 .get("model_tier")
1038 .or_else(|| as_dict.get("tier"))
1039 .map(|value| value.display())
1040 .filter(|value| !value.is_empty());
1041 }
1042 if node.model_policy.temperature.is_none() {
1043 node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1044 if let VmValue::Float(number) = value {
1045 Some(*number)
1046 } else {
1047 value.as_int().map(|number| number as f64)
1048 }
1049 });
1050 }
1051 if node.model_policy.max_tokens.is_none() {
1052 node.model_policy.max_tokens =
1053 as_dict.get("max_tokens").and_then(|value| value.as_int());
1054 }
1055 if node.mode.is_none() {
1056 node.mode = as_dict
1057 .get("mode")
1058 .map(|value| value.display())
1059 .filter(|value| !value.is_empty());
1060 }
1061 if key == "verify"
1062 && node.verify.is_none()
1063 && (raw_node.contains_key("assert_text")
1064 || raw_node.contains_key("command")
1065 || raw_node.contains_key("expect_status")
1066 || raw_node.contains_key("expect_text"))
1067 {
1068 node.verify = Some(serde_json::json!({
1069 "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1070 "command": raw_node.get("command").map(vm_value_to_json),
1071 "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1072 "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1073 }));
1074 }
1075 graph.nodes.insert(key.to_string(), node);
1076 }
1077 }
1078 if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1079 graph.entry = "act".to_string();
1080 }
1081 if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1082 if graph.nodes.contains_key("verify") {
1083 graph.edges.push(WorkflowEdge {
1084 from: "act".to_string(),
1085 to: "verify".to_string(),
1086 branch: None,
1087 label: None,
1088 });
1089 }
1090 if graph.nodes.contains_key("repair") {
1091 graph.edges.push(WorkflowEdge {
1092 from: "verify".to_string(),
1093 to: "repair".to_string(),
1094 branch: Some("failed".to_string()),
1095 label: None,
1096 });
1097 graph.edges.push(WorkflowEdge {
1098 from: "repair".to_string(),
1099 to: "verify".to_string(),
1100 branch: Some("retry".to_string()),
1101 label: None,
1102 });
1103 }
1104 }
1105 }
1106
1107 if graph.type_name.is_empty() {
1108 graph.type_name = "workflow_graph".to_string();
1109 }
1110 if graph.id.is_empty() {
1111 graph.id = new_id("workflow");
1112 }
1113 if graph.version == 0 {
1114 graph.version = 1;
1115 }
1116 if graph.entry.is_empty() {
1117 graph.entry = graph
1118 .nodes
1119 .keys()
1120 .next()
1121 .cloned()
1122 .unwrap_or_else(|| "act".to_string());
1123 }
1124 for (node_id, node) in &mut graph.nodes {
1125 if node.id.is_none() {
1126 node.id = Some(node_id.clone());
1127 }
1128 if node.kind.is_empty() {
1129 node.kind = "stage".to_string();
1130 }
1131 if node.join_policy.strategy.is_empty() {
1132 node.join_policy.strategy = "all".to_string();
1133 }
1134 if node.reduce_policy.strategy.is_empty() {
1135 node.reduce_policy.strategy = "concat".to_string();
1136 }
1137 if node.output_contract.output_kinds.is_empty() {
1138 node.output_contract.output_kinds = vec![match node.kind.as_str() {
1139 "verify" => "verification_result".to_string(),
1140 "reduce" => node
1141 .reduce_policy
1142 .output_kind
1143 .clone()
1144 .unwrap_or_else(|| "summary".to_string()),
1145 "map" => node
1146 .map_policy
1147 .output_kind
1148 .clone()
1149 .unwrap_or_else(|| "artifact".to_string()),
1150 "escalation" => "plan".to_string(),
1151 _ => "artifact".to_string(),
1152 }];
1153 }
1154 if node.retry_policy.max_attempts == 0 {
1155 node.retry_policy.max_attempts = 1;
1156 }
1157 }
1158 Ok(graph)
1159}
1160
1161pub fn validate_workflow(
1162 graph: &WorkflowGraph,
1163 ceiling: Option<&CapabilityPolicy>,
1164) -> WorkflowValidationReport {
1165 let mut errors = Vec::new();
1166 let mut warnings = Vec::new();
1167
1168 if !graph.nodes.contains_key(&graph.entry) {
1169 errors.push(format!("entry node does not exist: {}", graph.entry));
1170 }
1171
1172 let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1173 for edge in &graph.edges {
1174 if !node_ids.contains(&edge.from) {
1175 errors.push(format!("edge.from references unknown node: {}", edge.from));
1176 }
1177 if !node_ids.contains(&edge.to) {
1178 errors.push(format!("edge.to references unknown node: {}", edge.to));
1179 }
1180 }
1181
1182 let reachable_nodes = reachable_nodes(graph);
1183 for node_id in &node_ids {
1184 if !reachable_nodes.contains(node_id) {
1185 warnings.push(format!("node is unreachable: {node_id}"));
1186 }
1187 }
1188
1189 for (node_id, node) in &graph.nodes {
1190 let incoming = graph
1191 .edges
1192 .iter()
1193 .filter(|edge| edge.to == *node_id)
1194 .count();
1195 let outgoing: Vec<&WorkflowEdge> = graph
1196 .edges
1197 .iter()
1198 .filter(|edge| edge.from == *node_id)
1199 .collect();
1200 if let Some(min_inputs) = node.input_contract.min_inputs {
1201 if let Some(max_inputs) = node.input_contract.max_inputs {
1202 if min_inputs > max_inputs {
1203 errors.push(format!(
1204 "node {node_id}: input contract min_inputs exceeds max_inputs"
1205 ));
1206 }
1207 }
1208 }
1209 match node.kind.as_str() {
1210 "condition" => {
1211 let has_true = outgoing
1212 .iter()
1213 .any(|edge| edge.branch.as_deref() == Some("true"));
1214 let has_false = outgoing
1215 .iter()
1216 .any(|edge| edge.branch.as_deref() == Some("false"));
1217 if !has_true || !has_false {
1218 errors.push(format!(
1219 "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
1220 ));
1221 }
1222 }
1223 "fork" => {
1224 if outgoing.len() < 2 {
1225 errors.push(format!(
1226 "node {node_id}: fork nodes require at least two outgoing edges"
1227 ));
1228 }
1229 }
1230 "join" => {
1231 if incoming < 2 {
1232 warnings.push(format!(
1233 "node {node_id}: join node has fewer than two incoming edges"
1234 ));
1235 }
1236 }
1237 "map" => {
1238 if node.map_policy.items.is_empty()
1239 && node.map_policy.item_artifact_kind.is_none()
1240 && node.input_contract.input_kinds.is_empty()
1241 {
1242 errors.push(format!(
1243 "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
1244 ));
1245 }
1246 }
1247 "reduce" => {
1248 if node.input_contract.input_kinds.is_empty() {
1249 warnings.push(format!(
1250 "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
1251 ));
1252 }
1253 }
1254 _ => {}
1255 }
1256 }
1257
1258 if let Some(ceiling) = ceiling {
1259 if let Err(error) = ceiling.intersect(&graph.capability_policy) {
1260 errors.push(error);
1261 }
1262 for (node_id, node) in &graph.nodes {
1263 if let Err(error) = ceiling.intersect(&node.capability_policy) {
1264 errors.push(format!("node {node_id}: {error}"));
1265 }
1266 }
1267 }
1268
1269 WorkflowValidationReport {
1270 valid: errors.is_empty(),
1271 errors,
1272 warnings,
1273 reachable_nodes: reachable_nodes.into_iter().collect(),
1274 }
1275}
1276
1277fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
1278 let mut seen = BTreeSet::new();
1279 let mut stack = vec![graph.entry.clone()];
1280 while let Some(node_id) = stack.pop() {
1281 if !seen.insert(node_id.clone()) {
1282 continue;
1283 }
1284 for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
1285 stack.push(edge.to.clone());
1286 }
1287 }
1288 seen
1289}
1290
1291pub fn select_artifacts(
1292 mut artifacts: Vec<ArtifactRecord>,
1293 policy: &ContextPolicy,
1294) -> Vec<ArtifactRecord> {
1295 artifacts.retain(|artifact| {
1296 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
1297 && !policy.exclude_kinds.contains(&artifact.kind)
1298 && (policy.include_stages.is_empty()
1299 || artifact
1300 .stage
1301 .as_ref()
1302 .is_some_and(|stage| policy.include_stages.contains(stage)))
1303 });
1304 artifacts.sort_by(|a, b| {
1305 let b_pinned = policy.pinned_ids.contains(&b.id);
1306 let a_pinned = policy.pinned_ids.contains(&a.id);
1307 b_pinned
1308 .cmp(&a_pinned)
1309 .then_with(|| {
1310 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
1311 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
1312 b_prio_kind.cmp(&a_prio_kind)
1313 })
1314 .then_with(|| {
1315 b.priority
1316 .unwrap_or_default()
1317 .cmp(&a.priority.unwrap_or_default())
1318 })
1319 .then_with(|| {
1320 if policy.prefer_fresh {
1321 freshness_rank(b.freshness.as_deref())
1322 .cmp(&freshness_rank(a.freshness.as_deref()))
1323 } else {
1324 std::cmp::Ordering::Equal
1325 }
1326 })
1327 .then_with(|| {
1328 if policy.prefer_recent {
1329 b.created_at.cmp(&a.created_at)
1330 } else {
1331 std::cmp::Ordering::Equal
1332 }
1333 })
1334 .then_with(|| {
1335 b.relevance
1336 .partial_cmp(&a.relevance)
1337 .unwrap_or(std::cmp::Ordering::Equal)
1338 })
1339 .then_with(|| {
1340 a.estimated_tokens
1341 .unwrap_or(usize::MAX)
1342 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
1343 })
1344 });
1345
1346 let mut selected = Vec::new();
1347 let mut used_tokens = 0usize;
1348 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
1349 let effective_max_tokens = policy
1350 .max_tokens
1351 .map(|max| max.saturating_sub(reserve_tokens));
1352 for artifact in artifacts {
1353 if let Some(max_artifacts) = policy.max_artifacts {
1354 if selected.len() >= max_artifacts {
1355 break;
1356 }
1357 }
1358 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
1359 if let Some(max_tokens) = effective_max_tokens {
1360 if used_tokens + next_tokens > max_tokens {
1361 continue;
1362 }
1363 }
1364 used_tokens += next_tokens;
1365 selected.push(artifact);
1366 }
1367 selected
1368}
1369
1370pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
1371 let mut parts = Vec::new();
1372 for artifact in artifacts {
1373 let title = artifact
1374 .title
1375 .clone()
1376 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
1377 let body = artifact
1378 .text
1379 .clone()
1380 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
1381 .unwrap_or_default();
1382 match policy.render.as_deref() {
1383 Some("json") => {
1384 parts.push(
1385 serde_json::json!({
1386 "id": artifact.id,
1387 "kind": artifact.kind,
1388 "title": title,
1389 "source": artifact.source,
1390 "freshness": artifact.freshness,
1391 "priority": artifact.priority,
1392 "text": body,
1393 })
1394 .to_string(),
1395 );
1396 }
1397 _ => parts.push(format!(
1398 "[{title}] kind={} source={} freshness={} priority={}\n{}",
1399 artifact.kind,
1400 artifact
1401 .source
1402 .clone()
1403 .unwrap_or_else(|| "unknown".to_string()),
1404 artifact
1405 .freshness
1406 .clone()
1407 .unwrap_or_else(|| "normal".to_string()),
1408 artifact.priority.unwrap_or_default(),
1409 body
1410 )),
1411 }
1412 }
1413 parts.join("\n\n")
1414}
1415
1416pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
1417 let artifact: ArtifactRecord = parse_json_value(value)?;
1418 Ok(artifact.normalize())
1419}
1420
1421pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
1422 let mut run: RunRecord = parse_json_value(value)?;
1423 if run.type_name.is_empty() {
1424 run.type_name = "run_record".to_string();
1425 }
1426 if run.id.is_empty() {
1427 run.id = new_id("run");
1428 }
1429 if run.started_at.is_empty() {
1430 run.started_at = now_rfc3339();
1431 }
1432 if run.status.is_empty() {
1433 run.status = "running".to_string();
1434 }
1435 if run.root_run_id.is_none() {
1436 run.root_run_id = Some(run.id.clone());
1437 }
1438 if run.replay_fixture.is_none() {
1439 run.replay_fixture = Some(replay_fixture_from_run(&run));
1440 }
1441 Ok(run)
1442}
1443
1444pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
1445 let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
1446 if manifest.type_name.is_empty() {
1447 manifest.type_name = "eval_suite_manifest".to_string();
1448 }
1449 if manifest.id.is_empty() {
1450 manifest.id = new_id("eval_suite");
1451 }
1452 Ok(manifest)
1453}
1454
1455fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
1456 let content = std::fs::read_to_string(path)
1457 .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
1458 serde_json::from_str(&content)
1459 .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
1460}
1461
1462fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
1463 let path_buf = PathBuf::from(path);
1464 if path_buf.is_absolute() {
1465 path_buf
1466 } else if let Some(base_dir) = base_dir {
1467 base_dir.join(path_buf)
1468 } else {
1469 path_buf
1470 }
1471}
1472
1473pub fn evaluate_run_suite_manifest(
1474 manifest: &EvalSuiteManifest,
1475) -> Result<ReplayEvalSuiteReport, VmError> {
1476 let base_dir = manifest.base_dir.as_deref().map(Path::new);
1477 let mut reports = Vec::new();
1478 for case in &manifest.cases {
1479 let run_path = resolve_manifest_path(base_dir, &case.run_path);
1480 let run = load_run_record(&run_path)?;
1481 let fixture = match &case.fixture_path {
1482 Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
1483 None => run
1484 .replay_fixture
1485 .clone()
1486 .unwrap_or_else(|| replay_fixture_from_run(&run)),
1487 };
1488 let eval = evaluate_run_against_fixture(&run, &fixture);
1489 let mut pass = eval.pass;
1490 let mut failures = eval.failures;
1491 let comparison = match &case.compare_to {
1492 Some(path) => {
1493 let baseline_path = resolve_manifest_path(base_dir, path);
1494 let baseline = load_run_record(&baseline_path)?;
1495 let diff = diff_run_records(&baseline, &run);
1496 if !diff.identical {
1497 pass = false;
1498 failures.push(format!(
1499 "run differs from baseline {} with {} stage changes",
1500 baseline_path.display(),
1501 diff.stage_diffs.len()
1502 ));
1503 }
1504 Some(diff)
1505 }
1506 None => None,
1507 };
1508 reports.push(ReplayEvalCaseReport {
1509 run_id: run.id.clone(),
1510 workflow_id: run.workflow_id.clone(),
1511 label: case.label.clone(),
1512 pass,
1513 failures,
1514 stage_count: eval.stage_count,
1515 source_path: Some(run_path.display().to_string()),
1516 comparison,
1517 });
1518 }
1519 let total = reports.len();
1520 let passed = reports.iter().filter(|report| report.pass).count();
1521 let failed = total.saturating_sub(passed);
1522 Ok(ReplayEvalSuiteReport {
1523 pass: failed == 0,
1524 total,
1525 passed,
1526 failed,
1527 cases: reports,
1528 })
1529}
1530
1531pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
1532 let before_lines: Vec<&str> = before.lines().collect();
1533 let after_lines: Vec<&str> = after.lines().collect();
1534 let mut table = vec![vec![0usize; after_lines.len() + 1]; before_lines.len() + 1];
1535 for i in (0..before_lines.len()).rev() {
1536 for j in (0..after_lines.len()).rev() {
1537 table[i][j] = if before_lines[i] == after_lines[j] {
1538 table[i + 1][j + 1] + 1
1539 } else {
1540 table[i + 1][j].max(table[i][j + 1])
1541 };
1542 }
1543 }
1544
1545 let mut diff = String::new();
1546 let file = path.unwrap_or("artifact");
1547 diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
1548 let mut i = 0;
1549 let mut j = 0;
1550 while i < before_lines.len() && j < after_lines.len() {
1551 if before_lines[i] == after_lines[j] {
1552 diff.push_str(&format!(" {}\n", before_lines[i]));
1553 i += 1;
1554 j += 1;
1555 } else if table[i + 1][j] >= table[i][j + 1] {
1556 diff.push_str(&format!("-{}\n", before_lines[i]));
1557 i += 1;
1558 } else {
1559 diff.push_str(&format!("+{}\n", after_lines[j]));
1560 j += 1;
1561 }
1562 }
1563 while i < before_lines.len() {
1564 diff.push_str(&format!("-{}\n", before_lines[i]));
1565 i += 1;
1566 }
1567 while j < after_lines.len() {
1568 diff.push_str(&format!("+{}\n", after_lines[j]));
1569 j += 1;
1570 }
1571 diff
1572}
1573
1574pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
1575 let path = path
1576 .map(PathBuf::from)
1577 .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
1578 if let Some(parent) = path.parent() {
1579 std::fs::create_dir_all(parent)
1580 .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
1581 }
1582 let json = serde_json::to_string_pretty(run)
1583 .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
1584 std::fs::write(&path, json)
1585 .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
1586 Ok(path.to_string_lossy().to_string())
1587}
1588
1589pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
1590 let content = std::fs::read_to_string(path)
1591 .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
1592 serde_json::from_str(&content)
1593 .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
1594}
1595
1596pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
1597 ReplayFixture {
1598 type_name: "replay_fixture".to_string(),
1599 id: new_id("fixture"),
1600 source_run_id: run.id.clone(),
1601 workflow_id: run.workflow_id.clone(),
1602 workflow_name: run.workflow_name.clone(),
1603 created_at: now_rfc3339(),
1604 expected_status: run.status.clone(),
1605 stage_assertions: run
1606 .stages
1607 .iter()
1608 .map(|stage| ReplayStageAssertion {
1609 node_id: stage.node_id.clone(),
1610 expected_status: stage.status.clone(),
1611 expected_outcome: stage.outcome.clone(),
1612 expected_branch: stage.branch.clone(),
1613 required_artifact_kinds: stage
1614 .artifacts
1615 .iter()
1616 .map(|artifact| artifact.kind.clone())
1617 .collect(),
1618 visible_text_contains: stage
1619 .visible_text
1620 .as_ref()
1621 .filter(|text| !text.is_empty())
1622 .map(|text| text.chars().take(80).collect()),
1623 })
1624 .collect(),
1625 }
1626}
1627
1628pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
1629 let mut failures = Vec::new();
1630 if run.status != fixture.expected_status {
1631 failures.push(format!(
1632 "run status mismatch: expected {}, got {}",
1633 fixture.expected_status, run.status
1634 ));
1635 }
1636 for assertion in &fixture.stage_assertions {
1637 let Some(stage) = run
1638 .stages
1639 .iter()
1640 .find(|stage| stage.node_id == assertion.node_id)
1641 else {
1642 failures.push(format!("missing stage {}", assertion.node_id));
1643 continue;
1644 };
1645 if stage.status != assertion.expected_status {
1646 failures.push(format!(
1647 "stage {} status mismatch: expected {}, got {}",
1648 assertion.node_id, assertion.expected_status, stage.status
1649 ));
1650 }
1651 if stage.outcome != assertion.expected_outcome {
1652 failures.push(format!(
1653 "stage {} outcome mismatch: expected {}, got {}",
1654 assertion.node_id, assertion.expected_outcome, stage.outcome
1655 ));
1656 }
1657 if stage.branch != assertion.expected_branch {
1658 failures.push(format!(
1659 "stage {} branch mismatch: expected {:?}, got {:?}",
1660 assertion.node_id, assertion.expected_branch, stage.branch
1661 ));
1662 }
1663 for required_kind in &assertion.required_artifact_kinds {
1664 if !stage
1665 .artifacts
1666 .iter()
1667 .any(|artifact| &artifact.kind == required_kind)
1668 {
1669 failures.push(format!(
1670 "stage {} missing artifact kind {}",
1671 assertion.node_id, required_kind
1672 ));
1673 }
1674 }
1675 if let Some(snippet) = &assertion.visible_text_contains {
1676 let actual = stage.visible_text.clone().unwrap_or_default();
1677 if !actual.contains(snippet) {
1678 failures.push(format!(
1679 "stage {} visible text does not contain expected snippet {:?}",
1680 assertion.node_id, snippet
1681 ));
1682 }
1683 }
1684 }
1685
1686 ReplayEvalReport {
1687 pass: failures.is_empty(),
1688 failures,
1689 stage_count: run.stages.len(),
1690 }
1691}
1692
1693pub fn evaluate_run_suite(
1694 cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
1695) -> ReplayEvalSuiteReport {
1696 let mut reports = Vec::new();
1697 for (run, fixture, source_path) in cases {
1698 let report = evaluate_run_against_fixture(&run, &fixture);
1699 reports.push(ReplayEvalCaseReport {
1700 run_id: run.id.clone(),
1701 workflow_id: run.workflow_id.clone(),
1702 label: None,
1703 pass: report.pass,
1704 failures: report.failures,
1705 stage_count: report.stage_count,
1706 source_path,
1707 comparison: None,
1708 });
1709 }
1710 let total = reports.len();
1711 let passed = reports.iter().filter(|report| report.pass).count();
1712 let failed = total.saturating_sub(passed);
1713 ReplayEvalSuiteReport {
1714 pass: failed == 0,
1715 total,
1716 passed,
1717 failed,
1718 cases: reports,
1719 }
1720}
1721
1722pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
1723 let mut stage_diffs = Vec::new();
1724 let mut all_node_ids = BTreeSet::new();
1725 all_node_ids.extend(left.stages.iter().map(|stage| stage.node_id.clone()));
1726 all_node_ids.extend(right.stages.iter().map(|stage| stage.node_id.clone()));
1727
1728 for node_id in all_node_ids {
1729 let left_stage = left.stages.iter().find(|stage| stage.node_id == node_id);
1730 let right_stage = right.stages.iter().find(|stage| stage.node_id == node_id);
1731 match (left_stage, right_stage) {
1732 (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
1733 node_id,
1734 change: "removed".to_string(),
1735 details: vec!["stage missing from right run".to_string()],
1736 }),
1737 (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
1738 node_id,
1739 change: "added".to_string(),
1740 details: vec!["stage missing from left run".to_string()],
1741 }),
1742 (Some(left_stage), Some(right_stage)) => {
1743 let mut details = Vec::new();
1744 if left_stage.status != right_stage.status {
1745 details.push(format!(
1746 "status: {} -> {}",
1747 left_stage.status, right_stage.status
1748 ));
1749 }
1750 if left_stage.outcome != right_stage.outcome {
1751 details.push(format!(
1752 "outcome: {} -> {}",
1753 left_stage.outcome, right_stage.outcome
1754 ));
1755 }
1756 if left_stage.branch != right_stage.branch {
1757 details.push(format!(
1758 "branch: {:?} -> {:?}",
1759 left_stage.branch, right_stage.branch
1760 ));
1761 }
1762 if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
1763 {
1764 details.push(format!(
1765 "produced_artifacts: {} -> {}",
1766 left_stage.produced_artifact_ids.len(),
1767 right_stage.produced_artifact_ids.len()
1768 ));
1769 }
1770 if left_stage.artifacts.len() != right_stage.artifacts.len() {
1771 details.push(format!(
1772 "artifact_records: {} -> {}",
1773 left_stage.artifacts.len(),
1774 right_stage.artifacts.len()
1775 ));
1776 }
1777 if !details.is_empty() {
1778 stage_diffs.push(RunStageDiffRecord {
1779 node_id,
1780 change: "changed".to_string(),
1781 details,
1782 });
1783 }
1784 }
1785 (None, None) => {}
1786 }
1787 }
1788
1789 let status_changed = left.status != right.status;
1790 let identical = !status_changed
1791 && stage_diffs.is_empty()
1792 && left.transitions.len() == right.transitions.len()
1793 && left.artifacts.len() == right.artifacts.len()
1794 && left.checkpoints.len() == right.checkpoints.len();
1795
1796 RunDiffReport {
1797 left_run_id: left.id.clone(),
1798 right_run_id: right.id.clone(),
1799 identical,
1800 status_changed,
1801 left_status: left.status.clone(),
1802 right_status: right.status.clone(),
1803 stage_diffs,
1804 transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
1805 artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
1806 checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
1807 }
1808}
1809
1810pub fn push_execution_policy(policy: CapabilityPolicy) {
1811 EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
1812}
1813
1814pub fn pop_execution_policy() {
1815 EXECUTION_POLICY_STACK.with(|stack| {
1816 stack.borrow_mut().pop();
1817 });
1818}
1819
1820pub fn current_execution_policy() -> Option<CapabilityPolicy> {
1821 EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
1822}
1823
1824fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
1825 policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
1826}
1827
1828fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
1829 policy.capabilities.is_empty()
1830 || policy
1831 .capabilities
1832 .get(capability)
1833 .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
1834}
1835
1836fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
1837 fn rank(v: &str) -> usize {
1838 match v {
1839 "none" => 0,
1840 "read_only" => 1,
1841 "workspace_write" => 2,
1842 "process_exec" => 3,
1843 "network" => 4,
1844 _ => 5,
1845 }
1846 }
1847 policy
1848 .side_effect_level
1849 .as_ref()
1850 .map(|allowed| rank(allowed) >= rank(requested))
1851 .unwrap_or(true)
1852}
1853
1854fn reject_policy(reason: String) -> Result<(), VmError> {
1855 Err(VmError::CategorizedError {
1856 message: reason,
1857 category: crate::value::ErrorCategory::ToolRejected,
1858 })
1859}
1860
1861pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
1862 let Some(policy) = current_execution_policy() else {
1863 return Ok(());
1864 };
1865 match name {
1866 "read" | "read_file" => {
1867 if !policy_allows_tool(&policy, name)
1868 || !policy_allows_capability(&policy, "workspace", "read_text")
1869 {
1870 return reject_policy(format!(
1871 "builtin '{name}' exceeds workspace.read_text ceiling"
1872 ));
1873 }
1874 }
1875 "search" | "list_dir" => {
1876 if !policy_allows_tool(&policy, name)
1877 || !policy_allows_capability(&policy, "workspace", "list")
1878 {
1879 return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
1880 }
1881 }
1882 "file_exists" | "stat" => {
1883 if !policy_allows_capability(&policy, "workspace", "exists") {
1884 return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
1885 }
1886 }
1887 "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
1888 if !policy_allows_tool(&policy, "edit")
1889 || !policy_allows_capability(&policy, "workspace", "write_text")
1890 || !policy_allows_side_effect(&policy, "workspace_write")
1891 {
1892 return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
1893 }
1894 }
1895 "delete_file" => {
1896 if !policy_allows_capability(&policy, "workspace", "delete")
1897 || !policy_allows_side_effect(&policy, "workspace_write")
1898 {
1899 return reject_policy(
1900 "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
1901 );
1902 }
1903 }
1904 "apply_edit" => {
1905 if !policy_allows_capability(&policy, "workspace", "apply_edit")
1906 || !policy_allows_side_effect(&policy, "workspace_write")
1907 {
1908 return reject_policy(
1909 "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
1910 );
1911 }
1912 }
1913 "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
1914 if !policy_allows_tool(&policy, "run")
1915 || !policy_allows_capability(&policy, "process", "exec")
1916 || !policy_allows_side_effect(&policy, "process_exec")
1917 {
1918 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
1919 }
1920 }
1921 "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
1922 if !policy_allows_side_effect(&policy, "network") {
1923 return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
1924 }
1925 }
1926 "mcp_connect"
1927 | "mcp_call"
1928 | "mcp_list_tools"
1929 | "mcp_list_resources"
1930 | "mcp_list_resource_templates"
1931 | "mcp_read_resource"
1932 | "mcp_list_prompts"
1933 | "mcp_get_prompt"
1934 | "mcp_server_info"
1935 | "mcp_disconnect" => {
1936 if !policy_allows_tool(&policy, "run")
1937 || !policy_allows_capability(&policy, "process", "exec")
1938 || !policy_allows_side_effect(&policy, "process_exec")
1939 {
1940 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
1941 }
1942 }
1943 "host_invoke" => {
1944 let capability = args.first().map(|v| v.display()).unwrap_or_default();
1945 let op = args.get(1).map(|v| v.display()).unwrap_or_default();
1946 if !policy_allows_capability(&policy, &capability, &op) {
1947 return reject_policy(format!(
1948 "host_invoke {capability}.{op} exceeds capability ceiling"
1949 ));
1950 }
1951 let requested_side_effect = match (capability.as_str(), op.as_str()) {
1952 ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
1953 ("process", "exec") => "process_exec",
1954 _ => "read_only",
1955 };
1956 if !policy_allows_side_effect(&policy, requested_side_effect) {
1957 return reject_policy(format!(
1958 "host_invoke {capability}.{op} exceeds side-effect ceiling"
1959 ));
1960 }
1961 }
1962 _ => {}
1963 }
1964 Ok(())
1965}
1966
1967pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
1968 if current_execution_policy().is_some() {
1969 return reject_policy(format!(
1970 "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
1971 ));
1972 }
1973 Ok(())
1974}
1975
1976pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
1977 let Some(policy) = current_execution_policy() else {
1978 return Ok(());
1979 };
1980 if !policy_allows_tool(&policy, tool_name) {
1981 return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
1982 }
1983 Ok(())
1984}
1985
1986fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
1987 let dict = transcript.as_dict()?;
1988 let messages = match dict.get("messages") {
1989 Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
1990 _ => Vec::new(),
1991 };
1992 let retained = messages
1993 .into_iter()
1994 .rev()
1995 .take(keep_last)
1996 .collect::<Vec<_>>()
1997 .into_iter()
1998 .rev()
1999 .collect::<Vec<_>>();
2000 let mut compacted = dict.clone();
2001 compacted.insert(
2002 "messages".to_string(),
2003 VmValue::List(Rc::new(retained.clone())),
2004 );
2005 compacted.insert(
2006 "events".to_string(),
2007 VmValue::List(Rc::new(
2008 crate::llm::helpers::transcript_events_from_messages(&retained),
2009 )),
2010 );
2011 Some(VmValue::Dict(Rc::new(compacted)))
2012}
2013
2014fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
2015 let Some(visibility) = visibility else {
2016 return Some(transcript.clone());
2017 };
2018 if visibility != "public" && visibility != "public_only" {
2019 return Some(transcript.clone());
2020 }
2021 let dict = transcript.as_dict()?;
2022 let public_messages = match dict.get("messages") {
2023 Some(VmValue::List(list)) => list
2024 .iter()
2025 .filter(|message| {
2026 message
2027 .as_dict()
2028 .and_then(|d| d.get("role"))
2029 .map(|v| v.display())
2030 .map(|role| role != "tool_result")
2031 .unwrap_or(true)
2032 })
2033 .cloned()
2034 .collect::<Vec<_>>(),
2035 _ => Vec::new(),
2036 };
2037 let public_events = match dict.get("events") {
2038 Some(VmValue::List(list)) => list
2039 .iter()
2040 .filter(|event| {
2041 event
2042 .as_dict()
2043 .and_then(|d| d.get("visibility"))
2044 .map(|v| v.display())
2045 .map(|value| value == "public")
2046 .unwrap_or(true)
2047 })
2048 .cloned()
2049 .collect::<Vec<_>>(),
2050 _ => Vec::new(),
2051 };
2052 let mut redacted = dict.clone();
2053 redacted.insert(
2054 "messages".to_string(),
2055 VmValue::List(Rc::new(public_messages)),
2056 );
2057 redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
2058 Some(VmValue::Dict(Rc::new(redacted)))
2059}
2060
2061pub(crate) fn apply_input_transcript_policy(
2062 transcript: Option<VmValue>,
2063 policy: &TranscriptPolicy,
2064) -> Option<VmValue> {
2065 let mut transcript = transcript;
2066 match policy.mode.as_deref() {
2067 Some("reset") => return None,
2068 Some("fork") => {
2069 if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
2070 let mut forked = dict.as_ref().clone();
2071 forked.insert(
2072 "id".to_string(),
2073 VmValue::String(Rc::from(new_id("transcript"))),
2074 );
2075 transcript = Some(VmValue::Dict(Rc::new(forked)));
2076 }
2077 }
2078 _ => {}
2079 }
2080 if policy.compact {
2081 let keep_last = policy.keep_last.unwrap_or(6);
2082 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2083 }
2084 transcript
2085}
2086
2087fn apply_output_transcript_policy(
2088 transcript: Option<VmValue>,
2089 policy: &TranscriptPolicy,
2090) -> Option<VmValue> {
2091 let mut transcript = transcript;
2092 if policy.compact {
2093 let keep_last = policy.keep_last.unwrap_or(6);
2094 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2095 }
2096 transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
2097}
2098
2099pub async fn execute_stage_node(
2100 node_id: &str,
2101 node: &WorkflowNode,
2102 task: &str,
2103 artifacts: &[ArtifactRecord],
2104 transcript: Option<VmValue>,
2105) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
2106 let mut selection_policy = node.context_policy.clone();
2107 if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
2108 selection_policy.include_kinds = node.input_contract.input_kinds.clone();
2109 }
2110 let selected = select_artifacts(artifacts.to_vec(), &selection_policy);
2111 let rendered_context = render_artifacts_context(&selected, &node.context_policy);
2112 let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
2113 if node.input_contract.require_transcript && transcript.is_none() {
2114 return Err(VmError::Runtime(format!(
2115 "workflow stage {node_id} requires transcript input"
2116 )));
2117 }
2118 if let Some(min_inputs) = node.input_contract.min_inputs {
2119 if selected.len() < min_inputs {
2120 return Err(VmError::Runtime(format!(
2121 "workflow stage {node_id} requires at least {min_inputs} input artifacts"
2122 )));
2123 }
2124 }
2125 if let Some(max_inputs) = node.input_contract.max_inputs {
2126 if selected.len() > max_inputs {
2127 return Err(VmError::Runtime(format!(
2128 "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
2129 )));
2130 }
2131 }
2132 let prompt = if rendered_context.is_empty() {
2133 task.to_string()
2134 } else {
2135 format!(
2136 "{rendered_context}\n\n{}:\n{task}",
2137 node.task_label
2138 .clone()
2139 .unwrap_or_else(|| "Task".to_string())
2140 )
2141 };
2142
2143 let mut options = BTreeMap::new();
2144 if let Some(provider) = &node.model_policy.provider {
2145 options.insert(
2146 "provider".to_string(),
2147 VmValue::String(Rc::from(provider.clone())),
2148 );
2149 }
2150 if let Some(model) = &node.model_policy.model {
2151 options.insert(
2152 "model".to_string(),
2153 VmValue::String(Rc::from(model.clone())),
2154 );
2155 }
2156 if let Some(model_tier) = &node.model_policy.model_tier {
2157 options.insert(
2158 "model_tier".to_string(),
2159 VmValue::String(Rc::from(model_tier.clone())),
2160 );
2161 }
2162 if let Some(temperature) = node.model_policy.temperature {
2163 options.insert("temperature".to_string(), VmValue::Float(temperature));
2164 }
2165 if let Some(max_tokens) = node.model_policy.max_tokens {
2166 options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
2167 }
2168 if !node.tools.is_empty() {
2169 options.insert(
2170 "tools".to_string(),
2171 VmValue::List(Rc::new(
2172 node.tools
2173 .iter()
2174 .map(|tool| VmValue::String(Rc::from(tool.clone())))
2175 .collect(),
2176 )),
2177 );
2178 }
2179 if let Some(transcript) = transcript.clone() {
2180 options.insert("transcript".to_string(), transcript);
2181 }
2182
2183 let args = vec![
2184 VmValue::String(Rc::from(prompt)),
2185 node.system
2186 .clone()
2187 .map(|s| VmValue::String(Rc::from(s)))
2188 .unwrap_or(VmValue::Nil),
2189 VmValue::Dict(Rc::new(options)),
2190 ];
2191 let mut opts = extract_llm_options(&args)?;
2192
2193 let llm_result = if node.mode.as_deref() == Some("agent") || !node.tools.is_empty() {
2194 crate::llm::run_agent_loop_internal(
2195 &mut opts,
2196 crate::llm::AgentLoopConfig {
2197 persistent: true,
2198 max_iterations: 12,
2199 max_nudges: 3,
2200 nudge: None,
2201 tool_retries: 0,
2202 tool_backoff_ms: 1000,
2203 tool_format: "text".to_string(),
2204 auto_compact: None,
2205 policy: None,
2206 daemon: false,
2207 },
2208 )
2209 .await?
2210 } else {
2211 let result = vm_call_llm_full(&opts).await?;
2212 crate::llm::agent_loop_result_from_llm(&result, opts)
2213 };
2214
2215 let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
2216 let transcript = llm_result
2217 .get("transcript")
2218 .cloned()
2219 .map(|value| crate::stdlib::json_to_vm_value(&value));
2220 let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
2221 let output_kind = node
2222 .output_contract
2223 .output_kinds
2224 .first()
2225 .cloned()
2226 .unwrap_or_else(|| {
2227 if node.kind == "verify" {
2228 "verification_result".to_string()
2229 } else {
2230 "artifact".to_string()
2231 }
2232 });
2233 let mut metadata = BTreeMap::new();
2234 metadata.insert(
2235 "input_artifact_ids".to_string(),
2236 serde_json::json!(selected
2237 .iter()
2238 .map(|artifact| artifact.id.clone())
2239 .collect::<Vec<_>>()),
2240 );
2241 metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
2242 let artifact = ArtifactRecord {
2243 type_name: "artifact".to_string(),
2244 id: new_id("artifact"),
2245 kind: output_kind,
2246 title: Some(format!("stage {node_id} output")),
2247 text: Some(visible_text),
2248 data: Some(llm_result.clone()),
2249 source: Some(node_id.to_string()),
2250 created_at: now_rfc3339(),
2251 freshness: Some("fresh".to_string()),
2252 priority: None,
2253 lineage: selected
2254 .iter()
2255 .map(|artifact| artifact.id.clone())
2256 .collect(),
2257 relevance: Some(1.0),
2258 estimated_tokens: None,
2259 stage: Some(node_id.to_string()),
2260 metadata,
2261 }
2262 .normalize();
2263
2264 Ok((llm_result, vec![artifact], transcript))
2265}
2266
2267pub fn next_nodes_for(
2268 graph: &WorkflowGraph,
2269 current: &str,
2270 branch: Option<&str>,
2271) -> Vec<WorkflowEdge> {
2272 let mut matching: Vec<WorkflowEdge> = graph
2273 .edges
2274 .iter()
2275 .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
2276 .cloned()
2277 .collect();
2278 if matching.is_empty() {
2279 matching = graph
2280 .edges
2281 .iter()
2282 .filter(|edge| edge.from == current && edge.branch.is_none())
2283 .cloned()
2284 .collect();
2285 }
2286 matching
2287}
2288
2289pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
2290 next_nodes_for(graph, current, Some(branch))
2291 .into_iter()
2292 .next()
2293 .map(|edge| edge.to)
2294}
2295
2296pub fn append_audit_entry(
2297 graph: &mut WorkflowGraph,
2298 op: &str,
2299 node_id: Option<String>,
2300 reason: Option<String>,
2301 metadata: BTreeMap<String, serde_json::Value>,
2302) {
2303 graph.audit_log.push(WorkflowAuditEntry {
2304 id: new_id("audit"),
2305 op: op.to_string(),
2306 node_id,
2307 timestamp: now_rfc3339(),
2308 reason,
2309 metadata,
2310 });
2311}
2312
2313pub fn builtin_ceiling() -> CapabilityPolicy {
2314 CapabilityPolicy {
2315 tools: vec![
2316 "read".to_string(),
2317 "read_file".to_string(),
2318 "search".to_string(),
2319 "edit".to_string(),
2320 "run".to_string(),
2321 "exec".to_string(),
2322 "outline".to_string(),
2323 "list_directory".to_string(),
2324 "lsp_hover".to_string(),
2325 "lsp_definition".to_string(),
2326 "lsp_references".to_string(),
2327 "web_search".to_string(),
2328 "web_fetch".to_string(),
2329 ],
2330 capabilities: BTreeMap::from([
2331 (
2332 "workspace".to_string(),
2333 vec![
2334 "read_text".to_string(),
2335 "write_text".to_string(),
2336 "apply_edit".to_string(),
2337 "delete".to_string(),
2338 "exists".to_string(),
2339 "list".to_string(),
2340 ],
2341 ),
2342 ("process".to_string(), vec!["exec".to_string()]),
2343 ]),
2344 workspace_roots: Vec::new(),
2345 side_effect_level: Some("network".to_string()),
2346 recursion_limit: Some(8),
2347 tool_arg_constraints: Vec::new(),
2348 }
2349}
2350
2351#[cfg(test)]
2352mod tests {
2353 use super::*;
2354
2355 #[test]
2356 fn capability_intersection_rejects_privilege_expansion() {
2357 let ceiling = CapabilityPolicy {
2358 tools: vec!["read".to_string()],
2359 side_effect_level: Some("read_only".to_string()),
2360 recursion_limit: Some(2),
2361 ..Default::default()
2362 };
2363 let requested = CapabilityPolicy {
2364 tools: vec!["read".to_string(), "edit".to_string()],
2365 ..Default::default()
2366 };
2367 let error = ceiling.intersect(&requested).unwrap_err();
2368 assert!(error.contains("host ceiling"));
2369 }
2370
2371 #[test]
2372 fn active_execution_policy_rejects_unknown_bridge_builtin() {
2373 push_execution_policy(CapabilityPolicy {
2374 tools: vec!["read".to_string()],
2375 capabilities: BTreeMap::from([(
2376 "workspace".to_string(),
2377 vec!["read_text".to_string()],
2378 )]),
2379 side_effect_level: Some("read_only".to_string()),
2380 recursion_limit: Some(1),
2381 ..Default::default()
2382 });
2383 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
2384 pop_execution_policy();
2385 assert!(matches!(
2386 error,
2387 VmError::CategorizedError {
2388 category: crate::value::ErrorCategory::ToolRejected,
2389 ..
2390 }
2391 ));
2392 }
2393
2394 #[test]
2395 fn active_execution_policy_rejects_mcp_escape_hatch() {
2396 push_execution_policy(CapabilityPolicy {
2397 tools: vec!["read".to_string()],
2398 capabilities: BTreeMap::from([(
2399 "workspace".to_string(),
2400 vec!["read_text".to_string()],
2401 )]),
2402 side_effect_level: Some("read_only".to_string()),
2403 recursion_limit: Some(1),
2404 ..Default::default()
2405 });
2406 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
2407 pop_execution_policy();
2408 assert!(matches!(
2409 error,
2410 VmError::CategorizedError {
2411 category: crate::value::ErrorCategory::ToolRejected,
2412 ..
2413 }
2414 ));
2415 }
2416
2417 #[test]
2418 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
2419 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
2420 "name": "legacy",
2421 "act": {"mode": "llm"},
2422 "verify": {"kind": "verify"},
2423 "repair": {"mode": "agent"},
2424 }));
2425 let graph = normalize_workflow_value(&value).unwrap();
2426 assert_eq!(graph.type_name, "workflow_graph");
2427 assert!(graph.nodes.contains_key("act"));
2428 assert!(graph.nodes.contains_key("verify"));
2429 assert!(graph.nodes.contains_key("repair"));
2430 assert_eq!(graph.entry, "act");
2431 }
2432
2433 #[test]
2434 fn artifact_selection_honors_budget_and_priority() {
2435 let policy = ContextPolicy {
2436 max_artifacts: Some(2),
2437 max_tokens: Some(30),
2438 prefer_recent: true,
2439 prefer_fresh: true,
2440 prioritize_kinds: vec!["verification_result".to_string()],
2441 ..Default::default()
2442 };
2443 let artifacts = vec![
2444 ArtifactRecord {
2445 type_name: "artifact".to_string(),
2446 id: "a".to_string(),
2447 kind: "summary".to_string(),
2448 text: Some("short".to_string()),
2449 relevance: Some(0.9),
2450 created_at: now_rfc3339(),
2451 ..Default::default()
2452 }
2453 .normalize(),
2454 ArtifactRecord {
2455 type_name: "artifact".to_string(),
2456 id: "b".to_string(),
2457 kind: "summary".to_string(),
2458 text: Some("this is a much larger artifact body".to_string()),
2459 relevance: Some(1.0),
2460 created_at: now_rfc3339(),
2461 ..Default::default()
2462 }
2463 .normalize(),
2464 ArtifactRecord {
2465 type_name: "artifact".to_string(),
2466 id: "c".to_string(),
2467 kind: "summary".to_string(),
2468 text: Some("tiny".to_string()),
2469 relevance: Some(0.5),
2470 created_at: now_rfc3339(),
2471 ..Default::default()
2472 }
2473 .normalize(),
2474 ];
2475 let selected = select_artifacts(artifacts, &policy);
2476 assert_eq!(selected.len(), 2);
2477 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
2478 }
2479
2480 #[test]
2481 fn workflow_validation_rejects_condition_without_true_false_edges() {
2482 let graph = WorkflowGraph {
2483 entry: "gate".to_string(),
2484 nodes: BTreeMap::from([(
2485 "gate".to_string(),
2486 WorkflowNode {
2487 id: Some("gate".to_string()),
2488 kind: "condition".to_string(),
2489 ..Default::default()
2490 },
2491 )]),
2492 edges: vec![WorkflowEdge {
2493 from: "gate".to_string(),
2494 to: "next".to_string(),
2495 branch: Some("true".to_string()),
2496 label: None,
2497 }],
2498 ..Default::default()
2499 };
2500 let report = validate_workflow(&graph, None);
2501 assert!(!report.valid);
2502 assert!(report
2503 .errors
2504 .iter()
2505 .any(|error| error.contains("true") && error.contains("false")));
2506 }
2507
2508 #[test]
2509 fn replay_fixture_round_trip_passes() {
2510 let run = RunRecord {
2511 type_name: "run_record".to_string(),
2512 id: "run_1".to_string(),
2513 workflow_id: "wf".to_string(),
2514 workflow_name: Some("demo".to_string()),
2515 task: "demo".to_string(),
2516 status: "completed".to_string(),
2517 started_at: "1".to_string(),
2518 finished_at: Some("2".to_string()),
2519 parent_run_id: None,
2520 root_run_id: Some("run_1".to_string()),
2521 stages: vec![RunStageRecord {
2522 id: "stage_1".to_string(),
2523 node_id: "act".to_string(),
2524 kind: "stage".to_string(),
2525 status: "completed".to_string(),
2526 outcome: "success".to_string(),
2527 branch: Some("success".to_string()),
2528 started_at: "1".to_string(),
2529 finished_at: Some("2".to_string()),
2530 visible_text: Some("done".to_string()),
2531 private_reasoning: None,
2532 transcript: None,
2533 verification: None,
2534 artifacts: vec![ArtifactRecord {
2535 type_name: "artifact".to_string(),
2536 id: "a1".to_string(),
2537 kind: "summary".to_string(),
2538 text: Some("done".to_string()),
2539 created_at: "1".to_string(),
2540 ..Default::default()
2541 }
2542 .normalize()],
2543 consumed_artifact_ids: vec![],
2544 produced_artifact_ids: vec!["a1".to_string()],
2545 attempts: vec![],
2546 metadata: BTreeMap::new(),
2547 }],
2548 transitions: vec![],
2549 checkpoints: vec![],
2550 pending_nodes: vec![],
2551 completed_nodes: vec!["act".to_string()],
2552 child_runs: vec![],
2553 artifacts: vec![],
2554 policy: CapabilityPolicy::default(),
2555 execution: None,
2556 transcript: None,
2557 replay_fixture: None,
2558 metadata: BTreeMap::new(),
2559 persisted_path: None,
2560 };
2561 let fixture = replay_fixture_from_run(&run);
2562 let report = evaluate_run_against_fixture(&run, &fixture);
2563 assert!(report.pass);
2564 assert!(report.failures.is_empty());
2565 }
2566
2567 #[test]
2568 fn replay_eval_suite_reports_failed_case() {
2569 let good = RunRecord {
2570 id: "run_good".to_string(),
2571 workflow_id: "wf".to_string(),
2572 status: "completed".to_string(),
2573 stages: vec![RunStageRecord {
2574 node_id: "act".to_string(),
2575 status: "completed".to_string(),
2576 outcome: "success".to_string(),
2577 ..Default::default()
2578 }],
2579 ..Default::default()
2580 };
2581 let bad = RunRecord {
2582 id: "run_bad".to_string(),
2583 workflow_id: "wf".to_string(),
2584 status: "failed".to_string(),
2585 stages: vec![RunStageRecord {
2586 node_id: "act".to_string(),
2587 status: "failed".to_string(),
2588 outcome: "error".to_string(),
2589 ..Default::default()
2590 }],
2591 ..Default::default()
2592 };
2593 let suite = evaluate_run_suite(vec![
2594 (
2595 good.clone(),
2596 replay_fixture_from_run(&good),
2597 Some("good.json".to_string()),
2598 ),
2599 (
2600 bad.clone(),
2601 replay_fixture_from_run(&good),
2602 Some("bad.json".to_string()),
2603 ),
2604 ]);
2605 assert!(!suite.pass);
2606 assert_eq!(suite.total, 2);
2607 assert_eq!(suite.failed, 1);
2608 assert!(suite.cases.iter().any(|case| !case.pass));
2609 }
2610
2611 #[test]
2612 fn run_diff_reports_changed_stage() {
2613 let left = RunRecord {
2614 id: "left".to_string(),
2615 workflow_id: "wf".to_string(),
2616 status: "completed".to_string(),
2617 stages: vec![RunStageRecord {
2618 node_id: "act".to_string(),
2619 status: "completed".to_string(),
2620 outcome: "success".to_string(),
2621 ..Default::default()
2622 }],
2623 ..Default::default()
2624 };
2625 let right = RunRecord {
2626 id: "right".to_string(),
2627 workflow_id: "wf".to_string(),
2628 status: "failed".to_string(),
2629 stages: vec![RunStageRecord {
2630 node_id: "act".to_string(),
2631 status: "failed".to_string(),
2632 outcome: "error".to_string(),
2633 ..Default::default()
2634 }],
2635 ..Default::default()
2636 };
2637 let diff = diff_run_records(&left, &right);
2638 assert!(diff.status_changed);
2639 assert!(!diff.identical);
2640 assert_eq!(diff.stage_diffs.len(), 1);
2641 }
2642
2643 #[test]
2644 fn eval_suite_manifest_can_fail_on_baseline_diff() {
2645 let temp_dir =
2646 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
2647 std::fs::create_dir_all(&temp_dir).unwrap();
2648 let baseline_path = temp_dir.join("baseline.json");
2649 let candidate_path = temp_dir.join("candidate.json");
2650
2651 let baseline = RunRecord {
2652 id: "baseline".to_string(),
2653 workflow_id: "wf".to_string(),
2654 status: "completed".to_string(),
2655 stages: vec![RunStageRecord {
2656 node_id: "act".to_string(),
2657 status: "completed".to_string(),
2658 outcome: "success".to_string(),
2659 ..Default::default()
2660 }],
2661 ..Default::default()
2662 };
2663 let candidate = RunRecord {
2664 id: "candidate".to_string(),
2665 workflow_id: "wf".to_string(),
2666 status: "failed".to_string(),
2667 stages: vec![RunStageRecord {
2668 node_id: "act".to_string(),
2669 status: "failed".to_string(),
2670 outcome: "error".to_string(),
2671 ..Default::default()
2672 }],
2673 ..Default::default()
2674 };
2675
2676 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
2677 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
2678
2679 let manifest = EvalSuiteManifest {
2680 base_dir: Some(temp_dir.display().to_string()),
2681 cases: vec![EvalSuiteCase {
2682 label: Some("candidate".to_string()),
2683 run_path: "candidate.json".to_string(),
2684 fixture_path: None,
2685 compare_to: Some("baseline.json".to_string()),
2686 }],
2687 ..Default::default()
2688 };
2689 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
2690 assert!(!suite.pass);
2691 assert_eq!(suite.failed, 1);
2692 assert!(suite.cases[0].comparison.is_some());
2693 assert!(suite.cases[0]
2694 .failures
2695 .iter()
2696 .any(|failure| failure.contains("baseline")));
2697 }
2698
2699 #[test]
2700 fn render_unified_diff_marks_removed_and_added_lines() {
2701 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
2702 assert!(diff.contains("--- a/src/main.rs"));
2703 assert!(diff.contains("+++ b/src/main.rs"));
2704 assert!(diff.contains("-old"));
2705 assert!(diff.contains("+new"));
2706 assert!(diff.contains(" same"));
2707 }
2708
2709 #[test]
2710 fn execution_policy_rejects_process_exec_when_read_only() {
2711 push_execution_policy(CapabilityPolicy {
2712 side_effect_level: Some("read_only".to_string()),
2713 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
2714 ..Default::default()
2715 });
2716 let result = enforce_current_policy_for_builtin("exec", &[]);
2717 pop_execution_policy();
2718 assert!(result.is_err());
2719 }
2720
2721 #[test]
2722 fn execution_policy_rejects_unlisted_tool() {
2723 push_execution_policy(CapabilityPolicy {
2724 tools: vec!["read".to_string()],
2725 ..Default::default()
2726 });
2727 let result = enforce_current_policy_for_tool("edit");
2728 pop_execution_policy();
2729 assert!(result.is_err());
2730 }
2731
2732 #[test]
2735 fn pre_tool_hook_deny_blocks_execution() {
2736 clear_tool_hooks();
2737 register_tool_hook(ToolHook {
2738 pattern: "dangerous_*".to_string(),
2739 pre: Some(Rc::new(|_name, _args| {
2740 PreToolAction::Deny("blocked by policy".to_string())
2741 })),
2742 post: None,
2743 });
2744 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
2745 clear_tool_hooks();
2746 assert!(matches!(result, PreToolAction::Deny(_)));
2747 }
2748
2749 #[test]
2750 fn pre_tool_hook_allow_passes_through() {
2751 clear_tool_hooks();
2752 register_tool_hook(ToolHook {
2753 pattern: "safe_*".to_string(),
2754 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
2755 post: None,
2756 });
2757 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
2758 clear_tool_hooks();
2759 assert!(matches!(result, PreToolAction::Allow));
2760 }
2761
2762 #[test]
2763 fn pre_tool_hook_modify_rewrites_args() {
2764 clear_tool_hooks();
2765 register_tool_hook(ToolHook {
2766 pattern: "*".to_string(),
2767 pre: Some(Rc::new(|_name, _args| {
2768 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
2769 })),
2770 post: None,
2771 });
2772 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
2773 clear_tool_hooks();
2774 match result {
2775 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
2776 _ => panic!("expected Modify"),
2777 }
2778 }
2779
2780 #[test]
2781 fn post_tool_hook_modifies_result() {
2782 clear_tool_hooks();
2783 register_tool_hook(ToolHook {
2784 pattern: "exec".to_string(),
2785 pre: None,
2786 post: Some(Rc::new(|_name, result| {
2787 if result.contains("SECRET") {
2788 PostToolAction::Modify("[REDACTED]".to_string())
2789 } else {
2790 PostToolAction::Pass
2791 }
2792 })),
2793 });
2794 let result = run_post_tool_hooks("exec", "output with SECRET data");
2795 let clean = run_post_tool_hooks("exec", "clean output");
2796 clear_tool_hooks();
2797 assert_eq!(result, "[REDACTED]");
2798 assert_eq!(clean, "clean output");
2799 }
2800
2801 #[test]
2802 fn unmatched_hook_pattern_does_not_fire() {
2803 clear_tool_hooks();
2804 register_tool_hook(ToolHook {
2805 pattern: "exec".to_string(),
2806 pre: Some(Rc::new(|_name, _args| {
2807 PreToolAction::Deny("should not match".to_string())
2808 })),
2809 post: None,
2810 });
2811 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
2812 clear_tool_hooks();
2813 assert!(matches!(result, PreToolAction::Allow));
2814 }
2815
2816 #[test]
2817 fn glob_match_patterns() {
2818 assert!(glob_match("*", "anything"));
2819 assert!(glob_match("exec*", "exec_at"));
2820 assert!(glob_match("*_file", "read_file"));
2821 assert!(!glob_match("exec*", "read_file"));
2822 assert!(glob_match("read_file", "read_file"));
2823 assert!(!glob_match("read_file", "write_file"));
2824 }
2825
2826 #[test]
2829 fn microcompact_snips_large_output() {
2830 let large = "x".repeat(50_000);
2831 let result = microcompact_tool_output(&large, 10_000);
2832 assert!(result.len() < 15_000);
2833 assert!(result.contains("snipped"));
2834 }
2835
2836 #[test]
2837 fn microcompact_preserves_small_output() {
2838 let small = "hello world";
2839 let result = microcompact_tool_output(small, 10_000);
2840 assert_eq!(result, small);
2841 }
2842
2843 #[test]
2844 fn auto_compact_messages_reduces_count() {
2845 let mut messages: Vec<serde_json::Value> = (0..20)
2846 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
2847 .collect();
2848 let compacted = auto_compact_messages(&mut messages, 6);
2849 assert!(compacted);
2850 assert!(messages.len() <= 7); assert!(messages[0]["content"]
2852 .as_str()
2853 .unwrap()
2854 .contains("auto-compacted"));
2855 }
2856
2857 #[test]
2858 fn auto_compact_noop_when_under_threshold() {
2859 let mut messages: Vec<serde_json::Value> = (0..4)
2860 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
2861 .collect();
2862 let compacted = auto_compact_messages(&mut messages, 6);
2863 assert!(!compacted);
2864 assert_eq!(messages.len(), 4);
2865 }
2866
2867 #[test]
2868 fn estimate_message_tokens_basic() {
2869 let messages = vec![
2870 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
2871 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
2872 ];
2873 let tokens = estimate_message_tokens(&messages);
2874 assert_eq!(tokens, 200); }
2876
2877 #[test]
2880 fn dedup_artifacts_removes_duplicates() {
2881 let mut artifacts = vec![
2882 ArtifactRecord {
2883 id: "a1".to_string(),
2884 kind: "test".to_string(),
2885 text: Some("duplicate content".to_string()),
2886 ..Default::default()
2887 },
2888 ArtifactRecord {
2889 id: "a2".to_string(),
2890 kind: "test".to_string(),
2891 text: Some("duplicate content".to_string()),
2892 ..Default::default()
2893 },
2894 ArtifactRecord {
2895 id: "a3".to_string(),
2896 kind: "test".to_string(),
2897 text: Some("unique content".to_string()),
2898 ..Default::default()
2899 },
2900 ];
2901 dedup_artifacts(&mut artifacts);
2902 assert_eq!(artifacts.len(), 2);
2903 }
2904
2905 #[test]
2906 fn microcompact_artifact_snips_oversized() {
2907 let mut artifact = ArtifactRecord {
2908 id: "a1".to_string(),
2909 kind: "test".to_string(),
2910 text: Some("x".repeat(10_000)),
2911 estimated_tokens: Some(2_500),
2912 ..Default::default()
2913 };
2914 microcompact_artifact(&mut artifact, 500);
2915 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
2916 assert_eq!(artifact.estimated_tokens, Some(500));
2917 }
2918
2919 #[test]
2922 fn arg_constraint_allows_matching_pattern() {
2923 let policy = CapabilityPolicy {
2924 tool_arg_constraints: vec![ToolArgConstraint {
2925 tool: "exec".to_string(),
2926 arg_patterns: vec!["cargo *".to_string()],
2927 }],
2928 ..Default::default()
2929 };
2930 let result = enforce_tool_arg_constraints(
2931 &policy,
2932 "exec",
2933 &serde_json::json!({"command": "cargo test"}),
2934 );
2935 assert!(result.is_ok());
2936 }
2937
2938 #[test]
2939 fn arg_constraint_rejects_non_matching_pattern() {
2940 let policy = CapabilityPolicy {
2941 tool_arg_constraints: vec![ToolArgConstraint {
2942 tool: "exec".to_string(),
2943 arg_patterns: vec!["cargo *".to_string()],
2944 }],
2945 ..Default::default()
2946 };
2947 let result = enforce_tool_arg_constraints(
2948 &policy,
2949 "exec",
2950 &serde_json::json!({"command": "rm -rf /"}),
2951 );
2952 assert!(result.is_err());
2953 }
2954
2955 #[test]
2956 fn arg_constraint_ignores_unmatched_tool() {
2957 let policy = CapabilityPolicy {
2958 tool_arg_constraints: vec![ToolArgConstraint {
2959 tool: "exec".to_string(),
2960 arg_patterns: vec!["cargo *".to_string()],
2961 }],
2962 ..Default::default()
2963 };
2964 let result = enforce_tool_arg_constraints(
2965 &policy,
2966 "read_file",
2967 &serde_json::json!({"path": "/etc/passwd"}),
2968 );
2969 assert!(result.is_ok());
2970 }
2971}