1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12 use std::time::{SystemTime, UNIX_EPOCH};
13 let ts = SystemTime::now()
14 .duration_since(UNIX_EPOCH)
15 .unwrap_or_default()
16 .as_secs();
17 format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21 format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25 std::env::var("HARN_RUN_DIR")
26 .map(PathBuf::from)
27 .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31 static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32 static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33 static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
34}
35
36#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
37#[serde(default)]
38pub struct MutationSessionRecord {
39 pub session_id: String,
40 pub parent_session_id: Option<String>,
41 pub run_id: Option<String>,
42 pub worker_id: Option<String>,
43 pub execution_kind: Option<String>,
44 pub mutation_scope: String,
45 pub approval_mode: String,
46}
47
48impl MutationSessionRecord {
49 pub fn normalize(mut self) -> Self {
50 if self.session_id.is_empty() {
51 self.session_id = new_id("session");
52 }
53 if self.mutation_scope.is_empty() {
54 self.mutation_scope = "read_only".to_string();
55 }
56 if self.approval_mode.is_empty() {
57 self.approval_mode = "host_enforced".to_string();
58 }
59 self
60 }
61}
62
63pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
64 CURRENT_MUTATION_SESSION.with(|slot| {
65 *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
66 });
67}
68
69pub fn current_mutation_session() -> Option<MutationSessionRecord> {
70 CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
71}
72
73#[derive(Clone, Debug)]
77pub enum PreToolAction {
78 Allow,
80 Deny(String),
82 Modify(serde_json::Value),
84}
85
86#[derive(Clone, Debug)]
88pub enum PostToolAction {
89 Pass,
91 Modify(String),
93}
94
95pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
97pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
98
99#[derive(Clone)]
101pub struct ToolHook {
102 pub pattern: String,
104 pub pre: Option<PreToolHookFn>,
106 pub post: Option<PostToolHookFn>,
108}
109
110impl std::fmt::Debug for ToolHook {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 f.debug_struct("ToolHook")
113 .field("pattern", &self.pattern)
114 .field("has_pre", &self.pre.is_some())
115 .field("has_post", &self.post.is_some())
116 .finish()
117 }
118}
119
120fn glob_match(pattern: &str, name: &str) -> bool {
121 if pattern == "*" {
122 return true;
123 }
124 if let Some(prefix) = pattern.strip_suffix('*') {
125 return name.starts_with(prefix);
126 }
127 if let Some(suffix) = pattern.strip_prefix('*') {
128 return name.ends_with(suffix);
129 }
130 pattern == name
131}
132
133pub fn register_tool_hook(hook: ToolHook) {
134 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
135}
136
137pub fn clear_tool_hooks() {
138 TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
139}
140
141pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
143 TOOL_HOOKS.with(|hooks| {
144 let hooks = hooks.borrow();
145 let mut current_args = args.clone();
146 for hook in hooks.iter() {
147 if !glob_match(&hook.pattern, tool_name) {
148 continue;
149 }
150 if let Some(ref pre) = hook.pre {
151 match pre(tool_name, ¤t_args) {
152 PreToolAction::Allow => {}
153 PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
154 PreToolAction::Modify(new_args) => {
155 current_args = new_args;
156 }
157 }
158 }
159 }
160 if current_args != *args {
161 PreToolAction::Modify(current_args)
162 } else {
163 PreToolAction::Allow
164 }
165 })
166}
167
168pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
170 TOOL_HOOKS.with(|hooks| {
171 let hooks = hooks.borrow();
172 let mut current = result.to_string();
173 for hook in hooks.iter() {
174 if !glob_match(&hook.pattern, tool_name) {
175 continue;
176 }
177 if let Some(ref post) = hook.post {
178 match post(tool_name, ¤t) {
179 PostToolAction::Pass => {}
180 PostToolAction::Modify(new_result) => {
181 current = new_result;
182 }
183 }
184 }
185 }
186 current
187 })
188}
189
190#[derive(Clone, Debug, PartialEq, Eq)]
193pub enum CompactStrategy {
194 Llm,
195 Truncate,
196 Custom,
197 ObservationMask,
198}
199
200pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
201 match value {
202 "llm" => Ok(CompactStrategy::Llm),
203 "truncate" => Ok(CompactStrategy::Truncate),
204 "custom" => Ok(CompactStrategy::Custom),
205 "observation_mask" => Ok(CompactStrategy::ObservationMask),
206 other => Err(VmError::Runtime(format!(
207 "unknown compact_strategy '{other}' (expected 'llm', 'truncate', 'custom', or 'observation_mask')"
208 ))),
209 }
210}
211
212#[derive(Clone, Debug)]
222pub struct AutoCompactConfig {
223 pub token_threshold: usize,
225 pub tool_output_max_chars: usize,
227 pub keep_last: usize,
229 pub compact_strategy: CompactStrategy,
231 pub hard_limit_tokens: Option<usize>,
235 pub hard_limit_strategy: CompactStrategy,
237 pub custom_compactor: Option<VmValue>,
239 pub mask_callback: Option<VmValue>,
246}
247
248impl Default for AutoCompactConfig {
249 fn default() -> Self {
250 Self {
251 token_threshold: 48_000,
252 tool_output_max_chars: 16_000,
253 keep_last: 12,
254 compact_strategy: CompactStrategy::ObservationMask,
255 hard_limit_tokens: None,
256 hard_limit_strategy: CompactStrategy::Llm,
257 custom_compactor: None,
258 mask_callback: None,
259 }
260 }
261}
262
263pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
265 messages
266 .iter()
267 .map(|m| {
268 m.get("content")
269 .and_then(|c| c.as_str())
270 .map(|s| s.len())
271 .unwrap_or(0)
272 })
273 .sum::<usize>()
274 / 4
275}
276
277pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
280 if output.len() <= max_chars || max_chars < 200 {
281 return output.to_string();
282 }
283 let diagnostic_lines = output
284 .lines()
285 .filter(|line| {
286 let trimmed = line.trim();
287 let lower = trimmed.to_lowercase();
288 let has_file_line = {
290 let bytes = trimmed.as_bytes();
291 let mut i = 0;
292 let mut found_colon = false;
293 while i < bytes.len() {
294 if bytes[i] == b':' {
295 found_colon = true;
296 break;
297 }
298 i += 1;
299 }
300 found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
301 };
302 let has_strong_keyword =
316 trimmed.contains("FAIL") || trimmed.contains("panic") || trimmed.contains("Panic");
317 let has_weak_keyword = trimmed.contains("error")
318 || trimmed.contains("undefined")
319 || trimmed.contains("expected")
320 || trimmed.contains("got")
321 || lower.contains("cannot find")
322 || lower.contains("not found")
323 || lower.contains("no such")
324 || lower.contains("unresolved")
325 || lower.contains("missing")
326 || lower.contains("declared but not used")
327 || lower.contains("unused")
328 || lower.contains("mismatch");
329 let positional = lower.contains(" error ")
330 || lower.starts_with("error:")
331 || lower.starts_with("warning:")
332 || lower.starts_with("note:")
333 || lower.contains("panic:");
334 has_strong_keyword || (has_file_line && has_weak_keyword) || positional
335 })
336 .take(32)
337 .collect::<Vec<_>>();
338 if !diagnostic_lines.is_empty() {
339 let diagnostics = diagnostic_lines.join("\n");
340 let budget = max_chars.saturating_sub(diagnostics.len() + 64);
341 let keep = budget / 2;
342 if keep >= 80 && output.len() > keep * 2 {
343 let head_end = output.floor_char_boundary(keep);
344 let tail_start = output.ceil_char_boundary(output.len() - keep);
345 let head = &output[..head_end];
346 let tail = &output[tail_start..];
347 return format!(
348 "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
349 );
350 }
351 }
352 let keep = max_chars / 2;
353 let head_end = output.floor_char_boundary(keep);
354 let tail_start = output.ceil_char_boundary(output.len() - keep);
355 let head = &output[..head_end];
356 let tail = &output[tail_start..];
357 let snipped = output.len() - max_chars;
358 format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
359}
360
361fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
362 messages
363 .iter()
364 .map(|msg| {
365 let role = msg
366 .get("role")
367 .and_then(|v| v.as_str())
368 .unwrap_or("user")
369 .to_uppercase();
370 let content = msg
371 .get("content")
372 .and_then(|v| v.as_str())
373 .unwrap_or_default();
374 format!("{role}: {content}")
375 })
376 .collect::<Vec<_>>()
377 .join("\n")
378}
379
380fn truncate_compaction_summary(
381 old_messages: &[serde_json::Value],
382 archived_count: usize,
383) -> String {
384 truncate_compaction_summary_with_context(old_messages, archived_count, false)
385}
386
387fn truncate_compaction_summary_with_context(
388 old_messages: &[serde_json::Value],
389 archived_count: usize,
390 is_llm_fallback: bool,
391) -> String {
392 let per_msg_limit = 500_usize;
393 let summary_parts: Vec<String> = old_messages
394 .iter()
395 .filter_map(|m| {
396 let role = m.get("role")?.as_str()?;
397 let content = m.get("content")?.as_str()?;
398 if content.is_empty() {
399 return None;
400 }
401 let truncated = if content.len() > per_msg_limit {
402 format!(
403 "{}... [truncated from {} chars]",
404 &content[..content.floor_char_boundary(per_msg_limit)],
405 content.len()
406 )
407 } else {
408 content.to_string()
409 };
410 Some(format!("[{role}] {truncated}"))
411 })
412 .take(15)
413 .collect();
414 let header = if is_llm_fallback {
415 format!(
416 "[auto-compact fallback: LLM summarizer returned empty; {archived_count} older messages abbreviated to ~{per_msg_limit} chars each]"
417 )
418 } else {
419 format!("[auto-compacted {archived_count} older messages via truncate strategy]")
420 };
421 format!(
422 "{header}\n{}{}",
423 summary_parts.join("\n"),
424 if archived_count > 15 {
425 format!("\n... and {} more", archived_count - 15)
426 } else {
427 String::new()
428 }
429 )
430}
431
432fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
433 if let Some(map) = value.as_dict() {
434 if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
435 return Ok(summary.display());
436 }
437 }
438 match value {
439 VmValue::String(text) => Ok(text.to_string()),
440 VmValue::Nil => Ok(String::new()),
441 _ => serde_json::to_string_pretty(&vm_value_to_json(value))
442 .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
443 }
444}
445
446async fn llm_compaction_summary(
447 old_messages: &[serde_json::Value],
448 archived_count: usize,
449 llm_opts: &crate::llm::api::LlmCallOptions,
450) -> Result<String, VmError> {
451 let mut compact_opts = llm_opts.clone();
452 let formatted = format_compaction_messages(old_messages);
453 compact_opts.system = None;
454 compact_opts.transcript_id = None;
455 compact_opts.transcript_summary = None;
456 compact_opts.transcript_metadata = None;
457 compact_opts.native_tools = None;
458 compact_opts.tool_choice = None;
459 compact_opts.response_format = None;
460 compact_opts.json_schema = None;
461 compact_opts.messages = vec![serde_json::json!({
462 "role": "user",
463 "content": format!(
464 "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
465 ),
466 })];
467 let result = vm_call_llm_full(&compact_opts).await?;
468 let summary = result.text.trim();
469 if summary.is_empty() {
470 Ok(truncate_compaction_summary_with_context(
471 old_messages,
472 archived_count,
473 true,
474 ))
475 } else {
476 Ok(format!(
477 "[auto-compacted {archived_count} older messages]\n{summary}"
478 ))
479 }
480}
481
482async fn custom_compaction_summary(
483 old_messages: &[serde_json::Value],
484 archived_count: usize,
485 callback: &VmValue,
486) -> Result<String, VmError> {
487 let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
488 return Err(VmError::Runtime(
489 "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
490 ));
491 };
492 let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
493 VmError::Runtime(
494 "custom transcript compaction requires an async builtin VM context".to_string(),
495 )
496 })?;
497 let messages_vm = VmValue::List(Rc::new(
498 old_messages
499 .iter()
500 .map(crate::stdlib::json_to_vm_value)
501 .collect(),
502 ));
503 let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await;
504 let summary = compact_summary_text_from_value(&result?)?;
505 if summary.trim().is_empty() {
506 Ok(truncate_compaction_summary(old_messages, archived_count))
507 } else {
508 Ok(format!(
509 "[auto-compacted {archived_count} older messages]\n{summary}"
510 ))
511 }
512}
513
514fn content_has_error_signal(content: &str) -> bool {
517 let lower = content.to_ascii_lowercase();
518 lower.contains("error")
519 || lower.contains("fail")
520 || lower.contains("panic")
521 || lower.contains("non-zero exit")
522 || lower.contains("exit code")
523 || lower.contains("traceback")
524 || lower.contains("exception")
525}
526
527fn default_mask_tool_result(role: &str, content: &str) -> String {
532 let first_line = content.lines().next().unwrap_or(content);
533 let line_count = content.lines().count();
534 let char_count = content.len();
535 if line_count <= 3 {
536 format!("[{role}] {content}")
537 } else {
538 let preview = &first_line[..first_line.len().min(120)];
539 format!("[{role}] {preview}... [{line_count} lines, {char_count} chars masked]")
540 }
541}
542
543#[cfg(test)]
551fn observation_mask_compaction(
552 old_messages: &[serde_json::Value],
553 archived_count: usize,
554) -> String {
555 observation_mask_compaction_with_callback(old_messages, archived_count, None)
556}
557
558fn observation_mask_compaction_with_callback(
559 old_messages: &[serde_json::Value],
560 archived_count: usize,
561 mask_results: Option<&[Option<String>]>,
562) -> String {
563 let mut parts = Vec::new();
564 parts.push(format!(
565 "[auto-compacted {archived_count} older messages via observation masking]"
566 ));
567 for (idx, msg) in old_messages.iter().enumerate() {
568 let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user");
569 let content = msg
570 .get("content")
571 .and_then(|v| v.as_str())
572 .unwrap_or_default();
573 if content.is_empty() {
574 continue;
575 }
576 if role == "assistant" {
577 parts.push(format!("[assistant] {content}"));
578 continue;
579 }
580 if content_has_error_signal(content) {
582 parts.push(format!("[{role}] {content}"));
583 } else if let Some(Some(custom)) = mask_results.and_then(|r| r.get(idx)) {
584 parts.push(custom.clone());
586 } else {
587 parts.push(default_mask_tool_result(role, content));
588 }
589 }
590 parts.join("\n")
591}
592
593async fn invoke_mask_callback(
597 callback: &VmValue,
598 old_messages: &[serde_json::Value],
599) -> Result<Vec<Option<String>>, VmError> {
600 let VmValue::Closure(closure) = callback.clone() else {
601 return Err(VmError::Runtime(
602 "mask_callback must be a closure".to_string(),
603 ));
604 };
605 let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
606 VmError::Runtime("mask_callback requires an async builtin VM context".to_string())
607 })?;
608 let messages_vm = VmValue::List(Rc::new(
609 old_messages
610 .iter()
611 .map(crate::stdlib::json_to_vm_value)
612 .collect(),
613 ));
614 let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await?;
615 let list = match result {
617 VmValue::List(items) => items,
618 _ => return Ok(vec![None; old_messages.len()]),
619 };
620 Ok(list
621 .iter()
622 .map(|v| match v {
623 VmValue::String(s) => Some(s.to_string()),
624 VmValue::Nil => None,
625 _ => None,
626 })
627 .collect())
628}
629
630async fn apply_compaction_strategy(
632 strategy: &CompactStrategy,
633 old_messages: &[serde_json::Value],
634 archived_count: usize,
635 llm_opts: Option<&crate::llm::api::LlmCallOptions>,
636 custom_compactor: Option<&VmValue>,
637 mask_callback: Option<&VmValue>,
638) -> Result<String, VmError> {
639 match strategy {
640 CompactStrategy::Truncate => Ok(truncate_compaction_summary(old_messages, archived_count)),
641 CompactStrategy::Llm => {
642 llm_compaction_summary(
643 old_messages,
644 archived_count,
645 llm_opts.ok_or_else(|| {
646 VmError::Runtime(
647 "LLM transcript compaction requires active LLM call options".to_string(),
648 )
649 })?,
650 )
651 .await
652 }
653 CompactStrategy::Custom => {
654 custom_compaction_summary(
655 old_messages,
656 archived_count,
657 custom_compactor.ok_or_else(|| {
658 VmError::Runtime(
659 "compact_callback is required when compact_strategy is 'custom'"
660 .to_string(),
661 )
662 })?,
663 )
664 .await
665 }
666 CompactStrategy::ObservationMask => {
667 let mask_results = if let Some(cb) = mask_callback {
668 Some(invoke_mask_callback(cb, old_messages).await?)
669 } else {
670 None
671 };
672 Ok(observation_mask_compaction_with_callback(
673 old_messages,
674 archived_count,
675 mask_results.as_deref(),
676 ))
677 }
678 }
679}
680
681pub(crate) async fn auto_compact_messages(
690 messages: &mut Vec<serde_json::Value>,
691 config: &AutoCompactConfig,
692 llm_opts: Option<&crate::llm::api::LlmCallOptions>,
693) -> Result<Option<String>, VmError> {
694 if messages.len() <= config.keep_last {
695 return Ok(None);
696 }
697 let split_at = messages.len().saturating_sub(config.keep_last);
698 let old_messages: Vec<_> = messages.drain(..split_at).collect();
699 let archived_count = old_messages.len();
700
701 let mut summary = apply_compaction_strategy(
703 &config.compact_strategy,
704 &old_messages,
705 archived_count,
706 llm_opts,
707 config.custom_compactor.as_ref(),
708 config.mask_callback.as_ref(),
709 )
710 .await?;
711
712 if let Some(hard_limit) = config.hard_limit_tokens {
714 let summary_msg = serde_json::json!({"role": "user", "content": &summary});
716 let mut estimate_msgs = vec![summary_msg];
717 estimate_msgs.extend_from_slice(messages.as_slice());
718 let estimated = estimate_message_tokens(&estimate_msgs);
719 if estimated > hard_limit {
720 let tier1_as_messages = vec![serde_json::json!({
724 "role": "user",
725 "content": summary,
726 })];
727 summary = apply_compaction_strategy(
728 &config.hard_limit_strategy,
729 &tier1_as_messages,
730 archived_count,
731 llm_opts,
732 config.custom_compactor.as_ref(),
733 None,
734 )
735 .await?;
736 }
737 }
738
739 messages.insert(
740 0,
741 serde_json::json!({
742 "role": "user",
743 "content": summary,
744 }),
745 );
746 Ok(Some(summary))
747}
748
749pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
753 let max_chars = max_tokens * 4;
754 if let Some(ref text) = artifact.text {
755 if text.len() > max_chars && max_chars >= 200 {
756 artifact.text = Some(microcompact_tool_output(text, max_chars));
757 artifact.estimated_tokens = Some(max_tokens);
758 }
759 }
760}
761
762pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
765 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
766 artifacts.retain(|artifact| {
767 let text = artifact.text.as_deref().unwrap_or("");
768 if text.is_empty() {
769 return true;
770 }
771 let hash = {
773 use std::hash::{Hash, Hasher};
774 let mut hasher = std::collections::hash_map::DefaultHasher::new();
775 text.hash(&mut hasher);
776 hasher.finish()
777 };
778 seen_hashes.insert(hash)
779 });
780}
781
782pub fn select_artifacts_adaptive(
785 mut artifacts: Vec<ArtifactRecord>,
786 policy: &ContextPolicy,
787) -> Vec<ArtifactRecord> {
788 dedup_artifacts(&mut artifacts);
790
791 if let Some(max_tokens) = policy.max_tokens {
795 let count = artifacts.len().max(1);
796 let per_artifact_budget = max_tokens / count;
797 let cap = per_artifact_budget.max(500).min(max_tokens);
799 for artifact in &mut artifacts {
800 let est = artifact.estimated_tokens.unwrap_or(0);
801 if est > cap * 2 {
802 microcompact_artifact(artifact, cap);
803 }
804 }
805 }
806
807 select_artifacts(artifacts, policy)
809}
810
811#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
815#[serde(default)]
816pub struct ToolArgConstraint {
817 pub tool: String,
819 pub arg_patterns: Vec<String>,
822}
823
824pub fn enforce_tool_arg_constraints(
826 policy: &CapabilityPolicy,
827 tool_name: &str,
828 args: &serde_json::Value,
829) -> Result<(), VmError> {
830 for constraint in &policy.tool_arg_constraints {
831 if !glob_match(&constraint.tool, tool_name) {
832 continue;
833 }
834 if constraint.arg_patterns.is_empty() {
835 continue;
836 }
837 let first_arg = args
839 .as_object()
840 .and_then(|o| o.values().next())
841 .and_then(|v| v.as_str())
842 .or_else(|| args.as_str())
843 .unwrap_or("");
844 let matches = constraint
845 .arg_patterns
846 .iter()
847 .any(|pattern| glob_match(pattern, first_arg));
848 if !matches {
849 return reject_policy(format!(
850 "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
851 constraint.arg_patterns
852 ));
853 }
854 }
855 Ok(())
856}
857
858fn normalize_artifact_kind(kind: &str) -> String {
859 match kind {
860 "resource"
861 | "workspace_file"
862 | "editor_selection"
863 | "workspace_snapshot"
864 | "transcript_summary"
865 | "summary"
866 | "plan"
867 | "diff"
868 | "git_diff"
869 | "patch"
870 | "patch_set"
871 | "patch_proposal"
872 | "diff_review"
873 | "review_decision"
874 | "verification_bundle"
875 | "apply_intent"
876 | "verification_result"
877 | "test_result"
878 | "command_result"
879 | "provider_payload"
880 | "worker_result"
881 | "worker_notification"
882 | "artifact" => kind.to_string(),
883 "file" => "workspace_file".to_string(),
884 "transcript" => "transcript_summary".to_string(),
885 "verification" => "verification_result".to_string(),
886 "test" => "test_result".to_string(),
887 other if other.trim().is_empty() => "artifact".to_string(),
888 other => other.to_string(),
889 }
890}
891
892fn default_artifact_priority(kind: &str) -> i64 {
893 match kind {
894 "verification_result" | "test_result" => 100,
895 "verification_bundle" => 95,
896 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
897 | "review_decision" | "apply_intent" => 90,
898 "plan" => 80,
899 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
900 "summary" | "transcript_summary" => 60,
901 "command_result" => 50,
902 _ => 40,
903 }
904}
905
906fn freshness_rank(value: Option<&str>) -> i64 {
907 match value.unwrap_or_default() {
908 "fresh" | "live" => 3,
909 "recent" => 2,
910 "stale" => 0,
911 _ => 1,
912 }
913}
914
915#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
916#[serde(default)]
917pub struct ToolRuntimePolicyMetadata {
918 pub capabilities: BTreeMap<String, Vec<String>>,
919 pub side_effect_level: Option<String>,
920 pub path_params: Vec<String>,
921 pub mutation_classification: Option<String>,
922}
923
924#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
925#[serde(default)]
926pub struct CapabilityPolicy {
927 pub tools: Vec<String>,
928 pub capabilities: BTreeMap<String, Vec<String>>,
929 pub workspace_roots: Vec<String>,
930 pub side_effect_level: Option<String>,
931 pub recursion_limit: Option<usize>,
932 #[serde(default)]
934 pub tool_arg_constraints: Vec<ToolArgConstraint>,
935 #[serde(default)]
936 pub tool_metadata: BTreeMap<String, ToolRuntimePolicyMetadata>,
937}
938
939impl CapabilityPolicy {
940 pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
941 let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
942 (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
943 (Some(a), None) => Some(a.clone()),
944 (None, Some(b)) => Some(b.clone()),
945 (None, None) => None,
946 };
947
948 if !self.tools.is_empty() {
949 let denied: Vec<String> = requested
950 .tools
951 .iter()
952 .filter(|tool| !self.tools.contains(*tool))
953 .cloned()
954 .collect();
955 if !denied.is_empty() {
956 return Err(format!(
957 "requested tools exceed host ceiling: {}",
958 denied.join(", ")
959 ));
960 }
961 }
962
963 for (capability, requested_ops) in &requested.capabilities {
964 if let Some(allowed_ops) = self.capabilities.get(capability) {
965 let denied: Vec<String> = requested_ops
966 .iter()
967 .filter(|op| !allowed_ops.contains(*op))
968 .cloned()
969 .collect();
970 if !denied.is_empty() {
971 return Err(format!(
972 "requested capability operations exceed host ceiling: {}.{}",
973 capability,
974 denied.join(",")
975 ));
976 }
977 } else if !self.capabilities.is_empty() {
978 return Err(format!(
979 "requested capability exceeds host ceiling: {capability}"
980 ));
981 }
982 }
983
984 let tools = if self.tools.is_empty() {
985 requested.tools.clone()
986 } else if requested.tools.is_empty() {
987 self.tools.clone()
988 } else {
989 requested
990 .tools
991 .iter()
992 .filter(|tool| self.tools.contains(*tool))
993 .cloned()
994 .collect()
995 };
996
997 let capabilities = if self.capabilities.is_empty() {
998 requested.capabilities.clone()
999 } else if requested.capabilities.is_empty() {
1000 self.capabilities.clone()
1001 } else {
1002 requested
1003 .capabilities
1004 .iter()
1005 .filter_map(|(capability, requested_ops)| {
1006 self.capabilities.get(capability).map(|allowed_ops| {
1007 (
1008 capability.clone(),
1009 requested_ops
1010 .iter()
1011 .filter(|op| allowed_ops.contains(*op))
1012 .cloned()
1013 .collect::<Vec<_>>(),
1014 )
1015 })
1016 })
1017 .collect()
1018 };
1019
1020 let workspace_roots = if self.workspace_roots.is_empty() {
1021 requested.workspace_roots.clone()
1022 } else if requested.workspace_roots.is_empty() {
1023 self.workspace_roots.clone()
1024 } else {
1025 requested
1026 .workspace_roots
1027 .iter()
1028 .filter(|root| self.workspace_roots.contains(*root))
1029 .cloned()
1030 .collect()
1031 };
1032
1033 let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
1034 (Some(a), Some(b)) => Some(a.min(b)),
1035 (Some(a), None) => Some(a),
1036 (None, Some(b)) => Some(b),
1037 (None, None) => None,
1038 };
1039
1040 let mut tool_arg_constraints = self.tool_arg_constraints.clone();
1042 tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
1043
1044 let tool_metadata = tools
1045 .iter()
1046 .filter_map(|tool| {
1047 requested
1048 .tool_metadata
1049 .get(tool)
1050 .or_else(|| self.tool_metadata.get(tool))
1051 .cloned()
1052 .map(|metadata| (tool.clone(), metadata))
1053 })
1054 .collect();
1055
1056 Ok(CapabilityPolicy {
1057 tools,
1058 capabilities,
1059 workspace_roots,
1060 side_effect_level,
1061 recursion_limit,
1062 tool_arg_constraints,
1063 tool_metadata,
1064 })
1065 }
1066}
1067
1068fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
1069 fn rank(v: &str) -> usize {
1070 match v {
1071 "none" => 0,
1072 "read_only" => 1,
1073 "workspace_write" => 2,
1074 "process_exec" => 3,
1075 "network" => 4,
1076 _ => 5,
1077 }
1078 }
1079 if rank(a) <= rank(b) {
1080 a
1081 } else {
1082 b
1083 }
1084}
1085
1086#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1087#[serde(default)]
1088pub struct ModelPolicy {
1089 pub provider: Option<String>,
1090 pub model: Option<String>,
1091 pub model_tier: Option<String>,
1092 pub temperature: Option<f64>,
1093 pub max_tokens: Option<i64>,
1094 pub max_iterations: Option<usize>,
1096 pub max_nudges: Option<usize>,
1098 pub nudge: Option<String>,
1101}
1102
1103#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1104#[serde(default)]
1105pub struct TranscriptPolicy {
1106 pub mode: Option<String>,
1107 pub visibility: Option<String>,
1108 pub summarize: bool,
1109 pub compact: bool,
1110 pub keep_last: Option<usize>,
1111}
1112
1113#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1114#[serde(default)]
1115pub struct ContextPolicy {
1116 pub max_artifacts: Option<usize>,
1117 pub max_tokens: Option<usize>,
1118 pub reserve_tokens: Option<usize>,
1119 pub include_kinds: Vec<String>,
1120 pub exclude_kinds: Vec<String>,
1121 pub prioritize_kinds: Vec<String>,
1122 pub pinned_ids: Vec<String>,
1123 pub include_stages: Vec<String>,
1124 pub prefer_recent: bool,
1125 pub prefer_fresh: bool,
1126 pub render: Option<String>,
1127}
1128
1129#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1130#[serde(default)]
1131pub struct RetryPolicy {
1132 pub max_attempts: usize,
1133 pub verify: bool,
1134 pub repair: bool,
1135}
1136
1137#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1138#[serde(default)]
1139pub struct StageContract {
1140 pub input_kinds: Vec<String>,
1141 pub output_kinds: Vec<String>,
1142 pub min_inputs: Option<usize>,
1143 pub max_inputs: Option<usize>,
1144 pub require_transcript: bool,
1145 pub schema: Option<serde_json::Value>,
1146}
1147
1148#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1149#[serde(default)]
1150pub struct BranchSemantics {
1151 pub success: Option<String>,
1152 pub failure: Option<String>,
1153 pub verify_pass: Option<String>,
1154 pub verify_fail: Option<String>,
1155 pub condition_true: Option<String>,
1156 pub condition_false: Option<String>,
1157 pub loop_continue: Option<String>,
1158 pub loop_exit: Option<String>,
1159 pub escalation: Option<String>,
1160}
1161
1162#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1163#[serde(default)]
1164pub struct MapPolicy {
1165 pub items: Vec<serde_json::Value>,
1166 pub item_artifact_kind: Option<String>,
1167 pub output_kind: Option<String>,
1168 pub max_items: Option<usize>,
1169}
1170
1171#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1172#[serde(default)]
1173pub struct JoinPolicy {
1174 pub strategy: String,
1175 pub require_all_inputs: bool,
1176 pub min_completed: Option<usize>,
1177}
1178
1179#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1180#[serde(default)]
1181pub struct ReducePolicy {
1182 pub strategy: String,
1183 pub separator: Option<String>,
1184 pub output_kind: Option<String>,
1185}
1186
1187#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1188#[serde(default)]
1189pub struct EscalationPolicy {
1190 pub level: Option<String>,
1191 pub queue: Option<String>,
1192 pub reason: Option<String>,
1193}
1194
1195#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1196#[serde(default)]
1197pub struct ArtifactRecord {
1198 #[serde(rename = "_type")]
1199 pub type_name: String,
1200 pub id: String,
1201 pub kind: String,
1202 pub title: Option<String>,
1203 pub text: Option<String>,
1204 pub data: Option<serde_json::Value>,
1205 pub source: Option<String>,
1206 pub created_at: String,
1207 pub freshness: Option<String>,
1208 pub priority: Option<i64>,
1209 pub lineage: Vec<String>,
1210 pub relevance: Option<f64>,
1211 pub estimated_tokens: Option<usize>,
1212 pub stage: Option<String>,
1213 pub metadata: BTreeMap<String, serde_json::Value>,
1214}
1215
1216impl ArtifactRecord {
1217 pub fn normalize(mut self) -> Self {
1218 if self.type_name.is_empty() {
1219 self.type_name = "artifact".to_string();
1220 }
1221 if self.id.is_empty() {
1222 self.id = new_id("artifact");
1223 }
1224 if self.created_at.is_empty() {
1225 self.created_at = now_rfc3339();
1226 }
1227 if self.kind.is_empty() {
1228 self.kind = "artifact".to_string();
1229 }
1230 self.kind = normalize_artifact_kind(&self.kind);
1231 if self.estimated_tokens.is_none() {
1232 self.estimated_tokens = self
1233 .text
1234 .as_ref()
1235 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
1236 }
1237 if self.priority.is_none() {
1238 self.priority = Some(default_artifact_priority(&self.kind));
1239 }
1240 self
1241 }
1242}
1243
1244#[derive(Clone, Debug, Default, Serialize, Deserialize)]
1245#[serde(default)]
1246pub struct WorkflowNode {
1247 pub id: Option<String>,
1248 pub kind: String,
1249 pub mode: Option<String>,
1250 pub prompt: Option<String>,
1251 pub system: Option<String>,
1252 pub task_label: Option<String>,
1253 pub done_sentinel: Option<String>,
1254 pub tools: serde_json::Value,
1255 pub model_policy: ModelPolicy,
1256 pub transcript_policy: TranscriptPolicy,
1257 pub context_policy: ContextPolicy,
1258 pub retry_policy: RetryPolicy,
1259 pub capability_policy: CapabilityPolicy,
1260 pub input_contract: StageContract,
1261 pub output_contract: StageContract,
1262 pub branch_semantics: BranchSemantics,
1263 pub map_policy: MapPolicy,
1264 pub join_policy: JoinPolicy,
1265 pub reduce_policy: ReducePolicy,
1266 pub escalation_policy: EscalationPolicy,
1267 pub verify: Option<serde_json::Value>,
1268 pub metadata: BTreeMap<String, serde_json::Value>,
1269 #[serde(skip)]
1270 pub raw_tools: Option<VmValue>,
1271}
1272
1273impl PartialEq for WorkflowNode {
1274 fn eq(&self, other: &Self) -> bool {
1275 serde_json::to_value(self).ok() == serde_json::to_value(other).ok()
1276 }
1277}
1278
1279pub fn workflow_tool_names(value: &serde_json::Value) -> Vec<String> {
1280 match value {
1281 serde_json::Value::Null => Vec::new(),
1282 serde_json::Value::Array(items) => items
1283 .iter()
1284 .filter_map(|item| match item {
1285 serde_json::Value::Object(map) => map
1286 .get("name")
1287 .and_then(|value| value.as_str())
1288 .filter(|name| !name.is_empty())
1289 .map(|name| name.to_string()),
1290 _ => None,
1291 })
1292 .collect(),
1293 serde_json::Value::Object(map) => {
1294 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1295 return map
1296 .get("tools")
1297 .map(workflow_tool_names)
1298 .unwrap_or_default();
1299 }
1300 map.get("name")
1301 .and_then(|value| value.as_str())
1302 .filter(|name| !name.is_empty())
1303 .map(|name| vec![name.to_string()])
1304 .unwrap_or_default()
1305 }
1306 _ => Vec::new(),
1307 }
1308}
1309
1310fn max_side_effect_level(levels: impl Iterator<Item = String>) -> Option<String> {
1311 fn rank(v: &str) -> usize {
1312 match v {
1313 "none" => 0,
1314 "read_only" => 1,
1315 "workspace_write" => 2,
1316 "process_exec" => 3,
1317 "network" => 4,
1318 _ => 5,
1319 }
1320 }
1321 levels.max_by_key(|level| rank(level))
1322}
1323
1324fn parse_tool_runtime_policy(
1325 map: &serde_json::Map<String, serde_json::Value>,
1326) -> ToolRuntimePolicyMetadata {
1327 let Some(policy) = map.get("policy").and_then(|value| value.as_object()) else {
1328 return ToolRuntimePolicyMetadata::default();
1329 };
1330
1331 let capabilities = policy
1332 .get("capabilities")
1333 .and_then(|value| value.as_object())
1334 .map(|caps| {
1335 caps.iter()
1336 .map(|(capability, ops)| {
1337 let values = ops
1338 .as_array()
1339 .map(|items| {
1340 items
1341 .iter()
1342 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1343 .collect::<Vec<_>>()
1344 })
1345 .unwrap_or_default();
1346 (capability.clone(), values)
1347 })
1348 .collect::<BTreeMap<_, _>>()
1349 })
1350 .unwrap_or_default();
1351
1352 let path_params = policy
1353 .get("path_params")
1354 .and_then(|value| value.as_array())
1355 .map(|items| {
1356 items
1357 .iter()
1358 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1359 .collect::<Vec<_>>()
1360 })
1361 .unwrap_or_default();
1362
1363 ToolRuntimePolicyMetadata {
1364 capabilities,
1365 side_effect_level: policy
1366 .get("side_effect_level")
1367 .and_then(|value| value.as_str())
1368 .map(|s| s.to_string()),
1369 path_params,
1370 mutation_classification: policy
1371 .get("mutation_classification")
1372 .and_then(|value| value.as_str())
1373 .map(|s| s.to_string()),
1374 }
1375}
1376
1377pub fn workflow_tool_metadata(
1378 value: &serde_json::Value,
1379) -> BTreeMap<String, ToolRuntimePolicyMetadata> {
1380 match value {
1381 serde_json::Value::Null => BTreeMap::new(),
1382 serde_json::Value::Array(items) => items
1383 .iter()
1384 .filter_map(|item| match item {
1385 serde_json::Value::Object(map) => map
1386 .get("name")
1387 .and_then(|value| value.as_str())
1388 .filter(|name| !name.is_empty())
1389 .map(|name| (name.to_string(), parse_tool_runtime_policy(map))),
1390 _ => None,
1391 })
1392 .collect(),
1393 serde_json::Value::Object(map) => {
1394 if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1395 return map
1396 .get("tools")
1397 .map(workflow_tool_metadata)
1398 .unwrap_or_default();
1399 }
1400 map.get("name")
1401 .and_then(|value| value.as_str())
1402 .filter(|name| !name.is_empty())
1403 .map(|name| {
1404 let mut metadata = BTreeMap::new();
1405 metadata.insert(name.to_string(), parse_tool_runtime_policy(map));
1406 metadata
1407 })
1408 .unwrap_or_default()
1409 }
1410 _ => BTreeMap::new(),
1411 }
1412}
1413
1414pub fn workflow_tool_policy_from_tools(value: &serde_json::Value) -> CapabilityPolicy {
1415 let tools = workflow_tool_names(value);
1416 let tool_metadata = workflow_tool_metadata(value);
1417 let mut capabilities: BTreeMap<String, Vec<String>> = BTreeMap::new();
1418 for metadata in tool_metadata.values() {
1419 for (capability, ops) in &metadata.capabilities {
1420 let entry = capabilities.entry(capability.clone()).or_default();
1421 for op in ops {
1422 if !entry.contains(op) {
1423 entry.push(op.clone());
1424 }
1425 }
1426 entry.sort();
1427 }
1428 }
1429 let side_effect_level = max_side_effect_level(
1430 tool_metadata
1431 .values()
1432 .filter_map(|metadata| metadata.side_effect_level.clone()),
1433 );
1434 CapabilityPolicy {
1435 tools,
1436 capabilities,
1437 workspace_roots: Vec::new(),
1438 side_effect_level,
1439 recursion_limit: None,
1440 tool_arg_constraints: Vec::new(),
1441 tool_metadata,
1442 }
1443}
1444
1445#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1446#[serde(default)]
1447pub struct WorkflowEdge {
1448 pub from: String,
1449 pub to: String,
1450 pub branch: Option<String>,
1451 pub label: Option<String>,
1452}
1453
1454#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1455#[serde(default)]
1456pub struct WorkflowGraph {
1457 #[serde(rename = "_type")]
1458 pub type_name: String,
1459 pub id: String,
1460 pub name: Option<String>,
1461 pub version: usize,
1462 pub entry: String,
1463 pub nodes: BTreeMap<String, WorkflowNode>,
1464 pub edges: Vec<WorkflowEdge>,
1465 pub capability_policy: CapabilityPolicy,
1466 pub metadata: BTreeMap<String, serde_json::Value>,
1467 pub audit_log: Vec<WorkflowAuditEntry>,
1468}
1469
1470#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1471#[serde(default)]
1472pub struct WorkflowAuditEntry {
1473 pub id: String,
1474 pub op: String,
1475 pub node_id: Option<String>,
1476 pub timestamp: String,
1477 pub reason: Option<String>,
1478 pub metadata: BTreeMap<String, serde_json::Value>,
1479}
1480
1481#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1482#[serde(default)]
1483pub struct LlmUsageRecord {
1484 pub input_tokens: i64,
1485 pub output_tokens: i64,
1486 pub total_duration_ms: i64,
1487 pub call_count: i64,
1488 pub total_cost: f64,
1489 pub models: Vec<String>,
1490}
1491
1492#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1493#[serde(default)]
1494pub struct RunStageRecord {
1495 pub id: String,
1496 pub node_id: String,
1497 pub kind: String,
1498 pub status: String,
1499 pub outcome: String,
1500 pub branch: Option<String>,
1501 pub started_at: String,
1502 pub finished_at: Option<String>,
1503 pub visible_text: Option<String>,
1504 pub private_reasoning: Option<String>,
1505 pub transcript: Option<serde_json::Value>,
1506 pub verification: Option<serde_json::Value>,
1507 pub usage: Option<LlmUsageRecord>,
1508 pub artifacts: Vec<ArtifactRecord>,
1509 pub consumed_artifact_ids: Vec<String>,
1510 pub produced_artifact_ids: Vec<String>,
1511 pub attempts: Vec<RunStageAttemptRecord>,
1512 pub metadata: BTreeMap<String, serde_json::Value>,
1513}
1514
1515#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1516#[serde(default)]
1517pub struct RunStageAttemptRecord {
1518 pub attempt: usize,
1519 pub status: String,
1520 pub outcome: String,
1521 pub branch: Option<String>,
1522 pub error: Option<String>,
1523 pub verification: Option<serde_json::Value>,
1524 pub started_at: String,
1525 pub finished_at: Option<String>,
1526}
1527
1528#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1529#[serde(default)]
1530pub struct RunTransitionRecord {
1531 pub id: String,
1532 pub from_stage_id: Option<String>,
1533 pub from_node_id: Option<String>,
1534 pub to_node_id: String,
1535 pub branch: Option<String>,
1536 pub timestamp: String,
1537 pub consumed_artifact_ids: Vec<String>,
1538 pub produced_artifact_ids: Vec<String>,
1539}
1540
1541#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1542#[serde(default)]
1543pub struct RunCheckpointRecord {
1544 pub id: String,
1545 pub ready_nodes: Vec<String>,
1546 pub completed_nodes: Vec<String>,
1547 pub last_stage_id: Option<String>,
1548 pub persisted_at: String,
1549 pub reason: String,
1550}
1551
1552#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1553#[serde(default)]
1554pub struct ReplayFixture {
1555 #[serde(rename = "_type")]
1556 pub type_name: String,
1557 pub id: String,
1558 pub source_run_id: String,
1559 pub workflow_id: String,
1560 pub workflow_name: Option<String>,
1561 pub created_at: String,
1562 pub expected_status: String,
1563 pub stage_assertions: Vec<ReplayStageAssertion>,
1564}
1565
1566#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1567#[serde(default)]
1568pub struct ReplayStageAssertion {
1569 pub node_id: String,
1570 pub expected_status: String,
1571 pub expected_outcome: String,
1572 pub expected_branch: Option<String>,
1573 pub required_artifact_kinds: Vec<String>,
1574 pub visible_text_contains: Option<String>,
1575}
1576
1577#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1578#[serde(default)]
1579pub struct ReplayEvalReport {
1580 pub pass: bool,
1581 pub failures: Vec<String>,
1582 pub stage_count: usize,
1583}
1584
1585#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1586#[serde(default)]
1587pub struct ReplayEvalCaseReport {
1588 pub run_id: String,
1589 pub workflow_id: String,
1590 pub label: Option<String>,
1591 pub pass: bool,
1592 pub failures: Vec<String>,
1593 pub stage_count: usize,
1594 pub source_path: Option<String>,
1595 pub comparison: Option<RunDiffReport>,
1596}
1597
1598#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1599#[serde(default)]
1600pub struct ReplayEvalSuiteReport {
1601 pub pass: bool,
1602 pub total: usize,
1603 pub passed: usize,
1604 pub failed: usize,
1605 pub cases: Vec<ReplayEvalCaseReport>,
1606}
1607
1608#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1609#[serde(default)]
1610pub struct RunStageDiffRecord {
1611 pub node_id: String,
1612 pub change: String,
1613 pub details: Vec<String>,
1614}
1615
1616#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1617#[serde(default)]
1618pub struct RunDiffReport {
1619 pub left_run_id: String,
1620 pub right_run_id: String,
1621 pub identical: bool,
1622 pub status_changed: bool,
1623 pub left_status: String,
1624 pub right_status: String,
1625 pub stage_diffs: Vec<RunStageDiffRecord>,
1626 pub transition_count_delta: isize,
1627 pub artifact_count_delta: isize,
1628 pub checkpoint_count_delta: isize,
1629}
1630
1631#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1632#[serde(default)]
1633pub struct EvalSuiteManifest {
1634 #[serde(rename = "_type")]
1635 pub type_name: String,
1636 pub id: String,
1637 pub name: Option<String>,
1638 pub base_dir: Option<String>,
1639 pub cases: Vec<EvalSuiteCase>,
1640}
1641
1642#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1643#[serde(default)]
1644pub struct EvalSuiteCase {
1645 pub label: Option<String>,
1646 pub run_path: String,
1647 pub fixture_path: Option<String>,
1648 pub compare_to: Option<String>,
1649}
1650
1651#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1652#[serde(default)]
1653pub struct RunRecord {
1654 #[serde(rename = "_type")]
1655 pub type_name: String,
1656 pub id: String,
1657 pub workflow_id: String,
1658 pub workflow_name: Option<String>,
1659 pub task: String,
1660 pub status: String,
1661 pub started_at: String,
1662 pub finished_at: Option<String>,
1663 pub parent_run_id: Option<String>,
1664 pub root_run_id: Option<String>,
1665 pub stages: Vec<RunStageRecord>,
1666 pub transitions: Vec<RunTransitionRecord>,
1667 pub checkpoints: Vec<RunCheckpointRecord>,
1668 pub pending_nodes: Vec<String>,
1669 pub completed_nodes: Vec<String>,
1670 pub child_runs: Vec<RunChildRecord>,
1671 pub artifacts: Vec<ArtifactRecord>,
1672 pub policy: CapabilityPolicy,
1673 pub execution: Option<RunExecutionRecord>,
1674 pub transcript: Option<serde_json::Value>,
1675 pub usage: Option<LlmUsageRecord>,
1676 pub replay_fixture: Option<ReplayFixture>,
1677 pub trace_spans: Vec<RunTraceSpanRecord>,
1678 pub metadata: BTreeMap<String, serde_json::Value>,
1679 pub persisted_path: Option<String>,
1680}
1681
1682#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1683#[serde(default)]
1684pub struct RunTraceSpanRecord {
1685 pub span_id: u64,
1686 pub parent_id: Option<u64>,
1687 pub kind: String,
1688 pub name: String,
1689 pub start_ms: u64,
1690 pub duration_ms: u64,
1691 pub metadata: BTreeMap<String, serde_json::Value>,
1692}
1693
1694#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1695#[serde(default)]
1696pub struct RunChildRecord {
1697 pub worker_id: String,
1698 pub worker_name: String,
1699 pub parent_stage_id: Option<String>,
1700 pub session_id: Option<String>,
1701 pub parent_session_id: Option<String>,
1702 pub mutation_scope: Option<String>,
1703 pub approval_mode: Option<String>,
1704 pub task: String,
1705 pub status: String,
1706 pub started_at: String,
1707 pub finished_at: Option<String>,
1708 pub run_id: Option<String>,
1709 pub run_path: Option<String>,
1710 pub snapshot_path: Option<String>,
1711 pub execution: Option<RunExecutionRecord>,
1712}
1713
1714#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1715#[serde(default)]
1716pub struct RunExecutionRecord {
1717 pub cwd: Option<String>,
1718 pub source_dir: Option<String>,
1719 pub env: BTreeMap<String, String>,
1720 pub adapter: Option<String>,
1721 pub repo_path: Option<String>,
1722 pub worktree_path: Option<String>,
1723 pub branch: Option<String>,
1724 pub base_ref: Option<String>,
1725 pub cleanup: Option<String>,
1726}
1727
1728#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1729#[serde(default)]
1730pub struct WorkflowValidationReport {
1731 pub valid: bool,
1732 pub errors: Vec<String>,
1733 pub warnings: Vec<String>,
1734 pub reachable_nodes: Vec<String>,
1735}
1736
1737fn parse_json_payload<T: for<'de> Deserialize<'de>>(
1738 json: serde_json::Value,
1739 label: &str,
1740) -> Result<T, VmError> {
1741 let payload = json.to_string();
1742 let mut deserializer = serde_json::Deserializer::from_str(&payload);
1743 let mut tracker = serde_path_to_error::Track::new();
1744 let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1745 T::deserialize(path_deserializer).map_err(|error| {
1746 let snippet = if payload.len() > 600 {
1747 format!("{}...", &payload[..600])
1748 } else {
1749 payload.clone()
1750 };
1751 VmError::Runtime(format!(
1752 "{label} parse error at {}: {} | payload={}",
1753 tracker.path(),
1754 error,
1755 snippet
1756 ))
1757 })
1758}
1759
1760fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1761 parse_json_payload(vm_value_to_json(value), "orchestration")
1762}
1763
1764pub fn parse_workflow_node_value(value: &VmValue, label: &str) -> Result<WorkflowNode, VmError> {
1765 let mut node: WorkflowNode = parse_json_payload(vm_value_to_json(value), label)?;
1766 node.raw_tools = value.as_dict().and_then(|dict| dict.get("tools")).cloned();
1767 Ok(node)
1768}
1769
1770pub fn parse_workflow_node_json(
1771 json: serde_json::Value,
1772 label: &str,
1773) -> Result<WorkflowNode, VmError> {
1774 parse_json_payload(json, label)
1775}
1776
1777pub fn parse_workflow_edge_json(
1778 json: serde_json::Value,
1779 label: &str,
1780) -> Result<WorkflowEdge, VmError> {
1781 parse_json_payload(json, label)
1782}
1783
1784pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1785 let mut graph: WorkflowGraph = parse_json_value(value)?;
1786 let as_dict = value.as_dict().cloned().unwrap_or_default();
1787
1788 if graph.nodes.is_empty() {
1789 for key in ["act", "verify", "repair"] {
1790 if let Some(node_value) = as_dict.get(key) {
1791 let mut node = parse_workflow_node_value(node_value, "orchestration")?;
1792 let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1793 node.id = Some(key.to_string());
1794 if node.kind.is_empty() {
1795 node.kind = if key == "verify" {
1796 "verify".to_string()
1797 } else {
1798 "stage".to_string()
1799 };
1800 }
1801 if node.model_policy.provider.is_none() {
1802 node.model_policy.provider = as_dict
1803 .get("provider")
1804 .map(|value| value.display())
1805 .filter(|value| !value.is_empty());
1806 }
1807 if node.model_policy.model.is_none() {
1808 node.model_policy.model = as_dict
1809 .get("model")
1810 .map(|value| value.display())
1811 .filter(|value| !value.is_empty());
1812 }
1813 if node.model_policy.model_tier.is_none() {
1814 node.model_policy.model_tier = as_dict
1815 .get("model_tier")
1816 .or_else(|| as_dict.get("tier"))
1817 .map(|value| value.display())
1818 .filter(|value| !value.is_empty());
1819 }
1820 if node.model_policy.temperature.is_none() {
1821 node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1822 if let VmValue::Float(number) = value {
1823 Some(*number)
1824 } else {
1825 value.as_int().map(|number| number as f64)
1826 }
1827 });
1828 }
1829 if node.model_policy.max_tokens.is_none() {
1830 node.model_policy.max_tokens =
1831 as_dict.get("max_tokens").and_then(|value| value.as_int());
1832 }
1833 if node.mode.is_none() {
1834 node.mode = as_dict
1835 .get("mode")
1836 .map(|value| value.display())
1837 .filter(|value| !value.is_empty());
1838 }
1839 if node.done_sentinel.is_none() {
1840 node.done_sentinel = as_dict
1841 .get("done_sentinel")
1842 .map(|value| value.display())
1843 .filter(|value| !value.is_empty());
1844 }
1845 if key == "verify"
1846 && node.verify.is_none()
1847 && (raw_node.contains_key("assert_text")
1848 || raw_node.contains_key("command")
1849 || raw_node.contains_key("expect_status")
1850 || raw_node.contains_key("expect_text"))
1851 {
1852 node.verify = Some(serde_json::json!({
1853 "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1854 "command": raw_node.get("command").map(vm_value_to_json),
1855 "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1856 "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1857 }));
1858 }
1859 graph.nodes.insert(key.to_string(), node);
1860 }
1861 }
1862 if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1863 graph.entry = "act".to_string();
1864 }
1865 if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1866 if graph.nodes.contains_key("verify") {
1867 graph.edges.push(WorkflowEdge {
1868 from: "act".to_string(),
1869 to: "verify".to_string(),
1870 branch: None,
1871 label: None,
1872 });
1873 }
1874 if graph.nodes.contains_key("repair") {
1875 graph.edges.push(WorkflowEdge {
1876 from: "verify".to_string(),
1877 to: "repair".to_string(),
1878 branch: Some("failed".to_string()),
1879 label: None,
1880 });
1881 graph.edges.push(WorkflowEdge {
1882 from: "repair".to_string(),
1883 to: "verify".to_string(),
1884 branch: Some("retry".to_string()),
1885 label: None,
1886 });
1887 }
1888 }
1889 }
1890
1891 if graph.type_name.is_empty() {
1892 graph.type_name = "workflow_graph".to_string();
1893 }
1894 if graph.id.is_empty() {
1895 graph.id = new_id("workflow");
1896 }
1897 if graph.version == 0 {
1898 graph.version = 1;
1899 }
1900 if graph.entry.is_empty() {
1901 graph.entry = graph
1902 .nodes
1903 .keys()
1904 .next()
1905 .cloned()
1906 .unwrap_or_else(|| "act".to_string());
1907 }
1908 for (node_id, node) in &mut graph.nodes {
1909 if node.raw_tools.is_none() {
1910 node.raw_tools = as_dict
1911 .get("nodes")
1912 .and_then(|nodes| nodes.as_dict())
1913 .and_then(|nodes| nodes.get(node_id))
1914 .and_then(|node_value| node_value.as_dict())
1915 .and_then(|raw_node| raw_node.get("tools"))
1916 .cloned();
1917 }
1918 if node.id.is_none() {
1919 node.id = Some(node_id.clone());
1920 }
1921 if node.kind.is_empty() {
1922 node.kind = "stage".to_string();
1923 }
1924 if node.join_policy.strategy.is_empty() {
1925 node.join_policy.strategy = "all".to_string();
1926 }
1927 if node.reduce_policy.strategy.is_empty() {
1928 node.reduce_policy.strategy = "concat".to_string();
1929 }
1930 if node.output_contract.output_kinds.is_empty() {
1931 node.output_contract.output_kinds = vec![match node.kind.as_str() {
1932 "verify" => "verification_result".to_string(),
1933 "reduce" => node
1934 .reduce_policy
1935 .output_kind
1936 .clone()
1937 .unwrap_or_else(|| "summary".to_string()),
1938 "map" => node
1939 .map_policy
1940 .output_kind
1941 .clone()
1942 .unwrap_or_else(|| "artifact".to_string()),
1943 "escalation" => "plan".to_string(),
1944 _ => "artifact".to_string(),
1945 }];
1946 }
1947 if node.retry_policy.max_attempts == 0 {
1948 node.retry_policy.max_attempts = 1;
1949 }
1950 }
1951 Ok(graph)
1952}
1953
1954pub fn validate_workflow(
1955 graph: &WorkflowGraph,
1956 ceiling: Option<&CapabilityPolicy>,
1957) -> WorkflowValidationReport {
1958 let mut errors = Vec::new();
1959 let mut warnings = Vec::new();
1960
1961 if !graph.nodes.contains_key(&graph.entry) {
1962 errors.push(format!("entry node does not exist: {}", graph.entry));
1963 }
1964
1965 let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1966 for edge in &graph.edges {
1967 if !node_ids.contains(&edge.from) {
1968 errors.push(format!("edge.from references unknown node: {}", edge.from));
1969 }
1970 if !node_ids.contains(&edge.to) {
1971 errors.push(format!("edge.to references unknown node: {}", edge.to));
1972 }
1973 }
1974
1975 let reachable_nodes = reachable_nodes(graph);
1976 for node_id in &node_ids {
1977 if !reachable_nodes.contains(node_id) {
1978 warnings.push(format!("node is unreachable: {node_id}"));
1979 }
1980 }
1981
1982 for (node_id, node) in &graph.nodes {
1983 let incoming = graph
1984 .edges
1985 .iter()
1986 .filter(|edge| edge.to == *node_id)
1987 .count();
1988 let outgoing: Vec<&WorkflowEdge> = graph
1989 .edges
1990 .iter()
1991 .filter(|edge| edge.from == *node_id)
1992 .collect();
1993 if let Some(min_inputs) = node.input_contract.min_inputs {
1994 if let Some(max_inputs) = node.input_contract.max_inputs {
1995 if min_inputs > max_inputs {
1996 errors.push(format!(
1997 "node {node_id}: input contract min_inputs exceeds max_inputs"
1998 ));
1999 }
2000 }
2001 }
2002 match node.kind.as_str() {
2003 "condition" => {
2004 let has_true = outgoing
2005 .iter()
2006 .any(|edge| edge.branch.as_deref() == Some("true"));
2007 let has_false = outgoing
2008 .iter()
2009 .any(|edge| edge.branch.as_deref() == Some("false"));
2010 if !has_true || !has_false {
2011 errors.push(format!(
2012 "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
2013 ));
2014 }
2015 }
2016 "fork" => {
2017 if outgoing.len() < 2 {
2018 errors.push(format!(
2019 "node {node_id}: fork nodes require at least two outgoing edges"
2020 ));
2021 }
2022 }
2023 "join" => {
2024 if incoming < 2 {
2025 warnings.push(format!(
2026 "node {node_id}: join node has fewer than two incoming edges"
2027 ));
2028 }
2029 }
2030 "map" => {
2031 if node.map_policy.items.is_empty()
2032 && node.map_policy.item_artifact_kind.is_none()
2033 && node.input_contract.input_kinds.is_empty()
2034 {
2035 errors.push(format!(
2036 "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
2037 ));
2038 }
2039 }
2040 "reduce" => {
2041 if node.input_contract.input_kinds.is_empty() {
2042 warnings.push(format!(
2043 "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
2044 ));
2045 }
2046 }
2047 _ => {}
2048 }
2049 }
2050
2051 if let Some(ceiling) = ceiling {
2052 if let Err(error) = ceiling.intersect(&graph.capability_policy) {
2053 errors.push(error);
2054 }
2055 for (node_id, node) in &graph.nodes {
2056 if let Err(error) = ceiling.intersect(&node.capability_policy) {
2057 errors.push(format!("node {node_id}: {error}"));
2058 }
2059 }
2060 }
2061
2062 WorkflowValidationReport {
2063 valid: errors.is_empty(),
2064 errors,
2065 warnings,
2066 reachable_nodes: reachable_nodes.into_iter().collect(),
2067 }
2068}
2069
2070fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
2071 let mut seen = BTreeSet::new();
2072 let mut stack = vec![graph.entry.clone()];
2073 while let Some(node_id) = stack.pop() {
2074 if !seen.insert(node_id.clone()) {
2075 continue;
2076 }
2077 for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
2078 stack.push(edge.to.clone());
2079 }
2080 }
2081 seen
2082}
2083
2084pub fn select_artifacts(
2085 mut artifacts: Vec<ArtifactRecord>,
2086 policy: &ContextPolicy,
2087) -> Vec<ArtifactRecord> {
2088 artifacts.retain(|artifact| {
2089 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
2090 && !policy.exclude_kinds.contains(&artifact.kind)
2091 && (policy.include_stages.is_empty()
2092 || artifact
2093 .stage
2094 .as_ref()
2095 .is_some_and(|stage| policy.include_stages.contains(stage)))
2096 });
2097 artifacts.sort_by(|a, b| {
2098 let b_pinned = policy.pinned_ids.contains(&b.id);
2099 let a_pinned = policy.pinned_ids.contains(&a.id);
2100 b_pinned
2101 .cmp(&a_pinned)
2102 .then_with(|| {
2103 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
2104 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
2105 b_prio_kind.cmp(&a_prio_kind)
2106 })
2107 .then_with(|| {
2108 b.priority
2109 .unwrap_or_default()
2110 .cmp(&a.priority.unwrap_or_default())
2111 })
2112 .then_with(|| {
2113 if policy.prefer_fresh {
2114 freshness_rank(b.freshness.as_deref())
2115 .cmp(&freshness_rank(a.freshness.as_deref()))
2116 } else {
2117 std::cmp::Ordering::Equal
2118 }
2119 })
2120 .then_with(|| {
2121 if policy.prefer_recent {
2122 b.created_at.cmp(&a.created_at)
2123 } else {
2124 std::cmp::Ordering::Equal
2125 }
2126 })
2127 .then_with(|| {
2128 b.relevance
2129 .partial_cmp(&a.relevance)
2130 .unwrap_or(std::cmp::Ordering::Equal)
2131 })
2132 .then_with(|| {
2133 a.estimated_tokens
2134 .unwrap_or(usize::MAX)
2135 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
2136 })
2137 });
2138
2139 let mut selected = Vec::new();
2140 let mut used_tokens = 0usize;
2141 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
2142 let effective_max_tokens = policy
2143 .max_tokens
2144 .map(|max| max.saturating_sub(reserve_tokens));
2145 for artifact in artifacts {
2146 if let Some(max_artifacts) = policy.max_artifacts {
2147 if selected.len() >= max_artifacts {
2148 break;
2149 }
2150 }
2151 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
2152 if let Some(max_tokens) = effective_max_tokens {
2153 if used_tokens + next_tokens > max_tokens {
2154 continue;
2155 }
2156 }
2157 used_tokens += next_tokens;
2158 selected.push(artifact);
2159 }
2160 selected
2161}
2162
2163pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
2164 let mut parts = Vec::new();
2165 for artifact in artifacts {
2166 let title = artifact
2167 .title
2168 .clone()
2169 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
2170 let body = artifact
2171 .text
2172 .clone()
2173 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
2174 .unwrap_or_default();
2175 match policy.render.as_deref() {
2176 Some("json") => {
2177 parts.push(
2178 serde_json::json!({
2179 "id": artifact.id,
2180 "kind": artifact.kind,
2181 "title": title,
2182 "source": artifact.source,
2183 "freshness": artifact.freshness,
2184 "priority": artifact.priority,
2185 "text": body,
2186 })
2187 .to_string(),
2188 );
2189 }
2190 _ => parts.push(format!(
2191 "[{title}] kind={} source={} freshness={} priority={}\n{}",
2192 artifact.kind,
2193 artifact
2194 .source
2195 .clone()
2196 .unwrap_or_else(|| "unknown".to_string()),
2197 artifact
2198 .freshness
2199 .clone()
2200 .unwrap_or_else(|| "normal".to_string()),
2201 artifact.priority.unwrap_or_default(),
2202 body
2203 )),
2204 }
2205 }
2206 parts.join("\n\n")
2207}
2208
2209pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
2210 let artifact: ArtifactRecord = parse_json_value(value)?;
2211 Ok(artifact.normalize())
2212}
2213
2214pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
2215 let mut run: RunRecord = parse_json_payload(vm_value_to_json(value), "run_record")?;
2216 if run.type_name.is_empty() {
2217 run.type_name = "run_record".to_string();
2218 }
2219 if run.id.is_empty() {
2220 run.id = new_id("run");
2221 }
2222 if run.started_at.is_empty() {
2223 run.started_at = now_rfc3339();
2224 }
2225 if run.status.is_empty() {
2226 run.status = "running".to_string();
2227 }
2228 if run.root_run_id.is_none() {
2229 run.root_run_id = Some(run.id.clone());
2230 }
2231 if run.replay_fixture.is_none() {
2232 run.replay_fixture = Some(replay_fixture_from_run(&run));
2233 }
2234 Ok(run)
2235}
2236
2237pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
2238 let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
2239 if manifest.type_name.is_empty() {
2240 manifest.type_name = "eval_suite_manifest".to_string();
2241 }
2242 if manifest.id.is_empty() {
2243 manifest.id = new_id("eval_suite");
2244 }
2245 Ok(manifest)
2246}
2247
2248fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
2249 let content = std::fs::read_to_string(path)
2250 .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
2251 serde_json::from_str(&content)
2252 .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
2253}
2254
2255fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
2256 let path_buf = PathBuf::from(path);
2257 if path_buf.is_absolute() {
2258 path_buf
2259 } else if let Some(base_dir) = base_dir {
2260 base_dir.join(path_buf)
2261 } else {
2262 path_buf
2263 }
2264}
2265
2266pub fn evaluate_run_suite_manifest(
2267 manifest: &EvalSuiteManifest,
2268) -> Result<ReplayEvalSuiteReport, VmError> {
2269 let base_dir = manifest.base_dir.as_deref().map(Path::new);
2270 let mut reports = Vec::new();
2271 for case in &manifest.cases {
2272 let run_path = resolve_manifest_path(base_dir, &case.run_path);
2273 let run = load_run_record(&run_path)?;
2274 let fixture = match &case.fixture_path {
2275 Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
2276 None => run
2277 .replay_fixture
2278 .clone()
2279 .unwrap_or_else(|| replay_fixture_from_run(&run)),
2280 };
2281 let eval = evaluate_run_against_fixture(&run, &fixture);
2282 let mut pass = eval.pass;
2283 let mut failures = eval.failures;
2284 let comparison = match &case.compare_to {
2285 Some(path) => {
2286 let baseline_path = resolve_manifest_path(base_dir, path);
2287 let baseline = load_run_record(&baseline_path)?;
2288 let diff = diff_run_records(&baseline, &run);
2289 if !diff.identical {
2290 pass = false;
2291 failures.push(format!(
2292 "run differs from baseline {} with {} stage changes",
2293 baseline_path.display(),
2294 diff.stage_diffs.len()
2295 ));
2296 }
2297 Some(diff)
2298 }
2299 None => None,
2300 };
2301 reports.push(ReplayEvalCaseReport {
2302 run_id: run.id.clone(),
2303 workflow_id: run.workflow_id.clone(),
2304 label: case.label.clone(),
2305 pass,
2306 failures,
2307 stage_count: eval.stage_count,
2308 source_path: Some(run_path.display().to_string()),
2309 comparison,
2310 });
2311 }
2312 let total = reports.len();
2313 let passed = reports.iter().filter(|report| report.pass).count();
2314 let failed = total.saturating_sub(passed);
2315 Ok(ReplayEvalSuiteReport {
2316 pass: failed == 0,
2317 total,
2318 passed,
2319 failed,
2320 cases: reports,
2321 })
2322}
2323
2324#[derive(Clone, Copy, PartialEq, Eq, Debug)]
2326enum DiffOp {
2327 Equal,
2328 Delete,
2329 Insert,
2330}
2331
2332fn myers_diff(a: &[&str], b: &[&str]) -> Vec<(DiffOp, usize)> {
2336 let n = a.len() as isize;
2337 let m = b.len() as isize;
2338 if n == 0 && m == 0 {
2339 return Vec::new();
2340 }
2341 if n == 0 {
2342 return (0..m as usize).map(|j| (DiffOp::Insert, j)).collect();
2343 }
2344 if m == 0 {
2345 return (0..n as usize).map(|i| (DiffOp::Delete, i)).collect();
2346 }
2347
2348 let max_d = (n + m) as usize;
2349 let offset = max_d as isize;
2350 let v_size = 2 * max_d + 1;
2351 let mut v = vec![0isize; v_size];
2352 let mut trace: Vec<Vec<isize>> = Vec::new();
2354
2355 'outer: for d in 0..=max_d as isize {
2356 trace.push(v.clone());
2357 let mut new_v = v.clone();
2358 for k in (-d..=d).step_by(2) {
2359 let ki = (k + offset) as usize;
2360 let mut x = if k == -d || (k != d && v[ki - 1] < v[ki + 1]) {
2361 v[ki + 1] } else {
2363 v[ki - 1] + 1 };
2365 let mut y = x - k;
2366 while x < n && y < m && a[x as usize] == b[y as usize] {
2367 x += 1;
2368 y += 1;
2369 }
2370 new_v[ki] = x;
2371 if x >= n && y >= m {
2372 let _ = new_v;
2373 break 'outer;
2374 }
2375 }
2376 v = new_v;
2377 }
2378
2379 let mut ops: Vec<(DiffOp, usize)> = Vec::new();
2381 let mut x = n;
2382 let mut y = m;
2383 for d in (1..trace.len() as isize).rev() {
2384 let k = x - y;
2385 let v_prev = &trace[d as usize];
2386 let prev_k = if k == -d
2387 || (k != d && v_prev[(k - 1 + offset) as usize] < v_prev[(k + 1 + offset) as usize])
2388 {
2389 k + 1 } else {
2391 k - 1 };
2393 let prev_x = v_prev[(prev_k + offset) as usize];
2394 let prev_y = prev_x - prev_k;
2395
2396 while x > prev_x && y > prev_y {
2398 x -= 1;
2399 y -= 1;
2400 ops.push((DiffOp::Equal, x as usize));
2401 }
2402 if prev_k < k {
2404 x -= 1;
2405 ops.push((DiffOp::Delete, x as usize));
2406 } else {
2407 y -= 1;
2408 ops.push((DiffOp::Insert, y as usize));
2409 }
2410 }
2411 while x > 0 && y > 0 {
2413 x -= 1;
2414 y -= 1;
2415 ops.push((DiffOp::Equal, x as usize));
2416 }
2417 ops.reverse();
2418 ops
2419}
2420
2421pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
2422 let before_lines: Vec<&str> = before.lines().collect();
2423 let after_lines: Vec<&str> = after.lines().collect();
2424 let ops = myers_diff(&before_lines, &after_lines);
2425
2426 let mut diff = String::new();
2427 let file = path.unwrap_or("artifact");
2428 diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
2429 for &(op, idx) in &ops {
2430 match op {
2431 DiffOp::Equal => diff.push_str(&format!(" {}\n", before_lines[idx])),
2432 DiffOp::Delete => diff.push_str(&format!("-{}\n", before_lines[idx])),
2433 DiffOp::Insert => diff.push_str(&format!("+{}\n", after_lines[idx])),
2434 }
2435 }
2436 diff
2437}
2438
2439pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
2440 let path = path
2441 .map(PathBuf::from)
2442 .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
2443 if let Some(parent) = path.parent() {
2444 std::fs::create_dir_all(parent)
2445 .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
2446 }
2447 let json = serde_json::to_string_pretty(run)
2448 .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
2449 let tmp_path = path.with_extension("json.tmp");
2451 std::fs::write(&tmp_path, &json)
2452 .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
2453 std::fs::rename(&tmp_path, &path).map_err(|e| {
2454 let _ = std::fs::write(&path, &json);
2456 VmError::Runtime(format!("failed to finalize run record: {e}"))
2457 })?;
2458 Ok(path.to_string_lossy().to_string())
2459}
2460
2461pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
2462 let content = std::fs::read_to_string(path)
2463 .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
2464 serde_json::from_str(&content)
2465 .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
2466}
2467
2468pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
2469 ReplayFixture {
2470 type_name: "replay_fixture".to_string(),
2471 id: new_id("fixture"),
2472 source_run_id: run.id.clone(),
2473 workflow_id: run.workflow_id.clone(),
2474 workflow_name: run.workflow_name.clone(),
2475 created_at: now_rfc3339(),
2476 expected_status: run.status.clone(),
2477 stage_assertions: run
2478 .stages
2479 .iter()
2480 .map(|stage| ReplayStageAssertion {
2481 node_id: stage.node_id.clone(),
2482 expected_status: stage.status.clone(),
2483 expected_outcome: stage.outcome.clone(),
2484 expected_branch: stage.branch.clone(),
2485 required_artifact_kinds: stage
2486 .artifacts
2487 .iter()
2488 .map(|artifact| artifact.kind.clone())
2489 .collect(),
2490 visible_text_contains: stage
2491 .visible_text
2492 .as_ref()
2493 .filter(|text| !text.is_empty())
2494 .map(|text| text.chars().take(80).collect()),
2495 })
2496 .collect(),
2497 }
2498}
2499
2500pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
2501 let mut failures = Vec::new();
2502 if run.status != fixture.expected_status {
2503 failures.push(format!(
2504 "run status mismatch: expected {}, got {}",
2505 fixture.expected_status, run.status
2506 ));
2507 }
2508 let stages_by_id: BTreeMap<&str, &RunStageRecord> =
2509 run.stages.iter().map(|s| (s.node_id.as_str(), s)).collect();
2510 for assertion in &fixture.stage_assertions {
2511 let Some(stage) = stages_by_id.get(assertion.node_id.as_str()) else {
2512 failures.push(format!("missing stage {}", assertion.node_id));
2513 continue;
2514 };
2515 if stage.status != assertion.expected_status {
2516 failures.push(format!(
2517 "stage {} status mismatch: expected {}, got {}",
2518 assertion.node_id, assertion.expected_status, stage.status
2519 ));
2520 }
2521 if stage.outcome != assertion.expected_outcome {
2522 failures.push(format!(
2523 "stage {} outcome mismatch: expected {}, got {}",
2524 assertion.node_id, assertion.expected_outcome, stage.outcome
2525 ));
2526 }
2527 if stage.branch != assertion.expected_branch {
2528 failures.push(format!(
2529 "stage {} branch mismatch: expected {:?}, got {:?}",
2530 assertion.node_id, assertion.expected_branch, stage.branch
2531 ));
2532 }
2533 for required_kind in &assertion.required_artifact_kinds {
2534 if !stage
2535 .artifacts
2536 .iter()
2537 .any(|artifact| &artifact.kind == required_kind)
2538 {
2539 failures.push(format!(
2540 "stage {} missing artifact kind {}",
2541 assertion.node_id, required_kind
2542 ));
2543 }
2544 }
2545 if let Some(snippet) = &assertion.visible_text_contains {
2546 let actual = stage.visible_text.clone().unwrap_or_default();
2547 if !actual.contains(snippet) {
2548 failures.push(format!(
2549 "stage {} visible text does not contain expected snippet {:?}",
2550 assertion.node_id, snippet
2551 ));
2552 }
2553 }
2554 }
2555
2556 ReplayEvalReport {
2557 pass: failures.is_empty(),
2558 failures,
2559 stage_count: run.stages.len(),
2560 }
2561}
2562
2563pub fn evaluate_run_suite(
2564 cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
2565) -> ReplayEvalSuiteReport {
2566 let mut reports = Vec::new();
2567 for (run, fixture, source_path) in cases {
2568 let report = evaluate_run_against_fixture(&run, &fixture);
2569 reports.push(ReplayEvalCaseReport {
2570 run_id: run.id.clone(),
2571 workflow_id: run.workflow_id.clone(),
2572 label: None,
2573 pass: report.pass,
2574 failures: report.failures,
2575 stage_count: report.stage_count,
2576 source_path,
2577 comparison: None,
2578 });
2579 }
2580 let total = reports.len();
2581 let passed = reports.iter().filter(|report| report.pass).count();
2582 let failed = total.saturating_sub(passed);
2583 ReplayEvalSuiteReport {
2584 pass: failed == 0,
2585 total,
2586 passed,
2587 failed,
2588 cases: reports,
2589 }
2590}
2591
2592pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
2593 let mut stage_diffs = Vec::new();
2594 let mut all_node_ids = BTreeSet::new();
2595 let left_by_id: BTreeMap<&str, &RunStageRecord> = left
2596 .stages
2597 .iter()
2598 .map(|s| (s.node_id.as_str(), s))
2599 .collect();
2600 let right_by_id: BTreeMap<&str, &RunStageRecord> = right
2601 .stages
2602 .iter()
2603 .map(|s| (s.node_id.as_str(), s))
2604 .collect();
2605 all_node_ids.extend(left_by_id.keys().copied());
2606 all_node_ids.extend(right_by_id.keys().copied());
2607
2608 for node_id in all_node_ids {
2609 let left_stage = left_by_id.get(node_id).copied();
2610 let right_stage = right_by_id.get(node_id).copied();
2611 match (left_stage, right_stage) {
2612 (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
2613 node_id: node_id.to_string(),
2614 change: "removed".to_string(),
2615 details: vec!["stage missing from right run".to_string()],
2616 }),
2617 (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
2618 node_id: node_id.to_string(),
2619 change: "added".to_string(),
2620 details: vec!["stage missing from left run".to_string()],
2621 }),
2622 (Some(left_stage), Some(right_stage)) => {
2623 let mut details = Vec::new();
2624 if left_stage.status != right_stage.status {
2625 details.push(format!(
2626 "status: {} -> {}",
2627 left_stage.status, right_stage.status
2628 ));
2629 }
2630 if left_stage.outcome != right_stage.outcome {
2631 details.push(format!(
2632 "outcome: {} -> {}",
2633 left_stage.outcome, right_stage.outcome
2634 ));
2635 }
2636 if left_stage.branch != right_stage.branch {
2637 details.push(format!(
2638 "branch: {:?} -> {:?}",
2639 left_stage.branch, right_stage.branch
2640 ));
2641 }
2642 if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
2643 {
2644 details.push(format!(
2645 "produced_artifacts: {} -> {}",
2646 left_stage.produced_artifact_ids.len(),
2647 right_stage.produced_artifact_ids.len()
2648 ));
2649 }
2650 if left_stage.artifacts.len() != right_stage.artifacts.len() {
2651 details.push(format!(
2652 "artifact_records: {} -> {}",
2653 left_stage.artifacts.len(),
2654 right_stage.artifacts.len()
2655 ));
2656 }
2657 if !details.is_empty() {
2658 stage_diffs.push(RunStageDiffRecord {
2659 node_id: node_id.to_string(),
2660 change: "changed".to_string(),
2661 details,
2662 });
2663 }
2664 }
2665 (None, None) => {}
2666 }
2667 }
2668
2669 let status_changed = left.status != right.status;
2670 let identical = !status_changed
2671 && stage_diffs.is_empty()
2672 && left.transitions.len() == right.transitions.len()
2673 && left.artifacts.len() == right.artifacts.len()
2674 && left.checkpoints.len() == right.checkpoints.len();
2675
2676 RunDiffReport {
2677 left_run_id: left.id.clone(),
2678 right_run_id: right.id.clone(),
2679 identical,
2680 status_changed,
2681 left_status: left.status.clone(),
2682 right_status: right.status.clone(),
2683 stage_diffs,
2684 transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
2685 artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
2686 checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
2687 }
2688}
2689
2690pub fn push_execution_policy(policy: CapabilityPolicy) {
2691 EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
2692}
2693
2694pub fn pop_execution_policy() {
2695 EXECUTION_POLICY_STACK.with(|stack| {
2696 stack.borrow_mut().pop();
2697 });
2698}
2699
2700pub fn current_execution_policy() -> Option<CapabilityPolicy> {
2701 EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
2702}
2703
2704pub fn current_tool_metadata(tool: &str) -> Option<ToolRuntimePolicyMetadata> {
2705 current_execution_policy().and_then(|policy| policy.tool_metadata.get(tool).cloned())
2706}
2707
2708fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
2709 policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
2710}
2711
2712fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
2713 policy.capabilities.is_empty()
2714 || policy
2715 .capabilities
2716 .get(capability)
2717 .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
2718}
2719
2720fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
2721 fn rank(v: &str) -> usize {
2722 match v {
2723 "none" => 0,
2724 "read_only" => 1,
2725 "workspace_write" => 2,
2726 "process_exec" => 3,
2727 "network" => 4,
2728 _ => 5,
2729 }
2730 }
2731 policy
2732 .side_effect_level
2733 .as_ref()
2734 .map(|allowed| rank(allowed) >= rank(requested))
2735 .unwrap_or(true)
2736}
2737
2738fn reject_policy(reason: String) -> Result<(), VmError> {
2739 Err(VmError::CategorizedError {
2740 message: reason,
2741 category: crate::value::ErrorCategory::ToolRejected,
2742 })
2743}
2744
2745fn fallback_mutation_classification(tool_name: &str) -> String {
2746 let lower = tool_name.to_ascii_lowercase();
2747 if lower.starts_with("mcp_") {
2748 return "host_defined".to_string();
2749 }
2750 if lower == "exec"
2751 || lower == "shell"
2752 || lower == "exec_at"
2753 || lower == "shell_at"
2754 || lower == "run"
2755 || lower.starts_with("run_")
2756 {
2757 return "ambient_side_effect".to_string();
2758 }
2759 if lower.starts_with("delete")
2760 || lower.starts_with("remove")
2761 || lower.starts_with("move")
2762 || lower.starts_with("rename")
2763 {
2764 return "destructive".to_string();
2765 }
2766 if lower.contains("write")
2767 || lower.contains("edit")
2768 || lower.contains("patch")
2769 || lower.contains("create")
2770 || lower.contains("scaffold")
2771 || lower.starts_with("insert")
2772 || lower.starts_with("replace")
2773 || lower == "add_import"
2774 {
2775 return "apply_workspace".to_string();
2776 }
2777 "read_only".to_string()
2778}
2779
2780pub fn current_tool_mutation_classification(tool_name: &str) -> String {
2781 current_tool_metadata(tool_name)
2782 .and_then(|metadata| metadata.mutation_classification)
2783 .unwrap_or_else(|| fallback_mutation_classification(tool_name))
2784}
2785
2786pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
2787 let Some(map) = args.as_object() else {
2788 return Vec::new();
2789 };
2790 let path_keys = current_tool_metadata(tool_name)
2791 .map(|metadata| metadata.path_params)
2792 .filter(|keys| !keys.is_empty())
2793 .unwrap_or_else(|| {
2794 vec![
2795 "path".to_string(),
2796 "file".to_string(),
2797 "cwd".to_string(),
2798 "repo".to_string(),
2799 "target".to_string(),
2800 "destination".to_string(),
2801 ]
2802 });
2803 let mut paths = Vec::new();
2804 for key in path_keys {
2805 if let Some(value) = map.get(&key).and_then(|value| value.as_str()) {
2806 if !value.is_empty() {
2807 paths.push(value.to_string());
2808 }
2809 }
2810 }
2811 if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
2812 for item in items {
2813 if let Some(value) = item.as_str() {
2814 if !value.is_empty() {
2815 paths.push(value.to_string());
2816 }
2817 }
2818 }
2819 }
2820 paths.sort();
2821 paths.dedup();
2822 paths
2823}
2824
2825pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
2826 let Some(policy) = current_execution_policy() else {
2827 return Ok(());
2828 };
2829 match name {
2830 "read" | "read_file" => {
2831 if !policy_allows_tool(&policy, name)
2832 || !policy_allows_capability(&policy, "workspace", "read_text")
2833 {
2834 return reject_policy(format!(
2835 "builtin '{name}' exceeds workspace.read_text ceiling"
2836 ));
2837 }
2838 }
2839 "search" | "list_dir" => {
2840 if !policy_allows_tool(&policy, name)
2841 || !policy_allows_capability(&policy, "workspace", "list")
2842 {
2843 return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
2844 }
2845 }
2846 "file_exists" | "stat" => {
2847 if !policy_allows_capability(&policy, "workspace", "exists") {
2848 return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
2849 }
2850 }
2851 "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
2852 if !policy_allows_tool(&policy, "edit")
2853 || !policy_allows_capability(&policy, "workspace", "write_text")
2854 || !policy_allows_side_effect(&policy, "workspace_write")
2855 {
2856 return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
2857 }
2858 }
2859 "delete_file" => {
2860 if !policy_allows_capability(&policy, "workspace", "delete")
2861 || !policy_allows_side_effect(&policy, "workspace_write")
2862 {
2863 return reject_policy(
2864 "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
2865 );
2866 }
2867 }
2868 "apply_edit" => {
2869 if !policy_allows_capability(&policy, "workspace", "apply_edit")
2870 || !policy_allows_side_effect(&policy, "workspace_write")
2871 {
2872 return reject_policy(
2873 "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
2874 );
2875 }
2876 }
2877 "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
2878 if !policy_allows_tool(&policy, "run")
2879 || !policy_allows_capability(&policy, "process", "exec")
2880 || !policy_allows_side_effect(&policy, "process_exec")
2881 {
2882 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2883 }
2884 }
2885 "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
2886 if !policy_allows_side_effect(&policy, "network") {
2887 return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
2888 }
2889 }
2890 "mcp_connect"
2891 | "mcp_call"
2892 | "mcp_list_tools"
2893 | "mcp_list_resources"
2894 | "mcp_list_resource_templates"
2895 | "mcp_read_resource"
2896 | "mcp_list_prompts"
2897 | "mcp_get_prompt"
2898 | "mcp_server_info"
2899 | "mcp_disconnect" => {
2900 if !policy_allows_tool(&policy, "run")
2901 || !policy_allows_capability(&policy, "process", "exec")
2902 || !policy_allows_side_effect(&policy, "process_exec")
2903 {
2904 return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2905 }
2906 }
2907 "host_call" => {
2908 let name = args.first().map(|v| v.display()).unwrap_or_default();
2909 let Some((capability, op)) = name.split_once('.') else {
2910 return reject_policy(format!(
2911 "host_call '{name}' must use capability.operation naming"
2912 ));
2913 };
2914 if !policy_allows_capability(&policy, capability, op) {
2915 return reject_policy(format!(
2916 "host_call {capability}.{op} exceeds capability ceiling"
2917 ));
2918 }
2919 let requested_side_effect = match (capability, op) {
2920 ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
2921 ("process", "exec") => "process_exec",
2922 _ => "read_only",
2923 };
2924 if !policy_allows_side_effect(&policy, requested_side_effect) {
2925 return reject_policy(format!(
2926 "host_call {capability}.{op} exceeds side-effect ceiling"
2927 ));
2928 }
2929 }
2930 _ => {}
2931 }
2932 Ok(())
2933}
2934
2935pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
2936 if current_execution_policy().is_some() {
2937 return reject_policy(format!(
2938 "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
2939 ));
2940 }
2941 Ok(())
2942}
2943
2944pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
2945 let Some(policy) = current_execution_policy() else {
2946 return Ok(());
2947 };
2948 if !policy_allows_tool(&policy, tool_name) {
2949 return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
2950 }
2951 if let Some(metadata) = policy.tool_metadata.get(tool_name) {
2952 for (capability, ops) in &metadata.capabilities {
2953 for op in ops {
2954 if !policy_allows_capability(&policy, capability, op) {
2955 return reject_policy(format!(
2956 "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
2957 ));
2958 }
2959 }
2960 }
2961 if let Some(side_effect_level) = metadata.side_effect_level.as_deref() {
2962 if !policy_allows_side_effect(&policy, side_effect_level) {
2963 return reject_policy(format!(
2964 "tool '{tool_name}' exceeds side-effect ceiling: {side_effect_level}"
2965 ));
2966 }
2967 }
2968 }
2969 Ok(())
2970}
2971
2972fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
2973 let dict = transcript.as_dict()?;
2974 let messages = match dict.get("messages") {
2975 Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
2976 _ => Vec::new(),
2977 };
2978 let retained = messages
2979 .into_iter()
2980 .rev()
2981 .take(keep_last)
2982 .collect::<Vec<_>>()
2983 .into_iter()
2984 .rev()
2985 .collect::<Vec<_>>();
2986 let mut compacted = dict.clone();
2987 compacted.insert(
2988 "messages".to_string(),
2989 VmValue::List(Rc::new(retained.clone())),
2990 );
2991 compacted.insert(
2992 "events".to_string(),
2993 VmValue::List(Rc::new(
2994 crate::llm::helpers::transcript_events_from_messages(&retained),
2995 )),
2996 );
2997 Some(VmValue::Dict(Rc::new(compacted)))
2998}
2999
3000fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
3001 let Some(visibility) = visibility else {
3002 return Some(transcript.clone());
3003 };
3004 if visibility != "public" && visibility != "public_only" {
3005 return Some(transcript.clone());
3006 }
3007 let dict = transcript.as_dict()?;
3008 let public_messages = match dict.get("messages") {
3009 Some(VmValue::List(list)) => list
3010 .iter()
3011 .filter(|message| {
3012 message
3013 .as_dict()
3014 .and_then(|d| d.get("role"))
3015 .map(|v| v.display())
3016 .map(|role| role != "tool_result")
3017 .unwrap_or(true)
3018 })
3019 .cloned()
3020 .collect::<Vec<_>>(),
3021 _ => Vec::new(),
3022 };
3023 let public_events = match dict.get("events") {
3024 Some(VmValue::List(list)) => list
3025 .iter()
3026 .filter(|event| {
3027 event
3028 .as_dict()
3029 .and_then(|d| d.get("visibility"))
3030 .map(|v| v.display())
3031 .map(|value| value == "public")
3032 .unwrap_or(true)
3033 })
3034 .cloned()
3035 .collect::<Vec<_>>(),
3036 _ => Vec::new(),
3037 };
3038 let mut redacted = dict.clone();
3039 redacted.insert(
3040 "messages".to_string(),
3041 VmValue::List(Rc::new(public_messages)),
3042 );
3043 redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
3044 Some(VmValue::Dict(Rc::new(redacted)))
3045}
3046
3047pub(crate) fn apply_input_transcript_policy(
3048 transcript: Option<VmValue>,
3049 policy: &TranscriptPolicy,
3050) -> Option<VmValue> {
3051 let mut transcript = transcript;
3052 match policy.mode.as_deref() {
3053 Some("reset") => return None,
3054 Some("fork") => {
3055 if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
3056 let mut forked = dict.as_ref().clone();
3057 forked.insert(
3058 "id".to_string(),
3059 VmValue::String(Rc::from(new_id("transcript"))),
3060 );
3061 transcript = Some(VmValue::Dict(Rc::new(forked)));
3062 }
3063 }
3064 _ => {}
3065 }
3066 if policy.compact {
3067 let keep_last = policy.keep_last.unwrap_or(6);
3068 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
3069 }
3070 transcript
3071}
3072
3073fn apply_output_transcript_policy(
3074 transcript: Option<VmValue>,
3075 policy: &TranscriptPolicy,
3076) -> Option<VmValue> {
3077 let mut transcript = transcript;
3078 if policy.compact {
3079 let keep_last = policy.keep_last.unwrap_or(6);
3080 transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
3081 }
3082 transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
3083}
3084
3085pub async fn execute_stage_node(
3086 node_id: &str,
3087 node: &WorkflowNode,
3088 task: &str,
3089 artifacts: &[ArtifactRecord],
3090 transcript: Option<VmValue>,
3091) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
3092 let mut selection_policy = node.context_policy.clone();
3093 if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
3094 selection_policy.include_kinds = node.input_contract.input_kinds.clone();
3095 }
3096 let selected = select_artifacts_adaptive(artifacts.to_vec(), &selection_policy);
3097 let rendered_context = render_artifacts_context(&selected, &node.context_policy);
3098 let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
3099 if node.input_contract.require_transcript && transcript.is_none() {
3100 return Err(VmError::Runtime(format!(
3101 "workflow stage {node_id} requires transcript input"
3102 )));
3103 }
3104 if let Some(min_inputs) = node.input_contract.min_inputs {
3105 if selected.len() < min_inputs {
3106 return Err(VmError::Runtime(format!(
3107 "workflow stage {node_id} requires at least {min_inputs} input artifacts"
3108 )));
3109 }
3110 }
3111 if let Some(max_inputs) = node.input_contract.max_inputs {
3112 if selected.len() > max_inputs {
3113 return Err(VmError::Runtime(format!(
3114 "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
3115 )));
3116 }
3117 }
3118 let prompt = if rendered_context.is_empty() {
3119 task.to_string()
3120 } else {
3121 format!(
3122 "{rendered_context}\n\n{}:\n{task}",
3123 node.task_label
3124 .clone()
3125 .unwrap_or_else(|| "Task".to_string())
3126 )
3127 };
3128
3129 let tool_format = std::env::var("HARN_AGENT_TOOL_FORMAT")
3130 .ok()
3131 .filter(|value| !value.trim().is_empty())
3132 .unwrap_or_else(|| "text".to_string());
3133 let mut llm_result = if node.kind == "verify" {
3134 if let Some(command) = node
3135 .verify
3136 .as_ref()
3137 .and_then(|verify| verify.as_object())
3138 .and_then(|verify| verify.get("command"))
3139 .and_then(|value| value.as_str())
3140 .map(str::trim)
3141 .filter(|value| !value.is_empty())
3142 {
3143 let mut process = if cfg!(target_os = "windows") {
3144 let mut cmd = tokio::process::Command::new("cmd");
3145 cmd.arg("/C").arg(command);
3146 cmd
3147 } else {
3148 let mut cmd = tokio::process::Command::new("/bin/sh");
3149 cmd.arg("-lc").arg(command);
3150 cmd
3151 };
3152 process.stdin(std::process::Stdio::null());
3153 if let Some(context) = crate::stdlib::process::current_execution_context() {
3154 if let Some(cwd) = context.cwd.filter(|cwd| !cwd.is_empty()) {
3155 process.current_dir(cwd);
3156 }
3157 if !context.env.is_empty() {
3158 process.envs(context.env);
3159 }
3160 }
3161 let output = process
3162 .output()
3163 .await
3164 .map_err(|e| VmError::Runtime(format!("workflow verify exec failed: {e}")))?;
3165 let stdout = String::from_utf8_lossy(&output.stdout).to_string();
3166 let stderr = String::from_utf8_lossy(&output.stderr).to_string();
3167 let combined = if stderr.is_empty() {
3168 stdout.clone()
3169 } else if stdout.is_empty() {
3170 stderr.clone()
3171 } else {
3172 format!("{stdout}\n{stderr}")
3173 };
3174 serde_json::json!({
3175 "status": "completed",
3176 "text": combined,
3177 "visible_text": combined,
3178 "command": command,
3179 "stdout": stdout,
3180 "stderr": stderr,
3181 "exit_status": output.status.code().unwrap_or(-1),
3182 "success": output.status.success(),
3183 })
3184 } else {
3185 serde_json::json!({
3186 "status": "completed",
3187 "text": "",
3188 "visible_text": "",
3189 })
3190 }
3191 } else {
3192 let mut options = BTreeMap::new();
3193 if let Some(provider) = &node.model_policy.provider {
3194 options.insert(
3195 "provider".to_string(),
3196 VmValue::String(Rc::from(provider.clone())),
3197 );
3198 }
3199 if let Some(model) = &node.model_policy.model {
3200 options.insert(
3201 "model".to_string(),
3202 VmValue::String(Rc::from(model.clone())),
3203 );
3204 }
3205 if let Some(model_tier) = &node.model_policy.model_tier {
3206 options.insert(
3207 "model_tier".to_string(),
3208 VmValue::String(Rc::from(model_tier.clone())),
3209 );
3210 }
3211 if let Some(temperature) = node.model_policy.temperature {
3212 options.insert("temperature".to_string(), VmValue::Float(temperature));
3213 }
3214 if let Some(max_tokens) = node.model_policy.max_tokens {
3215 options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
3216 }
3217 let tool_names = workflow_tool_names(&node.tools);
3218 let tools_value = node.raw_tools.clone().or_else(|| {
3219 if matches!(node.tools, serde_json::Value::Null) {
3220 None
3221 } else {
3222 Some(crate::stdlib::json_to_vm_value(&node.tools))
3223 }
3224 });
3225 if tools_value.is_some() && !tool_names.is_empty() {
3226 options.insert("tools".to_string(), tools_value.unwrap_or(VmValue::Nil));
3227 }
3228 if let Some(transcript) = transcript.clone() {
3229 options.insert("transcript".to_string(), transcript);
3230 }
3231
3232 let args = vec![
3233 VmValue::String(Rc::from(prompt.clone())),
3234 node.system
3235 .clone()
3236 .map(|s| VmValue::String(Rc::from(s)))
3237 .unwrap_or(VmValue::Nil),
3238 VmValue::Dict(Rc::new(options)),
3239 ];
3240 let mut opts = extract_llm_options(&args)?;
3241
3242 if node.mode.as_deref() == Some("agent") || !tool_names.is_empty() {
3243 crate::llm::run_agent_loop_internal(
3244 &mut opts,
3245 crate::llm::AgentLoopConfig {
3246 persistent: true,
3247 max_iterations: node.model_policy.max_iterations.unwrap_or(16),
3248 max_nudges: node.model_policy.max_nudges.unwrap_or(3),
3249 nudge: node.model_policy.nudge.clone(),
3250 done_sentinel: node.done_sentinel.clone(),
3251 break_unless_phase: None,
3252 tool_retries: 0,
3253 tool_backoff_ms: 1000,
3254 tool_format: tool_format.clone(),
3255 auto_compact: None,
3256 context_callback: None,
3257 policy: None,
3258 daemon: false,
3259 llm_retries: 2,
3260 llm_backoff_ms: 2000,
3261 exit_when_verified: false,
3262 loop_detect_warn: 2,
3263 loop_detect_block: 3,
3264 loop_detect_skip: 4,
3265 },
3266 )
3267 .await?
3268 } else {
3269 let result = vm_call_llm_full(&opts).await?;
3270 crate::llm::agent_loop_result_from_llm(&result, opts)
3271 }
3272 };
3273 if let Some(payload) = llm_result.as_object_mut() {
3274 payload.insert("prompt".to_string(), serde_json::json!(prompt));
3275 payload.insert(
3276 "system_prompt".to_string(),
3277 serde_json::json!(node.system.clone().unwrap_or_default()),
3278 );
3279 payload.insert(
3280 "rendered_context".to_string(),
3281 serde_json::json!(rendered_context),
3282 );
3283 payload.insert(
3284 "selected_artifact_ids".to_string(),
3285 serde_json::json!(selected
3286 .iter()
3287 .map(|artifact| artifact.id.clone())
3288 .collect::<Vec<_>>()),
3289 );
3290 payload.insert(
3291 "selected_artifact_titles".to_string(),
3292 serde_json::json!(selected
3293 .iter()
3294 .map(|artifact| artifact.title.clone())
3295 .collect::<Vec<_>>()),
3296 );
3297 payload.insert(
3298 "tool_calling_mode".to_string(),
3299 serde_json::json!(tool_format.clone()),
3300 );
3301 }
3302
3303 let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
3304 let transcript = llm_result
3305 .get("transcript")
3306 .cloned()
3307 .map(|value| crate::stdlib::json_to_vm_value(&value));
3308 let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
3309 let output_kind = node
3310 .output_contract
3311 .output_kinds
3312 .first()
3313 .cloned()
3314 .unwrap_or_else(|| {
3315 if node.kind == "verify" {
3316 "verification_result".to_string()
3317 } else {
3318 "artifact".to_string()
3319 }
3320 });
3321 let mut metadata = BTreeMap::new();
3322 metadata.insert(
3323 "input_artifact_ids".to_string(),
3324 serde_json::json!(selected
3325 .iter()
3326 .map(|artifact| artifact.id.clone())
3327 .collect::<Vec<_>>()),
3328 );
3329 metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
3330 let artifact = ArtifactRecord {
3331 type_name: "artifact".to_string(),
3332 id: new_id("artifact"),
3333 kind: output_kind,
3334 title: Some(format!("stage {node_id} output")),
3335 text: Some(visible_text),
3336 data: Some(llm_result.clone()),
3337 source: Some(node_id.to_string()),
3338 created_at: now_rfc3339(),
3339 freshness: Some("fresh".to_string()),
3340 priority: None,
3341 lineage: selected
3342 .iter()
3343 .map(|artifact| artifact.id.clone())
3344 .collect(),
3345 relevance: Some(1.0),
3346 estimated_tokens: None,
3347 stage: Some(node_id.to_string()),
3348 metadata,
3349 }
3350 .normalize();
3351
3352 Ok((llm_result, vec![artifact], transcript))
3353}
3354
3355pub fn next_nodes_for(
3356 graph: &WorkflowGraph,
3357 current: &str,
3358 branch: Option<&str>,
3359) -> Vec<WorkflowEdge> {
3360 let mut matching: Vec<WorkflowEdge> = graph
3361 .edges
3362 .iter()
3363 .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
3364 .cloned()
3365 .collect();
3366 if matching.is_empty() {
3367 matching = graph
3368 .edges
3369 .iter()
3370 .filter(|edge| edge.from == current && edge.branch.is_none())
3371 .cloned()
3372 .collect();
3373 }
3374 matching
3375}
3376
3377pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
3378 next_nodes_for(graph, current, Some(branch))
3379 .into_iter()
3380 .next()
3381 .map(|edge| edge.to)
3382}
3383
3384pub fn append_audit_entry(
3385 graph: &mut WorkflowGraph,
3386 op: &str,
3387 node_id: Option<String>,
3388 reason: Option<String>,
3389 metadata: BTreeMap<String, serde_json::Value>,
3390) {
3391 graph.audit_log.push(WorkflowAuditEntry {
3392 id: new_id("audit"),
3393 op: op.to_string(),
3394 node_id,
3395 timestamp: now_rfc3339(),
3396 reason,
3397 metadata,
3398 });
3399}
3400
3401pub fn builtin_ceiling() -> CapabilityPolicy {
3402 CapabilityPolicy {
3403 tools: Vec::new(),
3407 capabilities: BTreeMap::new(),
3408 workspace_roots: Vec::new(),
3409 side_effect_level: Some("network".to_string()),
3410 recursion_limit: Some(8),
3411 tool_arg_constraints: Vec::new(),
3412 tool_metadata: BTreeMap::new(),
3413 }
3414}
3415
3416#[cfg(test)]
3417mod tests {
3418 use super::*;
3419
3420 #[test]
3421 fn capability_intersection_rejects_privilege_expansion() {
3422 let ceiling = CapabilityPolicy {
3423 tools: vec!["read".to_string()],
3424 side_effect_level: Some("read_only".to_string()),
3425 recursion_limit: Some(2),
3426 ..Default::default()
3427 };
3428 let requested = CapabilityPolicy {
3429 tools: vec!["read".to_string(), "edit".to_string()],
3430 ..Default::default()
3431 };
3432 let error = ceiling.intersect(&requested).unwrap_err();
3433 assert!(error.contains("host ceiling"));
3434 }
3435
3436 #[test]
3437 fn mutation_session_normalize_fills_defaults() {
3438 let normalized = MutationSessionRecord::default().normalize();
3439 assert!(normalized.session_id.starts_with("session_"));
3440 assert_eq!(normalized.mutation_scope, "read_only");
3441 assert_eq!(normalized.approval_mode, "host_enforced");
3442 }
3443
3444 #[test]
3445 fn install_current_mutation_session_round_trips() {
3446 install_current_mutation_session(Some(MutationSessionRecord {
3447 session_id: "session_test".to_string(),
3448 mutation_scope: "apply_workspace".to_string(),
3449 approval_mode: "explicit".to_string(),
3450 ..Default::default()
3451 }));
3452 let current = current_mutation_session().expect("session installed");
3453 assert_eq!(current.session_id, "session_test");
3454 assert_eq!(current.mutation_scope, "apply_workspace");
3455 assert_eq!(current.approval_mode, "explicit");
3456
3457 install_current_mutation_session(None);
3458 assert!(current_mutation_session().is_none());
3459 }
3460
3461 #[test]
3462 fn active_execution_policy_rejects_unknown_bridge_builtin() {
3463 push_execution_policy(CapabilityPolicy {
3464 tools: vec!["read".to_string()],
3465 capabilities: BTreeMap::from([(
3466 "workspace".to_string(),
3467 vec!["read_text".to_string()],
3468 )]),
3469 side_effect_level: Some("read_only".to_string()),
3470 recursion_limit: Some(1),
3471 ..Default::default()
3472 });
3473 let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
3474 pop_execution_policy();
3475 assert!(matches!(
3476 error,
3477 VmError::CategorizedError {
3478 category: crate::value::ErrorCategory::ToolRejected,
3479 ..
3480 }
3481 ));
3482 }
3483
3484 #[test]
3485 fn active_execution_policy_rejects_mcp_escape_hatch() {
3486 push_execution_policy(CapabilityPolicy {
3487 tools: vec!["read".to_string()],
3488 capabilities: BTreeMap::from([(
3489 "workspace".to_string(),
3490 vec!["read_text".to_string()],
3491 )]),
3492 side_effect_level: Some("read_only".to_string()),
3493 recursion_limit: Some(1),
3494 ..Default::default()
3495 });
3496 let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
3497 pop_execution_policy();
3498 assert!(matches!(
3499 error,
3500 VmError::CategorizedError {
3501 category: crate::value::ErrorCategory::ToolRejected,
3502 ..
3503 }
3504 ));
3505 }
3506
3507 #[test]
3508 fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
3509 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3510 "name": "legacy",
3511 "act": {"mode": "llm"},
3512 "verify": {"kind": "verify"},
3513 "repair": {"mode": "agent"},
3514 }));
3515 let graph = normalize_workflow_value(&value).unwrap();
3516 assert_eq!(graph.type_name, "workflow_graph");
3517 assert!(graph.nodes.contains_key("act"));
3518 assert!(graph.nodes.contains_key("verify"));
3519 assert!(graph.nodes.contains_key("repair"));
3520 assert_eq!(graph.entry, "act");
3521 }
3522
3523 #[test]
3524 fn workflow_normalization_accepts_tool_registry_nodes() {
3525 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3526 "name": "registry_tools",
3527 "entry": "implement",
3528 "nodes": {
3529 "implement": {
3530 "kind": "stage",
3531 "mode": "agent",
3532 "tools": {
3533 "_type": "tool_registry",
3534 "tools": [
3535 {"name": "read", "description": "Read files"},
3536 {"name": "run", "description": "Run commands"}
3537 ]
3538 }
3539 }
3540 },
3541 "edges": []
3542 }));
3543 let graph = normalize_workflow_value(&value).unwrap();
3544 let node = graph.nodes.get("implement").unwrap();
3545 assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
3546 }
3547
3548 #[test]
3549 fn artifact_selection_honors_budget_and_priority() {
3550 let policy = ContextPolicy {
3551 max_artifacts: Some(2),
3552 max_tokens: Some(30),
3553 prefer_recent: true,
3554 prefer_fresh: true,
3555 prioritize_kinds: vec!["verification_result".to_string()],
3556 ..Default::default()
3557 };
3558 let artifacts = vec![
3559 ArtifactRecord {
3560 type_name: "artifact".to_string(),
3561 id: "a".to_string(),
3562 kind: "summary".to_string(),
3563 text: Some("short".to_string()),
3564 relevance: Some(0.9),
3565 created_at: now_rfc3339(),
3566 ..Default::default()
3567 }
3568 .normalize(),
3569 ArtifactRecord {
3570 type_name: "artifact".to_string(),
3571 id: "b".to_string(),
3572 kind: "summary".to_string(),
3573 text: Some("this is a much larger artifact body".to_string()),
3574 relevance: Some(1.0),
3575 created_at: now_rfc3339(),
3576 ..Default::default()
3577 }
3578 .normalize(),
3579 ArtifactRecord {
3580 type_name: "artifact".to_string(),
3581 id: "c".to_string(),
3582 kind: "summary".to_string(),
3583 text: Some("tiny".to_string()),
3584 relevance: Some(0.5),
3585 created_at: now_rfc3339(),
3586 ..Default::default()
3587 }
3588 .normalize(),
3589 ];
3590 let selected = select_artifacts(artifacts, &policy);
3591 assert_eq!(selected.len(), 2);
3592 assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
3593 }
3594
3595 #[test]
3596 fn workflow_validation_rejects_condition_without_true_false_edges() {
3597 let graph = WorkflowGraph {
3598 entry: "gate".to_string(),
3599 nodes: BTreeMap::from([(
3600 "gate".to_string(),
3601 WorkflowNode {
3602 id: Some("gate".to_string()),
3603 kind: "condition".to_string(),
3604 ..Default::default()
3605 },
3606 )]),
3607 edges: vec![WorkflowEdge {
3608 from: "gate".to_string(),
3609 to: "next".to_string(),
3610 branch: Some("true".to_string()),
3611 label: None,
3612 }],
3613 ..Default::default()
3614 };
3615 let report = validate_workflow(&graph, None);
3616 assert!(!report.valid);
3617 assert!(report
3618 .errors
3619 .iter()
3620 .any(|error| error.contains("true") && error.contains("false")));
3621 }
3622
3623 #[test]
3624 fn replay_fixture_round_trip_passes() {
3625 let run = RunRecord {
3626 type_name: "run_record".to_string(),
3627 id: "run_1".to_string(),
3628 workflow_id: "wf".to_string(),
3629 workflow_name: Some("demo".to_string()),
3630 task: "demo".to_string(),
3631 status: "completed".to_string(),
3632 started_at: "1".to_string(),
3633 finished_at: Some("2".to_string()),
3634 parent_run_id: None,
3635 root_run_id: Some("run_1".to_string()),
3636 stages: vec![RunStageRecord {
3637 id: "stage_1".to_string(),
3638 node_id: "act".to_string(),
3639 kind: "stage".to_string(),
3640 status: "completed".to_string(),
3641 outcome: "success".to_string(),
3642 branch: Some("success".to_string()),
3643 started_at: "1".to_string(),
3644 finished_at: Some("2".to_string()),
3645 visible_text: Some("done".to_string()),
3646 private_reasoning: None,
3647 transcript: None,
3648 verification: None,
3649 usage: None,
3650 artifacts: vec![ArtifactRecord {
3651 type_name: "artifact".to_string(),
3652 id: "a1".to_string(),
3653 kind: "summary".to_string(),
3654 text: Some("done".to_string()),
3655 created_at: "1".to_string(),
3656 ..Default::default()
3657 }
3658 .normalize()],
3659 consumed_artifact_ids: vec![],
3660 produced_artifact_ids: vec!["a1".to_string()],
3661 attempts: vec![],
3662 metadata: BTreeMap::new(),
3663 }],
3664 transitions: vec![],
3665 checkpoints: vec![],
3666 pending_nodes: vec![],
3667 completed_nodes: vec!["act".to_string()],
3668 child_runs: vec![],
3669 artifacts: vec![],
3670 policy: CapabilityPolicy::default(),
3671 execution: None,
3672 transcript: None,
3673 usage: None,
3674 replay_fixture: None,
3675 trace_spans: vec![],
3676 metadata: BTreeMap::new(),
3677 persisted_path: None,
3678 };
3679 let fixture = replay_fixture_from_run(&run);
3680 let report = evaluate_run_against_fixture(&run, &fixture);
3681 assert!(report.pass);
3682 assert!(report.failures.is_empty());
3683 }
3684
3685 #[test]
3686 fn replay_eval_suite_reports_failed_case() {
3687 let good = RunRecord {
3688 id: "run_good".to_string(),
3689 workflow_id: "wf".to_string(),
3690 status: "completed".to_string(),
3691 stages: vec![RunStageRecord {
3692 node_id: "act".to_string(),
3693 status: "completed".to_string(),
3694 outcome: "success".to_string(),
3695 ..Default::default()
3696 }],
3697 ..Default::default()
3698 };
3699 let bad = RunRecord {
3700 id: "run_bad".to_string(),
3701 workflow_id: "wf".to_string(),
3702 status: "failed".to_string(),
3703 stages: vec![RunStageRecord {
3704 node_id: "act".to_string(),
3705 status: "failed".to_string(),
3706 outcome: "error".to_string(),
3707 ..Default::default()
3708 }],
3709 ..Default::default()
3710 };
3711 let suite = evaluate_run_suite(vec![
3712 (
3713 good.clone(),
3714 replay_fixture_from_run(&good),
3715 Some("good.json".to_string()),
3716 ),
3717 (
3718 bad.clone(),
3719 replay_fixture_from_run(&good),
3720 Some("bad.json".to_string()),
3721 ),
3722 ]);
3723 assert!(!suite.pass);
3724 assert_eq!(suite.total, 2);
3725 assert_eq!(suite.failed, 1);
3726 assert!(suite.cases.iter().any(|case| !case.pass));
3727 }
3728
3729 #[test]
3730 fn run_diff_reports_changed_stage() {
3731 let left = RunRecord {
3732 id: "left".to_string(),
3733 workflow_id: "wf".to_string(),
3734 status: "completed".to_string(),
3735 stages: vec![RunStageRecord {
3736 node_id: "act".to_string(),
3737 status: "completed".to_string(),
3738 outcome: "success".to_string(),
3739 ..Default::default()
3740 }],
3741 ..Default::default()
3742 };
3743 let right = RunRecord {
3744 id: "right".to_string(),
3745 workflow_id: "wf".to_string(),
3746 status: "failed".to_string(),
3747 stages: vec![RunStageRecord {
3748 node_id: "act".to_string(),
3749 status: "failed".to_string(),
3750 outcome: "error".to_string(),
3751 ..Default::default()
3752 }],
3753 ..Default::default()
3754 };
3755 let diff = diff_run_records(&left, &right);
3756 assert!(diff.status_changed);
3757 assert!(!diff.identical);
3758 assert_eq!(diff.stage_diffs.len(), 1);
3759 }
3760
3761 #[test]
3762 fn eval_suite_manifest_can_fail_on_baseline_diff() {
3763 let temp_dir =
3764 std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
3765 std::fs::create_dir_all(&temp_dir).unwrap();
3766 let baseline_path = temp_dir.join("baseline.json");
3767 let candidate_path = temp_dir.join("candidate.json");
3768
3769 let baseline = RunRecord {
3770 id: "baseline".to_string(),
3771 workflow_id: "wf".to_string(),
3772 status: "completed".to_string(),
3773 stages: vec![RunStageRecord {
3774 node_id: "act".to_string(),
3775 status: "completed".to_string(),
3776 outcome: "success".to_string(),
3777 ..Default::default()
3778 }],
3779 ..Default::default()
3780 };
3781 let candidate = RunRecord {
3782 id: "candidate".to_string(),
3783 workflow_id: "wf".to_string(),
3784 status: "failed".to_string(),
3785 stages: vec![RunStageRecord {
3786 node_id: "act".to_string(),
3787 status: "failed".to_string(),
3788 outcome: "error".to_string(),
3789 ..Default::default()
3790 }],
3791 ..Default::default()
3792 };
3793
3794 save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
3795 save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
3796
3797 let manifest = EvalSuiteManifest {
3798 base_dir: Some(temp_dir.display().to_string()),
3799 cases: vec![EvalSuiteCase {
3800 label: Some("candidate".to_string()),
3801 run_path: "candidate.json".to_string(),
3802 fixture_path: None,
3803 compare_to: Some("baseline.json".to_string()),
3804 }],
3805 ..Default::default()
3806 };
3807 let suite = evaluate_run_suite_manifest(&manifest).unwrap();
3808 assert!(!suite.pass);
3809 assert_eq!(suite.failed, 1);
3810 assert!(suite.cases[0].comparison.is_some());
3811 assert!(suite.cases[0]
3812 .failures
3813 .iter()
3814 .any(|failure| failure.contains("baseline")));
3815 }
3816
3817 #[test]
3818 fn render_unified_diff_marks_removed_and_added_lines() {
3819 let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
3820 assert!(diff.contains("--- a/src/main.rs"));
3821 assert!(diff.contains("+++ b/src/main.rs"));
3822 assert!(diff.contains("-old"));
3823 assert!(diff.contains("+new"));
3824 assert!(diff.contains(" same"));
3825 }
3826
3827 #[test]
3828 fn render_unified_diff_identical_inputs() {
3829 let text = "line1\nline2\nline3";
3830 let diff = render_unified_diff(None, text, text);
3831 assert!(diff.contains("--- a/artifact"));
3832 let body: Vec<&str> = diff.lines().skip(2).collect();
3833 assert!(!body.iter().any(|l| l.starts_with('-')));
3834 assert!(!body.iter().any(|l| l.starts_with('+')));
3835 assert_eq!(body.len(), 3);
3836 }
3837
3838 #[test]
3839 fn render_unified_diff_empty_before() {
3840 let diff = render_unified_diff(None, "", "new1\nnew2");
3841 assert!(diff.contains("+new1"));
3842 assert!(diff.contains("+new2"));
3843 let body: Vec<&str> = diff.lines().skip(2).collect();
3844 assert!(!body.iter().any(|l| l.starts_with('-')));
3845 }
3846
3847 #[test]
3848 fn render_unified_diff_empty_after() {
3849 let diff = render_unified_diff(None, "old1\nold2", "");
3850 assert!(diff.contains("-old1"));
3851 assert!(diff.contains("-old2"));
3852 let body: Vec<&str> = diff.lines().skip(2).collect();
3853 assert!(!body.iter().any(|l| l.starts_with('+')));
3854 }
3855
3856 #[test]
3857 fn render_unified_diff_both_empty() {
3858 let diff = render_unified_diff(None, "", "");
3859 assert!(diff.contains("--- a/artifact"));
3860 assert!(diff.contains("+++ b/artifact"));
3861 let body: String = diff.lines().skip(2).collect();
3863 assert!(body.is_empty());
3864 }
3865
3866 #[test]
3867 fn render_unified_diff_all_changed() {
3868 let diff = render_unified_diff(None, "a\nb", "x\ny");
3869 assert!(diff.contains("-a"));
3870 assert!(diff.contains("-b"));
3871 assert!(diff.contains("+x"));
3872 assert!(diff.contains("+y"));
3873 }
3874
3875 #[test]
3876 fn render_unified_diff_insertion_in_middle() {
3877 let diff = render_unified_diff(None, "a\nc", "a\nb\nc");
3878 assert!(diff.contains(" a"));
3879 assert!(diff.contains("+b"));
3880 assert!(diff.contains(" c"));
3881 let body: Vec<&str> = diff.lines().skip(2).collect();
3882 assert!(!body.iter().any(|l| l.starts_with('-')));
3883 }
3884
3885 #[test]
3886 fn render_unified_diff_deletion_from_middle() {
3887 let diff = render_unified_diff(None, "a\nb\nc", "a\nc");
3888 assert!(diff.contains(" a"));
3889 assert!(diff.contains("-b"));
3890 assert!(diff.contains(" c"));
3891 let body: Vec<&str> = diff.lines().skip(2).collect();
3892 assert!(!body.iter().any(|l| l.starts_with('+')));
3893 }
3894
3895 #[test]
3896 fn render_unified_diff_default_path() {
3897 let diff = render_unified_diff(None, "a", "b");
3898 assert!(diff.contains("--- a/artifact"));
3899 assert!(diff.contains("+++ b/artifact"));
3900 }
3901
3902 #[test]
3903 fn render_unified_diff_large_similar() {
3904 let mut before = Vec::new();
3906 let mut after = Vec::new();
3907 for i in 0..1000 {
3908 before.push(format!("line {i}"));
3909 after.push(format!("line {i}"));
3910 }
3911 before[500] = "OLD LINE 500".to_string();
3912 after[500] = "NEW LINE 500".to_string();
3913 let before_str = before.join("\n");
3914 let after_str = after.join("\n");
3915 let diff = render_unified_diff(None, &before_str, &after_str);
3916 assert!(diff.contains("-OLD LINE 500"));
3917 assert!(diff.contains("+NEW LINE 500"));
3918 assert!(diff.contains(" line 499"));
3920 assert!(diff.contains(" line 501"));
3921 }
3922
3923 #[test]
3924 fn myers_diff_empty_sequences() {
3925 let ops = myers_diff(&[], &[]);
3926 assert!(ops.is_empty());
3927 }
3928
3929 #[test]
3930 fn myers_diff_insert_only() {
3931 let ops = myers_diff(&[], &["a", "b"]);
3932 assert_eq!(ops.len(), 2);
3933 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Insert));
3934 }
3935
3936 #[test]
3937 fn myers_diff_delete_only() {
3938 let ops = myers_diff(&["a", "b"], &[]);
3939 assert_eq!(ops.len(), 2);
3940 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Delete));
3941 }
3942
3943 #[test]
3944 fn myers_diff_equal() {
3945 let ops = myers_diff(&["a", "b", "c"], &["a", "b", "c"]);
3946 assert_eq!(ops.len(), 3);
3947 assert!(ops.iter().all(|(op, _)| *op == DiffOp::Equal));
3948 }
3949
3950 #[test]
3951 fn execution_policy_rejects_process_exec_when_read_only() {
3952 push_execution_policy(CapabilityPolicy {
3953 side_effect_level: Some("read_only".to_string()),
3954 capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
3955 ..Default::default()
3956 });
3957 let result = enforce_current_policy_for_builtin("exec", &[]);
3958 pop_execution_policy();
3959 assert!(result.is_err());
3960 }
3961
3962 #[test]
3963 fn execution_policy_rejects_unlisted_tool() {
3964 push_execution_policy(CapabilityPolicy {
3965 tools: vec!["read".to_string()],
3966 ..Default::default()
3967 });
3968 let result = enforce_current_policy_for_tool("edit");
3969 pop_execution_policy();
3970 assert!(result.is_err());
3971 }
3972
3973 #[test]
3974 fn normalize_run_record_preserves_trace_spans() {
3975 let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3976 "_type": "run_record",
3977 "id": "run_trace",
3978 "workflow_id": "wf",
3979 "status": "completed",
3980 "started_at": "1",
3981 "trace_spans": [
3982 {
3983 "span_id": 1,
3984 "parent_id": null,
3985 "kind": "pipeline",
3986 "name": "workflow",
3987 "start_ms": 0,
3988 "duration_ms": 42,
3989 "metadata": {"model": "demo"}
3990 }
3991 ]
3992 }));
3993
3994 let run = normalize_run_record(&value).unwrap();
3995 assert_eq!(run.trace_spans.len(), 1);
3996 assert_eq!(run.trace_spans[0].kind, "pipeline");
3997 assert_eq!(
3998 run.trace_spans[0].metadata["model"],
3999 serde_json::json!("demo")
4000 );
4001 }
4002
4003 #[test]
4006 fn pre_tool_hook_deny_blocks_execution() {
4007 clear_tool_hooks();
4008 register_tool_hook(ToolHook {
4009 pattern: "dangerous_*".to_string(),
4010 pre: Some(Rc::new(|_name, _args| {
4011 PreToolAction::Deny("blocked by policy".to_string())
4012 })),
4013 post: None,
4014 });
4015 let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
4016 clear_tool_hooks();
4017 assert!(matches!(result, PreToolAction::Deny(_)));
4018 }
4019
4020 #[test]
4021 fn pre_tool_hook_allow_passes_through() {
4022 clear_tool_hooks();
4023 register_tool_hook(ToolHook {
4024 pattern: "safe_*".to_string(),
4025 pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
4026 post: None,
4027 });
4028 let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
4029 clear_tool_hooks();
4030 assert!(matches!(result, PreToolAction::Allow));
4031 }
4032
4033 #[test]
4034 fn pre_tool_hook_modify_rewrites_args() {
4035 clear_tool_hooks();
4036 register_tool_hook(ToolHook {
4037 pattern: "*".to_string(),
4038 pre: Some(Rc::new(|_name, _args| {
4039 PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
4040 })),
4041 post: None,
4042 });
4043 let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
4044 clear_tool_hooks();
4045 match result {
4046 PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
4047 _ => panic!("expected Modify"),
4048 }
4049 }
4050
4051 #[test]
4052 fn post_tool_hook_modifies_result() {
4053 clear_tool_hooks();
4054 register_tool_hook(ToolHook {
4055 pattern: "exec".to_string(),
4056 pre: None,
4057 post: Some(Rc::new(|_name, result| {
4058 if result.contains("SECRET") {
4059 PostToolAction::Modify("[REDACTED]".to_string())
4060 } else {
4061 PostToolAction::Pass
4062 }
4063 })),
4064 });
4065 let result = run_post_tool_hooks("exec", "output with SECRET data");
4066 let clean = run_post_tool_hooks("exec", "clean output");
4067 clear_tool_hooks();
4068 assert_eq!(result, "[REDACTED]");
4069 assert_eq!(clean, "clean output");
4070 }
4071
4072 #[test]
4073 fn unmatched_hook_pattern_does_not_fire() {
4074 clear_tool_hooks();
4075 register_tool_hook(ToolHook {
4076 pattern: "exec".to_string(),
4077 pre: Some(Rc::new(|_name, _args| {
4078 PreToolAction::Deny("should not match".to_string())
4079 })),
4080 post: None,
4081 });
4082 let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
4083 clear_tool_hooks();
4084 assert!(matches!(result, PreToolAction::Allow));
4085 }
4086
4087 #[test]
4088 fn glob_match_patterns() {
4089 assert!(glob_match("*", "anything"));
4090 assert!(glob_match("exec*", "exec_at"));
4091 assert!(glob_match("*_file", "read_file"));
4092 assert!(!glob_match("exec*", "read_file"));
4093 assert!(glob_match("read_file", "read_file"));
4094 assert!(!glob_match("read_file", "write_file"));
4095 }
4096
4097 #[test]
4100 fn microcompact_snips_large_output() {
4101 let large = "x".repeat(50_000);
4102 let result = microcompact_tool_output(&large, 10_000);
4103 assert!(result.len() < 15_000);
4104 assert!(result.contains("snipped"));
4105 }
4106
4107 #[test]
4108 fn microcompact_preserves_small_output() {
4109 let small = "hello world";
4110 let result = microcompact_tool_output(small, 10_000);
4111 assert_eq!(result, small);
4112 }
4113
4114 #[test]
4115 fn microcompact_preserves_strong_keyword_lines_without_file_line() {
4116 let mut output = String::new();
4125 for i in 0..100 {
4126 output.push_str(&format!("verbose progress line {i}\n"));
4127 }
4128 output.push_str("--- FAIL: TestEmpty (0.00s)\n");
4129 output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
4130 output.push_str("FAILED tests/test_parser.py::test_empty\n");
4131 for i in 0..100 {
4132 output.push_str(&format!("more output after failures {i}\n"));
4133 }
4134 let result = microcompact_tool_output(&output, 2_000);
4135 assert!(
4136 result.contains("--- FAIL: TestEmpty"),
4137 "strong 'FAIL' keyword should preserve the line:\n{result}"
4138 );
4139 assert!(
4140 result.contains("panicked at"),
4141 "strong 'panic' keyword should preserve the line:\n{result}"
4142 );
4143 assert!(
4144 result.contains("FAILED tests/test_parser.py"),
4145 "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
4146 );
4147 }
4148
4149 #[test]
4150 fn auto_compact_messages_reduces_count() {
4151 let mut messages: Vec<serde_json::Value> = (0..20)
4152 .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
4153 .collect();
4154 let runtime = tokio::runtime::Builder::new_current_thread()
4155 .enable_all()
4156 .build()
4157 .unwrap();
4158 let compacted = runtime.block_on(auto_compact_messages(
4159 &mut messages,
4160 &AutoCompactConfig {
4161 compact_strategy: CompactStrategy::Truncate,
4162 keep_last: 6,
4163 ..Default::default()
4164 },
4165 None,
4166 ));
4167 let summary = compacted.unwrap();
4168 assert!(summary.is_some());
4169 assert!(messages.len() <= 7); assert!(messages[0]["content"]
4171 .as_str()
4172 .unwrap()
4173 .contains("auto-compacted"));
4174 }
4175
4176 #[test]
4177 fn auto_compact_noop_when_under_threshold() {
4178 let mut messages: Vec<serde_json::Value> = (0..4)
4179 .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
4180 .collect();
4181 let runtime = tokio::runtime::Builder::new_current_thread()
4182 .enable_all()
4183 .build()
4184 .unwrap();
4185 let compacted = runtime.block_on(auto_compact_messages(
4186 &mut messages,
4187 &AutoCompactConfig {
4188 compact_strategy: CompactStrategy::Truncate,
4189 keep_last: 6,
4190 ..Default::default()
4191 },
4192 None,
4193 ));
4194 assert!(compacted.unwrap().is_none());
4195 assert_eq!(messages.len(), 4);
4196 }
4197
4198 #[test]
4199 fn observation_mask_preserves_errors_masks_verbose_output() {
4200 let mut messages = vec![
4201 serde_json::json!({"role": "assistant", "content": "I'll create the file now."}),
4202 serde_json::json!({"role": "user", "content": "File created: a.go\npackage main\nimport \"fmt\"\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n// more lines\n// more lines\n// more lines\n// more lines"}),
4203 serde_json::json!({"role": "assistant", "content": "Now let me run the tests."}),
4204 serde_json::json!({"role": "user", "content": "error: cannot find module\nexit code 1\nfailed to compile"}),
4205 serde_json::json!({"role": "assistant", "content": "I see the issue. Let me fix it."}),
4206 serde_json::json!({"role": "user", "content": "File patched successfully."}),
4207 serde_json::json!({"role": "assistant", "content": "Running tests again."}),
4209 serde_json::json!({"role": "user", "content": "All tests passed."}),
4210 ];
4211 let runtime = tokio::runtime::Builder::new_current_thread()
4212 .enable_all()
4213 .build()
4214 .unwrap();
4215 let compacted = runtime.block_on(auto_compact_messages(
4216 &mut messages,
4217 &AutoCompactConfig {
4218 compact_strategy: CompactStrategy::ObservationMask,
4219 keep_last: 2,
4220 ..Default::default()
4221 },
4222 None,
4223 ));
4224 let summary = compacted.unwrap().unwrap();
4225 assert!(summary.contains("I'll create the file now."));
4227 assert!(summary.contains("Now let me run the tests."));
4228 assert!(summary.contains("I see the issue. Let me fix it."));
4229 assert!(summary.contains("error: cannot find module"));
4231 assert!(summary.contains("exit code 1"));
4232 assert!(summary.contains("masked]"));
4234 assert!(summary.contains("File created: a.go"));
4235 assert!(summary.contains("File patched successfully."));
4237 assert!(!summary.contains("Running tests again."));
4239 assert!(!summary.contains("All tests passed."));
4240 assert_eq!(messages.len(), 3);
4242 }
4243
4244 #[test]
4245 fn observation_mask_keeps_short_tool_output() {
4246 let messages = vec![
4247 serde_json::json!({"role": "user", "content": "OK"}),
4248 serde_json::json!({"role": "user", "content": "Done."}),
4249 ];
4250 let summary = observation_mask_compaction(&messages, 2);
4251 assert!(summary.contains("[user] OK"));
4252 assert!(summary.contains("[user] Done."));
4253 assert!(!summary.contains("masked"));
4254 }
4255
4256 #[test]
4257 fn estimate_message_tokens_basic() {
4258 let messages = vec![
4259 serde_json::json!({"role": "user", "content": "a".repeat(400)}),
4260 serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
4261 ];
4262 let tokens = estimate_message_tokens(&messages);
4263 assert_eq!(tokens, 200); }
4265
4266 #[test]
4269 fn dedup_artifacts_removes_duplicates() {
4270 let mut artifacts = vec![
4271 ArtifactRecord {
4272 id: "a1".to_string(),
4273 kind: "test".to_string(),
4274 text: Some("duplicate content".to_string()),
4275 ..Default::default()
4276 },
4277 ArtifactRecord {
4278 id: "a2".to_string(),
4279 kind: "test".to_string(),
4280 text: Some("duplicate content".to_string()),
4281 ..Default::default()
4282 },
4283 ArtifactRecord {
4284 id: "a3".to_string(),
4285 kind: "test".to_string(),
4286 text: Some("unique content".to_string()),
4287 ..Default::default()
4288 },
4289 ];
4290 dedup_artifacts(&mut artifacts);
4291 assert_eq!(artifacts.len(), 2);
4292 }
4293
4294 #[test]
4295 fn microcompact_artifact_snips_oversized() {
4296 let mut artifact = ArtifactRecord {
4297 id: "a1".to_string(),
4298 kind: "test".to_string(),
4299 text: Some("x".repeat(10_000)),
4300 estimated_tokens: Some(2_500),
4301 ..Default::default()
4302 };
4303 microcompact_artifact(&mut artifact, 500);
4304 assert!(artifact.text.as_ref().unwrap().len() < 5_000);
4305 assert_eq!(artifact.estimated_tokens, Some(500));
4306 }
4307
4308 #[test]
4311 fn arg_constraint_allows_matching_pattern() {
4312 let policy = CapabilityPolicy {
4313 tool_arg_constraints: vec![ToolArgConstraint {
4314 tool: "exec".to_string(),
4315 arg_patterns: vec!["cargo *".to_string()],
4316 }],
4317 ..Default::default()
4318 };
4319 let result = enforce_tool_arg_constraints(
4320 &policy,
4321 "exec",
4322 &serde_json::json!({"command": "cargo test"}),
4323 );
4324 assert!(result.is_ok());
4325 }
4326
4327 #[test]
4328 fn arg_constraint_rejects_non_matching_pattern() {
4329 let policy = CapabilityPolicy {
4330 tool_arg_constraints: vec![ToolArgConstraint {
4331 tool: "exec".to_string(),
4332 arg_patterns: vec!["cargo *".to_string()],
4333 }],
4334 ..Default::default()
4335 };
4336 let result = enforce_tool_arg_constraints(
4337 &policy,
4338 "exec",
4339 &serde_json::json!({"command": "rm -rf /"}),
4340 );
4341 assert!(result.is_err());
4342 }
4343
4344 #[test]
4345 fn arg_constraint_ignores_unmatched_tool() {
4346 let policy = CapabilityPolicy {
4347 tool_arg_constraints: vec![ToolArgConstraint {
4348 tool: "exec".to_string(),
4349 arg_patterns: vec!["cargo *".to_string()],
4350 }],
4351 ..Default::default()
4352 };
4353 let result = enforce_tool_arg_constraints(
4354 &policy,
4355 "read_file",
4356 &serde_json::json!({"path": "/etc/passwd"}),
4357 );
4358 assert!(result.is_ok());
4359 }
4360
4361 #[test]
4362 fn microcompact_handles_multibyte_utf8() {
4363 let emoji_output = "🔥".repeat(500); let result = microcompact_tool_output(&emoji_output, 400);
4366 assert!(result.contains("snipped"));
4368
4369 let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
4371 let result2 = microcompact_tool_output(&mixed, 400);
4372 assert!(result2.contains("snipped"));
4373
4374 let cjk = "中文".repeat(500);
4376 let result3 = microcompact_tool_output(&cjk, 400);
4377 assert!(result3.contains("snipped"));
4378 }
4379}