Skip to main content

harn_vm/
orchestration.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12    use std::time::{SystemTime, UNIX_EPOCH};
13    let ts = SystemTime::now()
14        .duration_since(UNIX_EPOCH)
15        .unwrap_or_default()
16        .as_secs();
17    format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21    format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25    std::env::var("HARN_RUN_DIR")
26        .map(PathBuf::from)
27        .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31    static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32    static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33}
34
35// ── Tool lifecycle hooks ──────────────────────────────────────────────
36
37/// Action returned by a PreToolUse hook.
38#[derive(Clone, Debug)]
39pub enum PreToolAction {
40    /// Allow the tool call to proceed unchanged.
41    Allow,
42    /// Deny the tool call with an explanation.
43    Deny(String),
44    /// Allow but replace the arguments.
45    Modify(serde_json::Value),
46}
47
48/// Action returned by a PostToolUse hook.
49#[derive(Clone, Debug)]
50pub enum PostToolAction {
51    /// Pass the result through unchanged.
52    Pass,
53    /// Replace the result text.
54    Modify(String),
55}
56
57/// Callback types for tool lifecycle hooks.
58pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
59pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
60
61/// A registered tool hook with a name pattern and callbacks.
62#[derive(Clone)]
63pub struct ToolHook {
64    /// Glob-style pattern matched against tool names (e.g. `"*"`, `"exec*"`, `"read_file"`).
65    pub pattern: String,
66    /// Called before tool execution. Return `Deny` to reject, `Modify` to rewrite args.
67    pub pre: Option<PreToolHookFn>,
68    /// Called after tool execution with the result text. Return `Modify` to rewrite.
69    pub post: Option<PostToolHookFn>,
70}
71
72impl std::fmt::Debug for ToolHook {
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        f.debug_struct("ToolHook")
75            .field("pattern", &self.pattern)
76            .field("has_pre", &self.pre.is_some())
77            .field("has_post", &self.post.is_some())
78            .finish()
79    }
80}
81
82fn glob_match(pattern: &str, name: &str) -> bool {
83    if pattern == "*" {
84        return true;
85    }
86    if let Some(prefix) = pattern.strip_suffix('*') {
87        return name.starts_with(prefix);
88    }
89    if let Some(suffix) = pattern.strip_prefix('*') {
90        return name.ends_with(suffix);
91    }
92    pattern == name
93}
94
95pub fn register_tool_hook(hook: ToolHook) {
96    TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
97}
98
99pub fn clear_tool_hooks() {
100    TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
101}
102
103/// Run all matching PreToolUse hooks. Returns the final action.
104pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
105    TOOL_HOOKS.with(|hooks| {
106        let hooks = hooks.borrow();
107        let mut current_args = args.clone();
108        for hook in hooks.iter() {
109            if !glob_match(&hook.pattern, tool_name) {
110                continue;
111            }
112            if let Some(ref pre) = hook.pre {
113                match pre(tool_name, &current_args) {
114                    PreToolAction::Allow => {}
115                    PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
116                    PreToolAction::Modify(new_args) => {
117                        current_args = new_args;
118                    }
119                }
120            }
121        }
122        if current_args != *args {
123            PreToolAction::Modify(current_args)
124        } else {
125            PreToolAction::Allow
126        }
127    })
128}
129
130/// Run all matching PostToolUse hooks. Returns the (possibly modified) result.
131pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
132    TOOL_HOOKS.with(|hooks| {
133        let hooks = hooks.borrow();
134        let mut current = result.to_string();
135        for hook in hooks.iter() {
136            if !glob_match(&hook.pattern, tool_name) {
137                continue;
138            }
139            if let Some(ref post) = hook.post {
140                match post(tool_name, &current) {
141                    PostToolAction::Pass => {}
142                    PostToolAction::Modify(new_result) => {
143                        current = new_result;
144                    }
145                }
146            }
147        }
148        current
149    })
150}
151
152// ── Auto-compaction ───────────────────────────────────────────────────
153
154#[derive(Clone, Debug, PartialEq, Eq)]
155pub enum CompactStrategy {
156    Llm,
157    Truncate,
158    Custom,
159}
160
161pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
162    match value {
163        "llm" => Ok(CompactStrategy::Llm),
164        "truncate" => Ok(CompactStrategy::Truncate),
165        "custom" => Ok(CompactStrategy::Custom),
166        other => Err(VmError::Runtime(format!(
167            "unknown compact_strategy '{other}' (expected 'llm', 'truncate', or 'custom')"
168        ))),
169    }
170}
171
172/// Configuration for automatic transcript compaction in agent loops.
173#[derive(Clone, Debug)]
174pub struct AutoCompactConfig {
175    /// Maximum estimated tokens before triggering auto-compaction.
176    pub token_threshold: usize,
177    /// Maximum character length for a single tool result before microcompaction.
178    pub tool_output_max_chars: usize,
179    /// Number of recent messages to keep during auto-compaction.
180    pub keep_last: usize,
181    /// Strategy used to summarize archived messages.
182    pub compact_strategy: CompactStrategy,
183    /// Optional Harn callback used when `compact_strategy` is `custom`.
184    pub custom_compactor: Option<VmValue>,
185}
186
187impl Default for AutoCompactConfig {
188    fn default() -> Self {
189        Self {
190            token_threshold: 80_000,
191            tool_output_max_chars: 20_000,
192            keep_last: 8,
193            compact_strategy: CompactStrategy::Llm,
194            custom_compactor: None,
195        }
196    }
197}
198
199/// Estimate token count from a list of JSON messages (chars / 4 heuristic).
200pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
201    messages
202        .iter()
203        .map(|m| {
204            m.get("content")
205                .and_then(|c| c.as_str())
206                .map(|s| s.len())
207                .unwrap_or(0)
208        })
209        .sum::<usize>()
210        / 4
211}
212
213/// Microcompact a tool result: if it exceeds `max_chars`, keep the first and
214/// last portions with a snip marker in between.
215pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
216    if output.len() <= max_chars || max_chars < 200 {
217        return output.to_string();
218    }
219    let keep = max_chars / 2;
220    let head = &output[..keep];
221    let tail = &output[output.len() - keep..];
222    let snipped = output.len() - max_chars;
223    format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
224}
225
226fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
227    messages
228        .iter()
229        .map(|msg| {
230            let role = msg
231                .get("role")
232                .and_then(|v| v.as_str())
233                .unwrap_or("user")
234                .to_uppercase();
235            let content = msg
236                .get("content")
237                .and_then(|v| v.as_str())
238                .unwrap_or_default();
239            format!("{role}: {content}")
240        })
241        .collect::<Vec<_>>()
242        .join("\n")
243}
244
245fn truncate_compaction_summary(
246    old_messages: &[serde_json::Value],
247    archived_count: usize,
248) -> String {
249    let summary_parts: Vec<String> = old_messages
250        .iter()
251        .filter_map(|m| {
252            let role = m.get("role")?.as_str()?;
253            let content = m.get("content")?.as_str()?;
254            if content.is_empty() {
255                return None;
256            }
257            let truncated = if content.len() > 500 {
258                format!("{}...", &content[..500])
259            } else {
260                content.to_string()
261            };
262            Some(format!("[{role}] {truncated}"))
263        })
264        .take(15)
265        .collect();
266    format!(
267        "[auto-compacted {archived_count} older messages]\n{}{}",
268        summary_parts.join("\n"),
269        if archived_count > 15 {
270            format!("\n... and {} more", archived_count - 15)
271        } else {
272            String::new()
273        }
274    )
275}
276
277fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
278    if let Some(map) = value.as_dict() {
279        if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
280            return Ok(summary.display());
281        }
282    }
283    match value {
284        VmValue::String(text) => Ok(text.to_string()),
285        VmValue::Nil => Ok(String::new()),
286        _ => serde_json::to_string_pretty(&vm_value_to_json(value))
287            .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
288    }
289}
290
291async fn llm_compaction_summary(
292    old_messages: &[serde_json::Value],
293    archived_count: usize,
294    llm_opts: &crate::llm::api::LlmCallOptions,
295) -> Result<String, VmError> {
296    let mut compact_opts = llm_opts.clone();
297    let formatted = format_compaction_messages(old_messages);
298    compact_opts.system = None;
299    compact_opts.transcript_id = None;
300    compact_opts.transcript_summary = None;
301    compact_opts.transcript_metadata = None;
302    compact_opts.native_tools = None;
303    compact_opts.tool_choice = None;
304    compact_opts.response_format = None;
305    compact_opts.json_schema = None;
306    compact_opts.messages = vec![serde_json::json!({
307        "role": "user",
308        "content": format!(
309            "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
310        ),
311    })];
312    let result = vm_call_llm_full(&compact_opts).await?;
313    let summary = result.text.trim();
314    if summary.is_empty() {
315        Ok(truncate_compaction_summary(old_messages, archived_count))
316    } else {
317        Ok(format!(
318            "[auto-compacted {archived_count} older messages]\n{summary}"
319        ))
320    }
321}
322
323async fn custom_compaction_summary(
324    old_messages: &[serde_json::Value],
325    archived_count: usize,
326    callback: &VmValue,
327) -> Result<String, VmError> {
328    let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
329        return Err(VmError::Runtime(
330            "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
331        ));
332    };
333    let mut vm = crate::vm::take_async_builtin_child_vm().ok_or_else(|| {
334        VmError::Runtime(
335            "custom transcript compaction requires an async builtin VM context".to_string(),
336        )
337    })?;
338    let messages_vm = VmValue::List(Rc::new(
339        old_messages
340            .iter()
341            .map(crate::stdlib::json_to_vm_value)
342            .collect(),
343    ));
344    let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await;
345    crate::vm::restore_async_builtin_child_vm(vm);
346    let summary = compact_summary_text_from_value(&result?)?;
347    if summary.trim().is_empty() {
348        Ok(truncate_compaction_summary(old_messages, archived_count))
349    } else {
350        Ok(format!(
351            "[auto-compacted {archived_count} older messages]\n{summary}"
352        ))
353    }
354}
355
356/// Auto-compact a message list in place: summarize older messages into a
357/// note and keep the most recent `keep_last` messages.
358pub(crate) async fn auto_compact_messages(
359    messages: &mut Vec<serde_json::Value>,
360    config: &AutoCompactConfig,
361    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
362) -> Result<bool, VmError> {
363    if messages.len() <= config.keep_last {
364        return Ok(false);
365    }
366    let split_at = messages.len().saturating_sub(config.keep_last);
367    let old_messages: Vec<_> = messages.drain(..split_at).collect();
368    let archived_count = old_messages.len();
369    let summary = match config.compact_strategy {
370        CompactStrategy::Truncate => truncate_compaction_summary(&old_messages, archived_count),
371        CompactStrategy::Llm => {
372            llm_compaction_summary(
373                &old_messages,
374                archived_count,
375                llm_opts.ok_or_else(|| {
376                    VmError::Runtime(
377                        "LLM transcript compaction requires active LLM call options".to_string(),
378                    )
379                })?,
380            )
381            .await?
382        }
383        CompactStrategy::Custom => {
384            custom_compaction_summary(
385                &old_messages,
386                archived_count,
387                config.custom_compactor.as_ref().ok_or_else(|| {
388                    VmError::Runtime(
389                        "compact_callback is required when compact_strategy is 'custom'"
390                            .to_string(),
391                    )
392                })?,
393            )
394            .await?
395        }
396    };
397    messages.insert(
398        0,
399        serde_json::json!({
400            "role": "user",
401            "content": summary,
402        }),
403    );
404    Ok(true)
405}
406
407// ── Adaptive context assembly ─────────────────────────────────────────
408
409/// Snip an artifact's text to fit within a token budget.
410pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
411    let max_chars = max_tokens * 4;
412    if let Some(ref text) = artifact.text {
413        if text.len() > max_chars && max_chars >= 200 {
414            artifact.text = Some(microcompact_tool_output(text, max_chars));
415            artifact.estimated_tokens = Some(max_tokens);
416        }
417    }
418}
419
420/// Deduplicate artifacts by removing those with identical text content,
421/// keeping the one with higher priority.
422pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
423    let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
424    artifacts.retain(|artifact| {
425        let text = artifact.text.as_deref().unwrap_or("");
426        if text.is_empty() {
427            return true;
428        }
429        // Simple hash for dedup
430        let hash = {
431            use std::hash::{Hash, Hasher};
432            let mut hasher = std::collections::hash_map::DefaultHasher::new();
433            text.hash(&mut hasher);
434            hasher.finish()
435        };
436        seen_hashes.insert(hash)
437    });
438}
439
440/// Enhanced artifact selection: dedup, microcompact oversized artifacts,
441/// then delegate to the standard `select_artifacts`.
442pub fn select_artifacts_adaptive(
443    mut artifacts: Vec<ArtifactRecord>,
444    policy: &ContextPolicy,
445) -> Vec<ArtifactRecord> {
446    // Phase 1: deduplicate
447    dedup_artifacts(&mut artifacts);
448
449    // Phase 2: microcompact oversized artifacts relative to budget.
450    // Cap individual artifacts to a fraction of the total budget, but don't
451    // let the per-artifact cap exceed the total budget (avoid overrun).
452    if let Some(max_tokens) = policy.max_tokens {
453        let count = artifacts.len().max(1);
454        let per_artifact_budget = max_tokens / count;
455        // Floor of 500 tokens, but never more than total budget
456        let cap = per_artifact_budget.max(500).min(max_tokens);
457        for artifact in &mut artifacts {
458            let est = artifact.estimated_tokens.unwrap_or(0);
459            if est > cap * 2 {
460                microcompact_artifact(artifact, cap);
461            }
462        }
463    }
464
465    // Phase 3: standard selection with budget
466    select_artifacts(artifacts, policy)
467}
468
469// ── Per-agent policy with argument patterns ───────────────────────────
470
471/// Extended policy that supports argument-level constraints.
472#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
473#[serde(default)]
474pub struct ToolArgConstraint {
475    /// Tool name to constrain.
476    pub tool: String,
477    /// Glob patterns that the first string argument must match.
478    /// If empty, no argument constraint is applied.
479    pub arg_patterns: Vec<String>,
480}
481
482/// Check if a tool call satisfies argument constraints in the policy.
483pub fn enforce_tool_arg_constraints(
484    policy: &CapabilityPolicy,
485    tool_name: &str,
486    args: &serde_json::Value,
487) -> Result<(), VmError> {
488    for constraint in &policy.tool_arg_constraints {
489        if !glob_match(&constraint.tool, tool_name) {
490            continue;
491        }
492        if constraint.arg_patterns.is_empty() {
493            continue;
494        }
495        // Extract the first string-like argument for pattern matching
496        let first_arg = args
497            .as_object()
498            .and_then(|o| o.values().next())
499            .and_then(|v| v.as_str())
500            .or_else(|| args.as_str())
501            .unwrap_or("");
502        let matches = constraint
503            .arg_patterns
504            .iter()
505            .any(|pattern| glob_match(pattern, first_arg));
506        if !matches {
507            return reject_policy(format!(
508                "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
509                constraint.arg_patterns
510            ));
511        }
512    }
513    Ok(())
514}
515
516fn normalize_artifact_kind(kind: &str) -> String {
517    match kind {
518        "resource"
519        | "workspace_file"
520        | "editor_selection"
521        | "workspace_snapshot"
522        | "transcript_summary"
523        | "summary"
524        | "plan"
525        | "diff"
526        | "git_diff"
527        | "patch"
528        | "patch_set"
529        | "patch_proposal"
530        | "diff_review"
531        | "review_decision"
532        | "verification_bundle"
533        | "apply_intent"
534        | "verification_result"
535        | "test_result"
536        | "command_result"
537        | "provider_payload"
538        | "worker_result"
539        | "worker_notification"
540        | "artifact" => kind.to_string(),
541        "file" => "workspace_file".to_string(),
542        "transcript" => "transcript_summary".to_string(),
543        "verification" => "verification_result".to_string(),
544        "test" => "test_result".to_string(),
545        other if other.trim().is_empty() => "artifact".to_string(),
546        other => other.to_string(),
547    }
548}
549
550fn default_artifact_priority(kind: &str) -> i64 {
551    match kind {
552        "verification_result" | "test_result" => 100,
553        "verification_bundle" => 95,
554        "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
555        | "review_decision" | "apply_intent" => 90,
556        "plan" => 80,
557        "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
558        "summary" | "transcript_summary" => 60,
559        "command_result" => 50,
560        _ => 40,
561    }
562}
563
564fn freshness_rank(value: Option<&str>) -> i64 {
565    match value.unwrap_or_default() {
566        "fresh" | "live" => 3,
567        "recent" => 2,
568        "stale" => 0,
569        _ => 1,
570    }
571}
572
573#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
574#[serde(default)]
575pub struct CapabilityPolicy {
576    pub tools: Vec<String>,
577    pub capabilities: BTreeMap<String, Vec<String>>,
578    pub workspace_roots: Vec<String>,
579    pub side_effect_level: Option<String>,
580    pub recursion_limit: Option<usize>,
581    /// Argument-level constraints for specific tools.
582    #[serde(default)]
583    pub tool_arg_constraints: Vec<ToolArgConstraint>,
584}
585
586impl CapabilityPolicy {
587    pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
588        let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
589            (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
590            (Some(a), None) => Some(a.clone()),
591            (None, Some(b)) => Some(b.clone()),
592            (None, None) => None,
593        };
594
595        if !self.tools.is_empty() {
596            let denied: Vec<String> = requested
597                .tools
598                .iter()
599                .filter(|tool| !self.tools.contains(*tool))
600                .cloned()
601                .collect();
602            if !denied.is_empty() {
603                return Err(format!(
604                    "requested tools exceed host ceiling: {}",
605                    denied.join(", ")
606                ));
607            }
608        }
609
610        for (capability, requested_ops) in &requested.capabilities {
611            if let Some(allowed_ops) = self.capabilities.get(capability) {
612                let denied: Vec<String> = requested_ops
613                    .iter()
614                    .filter(|op| !allowed_ops.contains(*op))
615                    .cloned()
616                    .collect();
617                if !denied.is_empty() {
618                    return Err(format!(
619                        "requested capability operations exceed host ceiling: {}.{}",
620                        capability,
621                        denied.join(",")
622                    ));
623                }
624            } else if !self.capabilities.is_empty() {
625                return Err(format!(
626                    "requested capability exceeds host ceiling: {capability}"
627                ));
628            }
629        }
630
631        let tools = if self.tools.is_empty() {
632            requested.tools.clone()
633        } else if requested.tools.is_empty() {
634            self.tools.clone()
635        } else {
636            requested
637                .tools
638                .iter()
639                .filter(|tool| self.tools.contains(*tool))
640                .cloned()
641                .collect()
642        };
643
644        let capabilities = if self.capabilities.is_empty() {
645            requested.capabilities.clone()
646        } else if requested.capabilities.is_empty() {
647            self.capabilities.clone()
648        } else {
649            requested
650                .capabilities
651                .iter()
652                .filter_map(|(capability, requested_ops)| {
653                    self.capabilities.get(capability).map(|allowed_ops| {
654                        (
655                            capability.clone(),
656                            requested_ops
657                                .iter()
658                                .filter(|op| allowed_ops.contains(*op))
659                                .cloned()
660                                .collect::<Vec<_>>(),
661                        )
662                    })
663                })
664                .collect()
665        };
666
667        let workspace_roots = if self.workspace_roots.is_empty() {
668            requested.workspace_roots.clone()
669        } else if requested.workspace_roots.is_empty() {
670            self.workspace_roots.clone()
671        } else {
672            requested
673                .workspace_roots
674                .iter()
675                .filter(|root| self.workspace_roots.contains(*root))
676                .cloned()
677                .collect()
678        };
679
680        let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
681            (Some(a), Some(b)) => Some(a.min(b)),
682            (Some(a), None) => Some(a),
683            (None, Some(b)) => Some(b),
684            (None, None) => None,
685        };
686
687        // Merge arg constraints from both sides
688        let mut tool_arg_constraints = self.tool_arg_constraints.clone();
689        tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
690
691        Ok(CapabilityPolicy {
692            tools,
693            capabilities,
694            workspace_roots,
695            side_effect_level,
696            recursion_limit,
697            tool_arg_constraints,
698        })
699    }
700}
701
702fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
703    fn rank(v: &str) -> usize {
704        match v {
705            "none" => 0,
706            "read_only" => 1,
707            "workspace_write" => 2,
708            "process_exec" => 3,
709            "network" => 4,
710            _ => 5,
711        }
712    }
713    if rank(a) <= rank(b) {
714        a
715    } else {
716        b
717    }
718}
719
720#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
721#[serde(default)]
722pub struct ModelPolicy {
723    pub provider: Option<String>,
724    pub model: Option<String>,
725    pub model_tier: Option<String>,
726    pub temperature: Option<f64>,
727    pub max_tokens: Option<i64>,
728}
729
730#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
731#[serde(default)]
732pub struct TranscriptPolicy {
733    pub mode: Option<String>,
734    pub visibility: Option<String>,
735    pub summarize: bool,
736    pub compact: bool,
737    pub keep_last: Option<usize>,
738}
739
740#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
741#[serde(default)]
742pub struct ContextPolicy {
743    pub max_artifacts: Option<usize>,
744    pub max_tokens: Option<usize>,
745    pub reserve_tokens: Option<usize>,
746    pub include_kinds: Vec<String>,
747    pub exclude_kinds: Vec<String>,
748    pub prioritize_kinds: Vec<String>,
749    pub pinned_ids: Vec<String>,
750    pub include_stages: Vec<String>,
751    pub prefer_recent: bool,
752    pub prefer_fresh: bool,
753    pub render: Option<String>,
754}
755
756#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
757#[serde(default)]
758pub struct RetryPolicy {
759    pub max_attempts: usize,
760    pub verify: bool,
761    pub repair: bool,
762}
763
764#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
765#[serde(default)]
766pub struct StageContract {
767    pub input_kinds: Vec<String>,
768    pub output_kinds: Vec<String>,
769    pub min_inputs: Option<usize>,
770    pub max_inputs: Option<usize>,
771    pub require_transcript: bool,
772    pub schema: Option<serde_json::Value>,
773}
774
775#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
776#[serde(default)]
777pub struct BranchSemantics {
778    pub success: Option<String>,
779    pub failure: Option<String>,
780    pub verify_pass: Option<String>,
781    pub verify_fail: Option<String>,
782    pub condition_true: Option<String>,
783    pub condition_false: Option<String>,
784    pub loop_continue: Option<String>,
785    pub loop_exit: Option<String>,
786    pub escalation: Option<String>,
787}
788
789#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
790#[serde(default)]
791pub struct MapPolicy {
792    pub items: Vec<serde_json::Value>,
793    pub item_artifact_kind: Option<String>,
794    pub output_kind: Option<String>,
795    pub max_items: Option<usize>,
796}
797
798#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
799#[serde(default)]
800pub struct JoinPolicy {
801    pub strategy: String,
802    pub require_all_inputs: bool,
803    pub min_completed: Option<usize>,
804}
805
806#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
807#[serde(default)]
808pub struct ReducePolicy {
809    pub strategy: String,
810    pub separator: Option<String>,
811    pub output_kind: Option<String>,
812}
813
814#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
815#[serde(default)]
816pub struct EscalationPolicy {
817    pub level: Option<String>,
818    pub queue: Option<String>,
819    pub reason: Option<String>,
820}
821
822#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
823#[serde(default)]
824pub struct ArtifactRecord {
825    #[serde(rename = "_type")]
826    pub type_name: String,
827    pub id: String,
828    pub kind: String,
829    pub title: Option<String>,
830    pub text: Option<String>,
831    pub data: Option<serde_json::Value>,
832    pub source: Option<String>,
833    pub created_at: String,
834    pub freshness: Option<String>,
835    pub priority: Option<i64>,
836    pub lineage: Vec<String>,
837    pub relevance: Option<f64>,
838    pub estimated_tokens: Option<usize>,
839    pub stage: Option<String>,
840    pub metadata: BTreeMap<String, serde_json::Value>,
841}
842
843impl ArtifactRecord {
844    pub fn normalize(mut self) -> Self {
845        if self.type_name.is_empty() {
846            self.type_name = "artifact".to_string();
847        }
848        if self.id.is_empty() {
849            self.id = new_id("artifact");
850        }
851        if self.created_at.is_empty() {
852            self.created_at = now_rfc3339();
853        }
854        if self.kind.is_empty() {
855            self.kind = "artifact".to_string();
856        }
857        self.kind = normalize_artifact_kind(&self.kind);
858        if self.estimated_tokens.is_none() {
859            self.estimated_tokens = self
860                .text
861                .as_ref()
862                .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
863        }
864        if self.priority.is_none() {
865            self.priority = Some(default_artifact_priority(&self.kind));
866        }
867        self
868    }
869}
870
871#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
872#[serde(default)]
873pub struct WorkflowNode {
874    pub id: Option<String>,
875    pub kind: String,
876    pub mode: Option<String>,
877    pub prompt: Option<String>,
878    pub system: Option<String>,
879    pub task_label: Option<String>,
880    pub tools: serde_json::Value,
881    pub model_policy: ModelPolicy,
882    pub transcript_policy: TranscriptPolicy,
883    pub context_policy: ContextPolicy,
884    pub retry_policy: RetryPolicy,
885    pub capability_policy: CapabilityPolicy,
886    pub input_contract: StageContract,
887    pub output_contract: StageContract,
888    pub branch_semantics: BranchSemantics,
889    pub map_policy: MapPolicy,
890    pub join_policy: JoinPolicy,
891    pub reduce_policy: ReducePolicy,
892    pub escalation_policy: EscalationPolicy,
893    pub verify: Option<serde_json::Value>,
894    pub metadata: BTreeMap<String, serde_json::Value>,
895}
896
897pub fn workflow_tool_names(value: &serde_json::Value) -> Vec<String> {
898    match value {
899        serde_json::Value::Null => Vec::new(),
900        serde_json::Value::Array(items) => items
901            .iter()
902            .filter_map(|item| match item {
903                serde_json::Value::Object(map) => map
904                    .get("name")
905                    .and_then(|value| value.as_str())
906                    .filter(|name| !name.is_empty())
907                    .map(|name| name.to_string()),
908                _ => None,
909            })
910            .collect(),
911        serde_json::Value::Object(map) => {
912            if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
913                return map
914                    .get("tools")
915                    .map(workflow_tool_names)
916                    .unwrap_or_default();
917            }
918            map.get("name")
919                .and_then(|value| value.as_str())
920                .filter(|name| !name.is_empty())
921                .map(|name| vec![name.to_string()])
922                .unwrap_or_default()
923        }
924        _ => Vec::new(),
925    }
926}
927
928#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
929#[serde(default)]
930pub struct WorkflowEdge {
931    pub from: String,
932    pub to: String,
933    pub branch: Option<String>,
934    pub label: Option<String>,
935}
936
937#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
938#[serde(default)]
939pub struct WorkflowGraph {
940    #[serde(rename = "_type")]
941    pub type_name: String,
942    pub id: String,
943    pub name: Option<String>,
944    pub version: usize,
945    pub entry: String,
946    pub nodes: BTreeMap<String, WorkflowNode>,
947    pub edges: Vec<WorkflowEdge>,
948    pub capability_policy: CapabilityPolicy,
949    pub metadata: BTreeMap<String, serde_json::Value>,
950    pub audit_log: Vec<WorkflowAuditEntry>,
951}
952
953#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
954#[serde(default)]
955pub struct WorkflowAuditEntry {
956    pub id: String,
957    pub op: String,
958    pub node_id: Option<String>,
959    pub timestamp: String,
960    pub reason: Option<String>,
961    pub metadata: BTreeMap<String, serde_json::Value>,
962}
963
964#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
965#[serde(default)]
966pub struct LlmUsageRecord {
967    pub input_tokens: i64,
968    pub output_tokens: i64,
969    pub total_duration_ms: i64,
970    pub call_count: i64,
971    pub total_cost: f64,
972    pub models: Vec<String>,
973}
974
975#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
976#[serde(default)]
977pub struct RunStageRecord {
978    pub id: String,
979    pub node_id: String,
980    pub kind: String,
981    pub status: String,
982    pub outcome: String,
983    pub branch: Option<String>,
984    pub started_at: String,
985    pub finished_at: Option<String>,
986    pub visible_text: Option<String>,
987    pub private_reasoning: Option<String>,
988    pub transcript: Option<serde_json::Value>,
989    pub verification: Option<serde_json::Value>,
990    pub usage: Option<LlmUsageRecord>,
991    pub artifacts: Vec<ArtifactRecord>,
992    pub consumed_artifact_ids: Vec<String>,
993    pub produced_artifact_ids: Vec<String>,
994    pub attempts: Vec<RunStageAttemptRecord>,
995    pub metadata: BTreeMap<String, serde_json::Value>,
996}
997
998#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
999#[serde(default)]
1000pub struct RunStageAttemptRecord {
1001    pub attempt: usize,
1002    pub status: String,
1003    pub outcome: String,
1004    pub branch: Option<String>,
1005    pub error: Option<String>,
1006    pub verification: Option<serde_json::Value>,
1007    pub started_at: String,
1008    pub finished_at: Option<String>,
1009}
1010
1011#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1012#[serde(default)]
1013pub struct RunTransitionRecord {
1014    pub id: String,
1015    pub from_stage_id: Option<String>,
1016    pub from_node_id: Option<String>,
1017    pub to_node_id: String,
1018    pub branch: Option<String>,
1019    pub timestamp: String,
1020    pub consumed_artifact_ids: Vec<String>,
1021    pub produced_artifact_ids: Vec<String>,
1022}
1023
1024#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1025#[serde(default)]
1026pub struct RunCheckpointRecord {
1027    pub id: String,
1028    pub ready_nodes: Vec<String>,
1029    pub completed_nodes: Vec<String>,
1030    pub last_stage_id: Option<String>,
1031    pub persisted_at: String,
1032    pub reason: String,
1033}
1034
1035#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1036#[serde(default)]
1037pub struct ReplayFixture {
1038    #[serde(rename = "_type")]
1039    pub type_name: String,
1040    pub id: String,
1041    pub source_run_id: String,
1042    pub workflow_id: String,
1043    pub workflow_name: Option<String>,
1044    pub created_at: String,
1045    pub expected_status: String,
1046    pub stage_assertions: Vec<ReplayStageAssertion>,
1047}
1048
1049#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1050#[serde(default)]
1051pub struct ReplayStageAssertion {
1052    pub node_id: String,
1053    pub expected_status: String,
1054    pub expected_outcome: String,
1055    pub expected_branch: Option<String>,
1056    pub required_artifact_kinds: Vec<String>,
1057    pub visible_text_contains: Option<String>,
1058}
1059
1060#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1061#[serde(default)]
1062pub struct ReplayEvalReport {
1063    pub pass: bool,
1064    pub failures: Vec<String>,
1065    pub stage_count: usize,
1066}
1067
1068#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1069#[serde(default)]
1070pub struct ReplayEvalCaseReport {
1071    pub run_id: String,
1072    pub workflow_id: String,
1073    pub label: Option<String>,
1074    pub pass: bool,
1075    pub failures: Vec<String>,
1076    pub stage_count: usize,
1077    pub source_path: Option<String>,
1078    pub comparison: Option<RunDiffReport>,
1079}
1080
1081#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1082#[serde(default)]
1083pub struct ReplayEvalSuiteReport {
1084    pub pass: bool,
1085    pub total: usize,
1086    pub passed: usize,
1087    pub failed: usize,
1088    pub cases: Vec<ReplayEvalCaseReport>,
1089}
1090
1091#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1092#[serde(default)]
1093pub struct RunStageDiffRecord {
1094    pub node_id: String,
1095    pub change: String,
1096    pub details: Vec<String>,
1097}
1098
1099#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1100#[serde(default)]
1101pub struct RunDiffReport {
1102    pub left_run_id: String,
1103    pub right_run_id: String,
1104    pub identical: bool,
1105    pub status_changed: bool,
1106    pub left_status: String,
1107    pub right_status: String,
1108    pub stage_diffs: Vec<RunStageDiffRecord>,
1109    pub transition_count_delta: isize,
1110    pub artifact_count_delta: isize,
1111    pub checkpoint_count_delta: isize,
1112}
1113
1114#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1115#[serde(default)]
1116pub struct EvalSuiteManifest {
1117    #[serde(rename = "_type")]
1118    pub type_name: String,
1119    pub id: String,
1120    pub name: Option<String>,
1121    pub base_dir: Option<String>,
1122    pub cases: Vec<EvalSuiteCase>,
1123}
1124
1125#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1126#[serde(default)]
1127pub struct EvalSuiteCase {
1128    pub label: Option<String>,
1129    pub run_path: String,
1130    pub fixture_path: Option<String>,
1131    pub compare_to: Option<String>,
1132}
1133
1134#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1135#[serde(default)]
1136pub struct RunRecord {
1137    #[serde(rename = "_type")]
1138    pub type_name: String,
1139    pub id: String,
1140    pub workflow_id: String,
1141    pub workflow_name: Option<String>,
1142    pub task: String,
1143    pub status: String,
1144    pub started_at: String,
1145    pub finished_at: Option<String>,
1146    pub parent_run_id: Option<String>,
1147    pub root_run_id: Option<String>,
1148    pub stages: Vec<RunStageRecord>,
1149    pub transitions: Vec<RunTransitionRecord>,
1150    pub checkpoints: Vec<RunCheckpointRecord>,
1151    pub pending_nodes: Vec<String>,
1152    pub completed_nodes: Vec<String>,
1153    pub child_runs: Vec<RunChildRecord>,
1154    pub artifacts: Vec<ArtifactRecord>,
1155    pub policy: CapabilityPolicy,
1156    pub execution: Option<RunExecutionRecord>,
1157    pub transcript: Option<serde_json::Value>,
1158    pub usage: Option<LlmUsageRecord>,
1159    pub replay_fixture: Option<ReplayFixture>,
1160    pub metadata: BTreeMap<String, serde_json::Value>,
1161    pub persisted_path: Option<String>,
1162}
1163
1164#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1165#[serde(default)]
1166pub struct RunChildRecord {
1167    pub worker_id: String,
1168    pub worker_name: String,
1169    pub parent_stage_id: Option<String>,
1170    pub task: String,
1171    pub status: String,
1172    pub started_at: String,
1173    pub finished_at: Option<String>,
1174    pub run_id: Option<String>,
1175    pub run_path: Option<String>,
1176    pub snapshot_path: Option<String>,
1177    pub execution: Option<RunExecutionRecord>,
1178}
1179
1180#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1181#[serde(default)]
1182pub struct RunExecutionRecord {
1183    pub cwd: Option<String>,
1184    pub source_dir: Option<String>,
1185    pub env: BTreeMap<String, String>,
1186    pub adapter: Option<String>,
1187    pub repo_path: Option<String>,
1188    pub worktree_path: Option<String>,
1189    pub branch: Option<String>,
1190    pub base_ref: Option<String>,
1191    pub cleanup: Option<String>,
1192}
1193
1194#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1195#[serde(default)]
1196pub struct WorkflowValidationReport {
1197    pub valid: bool,
1198    pub errors: Vec<String>,
1199    pub warnings: Vec<String>,
1200    pub reachable_nodes: Vec<String>,
1201}
1202
1203fn parse_json_payload<T: for<'de> Deserialize<'de>>(
1204    json: serde_json::Value,
1205    label: &str,
1206) -> Result<T, VmError> {
1207    let payload = json.to_string();
1208    let mut deserializer = serde_json::Deserializer::from_str(&payload);
1209    let mut tracker = serde_path_to_error::Track::new();
1210    let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1211    T::deserialize(path_deserializer).map_err(|error| {
1212        let snippet = if payload.len() > 600 {
1213            format!("{}...", &payload[..600])
1214        } else {
1215            payload.clone()
1216        };
1217        VmError::Runtime(format!(
1218            "{label} parse error at {}: {} | payload={}",
1219            tracker.path(),
1220            error,
1221            snippet
1222        ))
1223    })
1224}
1225
1226fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1227    parse_json_payload(vm_value_to_json(value), "orchestration")
1228}
1229
1230pub fn parse_workflow_node_value(value: &VmValue, label: &str) -> Result<WorkflowNode, VmError> {
1231    parse_json_payload(vm_value_to_json(value), label)
1232}
1233
1234pub fn parse_workflow_node_json(
1235    json: serde_json::Value,
1236    label: &str,
1237) -> Result<WorkflowNode, VmError> {
1238    parse_json_payload(json, label)
1239}
1240
1241pub fn parse_workflow_edge_json(
1242    json: serde_json::Value,
1243    label: &str,
1244) -> Result<WorkflowEdge, VmError> {
1245    parse_json_payload(json, label)
1246}
1247
1248pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1249    let mut graph: WorkflowGraph = parse_json_value(value)?;
1250    let as_dict = value.as_dict().cloned().unwrap_or_default();
1251
1252    if graph.nodes.is_empty() {
1253        for key in ["act", "verify", "repair"] {
1254            if let Some(node_value) = as_dict.get(key) {
1255                let mut node: WorkflowNode = parse_json_value(node_value)?;
1256                let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1257                node.id = Some(key.to_string());
1258                if node.kind.is_empty() {
1259                    node.kind = if key == "verify" {
1260                        "verify".to_string()
1261                    } else {
1262                        "stage".to_string()
1263                    };
1264                }
1265                if node.model_policy.provider.is_none() {
1266                    node.model_policy.provider = as_dict
1267                        .get("provider")
1268                        .map(|value| value.display())
1269                        .filter(|value| !value.is_empty());
1270                }
1271                if node.model_policy.model.is_none() {
1272                    node.model_policy.model = as_dict
1273                        .get("model")
1274                        .map(|value| value.display())
1275                        .filter(|value| !value.is_empty());
1276                }
1277                if node.model_policy.model_tier.is_none() {
1278                    node.model_policy.model_tier = as_dict
1279                        .get("model_tier")
1280                        .or_else(|| as_dict.get("tier"))
1281                        .map(|value| value.display())
1282                        .filter(|value| !value.is_empty());
1283                }
1284                if node.model_policy.temperature.is_none() {
1285                    node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1286                        if let VmValue::Float(number) = value {
1287                            Some(*number)
1288                        } else {
1289                            value.as_int().map(|number| number as f64)
1290                        }
1291                    });
1292                }
1293                if node.model_policy.max_tokens.is_none() {
1294                    node.model_policy.max_tokens =
1295                        as_dict.get("max_tokens").and_then(|value| value.as_int());
1296                }
1297                if node.mode.is_none() {
1298                    node.mode = as_dict
1299                        .get("mode")
1300                        .map(|value| value.display())
1301                        .filter(|value| !value.is_empty());
1302                }
1303                if key == "verify"
1304                    && node.verify.is_none()
1305                    && (raw_node.contains_key("assert_text")
1306                        || raw_node.contains_key("command")
1307                        || raw_node.contains_key("expect_status")
1308                        || raw_node.contains_key("expect_text"))
1309                {
1310                    node.verify = Some(serde_json::json!({
1311                        "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1312                        "command": raw_node.get("command").map(vm_value_to_json),
1313                        "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1314                        "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1315                    }));
1316                }
1317                graph.nodes.insert(key.to_string(), node);
1318            }
1319        }
1320        if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1321            graph.entry = "act".to_string();
1322        }
1323        if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1324            if graph.nodes.contains_key("verify") {
1325                graph.edges.push(WorkflowEdge {
1326                    from: "act".to_string(),
1327                    to: "verify".to_string(),
1328                    branch: None,
1329                    label: None,
1330                });
1331            }
1332            if graph.nodes.contains_key("repair") {
1333                graph.edges.push(WorkflowEdge {
1334                    from: "verify".to_string(),
1335                    to: "repair".to_string(),
1336                    branch: Some("failed".to_string()),
1337                    label: None,
1338                });
1339                graph.edges.push(WorkflowEdge {
1340                    from: "repair".to_string(),
1341                    to: "verify".to_string(),
1342                    branch: Some("retry".to_string()),
1343                    label: None,
1344                });
1345            }
1346        }
1347    }
1348
1349    if graph.type_name.is_empty() {
1350        graph.type_name = "workflow_graph".to_string();
1351    }
1352    if graph.id.is_empty() {
1353        graph.id = new_id("workflow");
1354    }
1355    if graph.version == 0 {
1356        graph.version = 1;
1357    }
1358    if graph.entry.is_empty() {
1359        graph.entry = graph
1360            .nodes
1361            .keys()
1362            .next()
1363            .cloned()
1364            .unwrap_or_else(|| "act".to_string());
1365    }
1366    for (node_id, node) in &mut graph.nodes {
1367        if node.id.is_none() {
1368            node.id = Some(node_id.clone());
1369        }
1370        if node.kind.is_empty() {
1371            node.kind = "stage".to_string();
1372        }
1373        if node.join_policy.strategy.is_empty() {
1374            node.join_policy.strategy = "all".to_string();
1375        }
1376        if node.reduce_policy.strategy.is_empty() {
1377            node.reduce_policy.strategy = "concat".to_string();
1378        }
1379        if node.output_contract.output_kinds.is_empty() {
1380            node.output_contract.output_kinds = vec![match node.kind.as_str() {
1381                "verify" => "verification_result".to_string(),
1382                "reduce" => node
1383                    .reduce_policy
1384                    .output_kind
1385                    .clone()
1386                    .unwrap_or_else(|| "summary".to_string()),
1387                "map" => node
1388                    .map_policy
1389                    .output_kind
1390                    .clone()
1391                    .unwrap_or_else(|| "artifact".to_string()),
1392                "escalation" => "plan".to_string(),
1393                _ => "artifact".to_string(),
1394            }];
1395        }
1396        if node.retry_policy.max_attempts == 0 {
1397            node.retry_policy.max_attempts = 1;
1398        }
1399    }
1400    Ok(graph)
1401}
1402
1403pub fn validate_workflow(
1404    graph: &WorkflowGraph,
1405    ceiling: Option<&CapabilityPolicy>,
1406) -> WorkflowValidationReport {
1407    let mut errors = Vec::new();
1408    let mut warnings = Vec::new();
1409
1410    if !graph.nodes.contains_key(&graph.entry) {
1411        errors.push(format!("entry node does not exist: {}", graph.entry));
1412    }
1413
1414    let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1415    for edge in &graph.edges {
1416        if !node_ids.contains(&edge.from) {
1417            errors.push(format!("edge.from references unknown node: {}", edge.from));
1418        }
1419        if !node_ids.contains(&edge.to) {
1420            errors.push(format!("edge.to references unknown node: {}", edge.to));
1421        }
1422    }
1423
1424    let reachable_nodes = reachable_nodes(graph);
1425    for node_id in &node_ids {
1426        if !reachable_nodes.contains(node_id) {
1427            warnings.push(format!("node is unreachable: {node_id}"));
1428        }
1429    }
1430
1431    for (node_id, node) in &graph.nodes {
1432        let incoming = graph
1433            .edges
1434            .iter()
1435            .filter(|edge| edge.to == *node_id)
1436            .count();
1437        let outgoing: Vec<&WorkflowEdge> = graph
1438            .edges
1439            .iter()
1440            .filter(|edge| edge.from == *node_id)
1441            .collect();
1442        if let Some(min_inputs) = node.input_contract.min_inputs {
1443            if let Some(max_inputs) = node.input_contract.max_inputs {
1444                if min_inputs > max_inputs {
1445                    errors.push(format!(
1446                        "node {node_id}: input contract min_inputs exceeds max_inputs"
1447                    ));
1448                }
1449            }
1450        }
1451        match node.kind.as_str() {
1452            "condition" => {
1453                let has_true = outgoing
1454                    .iter()
1455                    .any(|edge| edge.branch.as_deref() == Some("true"));
1456                let has_false = outgoing
1457                    .iter()
1458                    .any(|edge| edge.branch.as_deref() == Some("false"));
1459                if !has_true || !has_false {
1460                    errors.push(format!(
1461                        "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
1462                    ));
1463                }
1464            }
1465            "fork" => {
1466                if outgoing.len() < 2 {
1467                    errors.push(format!(
1468                        "node {node_id}: fork nodes require at least two outgoing edges"
1469                    ));
1470                }
1471            }
1472            "join" => {
1473                if incoming < 2 {
1474                    warnings.push(format!(
1475                        "node {node_id}: join node has fewer than two incoming edges"
1476                    ));
1477                }
1478            }
1479            "map" => {
1480                if node.map_policy.items.is_empty()
1481                    && node.map_policy.item_artifact_kind.is_none()
1482                    && node.input_contract.input_kinds.is_empty()
1483                {
1484                    errors.push(format!(
1485                        "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
1486                    ));
1487                }
1488            }
1489            "reduce" => {
1490                if node.input_contract.input_kinds.is_empty() {
1491                    warnings.push(format!(
1492                        "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
1493                    ));
1494                }
1495            }
1496            _ => {}
1497        }
1498    }
1499
1500    if let Some(ceiling) = ceiling {
1501        if let Err(error) = ceiling.intersect(&graph.capability_policy) {
1502            errors.push(error);
1503        }
1504        for (node_id, node) in &graph.nodes {
1505            if let Err(error) = ceiling.intersect(&node.capability_policy) {
1506                errors.push(format!("node {node_id}: {error}"));
1507            }
1508        }
1509    }
1510
1511    WorkflowValidationReport {
1512        valid: errors.is_empty(),
1513        errors,
1514        warnings,
1515        reachable_nodes: reachable_nodes.into_iter().collect(),
1516    }
1517}
1518
1519fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
1520    let mut seen = BTreeSet::new();
1521    let mut stack = vec![graph.entry.clone()];
1522    while let Some(node_id) = stack.pop() {
1523        if !seen.insert(node_id.clone()) {
1524            continue;
1525        }
1526        for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
1527            stack.push(edge.to.clone());
1528        }
1529    }
1530    seen
1531}
1532
1533pub fn select_artifacts(
1534    mut artifacts: Vec<ArtifactRecord>,
1535    policy: &ContextPolicy,
1536) -> Vec<ArtifactRecord> {
1537    artifacts.retain(|artifact| {
1538        (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
1539            && !policy.exclude_kinds.contains(&artifact.kind)
1540            && (policy.include_stages.is_empty()
1541                || artifact
1542                    .stage
1543                    .as_ref()
1544                    .is_some_and(|stage| policy.include_stages.contains(stage)))
1545    });
1546    artifacts.sort_by(|a, b| {
1547        let b_pinned = policy.pinned_ids.contains(&b.id);
1548        let a_pinned = policy.pinned_ids.contains(&a.id);
1549        b_pinned
1550            .cmp(&a_pinned)
1551            .then_with(|| {
1552                let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
1553                let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
1554                b_prio_kind.cmp(&a_prio_kind)
1555            })
1556            .then_with(|| {
1557                b.priority
1558                    .unwrap_or_default()
1559                    .cmp(&a.priority.unwrap_or_default())
1560            })
1561            .then_with(|| {
1562                if policy.prefer_fresh {
1563                    freshness_rank(b.freshness.as_deref())
1564                        .cmp(&freshness_rank(a.freshness.as_deref()))
1565                } else {
1566                    std::cmp::Ordering::Equal
1567                }
1568            })
1569            .then_with(|| {
1570                if policy.prefer_recent {
1571                    b.created_at.cmp(&a.created_at)
1572                } else {
1573                    std::cmp::Ordering::Equal
1574                }
1575            })
1576            .then_with(|| {
1577                b.relevance
1578                    .partial_cmp(&a.relevance)
1579                    .unwrap_or(std::cmp::Ordering::Equal)
1580            })
1581            .then_with(|| {
1582                a.estimated_tokens
1583                    .unwrap_or(usize::MAX)
1584                    .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
1585            })
1586    });
1587
1588    let mut selected = Vec::new();
1589    let mut used_tokens = 0usize;
1590    let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
1591    let effective_max_tokens = policy
1592        .max_tokens
1593        .map(|max| max.saturating_sub(reserve_tokens));
1594    for artifact in artifacts {
1595        if let Some(max_artifacts) = policy.max_artifacts {
1596            if selected.len() >= max_artifacts {
1597                break;
1598            }
1599        }
1600        let next_tokens = artifact.estimated_tokens.unwrap_or(0);
1601        if let Some(max_tokens) = effective_max_tokens {
1602            if used_tokens + next_tokens > max_tokens {
1603                continue;
1604            }
1605        }
1606        used_tokens += next_tokens;
1607        selected.push(artifact);
1608    }
1609    selected
1610}
1611
1612pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
1613    let mut parts = Vec::new();
1614    for artifact in artifacts {
1615        let title = artifact
1616            .title
1617            .clone()
1618            .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
1619        let body = artifact
1620            .text
1621            .clone()
1622            .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
1623            .unwrap_or_default();
1624        match policy.render.as_deref() {
1625            Some("json") => {
1626                parts.push(
1627                    serde_json::json!({
1628                        "id": artifact.id,
1629                        "kind": artifact.kind,
1630                        "title": title,
1631                        "source": artifact.source,
1632                        "freshness": artifact.freshness,
1633                        "priority": artifact.priority,
1634                        "text": body,
1635                    })
1636                    .to_string(),
1637                );
1638            }
1639            _ => parts.push(format!(
1640                "[{title}] kind={} source={} freshness={} priority={}\n{}",
1641                artifact.kind,
1642                artifact
1643                    .source
1644                    .clone()
1645                    .unwrap_or_else(|| "unknown".to_string()),
1646                artifact
1647                    .freshness
1648                    .clone()
1649                    .unwrap_or_else(|| "normal".to_string()),
1650                artifact.priority.unwrap_or_default(),
1651                body
1652            )),
1653        }
1654    }
1655    parts.join("\n\n")
1656}
1657
1658pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
1659    let artifact: ArtifactRecord = parse_json_value(value)?;
1660    Ok(artifact.normalize())
1661}
1662
1663pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
1664    let json = vm_value_to_json(value);
1665    let payload = json.to_string();
1666    let mut deserializer = serde_json::Deserializer::from_str(&payload);
1667    let mut tracker = serde_path_to_error::Track::new();
1668    let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1669    let mut run: RunRecord = RunRecord::deserialize(path_deserializer).map_err(|error| {
1670        let snippet = if payload.len() > 600 {
1671            format!("{}...", &payload[..600])
1672        } else {
1673            payload.clone()
1674        };
1675        VmError::Runtime(format!(
1676            "orchestration parse error at {}: {} | payload={}",
1677            tracker.path(),
1678            error,
1679            snippet
1680        ))
1681    })?;
1682    if run.type_name.is_empty() {
1683        run.type_name = "run_record".to_string();
1684    }
1685    if run.id.is_empty() {
1686        run.id = new_id("run");
1687    }
1688    if run.started_at.is_empty() {
1689        run.started_at = now_rfc3339();
1690    }
1691    if run.status.is_empty() {
1692        run.status = "running".to_string();
1693    }
1694    if run.root_run_id.is_none() {
1695        run.root_run_id = Some(run.id.clone());
1696    }
1697    if run.replay_fixture.is_none() {
1698        run.replay_fixture = Some(replay_fixture_from_run(&run));
1699    }
1700    Ok(run)
1701}
1702
1703pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
1704    let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
1705    if manifest.type_name.is_empty() {
1706        manifest.type_name = "eval_suite_manifest".to_string();
1707    }
1708    if manifest.id.is_empty() {
1709        manifest.id = new_id("eval_suite");
1710    }
1711    Ok(manifest)
1712}
1713
1714fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
1715    let content = std::fs::read_to_string(path)
1716        .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
1717    serde_json::from_str(&content)
1718        .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
1719}
1720
1721fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
1722    let path_buf = PathBuf::from(path);
1723    if path_buf.is_absolute() {
1724        path_buf
1725    } else if let Some(base_dir) = base_dir {
1726        base_dir.join(path_buf)
1727    } else {
1728        path_buf
1729    }
1730}
1731
1732pub fn evaluate_run_suite_manifest(
1733    manifest: &EvalSuiteManifest,
1734) -> Result<ReplayEvalSuiteReport, VmError> {
1735    let base_dir = manifest.base_dir.as_deref().map(Path::new);
1736    let mut reports = Vec::new();
1737    for case in &manifest.cases {
1738        let run_path = resolve_manifest_path(base_dir, &case.run_path);
1739        let run = load_run_record(&run_path)?;
1740        let fixture = match &case.fixture_path {
1741            Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
1742            None => run
1743                .replay_fixture
1744                .clone()
1745                .unwrap_or_else(|| replay_fixture_from_run(&run)),
1746        };
1747        let eval = evaluate_run_against_fixture(&run, &fixture);
1748        let mut pass = eval.pass;
1749        let mut failures = eval.failures;
1750        let comparison = match &case.compare_to {
1751            Some(path) => {
1752                let baseline_path = resolve_manifest_path(base_dir, path);
1753                let baseline = load_run_record(&baseline_path)?;
1754                let diff = diff_run_records(&baseline, &run);
1755                if !diff.identical {
1756                    pass = false;
1757                    failures.push(format!(
1758                        "run differs from baseline {} with {} stage changes",
1759                        baseline_path.display(),
1760                        diff.stage_diffs.len()
1761                    ));
1762                }
1763                Some(diff)
1764            }
1765            None => None,
1766        };
1767        reports.push(ReplayEvalCaseReport {
1768            run_id: run.id.clone(),
1769            workflow_id: run.workflow_id.clone(),
1770            label: case.label.clone(),
1771            pass,
1772            failures,
1773            stage_count: eval.stage_count,
1774            source_path: Some(run_path.display().to_string()),
1775            comparison,
1776        });
1777    }
1778    let total = reports.len();
1779    let passed = reports.iter().filter(|report| report.pass).count();
1780    let failed = total.saturating_sub(passed);
1781    Ok(ReplayEvalSuiteReport {
1782        pass: failed == 0,
1783        total,
1784        passed,
1785        failed,
1786        cases: reports,
1787    })
1788}
1789
1790pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
1791    let before_lines: Vec<&str> = before.lines().collect();
1792    let after_lines: Vec<&str> = after.lines().collect();
1793    let mut table = vec![vec![0usize; after_lines.len() + 1]; before_lines.len() + 1];
1794    for i in (0..before_lines.len()).rev() {
1795        for j in (0..after_lines.len()).rev() {
1796            table[i][j] = if before_lines[i] == after_lines[j] {
1797                table[i + 1][j + 1] + 1
1798            } else {
1799                table[i + 1][j].max(table[i][j + 1])
1800            };
1801        }
1802    }
1803
1804    let mut diff = String::new();
1805    let file = path.unwrap_or("artifact");
1806    diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
1807    let mut i = 0;
1808    let mut j = 0;
1809    while i < before_lines.len() && j < after_lines.len() {
1810        if before_lines[i] == after_lines[j] {
1811            diff.push_str(&format!(" {}\n", before_lines[i]));
1812            i += 1;
1813            j += 1;
1814        } else if table[i + 1][j] >= table[i][j + 1] {
1815            diff.push_str(&format!("-{}\n", before_lines[i]));
1816            i += 1;
1817        } else {
1818            diff.push_str(&format!("+{}\n", after_lines[j]));
1819            j += 1;
1820        }
1821    }
1822    while i < before_lines.len() {
1823        diff.push_str(&format!("-{}\n", before_lines[i]));
1824        i += 1;
1825    }
1826    while j < after_lines.len() {
1827        diff.push_str(&format!("+{}\n", after_lines[j]));
1828        j += 1;
1829    }
1830    diff
1831}
1832
1833pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
1834    let path = path
1835        .map(PathBuf::from)
1836        .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
1837    if let Some(parent) = path.parent() {
1838        std::fs::create_dir_all(parent)
1839            .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
1840    }
1841    let json = serde_json::to_string_pretty(run)
1842        .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
1843    std::fs::write(&path, json)
1844        .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
1845    Ok(path.to_string_lossy().to_string())
1846}
1847
1848pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
1849    let content = std::fs::read_to_string(path)
1850        .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
1851    serde_json::from_str(&content)
1852        .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
1853}
1854
1855pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
1856    ReplayFixture {
1857        type_name: "replay_fixture".to_string(),
1858        id: new_id("fixture"),
1859        source_run_id: run.id.clone(),
1860        workflow_id: run.workflow_id.clone(),
1861        workflow_name: run.workflow_name.clone(),
1862        created_at: now_rfc3339(),
1863        expected_status: run.status.clone(),
1864        stage_assertions: run
1865            .stages
1866            .iter()
1867            .map(|stage| ReplayStageAssertion {
1868                node_id: stage.node_id.clone(),
1869                expected_status: stage.status.clone(),
1870                expected_outcome: stage.outcome.clone(),
1871                expected_branch: stage.branch.clone(),
1872                required_artifact_kinds: stage
1873                    .artifacts
1874                    .iter()
1875                    .map(|artifact| artifact.kind.clone())
1876                    .collect(),
1877                visible_text_contains: stage
1878                    .visible_text
1879                    .as_ref()
1880                    .filter(|text| !text.is_empty())
1881                    .map(|text| text.chars().take(80).collect()),
1882            })
1883            .collect(),
1884    }
1885}
1886
1887pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
1888    let mut failures = Vec::new();
1889    if run.status != fixture.expected_status {
1890        failures.push(format!(
1891            "run status mismatch: expected {}, got {}",
1892            fixture.expected_status, run.status
1893        ));
1894    }
1895    for assertion in &fixture.stage_assertions {
1896        let Some(stage) = run
1897            .stages
1898            .iter()
1899            .find(|stage| stage.node_id == assertion.node_id)
1900        else {
1901            failures.push(format!("missing stage {}", assertion.node_id));
1902            continue;
1903        };
1904        if stage.status != assertion.expected_status {
1905            failures.push(format!(
1906                "stage {} status mismatch: expected {}, got {}",
1907                assertion.node_id, assertion.expected_status, stage.status
1908            ));
1909        }
1910        if stage.outcome != assertion.expected_outcome {
1911            failures.push(format!(
1912                "stage {} outcome mismatch: expected {}, got {}",
1913                assertion.node_id, assertion.expected_outcome, stage.outcome
1914            ));
1915        }
1916        if stage.branch != assertion.expected_branch {
1917            failures.push(format!(
1918                "stage {} branch mismatch: expected {:?}, got {:?}",
1919                assertion.node_id, assertion.expected_branch, stage.branch
1920            ));
1921        }
1922        for required_kind in &assertion.required_artifact_kinds {
1923            if !stage
1924                .artifacts
1925                .iter()
1926                .any(|artifact| &artifact.kind == required_kind)
1927            {
1928                failures.push(format!(
1929                    "stage {} missing artifact kind {}",
1930                    assertion.node_id, required_kind
1931                ));
1932            }
1933        }
1934        if let Some(snippet) = &assertion.visible_text_contains {
1935            let actual = stage.visible_text.clone().unwrap_or_default();
1936            if !actual.contains(snippet) {
1937                failures.push(format!(
1938                    "stage {} visible text does not contain expected snippet {:?}",
1939                    assertion.node_id, snippet
1940                ));
1941            }
1942        }
1943    }
1944
1945    ReplayEvalReport {
1946        pass: failures.is_empty(),
1947        failures,
1948        stage_count: run.stages.len(),
1949    }
1950}
1951
1952pub fn evaluate_run_suite(
1953    cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
1954) -> ReplayEvalSuiteReport {
1955    let mut reports = Vec::new();
1956    for (run, fixture, source_path) in cases {
1957        let report = evaluate_run_against_fixture(&run, &fixture);
1958        reports.push(ReplayEvalCaseReport {
1959            run_id: run.id.clone(),
1960            workflow_id: run.workflow_id.clone(),
1961            label: None,
1962            pass: report.pass,
1963            failures: report.failures,
1964            stage_count: report.stage_count,
1965            source_path,
1966            comparison: None,
1967        });
1968    }
1969    let total = reports.len();
1970    let passed = reports.iter().filter(|report| report.pass).count();
1971    let failed = total.saturating_sub(passed);
1972    ReplayEvalSuiteReport {
1973        pass: failed == 0,
1974        total,
1975        passed,
1976        failed,
1977        cases: reports,
1978    }
1979}
1980
1981pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
1982    let mut stage_diffs = Vec::new();
1983    let mut all_node_ids = BTreeSet::new();
1984    all_node_ids.extend(left.stages.iter().map(|stage| stage.node_id.clone()));
1985    all_node_ids.extend(right.stages.iter().map(|stage| stage.node_id.clone()));
1986
1987    for node_id in all_node_ids {
1988        let left_stage = left.stages.iter().find(|stage| stage.node_id == node_id);
1989        let right_stage = right.stages.iter().find(|stage| stage.node_id == node_id);
1990        match (left_stage, right_stage) {
1991            (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
1992                node_id,
1993                change: "removed".to_string(),
1994                details: vec!["stage missing from right run".to_string()],
1995            }),
1996            (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
1997                node_id,
1998                change: "added".to_string(),
1999                details: vec!["stage missing from left run".to_string()],
2000            }),
2001            (Some(left_stage), Some(right_stage)) => {
2002                let mut details = Vec::new();
2003                if left_stage.status != right_stage.status {
2004                    details.push(format!(
2005                        "status: {} -> {}",
2006                        left_stage.status, right_stage.status
2007                    ));
2008                }
2009                if left_stage.outcome != right_stage.outcome {
2010                    details.push(format!(
2011                        "outcome: {} -> {}",
2012                        left_stage.outcome, right_stage.outcome
2013                    ));
2014                }
2015                if left_stage.branch != right_stage.branch {
2016                    details.push(format!(
2017                        "branch: {:?} -> {:?}",
2018                        left_stage.branch, right_stage.branch
2019                    ));
2020                }
2021                if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
2022                {
2023                    details.push(format!(
2024                        "produced_artifacts: {} -> {}",
2025                        left_stage.produced_artifact_ids.len(),
2026                        right_stage.produced_artifact_ids.len()
2027                    ));
2028                }
2029                if left_stage.artifacts.len() != right_stage.artifacts.len() {
2030                    details.push(format!(
2031                        "artifact_records: {} -> {}",
2032                        left_stage.artifacts.len(),
2033                        right_stage.artifacts.len()
2034                    ));
2035                }
2036                if !details.is_empty() {
2037                    stage_diffs.push(RunStageDiffRecord {
2038                        node_id,
2039                        change: "changed".to_string(),
2040                        details,
2041                    });
2042                }
2043            }
2044            (None, None) => {}
2045        }
2046    }
2047
2048    let status_changed = left.status != right.status;
2049    let identical = !status_changed
2050        && stage_diffs.is_empty()
2051        && left.transitions.len() == right.transitions.len()
2052        && left.artifacts.len() == right.artifacts.len()
2053        && left.checkpoints.len() == right.checkpoints.len();
2054
2055    RunDiffReport {
2056        left_run_id: left.id.clone(),
2057        right_run_id: right.id.clone(),
2058        identical,
2059        status_changed,
2060        left_status: left.status.clone(),
2061        right_status: right.status.clone(),
2062        stage_diffs,
2063        transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
2064        artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
2065        checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
2066    }
2067}
2068
2069pub fn push_execution_policy(policy: CapabilityPolicy) {
2070    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
2071}
2072
2073pub fn pop_execution_policy() {
2074    EXECUTION_POLICY_STACK.with(|stack| {
2075        stack.borrow_mut().pop();
2076    });
2077}
2078
2079pub fn current_execution_policy() -> Option<CapabilityPolicy> {
2080    EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
2081}
2082
2083fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
2084    policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
2085}
2086
2087fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
2088    policy.capabilities.is_empty()
2089        || policy
2090            .capabilities
2091            .get(capability)
2092            .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
2093}
2094
2095fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
2096    fn rank(v: &str) -> usize {
2097        match v {
2098            "none" => 0,
2099            "read_only" => 1,
2100            "workspace_write" => 2,
2101            "process_exec" => 3,
2102            "network" => 4,
2103            _ => 5,
2104        }
2105    }
2106    policy
2107        .side_effect_level
2108        .as_ref()
2109        .map(|allowed| rank(allowed) >= rank(requested))
2110        .unwrap_or(true)
2111}
2112
2113fn reject_policy(reason: String) -> Result<(), VmError> {
2114    Err(VmError::CategorizedError {
2115        message: reason,
2116        category: crate::value::ErrorCategory::ToolRejected,
2117    })
2118}
2119
2120pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
2121    let Some(policy) = current_execution_policy() else {
2122        return Ok(());
2123    };
2124    match name {
2125        "read" | "read_file" => {
2126            if !policy_allows_tool(&policy, name)
2127                || !policy_allows_capability(&policy, "workspace", "read_text")
2128            {
2129                return reject_policy(format!(
2130                    "builtin '{name}' exceeds workspace.read_text ceiling"
2131                ));
2132            }
2133        }
2134        "search" | "list_dir" => {
2135            if !policy_allows_tool(&policy, name)
2136                || !policy_allows_capability(&policy, "workspace", "list")
2137            {
2138                return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
2139            }
2140        }
2141        "file_exists" | "stat" => {
2142            if !policy_allows_capability(&policy, "workspace", "exists") {
2143                return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
2144            }
2145        }
2146        "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
2147            if !policy_allows_tool(&policy, "edit")
2148                || !policy_allows_capability(&policy, "workspace", "write_text")
2149                || !policy_allows_side_effect(&policy, "workspace_write")
2150            {
2151                return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
2152            }
2153        }
2154        "delete_file" => {
2155            if !policy_allows_capability(&policy, "workspace", "delete")
2156                || !policy_allows_side_effect(&policy, "workspace_write")
2157            {
2158                return reject_policy(
2159                    "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
2160                );
2161            }
2162        }
2163        "apply_edit" => {
2164            if !policy_allows_capability(&policy, "workspace", "apply_edit")
2165                || !policy_allows_side_effect(&policy, "workspace_write")
2166            {
2167                return reject_policy(
2168                    "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
2169                );
2170            }
2171        }
2172        "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
2173            if !policy_allows_tool(&policy, "run")
2174                || !policy_allows_capability(&policy, "process", "exec")
2175                || !policy_allows_side_effect(&policy, "process_exec")
2176            {
2177                return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2178            }
2179        }
2180        "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
2181            if !policy_allows_side_effect(&policy, "network") {
2182                return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
2183            }
2184        }
2185        "mcp_connect"
2186        | "mcp_call"
2187        | "mcp_list_tools"
2188        | "mcp_list_resources"
2189        | "mcp_list_resource_templates"
2190        | "mcp_read_resource"
2191        | "mcp_list_prompts"
2192        | "mcp_get_prompt"
2193        | "mcp_server_info"
2194        | "mcp_disconnect" => {
2195            if !policy_allows_tool(&policy, "run")
2196                || !policy_allows_capability(&policy, "process", "exec")
2197                || !policy_allows_side_effect(&policy, "process_exec")
2198            {
2199                return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2200            }
2201        }
2202        "host_invoke" => {
2203            let capability = args.first().map(|v| v.display()).unwrap_or_default();
2204            let op = args.get(1).map(|v| v.display()).unwrap_or_default();
2205            if !policy_allows_capability(&policy, &capability, &op) {
2206                return reject_policy(format!(
2207                    "host_invoke {capability}.{op} exceeds capability ceiling"
2208                ));
2209            }
2210            let requested_side_effect = match (capability.as_str(), op.as_str()) {
2211                ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
2212                ("process", "exec") => "process_exec",
2213                _ => "read_only",
2214            };
2215            if !policy_allows_side_effect(&policy, requested_side_effect) {
2216                return reject_policy(format!(
2217                    "host_invoke {capability}.{op} exceeds side-effect ceiling"
2218                ));
2219            }
2220        }
2221        _ => {}
2222    }
2223    Ok(())
2224}
2225
2226pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
2227    if current_execution_policy().is_some() {
2228        return reject_policy(format!(
2229            "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
2230        ));
2231    }
2232    Ok(())
2233}
2234
2235pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
2236    let Some(policy) = current_execution_policy() else {
2237        return Ok(());
2238    };
2239    if !policy_allows_tool(&policy, tool_name) {
2240        return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
2241    }
2242    Ok(())
2243}
2244
2245fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
2246    let dict = transcript.as_dict()?;
2247    let messages = match dict.get("messages") {
2248        Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
2249        _ => Vec::new(),
2250    };
2251    let retained = messages
2252        .into_iter()
2253        .rev()
2254        .take(keep_last)
2255        .collect::<Vec<_>>()
2256        .into_iter()
2257        .rev()
2258        .collect::<Vec<_>>();
2259    let mut compacted = dict.clone();
2260    compacted.insert(
2261        "messages".to_string(),
2262        VmValue::List(Rc::new(retained.clone())),
2263    );
2264    compacted.insert(
2265        "events".to_string(),
2266        VmValue::List(Rc::new(
2267            crate::llm::helpers::transcript_events_from_messages(&retained),
2268        )),
2269    );
2270    Some(VmValue::Dict(Rc::new(compacted)))
2271}
2272
2273fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
2274    let Some(visibility) = visibility else {
2275        return Some(transcript.clone());
2276    };
2277    if visibility != "public" && visibility != "public_only" {
2278        return Some(transcript.clone());
2279    }
2280    let dict = transcript.as_dict()?;
2281    let public_messages = match dict.get("messages") {
2282        Some(VmValue::List(list)) => list
2283            .iter()
2284            .filter(|message| {
2285                message
2286                    .as_dict()
2287                    .and_then(|d| d.get("role"))
2288                    .map(|v| v.display())
2289                    .map(|role| role != "tool_result")
2290                    .unwrap_or(true)
2291            })
2292            .cloned()
2293            .collect::<Vec<_>>(),
2294        _ => Vec::new(),
2295    };
2296    let public_events = match dict.get("events") {
2297        Some(VmValue::List(list)) => list
2298            .iter()
2299            .filter(|event| {
2300                event
2301                    .as_dict()
2302                    .and_then(|d| d.get("visibility"))
2303                    .map(|v| v.display())
2304                    .map(|value| value == "public")
2305                    .unwrap_or(true)
2306            })
2307            .cloned()
2308            .collect::<Vec<_>>(),
2309        _ => Vec::new(),
2310    };
2311    let mut redacted = dict.clone();
2312    redacted.insert(
2313        "messages".to_string(),
2314        VmValue::List(Rc::new(public_messages)),
2315    );
2316    redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
2317    Some(VmValue::Dict(Rc::new(redacted)))
2318}
2319
2320pub(crate) fn apply_input_transcript_policy(
2321    transcript: Option<VmValue>,
2322    policy: &TranscriptPolicy,
2323) -> Option<VmValue> {
2324    let mut transcript = transcript;
2325    match policy.mode.as_deref() {
2326        Some("reset") => return None,
2327        Some("fork") => {
2328            if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
2329                let mut forked = dict.as_ref().clone();
2330                forked.insert(
2331                    "id".to_string(),
2332                    VmValue::String(Rc::from(new_id("transcript"))),
2333                );
2334                transcript = Some(VmValue::Dict(Rc::new(forked)));
2335            }
2336        }
2337        _ => {}
2338    }
2339    if policy.compact {
2340        let keep_last = policy.keep_last.unwrap_or(6);
2341        transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2342    }
2343    transcript
2344}
2345
2346fn apply_output_transcript_policy(
2347    transcript: Option<VmValue>,
2348    policy: &TranscriptPolicy,
2349) -> Option<VmValue> {
2350    let mut transcript = transcript;
2351    if policy.compact {
2352        let keep_last = policy.keep_last.unwrap_or(6);
2353        transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
2354    }
2355    transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
2356}
2357
2358pub async fn execute_stage_node(
2359    node_id: &str,
2360    node: &WorkflowNode,
2361    task: &str,
2362    artifacts: &[ArtifactRecord],
2363    transcript: Option<VmValue>,
2364) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
2365    let mut selection_policy = node.context_policy.clone();
2366    if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
2367        selection_policy.include_kinds = node.input_contract.input_kinds.clone();
2368    }
2369    let selected = select_artifacts_adaptive(artifacts.to_vec(), &selection_policy);
2370    let rendered_context = render_artifacts_context(&selected, &node.context_policy);
2371    let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
2372    if node.input_contract.require_transcript && transcript.is_none() {
2373        return Err(VmError::Runtime(format!(
2374            "workflow stage {node_id} requires transcript input"
2375        )));
2376    }
2377    if let Some(min_inputs) = node.input_contract.min_inputs {
2378        if selected.len() < min_inputs {
2379            return Err(VmError::Runtime(format!(
2380                "workflow stage {node_id} requires at least {min_inputs} input artifacts"
2381            )));
2382        }
2383    }
2384    if let Some(max_inputs) = node.input_contract.max_inputs {
2385        if selected.len() > max_inputs {
2386            return Err(VmError::Runtime(format!(
2387                "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
2388            )));
2389        }
2390    }
2391    let prompt = if rendered_context.is_empty() {
2392        task.to_string()
2393    } else {
2394        format!(
2395            "{rendered_context}\n\n{}:\n{task}",
2396            node.task_label
2397                .clone()
2398                .unwrap_or_else(|| "Task".to_string())
2399        )
2400    };
2401
2402    let mut options = BTreeMap::new();
2403    if let Some(provider) = &node.model_policy.provider {
2404        options.insert(
2405            "provider".to_string(),
2406            VmValue::String(Rc::from(provider.clone())),
2407        );
2408    }
2409    if let Some(model) = &node.model_policy.model {
2410        options.insert(
2411            "model".to_string(),
2412            VmValue::String(Rc::from(model.clone())),
2413        );
2414    }
2415    if let Some(model_tier) = &node.model_policy.model_tier {
2416        options.insert(
2417            "model_tier".to_string(),
2418            VmValue::String(Rc::from(model_tier.clone())),
2419        );
2420    }
2421    if let Some(temperature) = node.model_policy.temperature {
2422        options.insert("temperature".to_string(), VmValue::Float(temperature));
2423    }
2424    if let Some(max_tokens) = node.model_policy.max_tokens {
2425        options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
2426    }
2427    let tool_names = workflow_tool_names(&node.tools);
2428    if !matches!(node.tools, serde_json::Value::Null) && !tool_names.is_empty() {
2429        options.insert(
2430            "tools".to_string(),
2431            crate::stdlib::json_to_vm_value(&node.tools),
2432        );
2433    }
2434    if let Some(transcript) = transcript.clone() {
2435        options.insert("transcript".to_string(), transcript);
2436    }
2437
2438    let args = vec![
2439        VmValue::String(Rc::from(prompt)),
2440        node.system
2441            .clone()
2442            .map(|s| VmValue::String(Rc::from(s)))
2443            .unwrap_or(VmValue::Nil),
2444        VmValue::Dict(Rc::new(options)),
2445    ];
2446    let mut opts = extract_llm_options(&args)?;
2447
2448    let llm_result = if node.mode.as_deref() == Some("agent") || !tool_names.is_empty() {
2449        crate::llm::run_agent_loop_internal(
2450            &mut opts,
2451            crate::llm::AgentLoopConfig {
2452                persistent: true,
2453                max_iterations: 12,
2454                max_nudges: 3,
2455                nudge: None,
2456                tool_retries: 0,
2457                tool_backoff_ms: 1000,
2458                tool_format: "text".to_string(),
2459                auto_compact: None,
2460                policy: None,
2461                daemon: false,
2462            },
2463        )
2464        .await?
2465    } else {
2466        let result = vm_call_llm_full(&opts).await?;
2467        crate::llm::agent_loop_result_from_llm(&result, opts)
2468    };
2469
2470    let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
2471    let transcript = llm_result
2472        .get("transcript")
2473        .cloned()
2474        .map(|value| crate::stdlib::json_to_vm_value(&value));
2475    let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
2476    let output_kind = node
2477        .output_contract
2478        .output_kinds
2479        .first()
2480        .cloned()
2481        .unwrap_or_else(|| {
2482            if node.kind == "verify" {
2483                "verification_result".to_string()
2484            } else {
2485                "artifact".to_string()
2486            }
2487        });
2488    let mut metadata = BTreeMap::new();
2489    metadata.insert(
2490        "input_artifact_ids".to_string(),
2491        serde_json::json!(selected
2492            .iter()
2493            .map(|artifact| artifact.id.clone())
2494            .collect::<Vec<_>>()),
2495    );
2496    metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
2497    let artifact = ArtifactRecord {
2498        type_name: "artifact".to_string(),
2499        id: new_id("artifact"),
2500        kind: output_kind,
2501        title: Some(format!("stage {node_id} output")),
2502        text: Some(visible_text),
2503        data: Some(llm_result.clone()),
2504        source: Some(node_id.to_string()),
2505        created_at: now_rfc3339(),
2506        freshness: Some("fresh".to_string()),
2507        priority: None,
2508        lineage: selected
2509            .iter()
2510            .map(|artifact| artifact.id.clone())
2511            .collect(),
2512        relevance: Some(1.0),
2513        estimated_tokens: None,
2514        stage: Some(node_id.to_string()),
2515        metadata,
2516    }
2517    .normalize();
2518
2519    Ok((llm_result, vec![artifact], transcript))
2520}
2521
2522pub fn next_nodes_for(
2523    graph: &WorkflowGraph,
2524    current: &str,
2525    branch: Option<&str>,
2526) -> Vec<WorkflowEdge> {
2527    let mut matching: Vec<WorkflowEdge> = graph
2528        .edges
2529        .iter()
2530        .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
2531        .cloned()
2532        .collect();
2533    if matching.is_empty() {
2534        matching = graph
2535            .edges
2536            .iter()
2537            .filter(|edge| edge.from == current && edge.branch.is_none())
2538            .cloned()
2539            .collect();
2540    }
2541    matching
2542}
2543
2544pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
2545    next_nodes_for(graph, current, Some(branch))
2546        .into_iter()
2547        .next()
2548        .map(|edge| edge.to)
2549}
2550
2551pub fn append_audit_entry(
2552    graph: &mut WorkflowGraph,
2553    op: &str,
2554    node_id: Option<String>,
2555    reason: Option<String>,
2556    metadata: BTreeMap<String, serde_json::Value>,
2557) {
2558    graph.audit_log.push(WorkflowAuditEntry {
2559        id: new_id("audit"),
2560        op: op.to_string(),
2561        node_id,
2562        timestamp: now_rfc3339(),
2563        reason,
2564        metadata,
2565    });
2566}
2567
2568pub fn builtin_ceiling() -> CapabilityPolicy {
2569    CapabilityPolicy {
2570        tools: vec![
2571            "read".to_string(),
2572            "read_file".to_string(),
2573            "search".to_string(),
2574            "edit".to_string(),
2575            "run".to_string(),
2576            "exec".to_string(),
2577            "outline".to_string(),
2578            "list_directory".to_string(),
2579            "lsp_hover".to_string(),
2580            "lsp_definition".to_string(),
2581            "lsp_references".to_string(),
2582            "web_search".to_string(),
2583            "web_fetch".to_string(),
2584        ],
2585        capabilities: BTreeMap::from([
2586            (
2587                "workspace".to_string(),
2588                vec![
2589                    "read_text".to_string(),
2590                    "write_text".to_string(),
2591                    "apply_edit".to_string(),
2592                    "delete".to_string(),
2593                    "exists".to_string(),
2594                    "list".to_string(),
2595                ],
2596            ),
2597            ("process".to_string(), vec!["exec".to_string()]),
2598        ]),
2599        workspace_roots: Vec::new(),
2600        side_effect_level: Some("network".to_string()),
2601        recursion_limit: Some(8),
2602        tool_arg_constraints: Vec::new(),
2603    }
2604}
2605
2606#[cfg(test)]
2607mod tests {
2608    use super::*;
2609
2610    #[test]
2611    fn capability_intersection_rejects_privilege_expansion() {
2612        let ceiling = CapabilityPolicy {
2613            tools: vec!["read".to_string()],
2614            side_effect_level: Some("read_only".to_string()),
2615            recursion_limit: Some(2),
2616            ..Default::default()
2617        };
2618        let requested = CapabilityPolicy {
2619            tools: vec!["read".to_string(), "edit".to_string()],
2620            ..Default::default()
2621        };
2622        let error = ceiling.intersect(&requested).unwrap_err();
2623        assert!(error.contains("host ceiling"));
2624    }
2625
2626    #[test]
2627    fn active_execution_policy_rejects_unknown_bridge_builtin() {
2628        push_execution_policy(CapabilityPolicy {
2629            tools: vec!["read".to_string()],
2630            capabilities: BTreeMap::from([(
2631                "workspace".to_string(),
2632                vec!["read_text".to_string()],
2633            )]),
2634            side_effect_level: Some("read_only".to_string()),
2635            recursion_limit: Some(1),
2636            ..Default::default()
2637        });
2638        let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
2639        pop_execution_policy();
2640        assert!(matches!(
2641            error,
2642            VmError::CategorizedError {
2643                category: crate::value::ErrorCategory::ToolRejected,
2644                ..
2645            }
2646        ));
2647    }
2648
2649    #[test]
2650    fn active_execution_policy_rejects_mcp_escape_hatch() {
2651        push_execution_policy(CapabilityPolicy {
2652            tools: vec!["read".to_string()],
2653            capabilities: BTreeMap::from([(
2654                "workspace".to_string(),
2655                vec!["read_text".to_string()],
2656            )]),
2657            side_effect_level: Some("read_only".to_string()),
2658            recursion_limit: Some(1),
2659            ..Default::default()
2660        });
2661        let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
2662        pop_execution_policy();
2663        assert!(matches!(
2664            error,
2665            VmError::CategorizedError {
2666                category: crate::value::ErrorCategory::ToolRejected,
2667                ..
2668            }
2669        ));
2670    }
2671
2672    #[test]
2673    fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
2674        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
2675            "name": "legacy",
2676            "act": {"mode": "llm"},
2677            "verify": {"kind": "verify"},
2678            "repair": {"mode": "agent"},
2679        }));
2680        let graph = normalize_workflow_value(&value).unwrap();
2681        assert_eq!(graph.type_name, "workflow_graph");
2682        assert!(graph.nodes.contains_key("act"));
2683        assert!(graph.nodes.contains_key("verify"));
2684        assert!(graph.nodes.contains_key("repair"));
2685        assert_eq!(graph.entry, "act");
2686    }
2687
2688    #[test]
2689    fn workflow_normalization_accepts_tool_registry_nodes() {
2690        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
2691            "name": "registry_tools",
2692            "entry": "implement",
2693            "nodes": {
2694                "implement": {
2695                    "kind": "stage",
2696                    "mode": "agent",
2697                    "tools": {
2698                        "_type": "tool_registry",
2699                        "tools": [
2700                            {"name": "read", "description": "Read files"},
2701                            {"name": "run", "description": "Run commands"}
2702                        ]
2703                    }
2704                }
2705            },
2706            "edges": []
2707        }));
2708        let graph = normalize_workflow_value(&value).unwrap();
2709        let node = graph.nodes.get("implement").unwrap();
2710        assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
2711    }
2712
2713    #[test]
2714    fn artifact_selection_honors_budget_and_priority() {
2715        let policy = ContextPolicy {
2716            max_artifacts: Some(2),
2717            max_tokens: Some(30),
2718            prefer_recent: true,
2719            prefer_fresh: true,
2720            prioritize_kinds: vec!["verification_result".to_string()],
2721            ..Default::default()
2722        };
2723        let artifacts = vec![
2724            ArtifactRecord {
2725                type_name: "artifact".to_string(),
2726                id: "a".to_string(),
2727                kind: "summary".to_string(),
2728                text: Some("short".to_string()),
2729                relevance: Some(0.9),
2730                created_at: now_rfc3339(),
2731                ..Default::default()
2732            }
2733            .normalize(),
2734            ArtifactRecord {
2735                type_name: "artifact".to_string(),
2736                id: "b".to_string(),
2737                kind: "summary".to_string(),
2738                text: Some("this is a much larger artifact body".to_string()),
2739                relevance: Some(1.0),
2740                created_at: now_rfc3339(),
2741                ..Default::default()
2742            }
2743            .normalize(),
2744            ArtifactRecord {
2745                type_name: "artifact".to_string(),
2746                id: "c".to_string(),
2747                kind: "summary".to_string(),
2748                text: Some("tiny".to_string()),
2749                relevance: Some(0.5),
2750                created_at: now_rfc3339(),
2751                ..Default::default()
2752            }
2753            .normalize(),
2754        ];
2755        let selected = select_artifacts(artifacts, &policy);
2756        assert_eq!(selected.len(), 2);
2757        assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
2758    }
2759
2760    #[test]
2761    fn workflow_validation_rejects_condition_without_true_false_edges() {
2762        let graph = WorkflowGraph {
2763            entry: "gate".to_string(),
2764            nodes: BTreeMap::from([(
2765                "gate".to_string(),
2766                WorkflowNode {
2767                    id: Some("gate".to_string()),
2768                    kind: "condition".to_string(),
2769                    ..Default::default()
2770                },
2771            )]),
2772            edges: vec![WorkflowEdge {
2773                from: "gate".to_string(),
2774                to: "next".to_string(),
2775                branch: Some("true".to_string()),
2776                label: None,
2777            }],
2778            ..Default::default()
2779        };
2780        let report = validate_workflow(&graph, None);
2781        assert!(!report.valid);
2782        assert!(report
2783            .errors
2784            .iter()
2785            .any(|error| error.contains("true") && error.contains("false")));
2786    }
2787
2788    #[test]
2789    fn replay_fixture_round_trip_passes() {
2790        let run = RunRecord {
2791            type_name: "run_record".to_string(),
2792            id: "run_1".to_string(),
2793            workflow_id: "wf".to_string(),
2794            workflow_name: Some("demo".to_string()),
2795            task: "demo".to_string(),
2796            status: "completed".to_string(),
2797            started_at: "1".to_string(),
2798            finished_at: Some("2".to_string()),
2799            parent_run_id: None,
2800            root_run_id: Some("run_1".to_string()),
2801            stages: vec![RunStageRecord {
2802                id: "stage_1".to_string(),
2803                node_id: "act".to_string(),
2804                kind: "stage".to_string(),
2805                status: "completed".to_string(),
2806                outcome: "success".to_string(),
2807                branch: Some("success".to_string()),
2808                started_at: "1".to_string(),
2809                finished_at: Some("2".to_string()),
2810                visible_text: Some("done".to_string()),
2811                private_reasoning: None,
2812                transcript: None,
2813                verification: None,
2814                usage: None,
2815                artifacts: vec![ArtifactRecord {
2816                    type_name: "artifact".to_string(),
2817                    id: "a1".to_string(),
2818                    kind: "summary".to_string(),
2819                    text: Some("done".to_string()),
2820                    created_at: "1".to_string(),
2821                    ..Default::default()
2822                }
2823                .normalize()],
2824                consumed_artifact_ids: vec![],
2825                produced_artifact_ids: vec!["a1".to_string()],
2826                attempts: vec![],
2827                metadata: BTreeMap::new(),
2828            }],
2829            transitions: vec![],
2830            checkpoints: vec![],
2831            pending_nodes: vec![],
2832            completed_nodes: vec!["act".to_string()],
2833            child_runs: vec![],
2834            artifacts: vec![],
2835            policy: CapabilityPolicy::default(),
2836            execution: None,
2837            transcript: None,
2838            usage: None,
2839            replay_fixture: None,
2840            metadata: BTreeMap::new(),
2841            persisted_path: None,
2842        };
2843        let fixture = replay_fixture_from_run(&run);
2844        let report = evaluate_run_against_fixture(&run, &fixture);
2845        assert!(report.pass);
2846        assert!(report.failures.is_empty());
2847    }
2848
2849    #[test]
2850    fn replay_eval_suite_reports_failed_case() {
2851        let good = RunRecord {
2852            id: "run_good".to_string(),
2853            workflow_id: "wf".to_string(),
2854            status: "completed".to_string(),
2855            stages: vec![RunStageRecord {
2856                node_id: "act".to_string(),
2857                status: "completed".to_string(),
2858                outcome: "success".to_string(),
2859                ..Default::default()
2860            }],
2861            ..Default::default()
2862        };
2863        let bad = RunRecord {
2864            id: "run_bad".to_string(),
2865            workflow_id: "wf".to_string(),
2866            status: "failed".to_string(),
2867            stages: vec![RunStageRecord {
2868                node_id: "act".to_string(),
2869                status: "failed".to_string(),
2870                outcome: "error".to_string(),
2871                ..Default::default()
2872            }],
2873            ..Default::default()
2874        };
2875        let suite = evaluate_run_suite(vec![
2876            (
2877                good.clone(),
2878                replay_fixture_from_run(&good),
2879                Some("good.json".to_string()),
2880            ),
2881            (
2882                bad.clone(),
2883                replay_fixture_from_run(&good),
2884                Some("bad.json".to_string()),
2885            ),
2886        ]);
2887        assert!(!suite.pass);
2888        assert_eq!(suite.total, 2);
2889        assert_eq!(suite.failed, 1);
2890        assert!(suite.cases.iter().any(|case| !case.pass));
2891    }
2892
2893    #[test]
2894    fn run_diff_reports_changed_stage() {
2895        let left = RunRecord {
2896            id: "left".to_string(),
2897            workflow_id: "wf".to_string(),
2898            status: "completed".to_string(),
2899            stages: vec![RunStageRecord {
2900                node_id: "act".to_string(),
2901                status: "completed".to_string(),
2902                outcome: "success".to_string(),
2903                ..Default::default()
2904            }],
2905            ..Default::default()
2906        };
2907        let right = RunRecord {
2908            id: "right".to_string(),
2909            workflow_id: "wf".to_string(),
2910            status: "failed".to_string(),
2911            stages: vec![RunStageRecord {
2912                node_id: "act".to_string(),
2913                status: "failed".to_string(),
2914                outcome: "error".to_string(),
2915                ..Default::default()
2916            }],
2917            ..Default::default()
2918        };
2919        let diff = diff_run_records(&left, &right);
2920        assert!(diff.status_changed);
2921        assert!(!diff.identical);
2922        assert_eq!(diff.stage_diffs.len(), 1);
2923    }
2924
2925    #[test]
2926    fn eval_suite_manifest_can_fail_on_baseline_diff() {
2927        let temp_dir =
2928            std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
2929        std::fs::create_dir_all(&temp_dir).unwrap();
2930        let baseline_path = temp_dir.join("baseline.json");
2931        let candidate_path = temp_dir.join("candidate.json");
2932
2933        let baseline = RunRecord {
2934            id: "baseline".to_string(),
2935            workflow_id: "wf".to_string(),
2936            status: "completed".to_string(),
2937            stages: vec![RunStageRecord {
2938                node_id: "act".to_string(),
2939                status: "completed".to_string(),
2940                outcome: "success".to_string(),
2941                ..Default::default()
2942            }],
2943            ..Default::default()
2944        };
2945        let candidate = RunRecord {
2946            id: "candidate".to_string(),
2947            workflow_id: "wf".to_string(),
2948            status: "failed".to_string(),
2949            stages: vec![RunStageRecord {
2950                node_id: "act".to_string(),
2951                status: "failed".to_string(),
2952                outcome: "error".to_string(),
2953                ..Default::default()
2954            }],
2955            ..Default::default()
2956        };
2957
2958        save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
2959        save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
2960
2961        let manifest = EvalSuiteManifest {
2962            base_dir: Some(temp_dir.display().to_string()),
2963            cases: vec![EvalSuiteCase {
2964                label: Some("candidate".to_string()),
2965                run_path: "candidate.json".to_string(),
2966                fixture_path: None,
2967                compare_to: Some("baseline.json".to_string()),
2968            }],
2969            ..Default::default()
2970        };
2971        let suite = evaluate_run_suite_manifest(&manifest).unwrap();
2972        assert!(!suite.pass);
2973        assert_eq!(suite.failed, 1);
2974        assert!(suite.cases[0].comparison.is_some());
2975        assert!(suite.cases[0]
2976            .failures
2977            .iter()
2978            .any(|failure| failure.contains("baseline")));
2979    }
2980
2981    #[test]
2982    fn render_unified_diff_marks_removed_and_added_lines() {
2983        let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
2984        assert!(diff.contains("--- a/src/main.rs"));
2985        assert!(diff.contains("+++ b/src/main.rs"));
2986        assert!(diff.contains("-old"));
2987        assert!(diff.contains("+new"));
2988        assert!(diff.contains(" same"));
2989    }
2990
2991    #[test]
2992    fn execution_policy_rejects_process_exec_when_read_only() {
2993        push_execution_policy(CapabilityPolicy {
2994            side_effect_level: Some("read_only".to_string()),
2995            capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
2996            ..Default::default()
2997        });
2998        let result = enforce_current_policy_for_builtin("exec", &[]);
2999        pop_execution_policy();
3000        assert!(result.is_err());
3001    }
3002
3003    #[test]
3004    fn execution_policy_rejects_unlisted_tool() {
3005        push_execution_policy(CapabilityPolicy {
3006            tools: vec!["read".to_string()],
3007            ..Default::default()
3008        });
3009        let result = enforce_current_policy_for_tool("edit");
3010        pop_execution_policy();
3011        assert!(result.is_err());
3012    }
3013
3014    // ── Tool hook tests ──────────────────────────────────────────────
3015
3016    #[test]
3017    fn pre_tool_hook_deny_blocks_execution() {
3018        clear_tool_hooks();
3019        register_tool_hook(ToolHook {
3020            pattern: "dangerous_*".to_string(),
3021            pre: Some(Rc::new(|_name, _args| {
3022                PreToolAction::Deny("blocked by policy".to_string())
3023            })),
3024            post: None,
3025        });
3026        let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
3027        clear_tool_hooks();
3028        assert!(matches!(result, PreToolAction::Deny(_)));
3029    }
3030
3031    #[test]
3032    fn pre_tool_hook_allow_passes_through() {
3033        clear_tool_hooks();
3034        register_tool_hook(ToolHook {
3035            pattern: "safe_*".to_string(),
3036            pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
3037            post: None,
3038        });
3039        let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
3040        clear_tool_hooks();
3041        assert!(matches!(result, PreToolAction::Allow));
3042    }
3043
3044    #[test]
3045    fn pre_tool_hook_modify_rewrites_args() {
3046        clear_tool_hooks();
3047        register_tool_hook(ToolHook {
3048            pattern: "*".to_string(),
3049            pre: Some(Rc::new(|_name, _args| {
3050                PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
3051            })),
3052            post: None,
3053        });
3054        let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
3055        clear_tool_hooks();
3056        match result {
3057            PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
3058            _ => panic!("expected Modify"),
3059        }
3060    }
3061
3062    #[test]
3063    fn post_tool_hook_modifies_result() {
3064        clear_tool_hooks();
3065        register_tool_hook(ToolHook {
3066            pattern: "exec".to_string(),
3067            pre: None,
3068            post: Some(Rc::new(|_name, result| {
3069                if result.contains("SECRET") {
3070                    PostToolAction::Modify("[REDACTED]".to_string())
3071                } else {
3072                    PostToolAction::Pass
3073                }
3074            })),
3075        });
3076        let result = run_post_tool_hooks("exec", "output with SECRET data");
3077        let clean = run_post_tool_hooks("exec", "clean output");
3078        clear_tool_hooks();
3079        assert_eq!(result, "[REDACTED]");
3080        assert_eq!(clean, "clean output");
3081    }
3082
3083    #[test]
3084    fn unmatched_hook_pattern_does_not_fire() {
3085        clear_tool_hooks();
3086        register_tool_hook(ToolHook {
3087            pattern: "exec".to_string(),
3088            pre: Some(Rc::new(|_name, _args| {
3089                PreToolAction::Deny("should not match".to_string())
3090            })),
3091            post: None,
3092        });
3093        let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
3094        clear_tool_hooks();
3095        assert!(matches!(result, PreToolAction::Allow));
3096    }
3097
3098    #[test]
3099    fn glob_match_patterns() {
3100        assert!(glob_match("*", "anything"));
3101        assert!(glob_match("exec*", "exec_at"));
3102        assert!(glob_match("*_file", "read_file"));
3103        assert!(!glob_match("exec*", "read_file"));
3104        assert!(glob_match("read_file", "read_file"));
3105        assert!(!glob_match("read_file", "write_file"));
3106    }
3107
3108    // ── Auto-compaction tests ────────────────────────────────────────
3109
3110    #[test]
3111    fn microcompact_snips_large_output() {
3112        let large = "x".repeat(50_000);
3113        let result = microcompact_tool_output(&large, 10_000);
3114        assert!(result.len() < 15_000);
3115        assert!(result.contains("snipped"));
3116    }
3117
3118    #[test]
3119    fn microcompact_preserves_small_output() {
3120        let small = "hello world";
3121        let result = microcompact_tool_output(small, 10_000);
3122        assert_eq!(result, small);
3123    }
3124
3125    #[test]
3126    fn auto_compact_messages_reduces_count() {
3127        let mut messages: Vec<serde_json::Value> = (0..20)
3128            .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
3129            .collect();
3130        let runtime = tokio::runtime::Builder::new_current_thread()
3131            .enable_all()
3132            .build()
3133            .unwrap();
3134        let compacted = runtime.block_on(auto_compact_messages(
3135            &mut messages,
3136            &AutoCompactConfig {
3137                compact_strategy: CompactStrategy::Truncate,
3138                keep_last: 6,
3139                ..Default::default()
3140            },
3141            None,
3142        ));
3143        assert!(compacted.unwrap());
3144        assert!(messages.len() <= 7); // 6 kept + 1 summary
3145        assert!(messages[0]["content"]
3146            .as_str()
3147            .unwrap()
3148            .contains("auto-compacted"));
3149    }
3150
3151    #[test]
3152    fn auto_compact_noop_when_under_threshold() {
3153        let mut messages: Vec<serde_json::Value> = (0..4)
3154            .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
3155            .collect();
3156        let runtime = tokio::runtime::Builder::new_current_thread()
3157            .enable_all()
3158            .build()
3159            .unwrap();
3160        let compacted = runtime.block_on(auto_compact_messages(
3161            &mut messages,
3162            &AutoCompactConfig {
3163                compact_strategy: CompactStrategy::Truncate,
3164                keep_last: 6,
3165                ..Default::default()
3166            },
3167            None,
3168        ));
3169        assert!(!compacted.unwrap());
3170        assert_eq!(messages.len(), 4);
3171    }
3172
3173    #[test]
3174    fn estimate_message_tokens_basic() {
3175        let messages = vec![
3176            serde_json::json!({"role": "user", "content": "a".repeat(400)}),
3177            serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
3178        ];
3179        let tokens = estimate_message_tokens(&messages);
3180        assert_eq!(tokens, 200); // 800 chars / 4
3181    }
3182
3183    // ── Artifact dedup and microcompaction tests ─────────────────────
3184
3185    #[test]
3186    fn dedup_artifacts_removes_duplicates() {
3187        let mut artifacts = vec![
3188            ArtifactRecord {
3189                id: "a1".to_string(),
3190                kind: "test".to_string(),
3191                text: Some("duplicate content".to_string()),
3192                ..Default::default()
3193            },
3194            ArtifactRecord {
3195                id: "a2".to_string(),
3196                kind: "test".to_string(),
3197                text: Some("duplicate content".to_string()),
3198                ..Default::default()
3199            },
3200            ArtifactRecord {
3201                id: "a3".to_string(),
3202                kind: "test".to_string(),
3203                text: Some("unique content".to_string()),
3204                ..Default::default()
3205            },
3206        ];
3207        dedup_artifacts(&mut artifacts);
3208        assert_eq!(artifacts.len(), 2);
3209    }
3210
3211    #[test]
3212    fn microcompact_artifact_snips_oversized() {
3213        let mut artifact = ArtifactRecord {
3214            id: "a1".to_string(),
3215            kind: "test".to_string(),
3216            text: Some("x".repeat(10_000)),
3217            estimated_tokens: Some(2_500),
3218            ..Default::default()
3219        };
3220        microcompact_artifact(&mut artifact, 500);
3221        assert!(artifact.text.as_ref().unwrap().len() < 5_000);
3222        assert_eq!(artifact.estimated_tokens, Some(500));
3223    }
3224
3225    // ── Tool argument constraint tests ───────────────────────────────
3226
3227    #[test]
3228    fn arg_constraint_allows_matching_pattern() {
3229        let policy = CapabilityPolicy {
3230            tool_arg_constraints: vec![ToolArgConstraint {
3231                tool: "exec".to_string(),
3232                arg_patterns: vec!["cargo *".to_string()],
3233            }],
3234            ..Default::default()
3235        };
3236        let result = enforce_tool_arg_constraints(
3237            &policy,
3238            "exec",
3239            &serde_json::json!({"command": "cargo test"}),
3240        );
3241        assert!(result.is_ok());
3242    }
3243
3244    #[test]
3245    fn arg_constraint_rejects_non_matching_pattern() {
3246        let policy = CapabilityPolicy {
3247            tool_arg_constraints: vec![ToolArgConstraint {
3248                tool: "exec".to_string(),
3249                arg_patterns: vec!["cargo *".to_string()],
3250            }],
3251            ..Default::default()
3252        };
3253        let result = enforce_tool_arg_constraints(
3254            &policy,
3255            "exec",
3256            &serde_json::json!({"command": "rm -rf /"}),
3257        );
3258        assert!(result.is_err());
3259    }
3260
3261    #[test]
3262    fn arg_constraint_ignores_unmatched_tool() {
3263        let policy = CapabilityPolicy {
3264            tool_arg_constraints: vec![ToolArgConstraint {
3265                tool: "exec".to_string(),
3266                arg_patterns: vec!["cargo *".to_string()],
3267            }],
3268            ..Default::default()
3269        };
3270        let result = enforce_tool_arg_constraints(
3271            &policy,
3272            "read_file",
3273            &serde_json::json!({"path": "/etc/passwd"}),
3274        );
3275        assert!(result.is_ok());
3276    }
3277}