harn_vm/
orchestration.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::{Path, PathBuf};
3use std::rc::Rc;
4use std::{cell::RefCell, thread_local};
5
6use serde::{Deserialize, Serialize};
7
8use crate::llm::{extract_llm_options, vm_call_llm_full, vm_value_to_json};
9use crate::value::{VmError, VmValue};
10
11fn now_rfc3339() -> String {
12    use std::time::{SystemTime, UNIX_EPOCH};
13    let ts = SystemTime::now()
14        .duration_since(UNIX_EPOCH)
15        .unwrap_or_default()
16        .as_secs();
17    format!("{ts}")
18}
19
20fn new_id(prefix: &str) -> String {
21    format!("{prefix}_{}", uuid::Uuid::now_v7())
22}
23
24fn default_run_dir() -> PathBuf {
25    std::env::var("HARN_RUN_DIR")
26        .map(PathBuf::from)
27        .unwrap_or_else(|_| PathBuf::from(".harn-runs"))
28}
29
30thread_local! {
31    static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
32    static TOOL_HOOKS: RefCell<Vec<ToolHook>> = const { RefCell::new(Vec::new()) };
33    static CURRENT_MUTATION_SESSION: RefCell<Option<MutationSessionRecord>> = const { RefCell::new(None) };
34}
35
36#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
37#[serde(default)]
38pub struct MutationSessionRecord {
39    pub session_id: String,
40    pub parent_session_id: Option<String>,
41    pub run_id: Option<String>,
42    pub worker_id: Option<String>,
43    pub execution_kind: Option<String>,
44    pub mutation_scope: String,
45    pub approval_mode: String,
46}
47
48impl MutationSessionRecord {
49    pub fn normalize(mut self) -> Self {
50        if self.session_id.is_empty() {
51            self.session_id = new_id("session");
52        }
53        if self.mutation_scope.is_empty() {
54            self.mutation_scope = "read_only".to_string();
55        }
56        if self.approval_mode.is_empty() {
57            self.approval_mode = "host_enforced".to_string();
58        }
59        self
60    }
61}
62
63pub fn install_current_mutation_session(session: Option<MutationSessionRecord>) {
64    CURRENT_MUTATION_SESSION.with(|slot| {
65        *slot.borrow_mut() = session.map(MutationSessionRecord::normalize);
66    });
67}
68
69pub fn current_mutation_session() -> Option<MutationSessionRecord> {
70    CURRENT_MUTATION_SESSION.with(|slot| slot.borrow().clone())
71}
72
73// ── Tool lifecycle hooks ──────────────────────────────────────────────
74
75/// Action returned by a PreToolUse hook.
76#[derive(Clone, Debug)]
77pub enum PreToolAction {
78    /// Allow the tool call to proceed unchanged.
79    Allow,
80    /// Deny the tool call with an explanation.
81    Deny(String),
82    /// Allow but replace the arguments.
83    Modify(serde_json::Value),
84}
85
86/// Action returned by a PostToolUse hook.
87#[derive(Clone, Debug)]
88pub enum PostToolAction {
89    /// Pass the result through unchanged.
90    Pass,
91    /// Replace the result text.
92    Modify(String),
93}
94
95/// Callback types for tool lifecycle hooks.
96pub type PreToolHookFn = Rc<dyn Fn(&str, &serde_json::Value) -> PreToolAction>;
97pub type PostToolHookFn = Rc<dyn Fn(&str, &str) -> PostToolAction>;
98
99/// A registered tool hook with a name pattern and callbacks.
100#[derive(Clone)]
101pub struct ToolHook {
102    /// Glob-style pattern matched against tool names (e.g. `"*"`, `"exec*"`, `"read_file"`).
103    pub pattern: String,
104    /// Called before tool execution. Return `Deny` to reject, `Modify` to rewrite args.
105    pub pre: Option<PreToolHookFn>,
106    /// Called after tool execution with the result text. Return `Modify` to rewrite.
107    pub post: Option<PostToolHookFn>,
108}
109
110impl std::fmt::Debug for ToolHook {
111    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112        f.debug_struct("ToolHook")
113            .field("pattern", &self.pattern)
114            .field("has_pre", &self.pre.is_some())
115            .field("has_post", &self.post.is_some())
116            .finish()
117    }
118}
119
120fn glob_match(pattern: &str, name: &str) -> bool {
121    if pattern == "*" {
122        return true;
123    }
124    if let Some(prefix) = pattern.strip_suffix('*') {
125        return name.starts_with(prefix);
126    }
127    if let Some(suffix) = pattern.strip_prefix('*') {
128        return name.ends_with(suffix);
129    }
130    pattern == name
131}
132
133pub fn register_tool_hook(hook: ToolHook) {
134    TOOL_HOOKS.with(|hooks| hooks.borrow_mut().push(hook));
135}
136
137pub fn clear_tool_hooks() {
138    TOOL_HOOKS.with(|hooks| hooks.borrow_mut().clear());
139}
140
141/// Run all matching PreToolUse hooks. Returns the final action.
142pub fn run_pre_tool_hooks(tool_name: &str, args: &serde_json::Value) -> PreToolAction {
143    TOOL_HOOKS.with(|hooks| {
144        let hooks = hooks.borrow();
145        let mut current_args = args.clone();
146        for hook in hooks.iter() {
147            if !glob_match(&hook.pattern, tool_name) {
148                continue;
149            }
150            if let Some(ref pre) = hook.pre {
151                match pre(tool_name, &current_args) {
152                    PreToolAction::Allow => {}
153                    PreToolAction::Deny(reason) => return PreToolAction::Deny(reason),
154                    PreToolAction::Modify(new_args) => {
155                        current_args = new_args;
156                    }
157                }
158            }
159        }
160        if current_args != *args {
161            PreToolAction::Modify(current_args)
162        } else {
163            PreToolAction::Allow
164        }
165    })
166}
167
168/// Run all matching PostToolUse hooks. Returns the (possibly modified) result.
169pub fn run_post_tool_hooks(tool_name: &str, result: &str) -> String {
170    TOOL_HOOKS.with(|hooks| {
171        let hooks = hooks.borrow();
172        let mut current = result.to_string();
173        for hook in hooks.iter() {
174            if !glob_match(&hook.pattern, tool_name) {
175                continue;
176            }
177            if let Some(ref post) = hook.post {
178                match post(tool_name, &current) {
179                    PostToolAction::Pass => {}
180                    PostToolAction::Modify(new_result) => {
181                        current = new_result;
182                    }
183                }
184            }
185        }
186        current
187    })
188}
189
190// ── Auto-compaction ───────────────────────────────────────────────────
191
192#[derive(Clone, Debug, PartialEq, Eq)]
193pub enum CompactStrategy {
194    Llm,
195    Truncate,
196    Custom,
197    ObservationMask,
198}
199
200pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
201    match value {
202        "llm" => Ok(CompactStrategy::Llm),
203        "truncate" => Ok(CompactStrategy::Truncate),
204        "custom" => Ok(CompactStrategy::Custom),
205        "observation_mask" => Ok(CompactStrategy::ObservationMask),
206        other => Err(VmError::Runtime(format!(
207            "unknown compact_strategy '{other}' (expected 'llm', 'truncate', 'custom', or 'observation_mask')"
208        ))),
209    }
210}
211
212/// Configuration for automatic transcript compaction in agent loops.
213///
214/// Two-tier compaction:
215///   Tier 1 (`token_threshold` / `compact_strategy`): lightweight, deterministic
216///     observation masking that fires early. Masks verbose tool results while
217///     preserving assistant prose and error output.
218///   Tier 2 (`hard_limit_tokens` / `hard_limit_strategy`): aggressive LLM-powered
219///     summarization that fires when tier-1 alone isn't enough, typically as the
220///     transcript approaches the model's actual context window.
221#[derive(Clone, Debug)]
222pub struct AutoCompactConfig {
223    /// Tier-1 threshold: estimated tokens before lightweight compaction.
224    pub token_threshold: usize,
225    /// Maximum character length for a single tool result before microcompaction.
226    pub tool_output_max_chars: usize,
227    /// Number of recent messages to keep during compaction.
228    pub keep_last: usize,
229    /// Tier-1 strategy (default: ObservationMask).
230    pub compact_strategy: CompactStrategy,
231    /// Tier-2 threshold: fires when tier-1 result still exceeds this.
232    /// Typically set to ~75% of the model's actual context window.
233    /// When `None`, tier-2 is disabled.
234    pub hard_limit_tokens: Option<usize>,
235    /// Tier-2 strategy (default: Llm).
236    pub hard_limit_strategy: CompactStrategy,
237    /// Optional Harn callback used when a strategy is `custom`.
238    pub custom_compactor: Option<VmValue>,
239    /// Optional callback for domain-specific per-message masking during
240    /// observation mask compaction. Called with a list of archived messages,
241    /// returns a list of `Option<String>` — `Some(masked)` to override the
242    /// default mask for that message, `None` to use the default.
243    /// This lets the host (e.g. burin-code) inject AST outlines, file
244    /// summaries, etc. without putting language-specific logic in Harn.
245    pub mask_callback: Option<VmValue>,
246}
247
248impl Default for AutoCompactConfig {
249    fn default() -> Self {
250        Self {
251            token_threshold: 48_000,
252            tool_output_max_chars: 16_000,
253            keep_last: 12,
254            compact_strategy: CompactStrategy::ObservationMask,
255            hard_limit_tokens: None,
256            hard_limit_strategy: CompactStrategy::Llm,
257            custom_compactor: None,
258            mask_callback: None,
259        }
260    }
261}
262
263/// Estimate token count from a list of JSON messages (chars / 4 heuristic).
264pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
265    messages
266        .iter()
267        .map(|m| {
268            m.get("content")
269                .and_then(|c| c.as_str())
270                .map(|s| s.len())
271                .unwrap_or(0)
272        })
273        .sum::<usize>()
274        / 4
275}
276
277/// Microcompact a tool result: if it exceeds `max_chars`, keep the first and
278/// last portions with a snip marker in between.
279pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
280    if output.len() <= max_chars || max_chars < 200 {
281        return output.to_string();
282    }
283    let diagnostic_lines = output
284        .lines()
285        .filter(|line| {
286            let trimmed = line.trim();
287            let lower = trimmed.to_lowercase();
288            // file:line pattern (e.g. "src/main.rs:42:" or "foo.go:10:5:")
289            let has_file_line = {
290                let bytes = trimmed.as_bytes();
291                let mut i = 0;
292                let mut found_colon = false;
293                while i < bytes.len() {
294                    if bytes[i] == b':' {
295                        found_colon = true;
296                        break;
297                    }
298                    i += 1;
299                }
300                found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
301            };
302            // Generic keyword classification. Split into "strong" keywords
303            // whose presence alone signals a diagnostic line (regardless of
304            // whether the line also has a file:line reference), and "weak"
305            // keywords which only count as diagnostic when paired with a
306            // file:line (to avoid false positives on narrative prose that
307            // happens to contain the word "error" or "expected").
308            //
309            // These are deliberately generic — not tied to any specific
310            // language's test runner output format. Language-specific
311            // patterns (Go's "--- FAIL:", pytest's "FAILED tests/", Rust's
312            // "thread 'X' panicked at") should be supplied by the pipeline
313            // via the `extra_diagnostic_patterns` auto_compact option,
314            // which is where language/runner awareness belongs.
315            let has_strong_keyword =
316                trimmed.contains("FAIL") || trimmed.contains("panic") || trimmed.contains("Panic");
317            let has_weak_keyword = trimmed.contains("error")
318                || trimmed.contains("undefined")
319                || trimmed.contains("expected")
320                || trimmed.contains("got")
321                || lower.contains("cannot find")
322                || lower.contains("not found")
323                || lower.contains("no such")
324                || lower.contains("unresolved")
325                || lower.contains("missing")
326                || lower.contains("declared but not used")
327                || lower.contains("unused")
328                || lower.contains("mismatch");
329            let positional = lower.contains(" error ")
330                || lower.starts_with("error:")
331                || lower.starts_with("warning:")
332                || lower.starts_with("note:")
333                || lower.contains("panic:");
334            has_strong_keyword || (has_file_line && has_weak_keyword) || positional
335        })
336        .take(32)
337        .collect::<Vec<_>>();
338    if !diagnostic_lines.is_empty() {
339        let diagnostics = diagnostic_lines.join("\n");
340        let budget = max_chars.saturating_sub(diagnostics.len() + 64);
341        let keep = budget / 2;
342        if keep >= 80 && output.len() > keep * 2 {
343            let head_end = output.floor_char_boundary(keep);
344            let tail_start = output.ceil_char_boundary(output.len() - keep);
345            let head = &output[..head_end];
346            let tail = &output[tail_start..];
347            return format!(
348                "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
349            );
350        }
351    }
352    let keep = max_chars / 2;
353    let head_end = output.floor_char_boundary(keep);
354    let tail_start = output.ceil_char_boundary(output.len() - keep);
355    let head = &output[..head_end];
356    let tail = &output[tail_start..];
357    let snipped = output.len() - max_chars;
358    format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
359}
360
361fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
362    messages
363        .iter()
364        .map(|msg| {
365            let role = msg
366                .get("role")
367                .and_then(|v| v.as_str())
368                .unwrap_or("user")
369                .to_uppercase();
370            let content = msg
371                .get("content")
372                .and_then(|v| v.as_str())
373                .unwrap_or_default();
374            format!("{role}: {content}")
375        })
376        .collect::<Vec<_>>()
377        .join("\n")
378}
379
380fn truncate_compaction_summary(
381    old_messages: &[serde_json::Value],
382    archived_count: usize,
383) -> String {
384    truncate_compaction_summary_with_context(old_messages, archived_count, false)
385}
386
387fn truncate_compaction_summary_with_context(
388    old_messages: &[serde_json::Value],
389    archived_count: usize,
390    is_llm_fallback: bool,
391) -> String {
392    let per_msg_limit = 500_usize;
393    let summary_parts: Vec<String> = old_messages
394        .iter()
395        .filter_map(|m| {
396            let role = m.get("role")?.as_str()?;
397            let content = m.get("content")?.as_str()?;
398            if content.is_empty() {
399                return None;
400            }
401            let truncated = if content.len() > per_msg_limit {
402                format!(
403                    "{}... [truncated from {} chars]",
404                    &content[..content.floor_char_boundary(per_msg_limit)],
405                    content.len()
406                )
407            } else {
408                content.to_string()
409            };
410            Some(format!("[{role}] {truncated}"))
411        })
412        .take(15)
413        .collect();
414    let header = if is_llm_fallback {
415        format!(
416            "[auto-compact fallback: LLM summarizer returned empty; {archived_count} older messages abbreviated to ~{per_msg_limit} chars each]"
417        )
418    } else {
419        format!("[auto-compacted {archived_count} older messages via truncate strategy]")
420    };
421    format!(
422        "{header}\n{}{}",
423        summary_parts.join("\n"),
424        if archived_count > 15 {
425            format!("\n... and {} more", archived_count - 15)
426        } else {
427            String::new()
428        }
429    )
430}
431
432fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
433    if let Some(map) = value.as_dict() {
434        if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
435            return Ok(summary.display());
436        }
437    }
438    match value {
439        VmValue::String(text) => Ok(text.to_string()),
440        VmValue::Nil => Ok(String::new()),
441        _ => serde_json::to_string_pretty(&vm_value_to_json(value))
442            .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
443    }
444}
445
446async fn llm_compaction_summary(
447    old_messages: &[serde_json::Value],
448    archived_count: usize,
449    llm_opts: &crate::llm::api::LlmCallOptions,
450) -> Result<String, VmError> {
451    let mut compact_opts = llm_opts.clone();
452    let formatted = format_compaction_messages(old_messages);
453    compact_opts.system = None;
454    compact_opts.transcript_id = None;
455    compact_opts.transcript_summary = None;
456    compact_opts.transcript_metadata = None;
457    compact_opts.native_tools = None;
458    compact_opts.tool_choice = None;
459    compact_opts.response_format = None;
460    compact_opts.json_schema = None;
461    compact_opts.messages = vec![serde_json::json!({
462        "role": "user",
463        "content": format!(
464            "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
465        ),
466    })];
467    let result = vm_call_llm_full(&compact_opts).await?;
468    let summary = result.text.trim();
469    if summary.is_empty() {
470        Ok(truncate_compaction_summary_with_context(
471            old_messages,
472            archived_count,
473            true,
474        ))
475    } else {
476        Ok(format!(
477            "[auto-compacted {archived_count} older messages]\n{summary}"
478        ))
479    }
480}
481
482async fn custom_compaction_summary(
483    old_messages: &[serde_json::Value],
484    archived_count: usize,
485    callback: &VmValue,
486) -> Result<String, VmError> {
487    let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
488        return Err(VmError::Runtime(
489            "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
490        ));
491    };
492    let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
493        VmError::Runtime(
494            "custom transcript compaction requires an async builtin VM context".to_string(),
495        )
496    })?;
497    let messages_vm = VmValue::List(Rc::new(
498        old_messages
499            .iter()
500            .map(crate::stdlib::json_to_vm_value)
501            .collect(),
502    ));
503    let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await;
504    let summary = compact_summary_text_from_value(&result?)?;
505    if summary.trim().is_empty() {
506        Ok(truncate_compaction_summary(old_messages, archived_count))
507    } else {
508        Ok(format!(
509            "[auto-compacted {archived_count} older messages]\n{summary}"
510        ))
511    }
512}
513
514/// Check whether a tool-result string contains error signals that should
515/// be preserved verbatim during observation masking.
516fn content_has_error_signal(content: &str) -> bool {
517    let lower = content.to_ascii_lowercase();
518    lower.contains("error")
519        || lower.contains("fail")
520        || lower.contains("panic")
521        || lower.contains("non-zero exit")
522        || lower.contains("exit code")
523        || lower.contains("traceback")
524        || lower.contains("exception")
525}
526
527/// Default per-message masking for tool results: keep the first line as a
528/// preview, report total size. Domain-specific masking (e.g. adding file
529/// outlines for edit results) is handled by the optional `mask_callback`
530/// on `AutoCompactConfig`, which the host (burin-code) can provide.
531fn default_mask_tool_result(role: &str, content: &str) -> String {
532    let first_line = content.lines().next().unwrap_or(content);
533    let line_count = content.lines().count();
534    let char_count = content.len();
535    if line_count <= 3 {
536        format!("[{role}] {content}")
537    } else {
538        let preview = &first_line[..first_line.len().min(120)];
539        format!("[{role}] {preview}... [{line_count} lines, {char_count} chars masked]")
540    }
541}
542
543/// Deterministic observation-mask compaction: keep model prose intact, replace
544/// old tool results with one-line summaries unless they contain error signals.
545///
546/// If `mask_callback` is provided, each non-error tool result is passed to it
547/// for domain-specific masking (e.g. adding AST outlines for edited files).
548/// The callback receives `(role, content)` and returns the masked string, or
549/// `None` to fall back to the default masking.
550#[cfg(test)]
551fn observation_mask_compaction(
552    old_messages: &[serde_json::Value],
553    archived_count: usize,
554) -> String {
555    observation_mask_compaction_with_callback(old_messages, archived_count, None)
556}
557
558fn observation_mask_compaction_with_callback(
559    old_messages: &[serde_json::Value],
560    archived_count: usize,
561    mask_results: Option<&[Option<String>]>,
562) -> String {
563    let mut parts = Vec::new();
564    parts.push(format!(
565        "[auto-compacted {archived_count} older messages via observation masking]"
566    ));
567    for (idx, msg) in old_messages.iter().enumerate() {
568        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user");
569        let content = msg
570            .get("content")
571            .and_then(|v| v.as_str())
572            .unwrap_or_default();
573        if content.is_empty() {
574            continue;
575        }
576        if role == "assistant" {
577            parts.push(format!("[assistant] {content}"));
578            continue;
579        }
580        // User messages in agent loops are tool results.
581        if content_has_error_signal(content) {
582            parts.push(format!("[{role}] {content}"));
583        } else if let Some(Some(custom)) = mask_results.and_then(|r| r.get(idx)) {
584            // Host-provided custom mask for this message.
585            parts.push(custom.clone());
586        } else {
587            parts.push(default_mask_tool_result(role, content));
588        }
589    }
590    parts.join("\n")
591}
592
593/// Invoke the mask_callback to get per-message custom masks. The callback
594/// receives the list of archived messages (as a VmValue list of dicts) and
595/// returns a list of `Option<String>`.
596async fn invoke_mask_callback(
597    callback: &VmValue,
598    old_messages: &[serde_json::Value],
599) -> Result<Vec<Option<String>>, VmError> {
600    let VmValue::Closure(closure) = callback.clone() else {
601        return Err(VmError::Runtime(
602            "mask_callback must be a closure".to_string(),
603        ));
604    };
605    let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
606        VmError::Runtime("mask_callback requires an async builtin VM context".to_string())
607    })?;
608    let messages_vm = VmValue::List(Rc::new(
609        old_messages
610            .iter()
611            .map(crate::stdlib::json_to_vm_value)
612            .collect(),
613    ));
614    let result = vm.call_closure_pub(&closure, &[messages_vm], &[]).await?;
615    // Expect a list of strings or nils.
616    let list = match result {
617        VmValue::List(items) => items,
618        _ => return Ok(vec![None; old_messages.len()]),
619    };
620    Ok(list
621        .iter()
622        .map(|v| match v {
623            VmValue::String(s) => Some(s.to_string()),
624            VmValue::Nil => None,
625            _ => None,
626        })
627        .collect())
628}
629
630/// Apply a single compaction strategy to a list of archived messages.
631async fn apply_compaction_strategy(
632    strategy: &CompactStrategy,
633    old_messages: &[serde_json::Value],
634    archived_count: usize,
635    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
636    custom_compactor: Option<&VmValue>,
637    mask_callback: Option<&VmValue>,
638) -> Result<String, VmError> {
639    match strategy {
640        CompactStrategy::Truncate => Ok(truncate_compaction_summary(old_messages, archived_count)),
641        CompactStrategy::Llm => {
642            llm_compaction_summary(
643                old_messages,
644                archived_count,
645                llm_opts.ok_or_else(|| {
646                    VmError::Runtime(
647                        "LLM transcript compaction requires active LLM call options".to_string(),
648                    )
649                })?,
650            )
651            .await
652        }
653        CompactStrategy::Custom => {
654            custom_compaction_summary(
655                old_messages,
656                archived_count,
657                custom_compactor.ok_or_else(|| {
658                    VmError::Runtime(
659                        "compact_callback is required when compact_strategy is 'custom'"
660                            .to_string(),
661                    )
662                })?,
663            )
664            .await
665        }
666        CompactStrategy::ObservationMask => {
667            let mask_results = if let Some(cb) = mask_callback {
668                Some(invoke_mask_callback(cb, old_messages).await?)
669            } else {
670                None
671            };
672            Ok(observation_mask_compaction_with_callback(
673                old_messages,
674                archived_count,
675                mask_results.as_deref(),
676            ))
677        }
678    }
679}
680
681/// Auto-compact a message list in place using two-tier compaction:
682///
683///   Tier 1: Fires when estimated tokens > `token_threshold`. Applies
684///     `compact_strategy` (typically ObservationMask) to archived messages.
685///
686///   Tier 2: If `hard_limit_tokens` is set and the result (summary + kept
687///     messages) still exceeds it, applies `hard_limit_strategy` (typically
688///     Llm) to aggressively compress the tier-1 summary.
689pub(crate) async fn auto_compact_messages(
690    messages: &mut Vec<serde_json::Value>,
691    config: &AutoCompactConfig,
692    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
693) -> Result<Option<String>, VmError> {
694    if messages.len() <= config.keep_last {
695        return Ok(None);
696    }
697    let split_at = messages.len().saturating_sub(config.keep_last);
698    let old_messages: Vec<_> = messages.drain(..split_at).collect();
699    let archived_count = old_messages.len();
700
701    // Tier 1: lightweight compaction (with optional mask callback).
702    let mut summary = apply_compaction_strategy(
703        &config.compact_strategy,
704        &old_messages,
705        archived_count,
706        llm_opts,
707        config.custom_compactor.as_ref(),
708        config.mask_callback.as_ref(),
709    )
710    .await?;
711
712    // Tier 2: if hard limit set and still too large, apply aggressive strategy.
713    if let Some(hard_limit) = config.hard_limit_tokens {
714        // Estimate tokens for: summary message + kept messages.
715        let summary_msg = serde_json::json!({"role": "user", "content": &summary});
716        let mut estimate_msgs = vec![summary_msg];
717        estimate_msgs.extend_from_slice(messages.as_slice());
718        let estimated = estimate_message_tokens(&estimate_msgs);
719        if estimated > hard_limit {
720            // Wrap the tier-1 summary as a single archived message and
721            // re-compact it with the aggressive strategy. No mask callback
722            // for tier-2 — it's already been masked.
723            let tier1_as_messages = vec![serde_json::json!({
724                "role": "user",
725                "content": summary,
726            })];
727            summary = apply_compaction_strategy(
728                &config.hard_limit_strategy,
729                &tier1_as_messages,
730                archived_count,
731                llm_opts,
732                config.custom_compactor.as_ref(),
733                None,
734            )
735            .await?;
736        }
737    }
738
739    messages.insert(
740        0,
741        serde_json::json!({
742            "role": "user",
743            "content": summary,
744        }),
745    );
746    Ok(Some(summary))
747}
748
749// ── Adaptive context assembly ─────────────────────────────────────────
750
751/// Snip an artifact's text to fit within a token budget.
752pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
753    let max_chars = max_tokens * 4;
754    if let Some(ref text) = artifact.text {
755        if text.len() > max_chars && max_chars >= 200 {
756            artifact.text = Some(microcompact_tool_output(text, max_chars));
757            artifact.estimated_tokens = Some(max_tokens);
758        }
759    }
760}
761
762/// Deduplicate artifacts by removing those with identical text content,
763/// keeping the one with higher priority.
764pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
765    let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
766    artifacts.retain(|artifact| {
767        let text = artifact.text.as_deref().unwrap_or("");
768        if text.is_empty() {
769            return true;
770        }
771        // Simple hash for dedup
772        let hash = {
773            use std::hash::{Hash, Hasher};
774            let mut hasher = std::collections::hash_map::DefaultHasher::new();
775            text.hash(&mut hasher);
776            hasher.finish()
777        };
778        seen_hashes.insert(hash)
779    });
780}
781
782/// Enhanced artifact selection: dedup, microcompact oversized artifacts,
783/// then delegate to the standard `select_artifacts`.
784pub fn select_artifacts_adaptive(
785    mut artifacts: Vec<ArtifactRecord>,
786    policy: &ContextPolicy,
787) -> Vec<ArtifactRecord> {
788    // Phase 1: deduplicate
789    dedup_artifacts(&mut artifacts);
790
791    // Phase 2: microcompact oversized artifacts relative to budget.
792    // Cap individual artifacts to a fraction of the total budget, but don't
793    // let the per-artifact cap exceed the total budget (avoid overrun).
794    if let Some(max_tokens) = policy.max_tokens {
795        let count = artifacts.len().max(1);
796        let per_artifact_budget = max_tokens / count;
797        // Floor of 500 tokens, but never more than total budget
798        let cap = per_artifact_budget.max(500).min(max_tokens);
799        for artifact in &mut artifacts {
800            let est = artifact.estimated_tokens.unwrap_or(0);
801            if est > cap * 2 {
802                microcompact_artifact(artifact, cap);
803            }
804        }
805    }
806
807    // Phase 3: standard selection with budget
808    select_artifacts(artifacts, policy)
809}
810
811// ── Per-agent policy with argument patterns ───────────────────────────
812
813/// Extended policy that supports argument-level constraints.
814#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
815#[serde(default)]
816pub struct ToolArgConstraint {
817    /// Tool name to constrain.
818    pub tool: String,
819    /// Glob patterns that the first string argument must match.
820    /// If empty, no argument constraint is applied.
821    pub arg_patterns: Vec<String>,
822}
823
824/// Check if a tool call satisfies argument constraints in the policy.
825pub fn enforce_tool_arg_constraints(
826    policy: &CapabilityPolicy,
827    tool_name: &str,
828    args: &serde_json::Value,
829) -> Result<(), VmError> {
830    for constraint in &policy.tool_arg_constraints {
831        if !glob_match(&constraint.tool, tool_name) {
832            continue;
833        }
834        if constraint.arg_patterns.is_empty() {
835            continue;
836        }
837        // Extract the first string-like argument for pattern matching
838        let first_arg = args
839            .as_object()
840            .and_then(|o| o.values().next())
841            .and_then(|v| v.as_str())
842            .or_else(|| args.as_str())
843            .unwrap_or("");
844        let matches = constraint
845            .arg_patterns
846            .iter()
847            .any(|pattern| glob_match(pattern, first_arg));
848        if !matches {
849            return reject_policy(format!(
850                "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
851                constraint.arg_patterns
852            ));
853        }
854    }
855    Ok(())
856}
857
858fn normalize_artifact_kind(kind: &str) -> String {
859    match kind {
860        "resource"
861        | "workspace_file"
862        | "editor_selection"
863        | "workspace_snapshot"
864        | "transcript_summary"
865        | "summary"
866        | "plan"
867        | "diff"
868        | "git_diff"
869        | "patch"
870        | "patch_set"
871        | "patch_proposal"
872        | "diff_review"
873        | "review_decision"
874        | "verification_bundle"
875        | "apply_intent"
876        | "verification_result"
877        | "test_result"
878        | "command_result"
879        | "provider_payload"
880        | "worker_result"
881        | "worker_notification"
882        | "artifact" => kind.to_string(),
883        "file" => "workspace_file".to_string(),
884        "transcript" => "transcript_summary".to_string(),
885        "verification" => "verification_result".to_string(),
886        "test" => "test_result".to_string(),
887        other if other.trim().is_empty() => "artifact".to_string(),
888        other => other.to_string(),
889    }
890}
891
892fn default_artifact_priority(kind: &str) -> i64 {
893    match kind {
894        "verification_result" | "test_result" => 100,
895        "verification_bundle" => 95,
896        "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
897        | "review_decision" | "apply_intent" => 90,
898        "plan" => 80,
899        "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
900        "summary" | "transcript_summary" => 60,
901        "command_result" => 50,
902        _ => 40,
903    }
904}
905
906fn freshness_rank(value: Option<&str>) -> i64 {
907    match value.unwrap_or_default() {
908        "fresh" | "live" => 3,
909        "recent" => 2,
910        "stale" => 0,
911        _ => 1,
912    }
913}
914
915#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
916#[serde(default)]
917pub struct ToolRuntimePolicyMetadata {
918    pub capabilities: BTreeMap<String, Vec<String>>,
919    pub side_effect_level: Option<String>,
920    pub path_params: Vec<String>,
921    pub mutation_classification: Option<String>,
922}
923
924#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
925#[serde(default)]
926pub struct CapabilityPolicy {
927    pub tools: Vec<String>,
928    pub capabilities: BTreeMap<String, Vec<String>>,
929    pub workspace_roots: Vec<String>,
930    pub side_effect_level: Option<String>,
931    pub recursion_limit: Option<usize>,
932    /// Argument-level constraints for specific tools.
933    #[serde(default)]
934    pub tool_arg_constraints: Vec<ToolArgConstraint>,
935    #[serde(default)]
936    pub tool_metadata: BTreeMap<String, ToolRuntimePolicyMetadata>,
937}
938
939impl CapabilityPolicy {
940    pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
941        let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
942            (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
943            (Some(a), None) => Some(a.clone()),
944            (None, Some(b)) => Some(b.clone()),
945            (None, None) => None,
946        };
947
948        if !self.tools.is_empty() {
949            let denied: Vec<String> = requested
950                .tools
951                .iter()
952                .filter(|tool| !self.tools.contains(*tool))
953                .cloned()
954                .collect();
955            if !denied.is_empty() {
956                return Err(format!(
957                    "requested tools exceed host ceiling: {}",
958                    denied.join(", ")
959                ));
960            }
961        }
962
963        for (capability, requested_ops) in &requested.capabilities {
964            if let Some(allowed_ops) = self.capabilities.get(capability) {
965                let denied: Vec<String> = requested_ops
966                    .iter()
967                    .filter(|op| !allowed_ops.contains(*op))
968                    .cloned()
969                    .collect();
970                if !denied.is_empty() {
971                    return Err(format!(
972                        "requested capability operations exceed host ceiling: {}.{}",
973                        capability,
974                        denied.join(",")
975                    ));
976                }
977            } else if !self.capabilities.is_empty() {
978                return Err(format!(
979                    "requested capability exceeds host ceiling: {capability}"
980                ));
981            }
982        }
983
984        let tools = if self.tools.is_empty() {
985            requested.tools.clone()
986        } else if requested.tools.is_empty() {
987            self.tools.clone()
988        } else {
989            requested
990                .tools
991                .iter()
992                .filter(|tool| self.tools.contains(*tool))
993                .cloned()
994                .collect()
995        };
996
997        let capabilities = if self.capabilities.is_empty() {
998            requested.capabilities.clone()
999        } else if requested.capabilities.is_empty() {
1000            self.capabilities.clone()
1001        } else {
1002            requested
1003                .capabilities
1004                .iter()
1005                .filter_map(|(capability, requested_ops)| {
1006                    self.capabilities.get(capability).map(|allowed_ops| {
1007                        (
1008                            capability.clone(),
1009                            requested_ops
1010                                .iter()
1011                                .filter(|op| allowed_ops.contains(*op))
1012                                .cloned()
1013                                .collect::<Vec<_>>(),
1014                        )
1015                    })
1016                })
1017                .collect()
1018        };
1019
1020        let workspace_roots = if self.workspace_roots.is_empty() {
1021            requested.workspace_roots.clone()
1022        } else if requested.workspace_roots.is_empty() {
1023            self.workspace_roots.clone()
1024        } else {
1025            requested
1026                .workspace_roots
1027                .iter()
1028                .filter(|root| self.workspace_roots.contains(*root))
1029                .cloned()
1030                .collect()
1031        };
1032
1033        let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
1034            (Some(a), Some(b)) => Some(a.min(b)),
1035            (Some(a), None) => Some(a),
1036            (None, Some(b)) => Some(b),
1037            (None, None) => None,
1038        };
1039
1040        // Merge arg constraints from both sides
1041        let mut tool_arg_constraints = self.tool_arg_constraints.clone();
1042        tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
1043
1044        let tool_metadata = tools
1045            .iter()
1046            .filter_map(|tool| {
1047                requested
1048                    .tool_metadata
1049                    .get(tool)
1050                    .or_else(|| self.tool_metadata.get(tool))
1051                    .cloned()
1052                    .map(|metadata| (tool.clone(), metadata))
1053            })
1054            .collect();
1055
1056        Ok(CapabilityPolicy {
1057            tools,
1058            capabilities,
1059            workspace_roots,
1060            side_effect_level,
1061            recursion_limit,
1062            tool_arg_constraints,
1063            tool_metadata,
1064        })
1065    }
1066}
1067
1068fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
1069    fn rank(v: &str) -> usize {
1070        match v {
1071            "none" => 0,
1072            "read_only" => 1,
1073            "workspace_write" => 2,
1074            "process_exec" => 3,
1075            "network" => 4,
1076            _ => 5,
1077        }
1078    }
1079    if rank(a) <= rank(b) {
1080        a
1081    } else {
1082        b
1083    }
1084}
1085
1086#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1087#[serde(default)]
1088pub struct ModelPolicy {
1089    pub provider: Option<String>,
1090    pub model: Option<String>,
1091    pub model_tier: Option<String>,
1092    pub temperature: Option<f64>,
1093    pub max_tokens: Option<i64>,
1094    /// Maximum agent_loop iterations for this stage. Overrides the default 16.
1095    pub max_iterations: Option<usize>,
1096    /// Maximum consecutive text-only (no tool call) responses before declaring stuck.
1097    pub max_nudges: Option<usize>,
1098    /// Custom nudge message injected when the model produces text without tool calls.
1099    /// If omitted, the VM uses a generic "Continue — use a tool call" message.
1100    pub nudge: Option<String>,
1101}
1102
1103#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1104#[serde(default)]
1105pub struct TranscriptPolicy {
1106    pub mode: Option<String>,
1107    pub visibility: Option<String>,
1108    pub summarize: bool,
1109    pub compact: bool,
1110    pub keep_last: Option<usize>,
1111}
1112
1113#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1114#[serde(default)]
1115pub struct ContextPolicy {
1116    pub max_artifacts: Option<usize>,
1117    pub max_tokens: Option<usize>,
1118    pub reserve_tokens: Option<usize>,
1119    pub include_kinds: Vec<String>,
1120    pub exclude_kinds: Vec<String>,
1121    pub prioritize_kinds: Vec<String>,
1122    pub pinned_ids: Vec<String>,
1123    pub include_stages: Vec<String>,
1124    pub prefer_recent: bool,
1125    pub prefer_fresh: bool,
1126    pub render: Option<String>,
1127}
1128
1129#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1130#[serde(default)]
1131pub struct RetryPolicy {
1132    pub max_attempts: usize,
1133    pub verify: bool,
1134    pub repair: bool,
1135}
1136
1137#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1138#[serde(default)]
1139pub struct StageContract {
1140    pub input_kinds: Vec<String>,
1141    pub output_kinds: Vec<String>,
1142    pub min_inputs: Option<usize>,
1143    pub max_inputs: Option<usize>,
1144    pub require_transcript: bool,
1145    pub schema: Option<serde_json::Value>,
1146}
1147
1148#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1149#[serde(default)]
1150pub struct BranchSemantics {
1151    pub success: Option<String>,
1152    pub failure: Option<String>,
1153    pub verify_pass: Option<String>,
1154    pub verify_fail: Option<String>,
1155    pub condition_true: Option<String>,
1156    pub condition_false: Option<String>,
1157    pub loop_continue: Option<String>,
1158    pub loop_exit: Option<String>,
1159    pub escalation: Option<String>,
1160}
1161
1162#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1163#[serde(default)]
1164pub struct MapPolicy {
1165    pub items: Vec<serde_json::Value>,
1166    pub item_artifact_kind: Option<String>,
1167    pub output_kind: Option<String>,
1168    pub max_items: Option<usize>,
1169}
1170
1171#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1172#[serde(default)]
1173pub struct JoinPolicy {
1174    pub strategy: String,
1175    pub require_all_inputs: bool,
1176    pub min_completed: Option<usize>,
1177}
1178
1179#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1180#[serde(default)]
1181pub struct ReducePolicy {
1182    pub strategy: String,
1183    pub separator: Option<String>,
1184    pub output_kind: Option<String>,
1185}
1186
1187#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1188#[serde(default)]
1189pub struct EscalationPolicy {
1190    pub level: Option<String>,
1191    pub queue: Option<String>,
1192    pub reason: Option<String>,
1193}
1194
1195#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1196#[serde(default)]
1197pub struct ArtifactRecord {
1198    #[serde(rename = "_type")]
1199    pub type_name: String,
1200    pub id: String,
1201    pub kind: String,
1202    pub title: Option<String>,
1203    pub text: Option<String>,
1204    pub data: Option<serde_json::Value>,
1205    pub source: Option<String>,
1206    pub created_at: String,
1207    pub freshness: Option<String>,
1208    pub priority: Option<i64>,
1209    pub lineage: Vec<String>,
1210    pub relevance: Option<f64>,
1211    pub estimated_tokens: Option<usize>,
1212    pub stage: Option<String>,
1213    pub metadata: BTreeMap<String, serde_json::Value>,
1214}
1215
1216impl ArtifactRecord {
1217    pub fn normalize(mut self) -> Self {
1218        if self.type_name.is_empty() {
1219            self.type_name = "artifact".to_string();
1220        }
1221        if self.id.is_empty() {
1222            self.id = new_id("artifact");
1223        }
1224        if self.created_at.is_empty() {
1225            self.created_at = now_rfc3339();
1226        }
1227        if self.kind.is_empty() {
1228            self.kind = "artifact".to_string();
1229        }
1230        self.kind = normalize_artifact_kind(&self.kind);
1231        if self.estimated_tokens.is_none() {
1232            self.estimated_tokens = self
1233                .text
1234                .as_ref()
1235                .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
1236        }
1237        if self.priority.is_none() {
1238            self.priority = Some(default_artifact_priority(&self.kind));
1239        }
1240        self
1241    }
1242}
1243
1244#[derive(Clone, Debug, Default, Serialize, Deserialize)]
1245#[serde(default)]
1246pub struct WorkflowNode {
1247    pub id: Option<String>,
1248    pub kind: String,
1249    pub mode: Option<String>,
1250    pub prompt: Option<String>,
1251    pub system: Option<String>,
1252    pub task_label: Option<String>,
1253    pub done_sentinel: Option<String>,
1254    pub tools: serde_json::Value,
1255    pub model_policy: ModelPolicy,
1256    pub transcript_policy: TranscriptPolicy,
1257    pub context_policy: ContextPolicy,
1258    pub retry_policy: RetryPolicy,
1259    pub capability_policy: CapabilityPolicy,
1260    pub input_contract: StageContract,
1261    pub output_contract: StageContract,
1262    pub branch_semantics: BranchSemantics,
1263    pub map_policy: MapPolicy,
1264    pub join_policy: JoinPolicy,
1265    pub reduce_policy: ReducePolicy,
1266    pub escalation_policy: EscalationPolicy,
1267    pub verify: Option<serde_json::Value>,
1268    pub metadata: BTreeMap<String, serde_json::Value>,
1269    #[serde(skip)]
1270    pub raw_tools: Option<VmValue>,
1271}
1272
1273impl PartialEq for WorkflowNode {
1274    fn eq(&self, other: &Self) -> bool {
1275        serde_json::to_value(self).ok() == serde_json::to_value(other).ok()
1276    }
1277}
1278
1279pub fn workflow_tool_names(value: &serde_json::Value) -> Vec<String> {
1280    match value {
1281        serde_json::Value::Null => Vec::new(),
1282        serde_json::Value::Array(items) => items
1283            .iter()
1284            .filter_map(|item| match item {
1285                serde_json::Value::Object(map) => map
1286                    .get("name")
1287                    .and_then(|value| value.as_str())
1288                    .filter(|name| !name.is_empty())
1289                    .map(|name| name.to_string()),
1290                _ => None,
1291            })
1292            .collect(),
1293        serde_json::Value::Object(map) => {
1294            if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1295                return map
1296                    .get("tools")
1297                    .map(workflow_tool_names)
1298                    .unwrap_or_default();
1299            }
1300            map.get("name")
1301                .and_then(|value| value.as_str())
1302                .filter(|name| !name.is_empty())
1303                .map(|name| vec![name.to_string()])
1304                .unwrap_or_default()
1305        }
1306        _ => Vec::new(),
1307    }
1308}
1309
1310fn max_side_effect_level(levels: impl Iterator<Item = String>) -> Option<String> {
1311    fn rank(v: &str) -> usize {
1312        match v {
1313            "none" => 0,
1314            "read_only" => 1,
1315            "workspace_write" => 2,
1316            "process_exec" => 3,
1317            "network" => 4,
1318            _ => 5,
1319        }
1320    }
1321    levels.max_by_key(|level| rank(level))
1322}
1323
1324fn parse_tool_runtime_policy(
1325    map: &serde_json::Map<String, serde_json::Value>,
1326) -> ToolRuntimePolicyMetadata {
1327    let Some(policy) = map.get("policy").and_then(|value| value.as_object()) else {
1328        return ToolRuntimePolicyMetadata::default();
1329    };
1330
1331    let capabilities = policy
1332        .get("capabilities")
1333        .and_then(|value| value.as_object())
1334        .map(|caps| {
1335            caps.iter()
1336                .map(|(capability, ops)| {
1337                    let values = ops
1338                        .as_array()
1339                        .map(|items| {
1340                            items
1341                                .iter()
1342                                .filter_map(|item| item.as_str().map(|s| s.to_string()))
1343                                .collect::<Vec<_>>()
1344                        })
1345                        .unwrap_or_default();
1346                    (capability.clone(), values)
1347                })
1348                .collect::<BTreeMap<_, _>>()
1349        })
1350        .unwrap_or_default();
1351
1352    let path_params = policy
1353        .get("path_params")
1354        .and_then(|value| value.as_array())
1355        .map(|items| {
1356            items
1357                .iter()
1358                .filter_map(|item| item.as_str().map(|s| s.to_string()))
1359                .collect::<Vec<_>>()
1360        })
1361        .unwrap_or_default();
1362
1363    ToolRuntimePolicyMetadata {
1364        capabilities,
1365        side_effect_level: policy
1366            .get("side_effect_level")
1367            .and_then(|value| value.as_str())
1368            .map(|s| s.to_string()),
1369        path_params,
1370        mutation_classification: policy
1371            .get("mutation_classification")
1372            .and_then(|value| value.as_str())
1373            .map(|s| s.to_string()),
1374    }
1375}
1376
1377pub fn workflow_tool_metadata(
1378    value: &serde_json::Value,
1379) -> BTreeMap<String, ToolRuntimePolicyMetadata> {
1380    match value {
1381        serde_json::Value::Null => BTreeMap::new(),
1382        serde_json::Value::Array(items) => items
1383            .iter()
1384            .filter_map(|item| match item {
1385                serde_json::Value::Object(map) => map
1386                    .get("name")
1387                    .and_then(|value| value.as_str())
1388                    .filter(|name| !name.is_empty())
1389                    .map(|name| (name.to_string(), parse_tool_runtime_policy(map))),
1390                _ => None,
1391            })
1392            .collect(),
1393        serde_json::Value::Object(map) => {
1394            if map.get("_type").and_then(|value| value.as_str()) == Some("tool_registry") {
1395                return map
1396                    .get("tools")
1397                    .map(workflow_tool_metadata)
1398                    .unwrap_or_default();
1399            }
1400            map.get("name")
1401                .and_then(|value| value.as_str())
1402                .filter(|name| !name.is_empty())
1403                .map(|name| {
1404                    let mut metadata = BTreeMap::new();
1405                    metadata.insert(name.to_string(), parse_tool_runtime_policy(map));
1406                    metadata
1407                })
1408                .unwrap_or_default()
1409        }
1410        _ => BTreeMap::new(),
1411    }
1412}
1413
1414pub fn workflow_tool_policy_from_tools(value: &serde_json::Value) -> CapabilityPolicy {
1415    let tools = workflow_tool_names(value);
1416    let tool_metadata = workflow_tool_metadata(value);
1417    let mut capabilities: BTreeMap<String, Vec<String>> = BTreeMap::new();
1418    for metadata in tool_metadata.values() {
1419        for (capability, ops) in &metadata.capabilities {
1420            let entry = capabilities.entry(capability.clone()).or_default();
1421            for op in ops {
1422                if !entry.contains(op) {
1423                    entry.push(op.clone());
1424                }
1425            }
1426            entry.sort();
1427        }
1428    }
1429    let side_effect_level = max_side_effect_level(
1430        tool_metadata
1431            .values()
1432            .filter_map(|metadata| metadata.side_effect_level.clone()),
1433    );
1434    CapabilityPolicy {
1435        tools,
1436        capabilities,
1437        workspace_roots: Vec::new(),
1438        side_effect_level,
1439        recursion_limit: None,
1440        tool_arg_constraints: Vec::new(),
1441        tool_metadata,
1442    }
1443}
1444
1445#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1446#[serde(default)]
1447pub struct WorkflowEdge {
1448    pub from: String,
1449    pub to: String,
1450    pub branch: Option<String>,
1451    pub label: Option<String>,
1452}
1453
1454#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1455#[serde(default)]
1456pub struct WorkflowGraph {
1457    #[serde(rename = "_type")]
1458    pub type_name: String,
1459    pub id: String,
1460    pub name: Option<String>,
1461    pub version: usize,
1462    pub entry: String,
1463    pub nodes: BTreeMap<String, WorkflowNode>,
1464    pub edges: Vec<WorkflowEdge>,
1465    pub capability_policy: CapabilityPolicy,
1466    pub metadata: BTreeMap<String, serde_json::Value>,
1467    pub audit_log: Vec<WorkflowAuditEntry>,
1468}
1469
1470#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1471#[serde(default)]
1472pub struct WorkflowAuditEntry {
1473    pub id: String,
1474    pub op: String,
1475    pub node_id: Option<String>,
1476    pub timestamp: String,
1477    pub reason: Option<String>,
1478    pub metadata: BTreeMap<String, serde_json::Value>,
1479}
1480
1481#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1482#[serde(default)]
1483pub struct LlmUsageRecord {
1484    pub input_tokens: i64,
1485    pub output_tokens: i64,
1486    pub total_duration_ms: i64,
1487    pub call_count: i64,
1488    pub total_cost: f64,
1489    pub models: Vec<String>,
1490}
1491
1492#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1493#[serde(default)]
1494pub struct RunStageRecord {
1495    pub id: String,
1496    pub node_id: String,
1497    pub kind: String,
1498    pub status: String,
1499    pub outcome: String,
1500    pub branch: Option<String>,
1501    pub started_at: String,
1502    pub finished_at: Option<String>,
1503    pub visible_text: Option<String>,
1504    pub private_reasoning: Option<String>,
1505    pub transcript: Option<serde_json::Value>,
1506    pub verification: Option<serde_json::Value>,
1507    pub usage: Option<LlmUsageRecord>,
1508    pub artifacts: Vec<ArtifactRecord>,
1509    pub consumed_artifact_ids: Vec<String>,
1510    pub produced_artifact_ids: Vec<String>,
1511    pub attempts: Vec<RunStageAttemptRecord>,
1512    pub metadata: BTreeMap<String, serde_json::Value>,
1513}
1514
1515#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1516#[serde(default)]
1517pub struct RunStageAttemptRecord {
1518    pub attempt: usize,
1519    pub status: String,
1520    pub outcome: String,
1521    pub branch: Option<String>,
1522    pub error: Option<String>,
1523    pub verification: Option<serde_json::Value>,
1524    pub started_at: String,
1525    pub finished_at: Option<String>,
1526}
1527
1528#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1529#[serde(default)]
1530pub struct RunTransitionRecord {
1531    pub id: String,
1532    pub from_stage_id: Option<String>,
1533    pub from_node_id: Option<String>,
1534    pub to_node_id: String,
1535    pub branch: Option<String>,
1536    pub timestamp: String,
1537    pub consumed_artifact_ids: Vec<String>,
1538    pub produced_artifact_ids: Vec<String>,
1539}
1540
1541#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1542#[serde(default)]
1543pub struct RunCheckpointRecord {
1544    pub id: String,
1545    pub ready_nodes: Vec<String>,
1546    pub completed_nodes: Vec<String>,
1547    pub last_stage_id: Option<String>,
1548    pub persisted_at: String,
1549    pub reason: String,
1550}
1551
1552#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1553#[serde(default)]
1554pub struct ReplayFixture {
1555    #[serde(rename = "_type")]
1556    pub type_name: String,
1557    pub id: String,
1558    pub source_run_id: String,
1559    pub workflow_id: String,
1560    pub workflow_name: Option<String>,
1561    pub created_at: String,
1562    pub expected_status: String,
1563    pub stage_assertions: Vec<ReplayStageAssertion>,
1564}
1565
1566#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1567#[serde(default)]
1568pub struct ReplayStageAssertion {
1569    pub node_id: String,
1570    pub expected_status: String,
1571    pub expected_outcome: String,
1572    pub expected_branch: Option<String>,
1573    pub required_artifact_kinds: Vec<String>,
1574    pub visible_text_contains: Option<String>,
1575}
1576
1577#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1578#[serde(default)]
1579pub struct ReplayEvalReport {
1580    pub pass: bool,
1581    pub failures: Vec<String>,
1582    pub stage_count: usize,
1583}
1584
1585#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1586#[serde(default)]
1587pub struct ReplayEvalCaseReport {
1588    pub run_id: String,
1589    pub workflow_id: String,
1590    pub label: Option<String>,
1591    pub pass: bool,
1592    pub failures: Vec<String>,
1593    pub stage_count: usize,
1594    pub source_path: Option<String>,
1595    pub comparison: Option<RunDiffReport>,
1596}
1597
1598#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1599#[serde(default)]
1600pub struct ReplayEvalSuiteReport {
1601    pub pass: bool,
1602    pub total: usize,
1603    pub passed: usize,
1604    pub failed: usize,
1605    pub cases: Vec<ReplayEvalCaseReport>,
1606}
1607
1608#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1609#[serde(default)]
1610pub struct RunStageDiffRecord {
1611    pub node_id: String,
1612    pub change: String,
1613    pub details: Vec<String>,
1614}
1615
1616#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1617#[serde(default)]
1618pub struct RunDiffReport {
1619    pub left_run_id: String,
1620    pub right_run_id: String,
1621    pub identical: bool,
1622    pub status_changed: bool,
1623    pub left_status: String,
1624    pub right_status: String,
1625    pub stage_diffs: Vec<RunStageDiffRecord>,
1626    pub transition_count_delta: isize,
1627    pub artifact_count_delta: isize,
1628    pub checkpoint_count_delta: isize,
1629}
1630
1631#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1632#[serde(default)]
1633pub struct EvalSuiteManifest {
1634    #[serde(rename = "_type")]
1635    pub type_name: String,
1636    pub id: String,
1637    pub name: Option<String>,
1638    pub base_dir: Option<String>,
1639    pub cases: Vec<EvalSuiteCase>,
1640}
1641
1642#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1643#[serde(default)]
1644pub struct EvalSuiteCase {
1645    pub label: Option<String>,
1646    pub run_path: String,
1647    pub fixture_path: Option<String>,
1648    pub compare_to: Option<String>,
1649}
1650
1651#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1652#[serde(default)]
1653pub struct RunRecord {
1654    #[serde(rename = "_type")]
1655    pub type_name: String,
1656    pub id: String,
1657    pub workflow_id: String,
1658    pub workflow_name: Option<String>,
1659    pub task: String,
1660    pub status: String,
1661    pub started_at: String,
1662    pub finished_at: Option<String>,
1663    pub parent_run_id: Option<String>,
1664    pub root_run_id: Option<String>,
1665    pub stages: Vec<RunStageRecord>,
1666    pub transitions: Vec<RunTransitionRecord>,
1667    pub checkpoints: Vec<RunCheckpointRecord>,
1668    pub pending_nodes: Vec<String>,
1669    pub completed_nodes: Vec<String>,
1670    pub child_runs: Vec<RunChildRecord>,
1671    pub artifacts: Vec<ArtifactRecord>,
1672    pub policy: CapabilityPolicy,
1673    pub execution: Option<RunExecutionRecord>,
1674    pub transcript: Option<serde_json::Value>,
1675    pub usage: Option<LlmUsageRecord>,
1676    pub replay_fixture: Option<ReplayFixture>,
1677    pub trace_spans: Vec<RunTraceSpanRecord>,
1678    pub metadata: BTreeMap<String, serde_json::Value>,
1679    pub persisted_path: Option<String>,
1680}
1681
1682#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1683#[serde(default)]
1684pub struct RunTraceSpanRecord {
1685    pub span_id: u64,
1686    pub parent_id: Option<u64>,
1687    pub kind: String,
1688    pub name: String,
1689    pub start_ms: u64,
1690    pub duration_ms: u64,
1691    pub metadata: BTreeMap<String, serde_json::Value>,
1692}
1693
1694#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1695#[serde(default)]
1696pub struct RunChildRecord {
1697    pub worker_id: String,
1698    pub worker_name: String,
1699    pub parent_stage_id: Option<String>,
1700    pub session_id: Option<String>,
1701    pub parent_session_id: Option<String>,
1702    pub mutation_scope: Option<String>,
1703    pub approval_mode: Option<String>,
1704    pub task: String,
1705    pub status: String,
1706    pub started_at: String,
1707    pub finished_at: Option<String>,
1708    pub run_id: Option<String>,
1709    pub run_path: Option<String>,
1710    pub snapshot_path: Option<String>,
1711    pub execution: Option<RunExecutionRecord>,
1712}
1713
1714#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1715#[serde(default)]
1716pub struct RunExecutionRecord {
1717    pub cwd: Option<String>,
1718    pub source_dir: Option<String>,
1719    pub env: BTreeMap<String, String>,
1720    pub adapter: Option<String>,
1721    pub repo_path: Option<String>,
1722    pub worktree_path: Option<String>,
1723    pub branch: Option<String>,
1724    pub base_ref: Option<String>,
1725    pub cleanup: Option<String>,
1726}
1727
1728#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1729#[serde(default)]
1730pub struct WorkflowValidationReport {
1731    pub valid: bool,
1732    pub errors: Vec<String>,
1733    pub warnings: Vec<String>,
1734    pub reachable_nodes: Vec<String>,
1735}
1736
1737fn parse_json_payload<T: for<'de> Deserialize<'de>>(
1738    json: serde_json::Value,
1739    label: &str,
1740) -> Result<T, VmError> {
1741    let payload = json.to_string();
1742    let mut deserializer = serde_json::Deserializer::from_str(&payload);
1743    let mut tracker = serde_path_to_error::Track::new();
1744    let path_deserializer = serde_path_to_error::Deserializer::new(&mut deserializer, &mut tracker);
1745    T::deserialize(path_deserializer).map_err(|error| {
1746        let snippet = if payload.len() > 600 {
1747            format!("{}...", &payload[..600])
1748        } else {
1749            payload.clone()
1750        };
1751        VmError::Runtime(format!(
1752            "{label} parse error at {}: {} | payload={}",
1753            tracker.path(),
1754            error,
1755            snippet
1756        ))
1757    })
1758}
1759
1760fn parse_json_value<T: for<'de> Deserialize<'de>>(value: &VmValue) -> Result<T, VmError> {
1761    parse_json_payload(vm_value_to_json(value), "orchestration")
1762}
1763
1764pub fn parse_workflow_node_value(value: &VmValue, label: &str) -> Result<WorkflowNode, VmError> {
1765    let mut node: WorkflowNode = parse_json_payload(vm_value_to_json(value), label)?;
1766    node.raw_tools = value.as_dict().and_then(|dict| dict.get("tools")).cloned();
1767    Ok(node)
1768}
1769
1770pub fn parse_workflow_node_json(
1771    json: serde_json::Value,
1772    label: &str,
1773) -> Result<WorkflowNode, VmError> {
1774    parse_json_payload(json, label)
1775}
1776
1777pub fn parse_workflow_edge_json(
1778    json: serde_json::Value,
1779    label: &str,
1780) -> Result<WorkflowEdge, VmError> {
1781    parse_json_payload(json, label)
1782}
1783
1784pub fn normalize_workflow_value(value: &VmValue) -> Result<WorkflowGraph, VmError> {
1785    let mut graph: WorkflowGraph = parse_json_value(value)?;
1786    let as_dict = value.as_dict().cloned().unwrap_or_default();
1787
1788    if graph.nodes.is_empty() {
1789        for key in ["act", "verify", "repair"] {
1790            if let Some(node_value) = as_dict.get(key) {
1791                let mut node = parse_workflow_node_value(node_value, "orchestration")?;
1792                let raw_node = node_value.as_dict().cloned().unwrap_or_default();
1793                node.id = Some(key.to_string());
1794                if node.kind.is_empty() {
1795                    node.kind = if key == "verify" {
1796                        "verify".to_string()
1797                    } else {
1798                        "stage".to_string()
1799                    };
1800                }
1801                if node.model_policy.provider.is_none() {
1802                    node.model_policy.provider = as_dict
1803                        .get("provider")
1804                        .map(|value| value.display())
1805                        .filter(|value| !value.is_empty());
1806                }
1807                if node.model_policy.model.is_none() {
1808                    node.model_policy.model = as_dict
1809                        .get("model")
1810                        .map(|value| value.display())
1811                        .filter(|value| !value.is_empty());
1812                }
1813                if node.model_policy.model_tier.is_none() {
1814                    node.model_policy.model_tier = as_dict
1815                        .get("model_tier")
1816                        .or_else(|| as_dict.get("tier"))
1817                        .map(|value| value.display())
1818                        .filter(|value| !value.is_empty());
1819                }
1820                if node.model_policy.temperature.is_none() {
1821                    node.model_policy.temperature = as_dict.get("temperature").and_then(|value| {
1822                        if let VmValue::Float(number) = value {
1823                            Some(*number)
1824                        } else {
1825                            value.as_int().map(|number| number as f64)
1826                        }
1827                    });
1828                }
1829                if node.model_policy.max_tokens.is_none() {
1830                    node.model_policy.max_tokens =
1831                        as_dict.get("max_tokens").and_then(|value| value.as_int());
1832                }
1833                if node.mode.is_none() {
1834                    node.mode = as_dict
1835                        .get("mode")
1836                        .map(|value| value.display())
1837                        .filter(|value| !value.is_empty());
1838                }
1839                if node.done_sentinel.is_none() {
1840                    node.done_sentinel = as_dict
1841                        .get("done_sentinel")
1842                        .map(|value| value.display())
1843                        .filter(|value| !value.is_empty());
1844                }
1845                if key == "verify"
1846                    && node.verify.is_none()
1847                    && (raw_node.contains_key("assert_text")
1848                        || raw_node.contains_key("command")
1849                        || raw_node.contains_key("expect_status")
1850                        || raw_node.contains_key("expect_text"))
1851                {
1852                    node.verify = Some(serde_json::json!({
1853                        "assert_text": raw_node.get("assert_text").map(vm_value_to_json),
1854                        "command": raw_node.get("command").map(vm_value_to_json),
1855                        "expect_status": raw_node.get("expect_status").map(vm_value_to_json),
1856                        "expect_text": raw_node.get("expect_text").map(vm_value_to_json),
1857                    }));
1858                }
1859                graph.nodes.insert(key.to_string(), node);
1860            }
1861        }
1862        if graph.entry.is_empty() && graph.nodes.contains_key("act") {
1863            graph.entry = "act".to_string();
1864        }
1865        if graph.edges.is_empty() && graph.nodes.contains_key("act") {
1866            if graph.nodes.contains_key("verify") {
1867                graph.edges.push(WorkflowEdge {
1868                    from: "act".to_string(),
1869                    to: "verify".to_string(),
1870                    branch: None,
1871                    label: None,
1872                });
1873            }
1874            if graph.nodes.contains_key("repair") {
1875                graph.edges.push(WorkflowEdge {
1876                    from: "verify".to_string(),
1877                    to: "repair".to_string(),
1878                    branch: Some("failed".to_string()),
1879                    label: None,
1880                });
1881                graph.edges.push(WorkflowEdge {
1882                    from: "repair".to_string(),
1883                    to: "verify".to_string(),
1884                    branch: Some("retry".to_string()),
1885                    label: None,
1886                });
1887            }
1888        }
1889    }
1890
1891    if graph.type_name.is_empty() {
1892        graph.type_name = "workflow_graph".to_string();
1893    }
1894    if graph.id.is_empty() {
1895        graph.id = new_id("workflow");
1896    }
1897    if graph.version == 0 {
1898        graph.version = 1;
1899    }
1900    if graph.entry.is_empty() {
1901        graph.entry = graph
1902            .nodes
1903            .keys()
1904            .next()
1905            .cloned()
1906            .unwrap_or_else(|| "act".to_string());
1907    }
1908    for (node_id, node) in &mut graph.nodes {
1909        if node.raw_tools.is_none() {
1910            node.raw_tools = as_dict
1911                .get("nodes")
1912                .and_then(|nodes| nodes.as_dict())
1913                .and_then(|nodes| nodes.get(node_id))
1914                .and_then(|node_value| node_value.as_dict())
1915                .and_then(|raw_node| raw_node.get("tools"))
1916                .cloned();
1917        }
1918        if node.id.is_none() {
1919            node.id = Some(node_id.clone());
1920        }
1921        if node.kind.is_empty() {
1922            node.kind = "stage".to_string();
1923        }
1924        if node.join_policy.strategy.is_empty() {
1925            node.join_policy.strategy = "all".to_string();
1926        }
1927        if node.reduce_policy.strategy.is_empty() {
1928            node.reduce_policy.strategy = "concat".to_string();
1929        }
1930        if node.output_contract.output_kinds.is_empty() {
1931            node.output_contract.output_kinds = vec![match node.kind.as_str() {
1932                "verify" => "verification_result".to_string(),
1933                "reduce" => node
1934                    .reduce_policy
1935                    .output_kind
1936                    .clone()
1937                    .unwrap_or_else(|| "summary".to_string()),
1938                "map" => node
1939                    .map_policy
1940                    .output_kind
1941                    .clone()
1942                    .unwrap_or_else(|| "artifact".to_string()),
1943                "escalation" => "plan".to_string(),
1944                _ => "artifact".to_string(),
1945            }];
1946        }
1947        if node.retry_policy.max_attempts == 0 {
1948            node.retry_policy.max_attempts = 1;
1949        }
1950    }
1951    Ok(graph)
1952}
1953
1954pub fn validate_workflow(
1955    graph: &WorkflowGraph,
1956    ceiling: Option<&CapabilityPolicy>,
1957) -> WorkflowValidationReport {
1958    let mut errors = Vec::new();
1959    let mut warnings = Vec::new();
1960
1961    if !graph.nodes.contains_key(&graph.entry) {
1962        errors.push(format!("entry node does not exist: {}", graph.entry));
1963    }
1964
1965    let node_ids: BTreeSet<String> = graph.nodes.keys().cloned().collect();
1966    for edge in &graph.edges {
1967        if !node_ids.contains(&edge.from) {
1968            errors.push(format!("edge.from references unknown node: {}", edge.from));
1969        }
1970        if !node_ids.contains(&edge.to) {
1971            errors.push(format!("edge.to references unknown node: {}", edge.to));
1972        }
1973    }
1974
1975    let reachable_nodes = reachable_nodes(graph);
1976    for node_id in &node_ids {
1977        if !reachable_nodes.contains(node_id) {
1978            warnings.push(format!("node is unreachable: {node_id}"));
1979        }
1980    }
1981
1982    for (node_id, node) in &graph.nodes {
1983        let incoming = graph
1984            .edges
1985            .iter()
1986            .filter(|edge| edge.to == *node_id)
1987            .count();
1988        let outgoing: Vec<&WorkflowEdge> = graph
1989            .edges
1990            .iter()
1991            .filter(|edge| edge.from == *node_id)
1992            .collect();
1993        if let Some(min_inputs) = node.input_contract.min_inputs {
1994            if let Some(max_inputs) = node.input_contract.max_inputs {
1995                if min_inputs > max_inputs {
1996                    errors.push(format!(
1997                        "node {node_id}: input contract min_inputs exceeds max_inputs"
1998                    ));
1999                }
2000            }
2001        }
2002        match node.kind.as_str() {
2003            "condition" => {
2004                let has_true = outgoing
2005                    .iter()
2006                    .any(|edge| edge.branch.as_deref() == Some("true"));
2007                let has_false = outgoing
2008                    .iter()
2009                    .any(|edge| edge.branch.as_deref() == Some("false"));
2010                if !has_true || !has_false {
2011                    errors.push(format!(
2012                        "node {node_id}: condition nodes require both 'true' and 'false' branch edges"
2013                    ));
2014                }
2015            }
2016            "fork" => {
2017                if outgoing.len() < 2 {
2018                    errors.push(format!(
2019                        "node {node_id}: fork nodes require at least two outgoing edges"
2020                    ));
2021                }
2022            }
2023            "join" => {
2024                if incoming < 2 {
2025                    warnings.push(format!(
2026                        "node {node_id}: join node has fewer than two incoming edges"
2027                    ));
2028                }
2029            }
2030            "map" => {
2031                if node.map_policy.items.is_empty()
2032                    && node.map_policy.item_artifact_kind.is_none()
2033                    && node.input_contract.input_kinds.is_empty()
2034                {
2035                    errors.push(format!(
2036                        "node {node_id}: map nodes require items, item_artifact_kind, or input_contract.input_kinds"
2037                    ));
2038                }
2039            }
2040            "reduce" => {
2041                if node.input_contract.input_kinds.is_empty() {
2042                    warnings.push(format!(
2043                        "node {node_id}: reduce node has no input_contract.input_kinds; it will consume all available artifacts"
2044                    ));
2045                }
2046            }
2047            _ => {}
2048        }
2049    }
2050
2051    if let Some(ceiling) = ceiling {
2052        if let Err(error) = ceiling.intersect(&graph.capability_policy) {
2053            errors.push(error);
2054        }
2055        for (node_id, node) in &graph.nodes {
2056            if let Err(error) = ceiling.intersect(&node.capability_policy) {
2057                errors.push(format!("node {node_id}: {error}"));
2058            }
2059        }
2060    }
2061
2062    WorkflowValidationReport {
2063        valid: errors.is_empty(),
2064        errors,
2065        warnings,
2066        reachable_nodes: reachable_nodes.into_iter().collect(),
2067    }
2068}
2069
2070fn reachable_nodes(graph: &WorkflowGraph) -> BTreeSet<String> {
2071    let mut seen = BTreeSet::new();
2072    let mut stack = vec![graph.entry.clone()];
2073    while let Some(node_id) = stack.pop() {
2074        if !seen.insert(node_id.clone()) {
2075            continue;
2076        }
2077        for edge in graph.edges.iter().filter(|edge| edge.from == node_id) {
2078            stack.push(edge.to.clone());
2079        }
2080    }
2081    seen
2082}
2083
2084pub fn select_artifacts(
2085    mut artifacts: Vec<ArtifactRecord>,
2086    policy: &ContextPolicy,
2087) -> Vec<ArtifactRecord> {
2088    artifacts.retain(|artifact| {
2089        (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
2090            && !policy.exclude_kinds.contains(&artifact.kind)
2091            && (policy.include_stages.is_empty()
2092                || artifact
2093                    .stage
2094                    .as_ref()
2095                    .is_some_and(|stage| policy.include_stages.contains(stage)))
2096    });
2097    artifacts.sort_by(|a, b| {
2098        let b_pinned = policy.pinned_ids.contains(&b.id);
2099        let a_pinned = policy.pinned_ids.contains(&a.id);
2100        b_pinned
2101            .cmp(&a_pinned)
2102            .then_with(|| {
2103                let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
2104                let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
2105                b_prio_kind.cmp(&a_prio_kind)
2106            })
2107            .then_with(|| {
2108                b.priority
2109                    .unwrap_or_default()
2110                    .cmp(&a.priority.unwrap_or_default())
2111            })
2112            .then_with(|| {
2113                if policy.prefer_fresh {
2114                    freshness_rank(b.freshness.as_deref())
2115                        .cmp(&freshness_rank(a.freshness.as_deref()))
2116                } else {
2117                    std::cmp::Ordering::Equal
2118                }
2119            })
2120            .then_with(|| {
2121                if policy.prefer_recent {
2122                    b.created_at.cmp(&a.created_at)
2123                } else {
2124                    std::cmp::Ordering::Equal
2125                }
2126            })
2127            .then_with(|| {
2128                b.relevance
2129                    .partial_cmp(&a.relevance)
2130                    .unwrap_or(std::cmp::Ordering::Equal)
2131            })
2132            .then_with(|| {
2133                a.estimated_tokens
2134                    .unwrap_or(usize::MAX)
2135                    .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
2136            })
2137    });
2138
2139    let mut selected = Vec::new();
2140    let mut used_tokens = 0usize;
2141    let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
2142    let effective_max_tokens = policy
2143        .max_tokens
2144        .map(|max| max.saturating_sub(reserve_tokens));
2145    for artifact in artifacts {
2146        if let Some(max_artifacts) = policy.max_artifacts {
2147            if selected.len() >= max_artifacts {
2148                break;
2149            }
2150        }
2151        let next_tokens = artifact.estimated_tokens.unwrap_or(0);
2152        if let Some(max_tokens) = effective_max_tokens {
2153            if used_tokens + next_tokens > max_tokens {
2154                continue;
2155            }
2156        }
2157        used_tokens += next_tokens;
2158        selected.push(artifact);
2159    }
2160    selected
2161}
2162
2163pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
2164    let mut parts = Vec::new();
2165    for artifact in artifacts {
2166        let title = artifact
2167            .title
2168            .clone()
2169            .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
2170        let body = artifact
2171            .text
2172            .clone()
2173            .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
2174            .unwrap_or_default();
2175        match policy.render.as_deref() {
2176            Some("json") => {
2177                parts.push(
2178                    serde_json::json!({
2179                        "id": artifact.id,
2180                        "kind": artifact.kind,
2181                        "title": title,
2182                        "source": artifact.source,
2183                        "freshness": artifact.freshness,
2184                        "priority": artifact.priority,
2185                        "text": body,
2186                    })
2187                    .to_string(),
2188                );
2189            }
2190            _ => parts.push(format!(
2191                "[{title}] kind={} source={} freshness={} priority={}\n{}",
2192                artifact.kind,
2193                artifact
2194                    .source
2195                    .clone()
2196                    .unwrap_or_else(|| "unknown".to_string()),
2197                artifact
2198                    .freshness
2199                    .clone()
2200                    .unwrap_or_else(|| "normal".to_string()),
2201                artifact.priority.unwrap_or_default(),
2202                body
2203            )),
2204        }
2205    }
2206    parts.join("\n\n")
2207}
2208
2209pub fn normalize_artifact(value: &VmValue) -> Result<ArtifactRecord, VmError> {
2210    let artifact: ArtifactRecord = parse_json_value(value)?;
2211    Ok(artifact.normalize())
2212}
2213
2214pub fn normalize_run_record(value: &VmValue) -> Result<RunRecord, VmError> {
2215    let mut run: RunRecord = parse_json_payload(vm_value_to_json(value), "run_record")?;
2216    if run.type_name.is_empty() {
2217        run.type_name = "run_record".to_string();
2218    }
2219    if run.id.is_empty() {
2220        run.id = new_id("run");
2221    }
2222    if run.started_at.is_empty() {
2223        run.started_at = now_rfc3339();
2224    }
2225    if run.status.is_empty() {
2226        run.status = "running".to_string();
2227    }
2228    if run.root_run_id.is_none() {
2229        run.root_run_id = Some(run.id.clone());
2230    }
2231    if run.replay_fixture.is_none() {
2232        run.replay_fixture = Some(replay_fixture_from_run(&run));
2233    }
2234    Ok(run)
2235}
2236
2237pub fn normalize_eval_suite_manifest(value: &VmValue) -> Result<EvalSuiteManifest, VmError> {
2238    let mut manifest: EvalSuiteManifest = parse_json_value(value)?;
2239    if manifest.type_name.is_empty() {
2240        manifest.type_name = "eval_suite_manifest".to_string();
2241    }
2242    if manifest.id.is_empty() {
2243        manifest.id = new_id("eval_suite");
2244    }
2245    Ok(manifest)
2246}
2247
2248fn load_replay_fixture(path: &Path) -> Result<ReplayFixture, VmError> {
2249    let content = std::fs::read_to_string(path)
2250        .map_err(|e| VmError::Runtime(format!("failed to read replay fixture: {e}")))?;
2251    serde_json::from_str(&content)
2252        .map_err(|e| VmError::Runtime(format!("failed to parse replay fixture: {e}")))
2253}
2254
2255fn resolve_manifest_path(base_dir: Option<&Path>, path: &str) -> PathBuf {
2256    let path_buf = PathBuf::from(path);
2257    if path_buf.is_absolute() {
2258        path_buf
2259    } else if let Some(base_dir) = base_dir {
2260        base_dir.join(path_buf)
2261    } else {
2262        path_buf
2263    }
2264}
2265
2266pub fn evaluate_run_suite_manifest(
2267    manifest: &EvalSuiteManifest,
2268) -> Result<ReplayEvalSuiteReport, VmError> {
2269    let base_dir = manifest.base_dir.as_deref().map(Path::new);
2270    let mut reports = Vec::new();
2271    for case in &manifest.cases {
2272        let run_path = resolve_manifest_path(base_dir, &case.run_path);
2273        let run = load_run_record(&run_path)?;
2274        let fixture = match &case.fixture_path {
2275            Some(path) => load_replay_fixture(&resolve_manifest_path(base_dir, path))?,
2276            None => run
2277                .replay_fixture
2278                .clone()
2279                .unwrap_or_else(|| replay_fixture_from_run(&run)),
2280        };
2281        let eval = evaluate_run_against_fixture(&run, &fixture);
2282        let mut pass = eval.pass;
2283        let mut failures = eval.failures;
2284        let comparison = match &case.compare_to {
2285            Some(path) => {
2286                let baseline_path = resolve_manifest_path(base_dir, path);
2287                let baseline = load_run_record(&baseline_path)?;
2288                let diff = diff_run_records(&baseline, &run);
2289                if !diff.identical {
2290                    pass = false;
2291                    failures.push(format!(
2292                        "run differs from baseline {} with {} stage changes",
2293                        baseline_path.display(),
2294                        diff.stage_diffs.len()
2295                    ));
2296                }
2297                Some(diff)
2298            }
2299            None => None,
2300        };
2301        reports.push(ReplayEvalCaseReport {
2302            run_id: run.id.clone(),
2303            workflow_id: run.workflow_id.clone(),
2304            label: case.label.clone(),
2305            pass,
2306            failures,
2307            stage_count: eval.stage_count,
2308            source_path: Some(run_path.display().to_string()),
2309            comparison,
2310        });
2311    }
2312    let total = reports.len();
2313    let passed = reports.iter().filter(|report| report.pass).count();
2314    let failed = total.saturating_sub(passed);
2315    Ok(ReplayEvalSuiteReport {
2316        pass: failed == 0,
2317        total,
2318        passed,
2319        failed,
2320        cases: reports,
2321    })
2322}
2323
2324/// Edit operation in a diff sequence.
2325#[derive(Clone, Copy, PartialEq, Eq, Debug)]
2326enum DiffOp {
2327    Equal,
2328    Delete,
2329    Insert,
2330}
2331
2332/// Compute the shortest edit script using Myers' O(nd) algorithm.
2333/// Returns a sequence of (DiffOp, line_index_in_before_or_after).
2334/// Time: O(nd) where d = edit distance. Space: O(d * n).
2335fn myers_diff(a: &[&str], b: &[&str]) -> Vec<(DiffOp, usize)> {
2336    let n = a.len() as isize;
2337    let m = b.len() as isize;
2338    if n == 0 && m == 0 {
2339        return Vec::new();
2340    }
2341    if n == 0 {
2342        return (0..m as usize).map(|j| (DiffOp::Insert, j)).collect();
2343    }
2344    if m == 0 {
2345        return (0..n as usize).map(|i| (DiffOp::Delete, i)).collect();
2346    }
2347
2348    let max_d = (n + m) as usize;
2349    let offset = max_d as isize;
2350    let v_size = 2 * max_d + 1;
2351    let mut v = vec![0isize; v_size];
2352    // trace[d] stores v snapshot BEFORE step d was applied.
2353    let mut trace: Vec<Vec<isize>> = Vec::new();
2354
2355    'outer: for d in 0..=max_d as isize {
2356        trace.push(v.clone());
2357        let mut new_v = v.clone();
2358        for k in (-d..=d).step_by(2) {
2359            let ki = (k + offset) as usize;
2360            let mut x = if k == -d || (k != d && v[ki - 1] < v[ki + 1]) {
2361                v[ki + 1] // insert (move down)
2362            } else {
2363                v[ki - 1] + 1 // delete (move right)
2364            };
2365            let mut y = x - k;
2366            while x < n && y < m && a[x as usize] == b[y as usize] {
2367                x += 1;
2368                y += 1;
2369            }
2370            new_v[ki] = x;
2371            if x >= n && y >= m {
2372                let _ = new_v;
2373                break 'outer;
2374            }
2375        }
2376        v = new_v;
2377    }
2378
2379    // Backtrack from (n, m) to (0, 0).
2380    let mut ops: Vec<(DiffOp, usize)> = Vec::new();
2381    let mut x = n;
2382    let mut y = m;
2383    for d in (1..trace.len() as isize).rev() {
2384        let k = x - y;
2385        let v_prev = &trace[d as usize];
2386        let prev_k = if k == -d
2387            || (k != d && v_prev[(k - 1 + offset) as usize] < v_prev[(k + 1 + offset) as usize])
2388        {
2389            k + 1 // came from insert
2390        } else {
2391            k - 1 // came from delete
2392        };
2393        let prev_x = v_prev[(prev_k + offset) as usize];
2394        let prev_y = prev_x - prev_k;
2395
2396        // Diagonal (equal) moves
2397        while x > prev_x && y > prev_y {
2398            x -= 1;
2399            y -= 1;
2400            ops.push((DiffOp::Equal, x as usize));
2401        }
2402        // Edit move
2403        if prev_k < k {
2404            x -= 1;
2405            ops.push((DiffOp::Delete, x as usize));
2406        } else {
2407            y -= 1;
2408            ops.push((DiffOp::Insert, y as usize));
2409        }
2410    }
2411    // Initial diagonal at d=0
2412    while x > 0 && y > 0 {
2413        x -= 1;
2414        y -= 1;
2415        ops.push((DiffOp::Equal, x as usize));
2416    }
2417    ops.reverse();
2418    ops
2419}
2420
2421pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
2422    let before_lines: Vec<&str> = before.lines().collect();
2423    let after_lines: Vec<&str> = after.lines().collect();
2424    let ops = myers_diff(&before_lines, &after_lines);
2425
2426    let mut diff = String::new();
2427    let file = path.unwrap_or("artifact");
2428    diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
2429    for &(op, idx) in &ops {
2430        match op {
2431            DiffOp::Equal => diff.push_str(&format!(" {}\n", before_lines[idx])),
2432            DiffOp::Delete => diff.push_str(&format!("-{}\n", before_lines[idx])),
2433            DiffOp::Insert => diff.push_str(&format!("+{}\n", after_lines[idx])),
2434        }
2435    }
2436    diff
2437}
2438
2439pub fn save_run_record(run: &RunRecord, path: Option<&str>) -> Result<String, VmError> {
2440    let path = path
2441        .map(PathBuf::from)
2442        .unwrap_or_else(|| default_run_dir().join(format!("{}.json", run.id)));
2443    if let Some(parent) = path.parent() {
2444        std::fs::create_dir_all(parent)
2445            .map_err(|e| VmError::Runtime(format!("failed to create run directory: {e}")))?;
2446    }
2447    let json = serde_json::to_string_pretty(run)
2448        .map_err(|e| VmError::Runtime(format!("failed to encode run record: {e}")))?;
2449    // Atomic write: write to .tmp then rename to prevent corruption on kill.
2450    let tmp_path = path.with_extension("json.tmp");
2451    std::fs::write(&tmp_path, &json)
2452        .map_err(|e| VmError::Runtime(format!("failed to persist run record: {e}")))?;
2453    std::fs::rename(&tmp_path, &path).map_err(|e| {
2454        // Fallback: if rename fails (cross-device), write directly.
2455        let _ = std::fs::write(&path, &json);
2456        VmError::Runtime(format!("failed to finalize run record: {e}"))
2457    })?;
2458    Ok(path.to_string_lossy().to_string())
2459}
2460
2461pub fn load_run_record(path: &Path) -> Result<RunRecord, VmError> {
2462    let content = std::fs::read_to_string(path)
2463        .map_err(|e| VmError::Runtime(format!("failed to read run record: {e}")))?;
2464    serde_json::from_str(&content)
2465        .map_err(|e| VmError::Runtime(format!("failed to parse run record: {e}")))
2466}
2467
2468pub fn replay_fixture_from_run(run: &RunRecord) -> ReplayFixture {
2469    ReplayFixture {
2470        type_name: "replay_fixture".to_string(),
2471        id: new_id("fixture"),
2472        source_run_id: run.id.clone(),
2473        workflow_id: run.workflow_id.clone(),
2474        workflow_name: run.workflow_name.clone(),
2475        created_at: now_rfc3339(),
2476        expected_status: run.status.clone(),
2477        stage_assertions: run
2478            .stages
2479            .iter()
2480            .map(|stage| ReplayStageAssertion {
2481                node_id: stage.node_id.clone(),
2482                expected_status: stage.status.clone(),
2483                expected_outcome: stage.outcome.clone(),
2484                expected_branch: stage.branch.clone(),
2485                required_artifact_kinds: stage
2486                    .artifacts
2487                    .iter()
2488                    .map(|artifact| artifact.kind.clone())
2489                    .collect(),
2490                visible_text_contains: stage
2491                    .visible_text
2492                    .as_ref()
2493                    .filter(|text| !text.is_empty())
2494                    .map(|text| text.chars().take(80).collect()),
2495            })
2496            .collect(),
2497    }
2498}
2499
2500pub fn evaluate_run_against_fixture(run: &RunRecord, fixture: &ReplayFixture) -> ReplayEvalReport {
2501    let mut failures = Vec::new();
2502    if run.status != fixture.expected_status {
2503        failures.push(format!(
2504            "run status mismatch: expected {}, got {}",
2505            fixture.expected_status, run.status
2506        ));
2507    }
2508    let stages_by_id: BTreeMap<&str, &RunStageRecord> =
2509        run.stages.iter().map(|s| (s.node_id.as_str(), s)).collect();
2510    for assertion in &fixture.stage_assertions {
2511        let Some(stage) = stages_by_id.get(assertion.node_id.as_str()) else {
2512            failures.push(format!("missing stage {}", assertion.node_id));
2513            continue;
2514        };
2515        if stage.status != assertion.expected_status {
2516            failures.push(format!(
2517                "stage {} status mismatch: expected {}, got {}",
2518                assertion.node_id, assertion.expected_status, stage.status
2519            ));
2520        }
2521        if stage.outcome != assertion.expected_outcome {
2522            failures.push(format!(
2523                "stage {} outcome mismatch: expected {}, got {}",
2524                assertion.node_id, assertion.expected_outcome, stage.outcome
2525            ));
2526        }
2527        if stage.branch != assertion.expected_branch {
2528            failures.push(format!(
2529                "stage {} branch mismatch: expected {:?}, got {:?}",
2530                assertion.node_id, assertion.expected_branch, stage.branch
2531            ));
2532        }
2533        for required_kind in &assertion.required_artifact_kinds {
2534            if !stage
2535                .artifacts
2536                .iter()
2537                .any(|artifact| &artifact.kind == required_kind)
2538            {
2539                failures.push(format!(
2540                    "stage {} missing artifact kind {}",
2541                    assertion.node_id, required_kind
2542                ));
2543            }
2544        }
2545        if let Some(snippet) = &assertion.visible_text_contains {
2546            let actual = stage.visible_text.clone().unwrap_or_default();
2547            if !actual.contains(snippet) {
2548                failures.push(format!(
2549                    "stage {} visible text does not contain expected snippet {:?}",
2550                    assertion.node_id, snippet
2551                ));
2552            }
2553        }
2554    }
2555
2556    ReplayEvalReport {
2557        pass: failures.is_empty(),
2558        failures,
2559        stage_count: run.stages.len(),
2560    }
2561}
2562
2563pub fn evaluate_run_suite(
2564    cases: Vec<(RunRecord, ReplayFixture, Option<String>)>,
2565) -> ReplayEvalSuiteReport {
2566    let mut reports = Vec::new();
2567    for (run, fixture, source_path) in cases {
2568        let report = evaluate_run_against_fixture(&run, &fixture);
2569        reports.push(ReplayEvalCaseReport {
2570            run_id: run.id.clone(),
2571            workflow_id: run.workflow_id.clone(),
2572            label: None,
2573            pass: report.pass,
2574            failures: report.failures,
2575            stage_count: report.stage_count,
2576            source_path,
2577            comparison: None,
2578        });
2579    }
2580    let total = reports.len();
2581    let passed = reports.iter().filter(|report| report.pass).count();
2582    let failed = total.saturating_sub(passed);
2583    ReplayEvalSuiteReport {
2584        pass: failed == 0,
2585        total,
2586        passed,
2587        failed,
2588        cases: reports,
2589    }
2590}
2591
2592pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
2593    let mut stage_diffs = Vec::new();
2594    let mut all_node_ids = BTreeSet::new();
2595    let left_by_id: BTreeMap<&str, &RunStageRecord> = left
2596        .stages
2597        .iter()
2598        .map(|s| (s.node_id.as_str(), s))
2599        .collect();
2600    let right_by_id: BTreeMap<&str, &RunStageRecord> = right
2601        .stages
2602        .iter()
2603        .map(|s| (s.node_id.as_str(), s))
2604        .collect();
2605    all_node_ids.extend(left_by_id.keys().copied());
2606    all_node_ids.extend(right_by_id.keys().copied());
2607
2608    for node_id in all_node_ids {
2609        let left_stage = left_by_id.get(node_id).copied();
2610        let right_stage = right_by_id.get(node_id).copied();
2611        match (left_stage, right_stage) {
2612            (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
2613                node_id: node_id.to_string(),
2614                change: "removed".to_string(),
2615                details: vec!["stage missing from right run".to_string()],
2616            }),
2617            (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
2618                node_id: node_id.to_string(),
2619                change: "added".to_string(),
2620                details: vec!["stage missing from left run".to_string()],
2621            }),
2622            (Some(left_stage), Some(right_stage)) => {
2623                let mut details = Vec::new();
2624                if left_stage.status != right_stage.status {
2625                    details.push(format!(
2626                        "status: {} -> {}",
2627                        left_stage.status, right_stage.status
2628                    ));
2629                }
2630                if left_stage.outcome != right_stage.outcome {
2631                    details.push(format!(
2632                        "outcome: {} -> {}",
2633                        left_stage.outcome, right_stage.outcome
2634                    ));
2635                }
2636                if left_stage.branch != right_stage.branch {
2637                    details.push(format!(
2638                        "branch: {:?} -> {:?}",
2639                        left_stage.branch, right_stage.branch
2640                    ));
2641                }
2642                if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
2643                {
2644                    details.push(format!(
2645                        "produced_artifacts: {} -> {}",
2646                        left_stage.produced_artifact_ids.len(),
2647                        right_stage.produced_artifact_ids.len()
2648                    ));
2649                }
2650                if left_stage.artifacts.len() != right_stage.artifacts.len() {
2651                    details.push(format!(
2652                        "artifact_records: {} -> {}",
2653                        left_stage.artifacts.len(),
2654                        right_stage.artifacts.len()
2655                    ));
2656                }
2657                if !details.is_empty() {
2658                    stage_diffs.push(RunStageDiffRecord {
2659                        node_id: node_id.to_string(),
2660                        change: "changed".to_string(),
2661                        details,
2662                    });
2663                }
2664            }
2665            (None, None) => {}
2666        }
2667    }
2668
2669    let status_changed = left.status != right.status;
2670    let identical = !status_changed
2671        && stage_diffs.is_empty()
2672        && left.transitions.len() == right.transitions.len()
2673        && left.artifacts.len() == right.artifacts.len()
2674        && left.checkpoints.len() == right.checkpoints.len();
2675
2676    RunDiffReport {
2677        left_run_id: left.id.clone(),
2678        right_run_id: right.id.clone(),
2679        identical,
2680        status_changed,
2681        left_status: left.status.clone(),
2682        right_status: right.status.clone(),
2683        stage_diffs,
2684        transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
2685        artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
2686        checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
2687    }
2688}
2689
2690pub fn push_execution_policy(policy: CapabilityPolicy) {
2691    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
2692}
2693
2694pub fn pop_execution_policy() {
2695    EXECUTION_POLICY_STACK.with(|stack| {
2696        stack.borrow_mut().pop();
2697    });
2698}
2699
2700pub fn current_execution_policy() -> Option<CapabilityPolicy> {
2701    EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
2702}
2703
2704pub fn current_tool_metadata(tool: &str) -> Option<ToolRuntimePolicyMetadata> {
2705    current_execution_policy().and_then(|policy| policy.tool_metadata.get(tool).cloned())
2706}
2707
2708fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
2709    policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
2710}
2711
2712fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
2713    policy.capabilities.is_empty()
2714        || policy
2715            .capabilities
2716            .get(capability)
2717            .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
2718}
2719
2720fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
2721    fn rank(v: &str) -> usize {
2722        match v {
2723            "none" => 0,
2724            "read_only" => 1,
2725            "workspace_write" => 2,
2726            "process_exec" => 3,
2727            "network" => 4,
2728            _ => 5,
2729        }
2730    }
2731    policy
2732        .side_effect_level
2733        .as_ref()
2734        .map(|allowed| rank(allowed) >= rank(requested))
2735        .unwrap_or(true)
2736}
2737
2738fn reject_policy(reason: String) -> Result<(), VmError> {
2739    Err(VmError::CategorizedError {
2740        message: reason,
2741        category: crate::value::ErrorCategory::ToolRejected,
2742    })
2743}
2744
2745fn fallback_mutation_classification(tool_name: &str) -> String {
2746    let lower = tool_name.to_ascii_lowercase();
2747    if lower.starts_with("mcp_") {
2748        return "host_defined".to_string();
2749    }
2750    if lower == "exec"
2751        || lower == "shell"
2752        || lower == "exec_at"
2753        || lower == "shell_at"
2754        || lower == "run"
2755        || lower.starts_with("run_")
2756    {
2757        return "ambient_side_effect".to_string();
2758    }
2759    if lower.starts_with("delete")
2760        || lower.starts_with("remove")
2761        || lower.starts_with("move")
2762        || lower.starts_with("rename")
2763    {
2764        return "destructive".to_string();
2765    }
2766    if lower.contains("write")
2767        || lower.contains("edit")
2768        || lower.contains("patch")
2769        || lower.contains("create")
2770        || lower.contains("scaffold")
2771        || lower.starts_with("insert")
2772        || lower.starts_with("replace")
2773        || lower == "add_import"
2774    {
2775        return "apply_workspace".to_string();
2776    }
2777    "read_only".to_string()
2778}
2779
2780pub fn current_tool_mutation_classification(tool_name: &str) -> String {
2781    current_tool_metadata(tool_name)
2782        .and_then(|metadata| metadata.mutation_classification)
2783        .unwrap_or_else(|| fallback_mutation_classification(tool_name))
2784}
2785
2786pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
2787    let Some(map) = args.as_object() else {
2788        return Vec::new();
2789    };
2790    let path_keys = current_tool_metadata(tool_name)
2791        .map(|metadata| metadata.path_params)
2792        .filter(|keys| !keys.is_empty())
2793        .unwrap_or_else(|| {
2794            vec![
2795                "path".to_string(),
2796                "file".to_string(),
2797                "cwd".to_string(),
2798                "repo".to_string(),
2799                "target".to_string(),
2800                "destination".to_string(),
2801            ]
2802        });
2803    let mut paths = Vec::new();
2804    for key in path_keys {
2805        if let Some(value) = map.get(&key).and_then(|value| value.as_str()) {
2806            if !value.is_empty() {
2807                paths.push(value.to_string());
2808            }
2809        }
2810    }
2811    if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
2812        for item in items {
2813            if let Some(value) = item.as_str() {
2814                if !value.is_empty() {
2815                    paths.push(value.to_string());
2816                }
2817            }
2818        }
2819    }
2820    paths.sort();
2821    paths.dedup();
2822    paths
2823}
2824
2825pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
2826    let Some(policy) = current_execution_policy() else {
2827        return Ok(());
2828    };
2829    match name {
2830        "read" | "read_file" => {
2831            if !policy_allows_tool(&policy, name)
2832                || !policy_allows_capability(&policy, "workspace", "read_text")
2833            {
2834                return reject_policy(format!(
2835                    "builtin '{name}' exceeds workspace.read_text ceiling"
2836                ));
2837            }
2838        }
2839        "search" | "list_dir" => {
2840            if !policy_allows_tool(&policy, name)
2841                || !policy_allows_capability(&policy, "workspace", "list")
2842            {
2843                return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
2844            }
2845        }
2846        "file_exists" | "stat" => {
2847            if !policy_allows_capability(&policy, "workspace", "exists") {
2848                return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
2849            }
2850        }
2851        "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
2852            if !policy_allows_tool(&policy, "edit")
2853                || !policy_allows_capability(&policy, "workspace", "write_text")
2854                || !policy_allows_side_effect(&policy, "workspace_write")
2855            {
2856                return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
2857            }
2858        }
2859        "delete_file" => {
2860            if !policy_allows_capability(&policy, "workspace", "delete")
2861                || !policy_allows_side_effect(&policy, "workspace_write")
2862            {
2863                return reject_policy(
2864                    "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
2865                );
2866            }
2867        }
2868        "apply_edit" => {
2869            if !policy_allows_capability(&policy, "workspace", "apply_edit")
2870                || !policy_allows_side_effect(&policy, "workspace_write")
2871            {
2872                return reject_policy(
2873                    "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
2874                );
2875            }
2876        }
2877        "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
2878            if !policy_allows_tool(&policy, "run")
2879                || !policy_allows_capability(&policy, "process", "exec")
2880                || !policy_allows_side_effect(&policy, "process_exec")
2881            {
2882                return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2883            }
2884        }
2885        "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
2886            if !policy_allows_side_effect(&policy, "network") {
2887                return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
2888            }
2889        }
2890        "mcp_connect"
2891        | "mcp_call"
2892        | "mcp_list_tools"
2893        | "mcp_list_resources"
2894        | "mcp_list_resource_templates"
2895        | "mcp_read_resource"
2896        | "mcp_list_prompts"
2897        | "mcp_get_prompt"
2898        | "mcp_server_info"
2899        | "mcp_disconnect" => {
2900            if !policy_allows_tool(&policy, "run")
2901                || !policy_allows_capability(&policy, "process", "exec")
2902                || !policy_allows_side_effect(&policy, "process_exec")
2903            {
2904                return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
2905            }
2906        }
2907        "host_call" => {
2908            let name = args.first().map(|v| v.display()).unwrap_or_default();
2909            let Some((capability, op)) = name.split_once('.') else {
2910                return reject_policy(format!(
2911                    "host_call '{name}' must use capability.operation naming"
2912                ));
2913            };
2914            if !policy_allows_capability(&policy, capability, op) {
2915                return reject_policy(format!(
2916                    "host_call {capability}.{op} exceeds capability ceiling"
2917                ));
2918            }
2919            let requested_side_effect = match (capability, op) {
2920                ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
2921                ("process", "exec") => "process_exec",
2922                _ => "read_only",
2923            };
2924            if !policy_allows_side_effect(&policy, requested_side_effect) {
2925                return reject_policy(format!(
2926                    "host_call {capability}.{op} exceeds side-effect ceiling"
2927                ));
2928            }
2929        }
2930        _ => {}
2931    }
2932    Ok(())
2933}
2934
2935pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
2936    if current_execution_policy().is_some() {
2937        return reject_policy(format!(
2938            "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
2939        ));
2940    }
2941    Ok(())
2942}
2943
2944pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
2945    let Some(policy) = current_execution_policy() else {
2946        return Ok(());
2947    };
2948    if !policy_allows_tool(&policy, tool_name) {
2949        return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
2950    }
2951    if let Some(metadata) = policy.tool_metadata.get(tool_name) {
2952        for (capability, ops) in &metadata.capabilities {
2953            for op in ops {
2954                if !policy_allows_capability(&policy, capability, op) {
2955                    return reject_policy(format!(
2956                        "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
2957                    ));
2958                }
2959            }
2960        }
2961        if let Some(side_effect_level) = metadata.side_effect_level.as_deref() {
2962            if !policy_allows_side_effect(&policy, side_effect_level) {
2963                return reject_policy(format!(
2964                    "tool '{tool_name}' exceeds side-effect ceiling: {side_effect_level}"
2965                ));
2966            }
2967        }
2968    }
2969    Ok(())
2970}
2971
2972fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
2973    let dict = transcript.as_dict()?;
2974    let messages = match dict.get("messages") {
2975        Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
2976        _ => Vec::new(),
2977    };
2978    let retained = messages
2979        .into_iter()
2980        .rev()
2981        .take(keep_last)
2982        .collect::<Vec<_>>()
2983        .into_iter()
2984        .rev()
2985        .collect::<Vec<_>>();
2986    let mut compacted = dict.clone();
2987    compacted.insert(
2988        "messages".to_string(),
2989        VmValue::List(Rc::new(retained.clone())),
2990    );
2991    compacted.insert(
2992        "events".to_string(),
2993        VmValue::List(Rc::new(
2994            crate::llm::helpers::transcript_events_from_messages(&retained),
2995        )),
2996    );
2997    Some(VmValue::Dict(Rc::new(compacted)))
2998}
2999
3000fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
3001    let Some(visibility) = visibility else {
3002        return Some(transcript.clone());
3003    };
3004    if visibility != "public" && visibility != "public_only" {
3005        return Some(transcript.clone());
3006    }
3007    let dict = transcript.as_dict()?;
3008    let public_messages = match dict.get("messages") {
3009        Some(VmValue::List(list)) => list
3010            .iter()
3011            .filter(|message| {
3012                message
3013                    .as_dict()
3014                    .and_then(|d| d.get("role"))
3015                    .map(|v| v.display())
3016                    .map(|role| role != "tool_result")
3017                    .unwrap_or(true)
3018            })
3019            .cloned()
3020            .collect::<Vec<_>>(),
3021        _ => Vec::new(),
3022    };
3023    let public_events = match dict.get("events") {
3024        Some(VmValue::List(list)) => list
3025            .iter()
3026            .filter(|event| {
3027                event
3028                    .as_dict()
3029                    .and_then(|d| d.get("visibility"))
3030                    .map(|v| v.display())
3031                    .map(|value| value == "public")
3032                    .unwrap_or(true)
3033            })
3034            .cloned()
3035            .collect::<Vec<_>>(),
3036        _ => Vec::new(),
3037    };
3038    let mut redacted = dict.clone();
3039    redacted.insert(
3040        "messages".to_string(),
3041        VmValue::List(Rc::new(public_messages)),
3042    );
3043    redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
3044    Some(VmValue::Dict(Rc::new(redacted)))
3045}
3046
3047pub(crate) fn apply_input_transcript_policy(
3048    transcript: Option<VmValue>,
3049    policy: &TranscriptPolicy,
3050) -> Option<VmValue> {
3051    let mut transcript = transcript;
3052    match policy.mode.as_deref() {
3053        Some("reset") => return None,
3054        Some("fork") => {
3055            if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
3056                let mut forked = dict.as_ref().clone();
3057                forked.insert(
3058                    "id".to_string(),
3059                    VmValue::String(Rc::from(new_id("transcript"))),
3060                );
3061                transcript = Some(VmValue::Dict(Rc::new(forked)));
3062            }
3063        }
3064        _ => {}
3065    }
3066    if policy.compact {
3067        let keep_last = policy.keep_last.unwrap_or(6);
3068        transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
3069    }
3070    transcript
3071}
3072
3073fn apply_output_transcript_policy(
3074    transcript: Option<VmValue>,
3075    policy: &TranscriptPolicy,
3076) -> Option<VmValue> {
3077    let mut transcript = transcript;
3078    if policy.compact {
3079        let keep_last = policy.keep_last.unwrap_or(6);
3080        transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
3081    }
3082    transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
3083}
3084
3085pub async fn execute_stage_node(
3086    node_id: &str,
3087    node: &WorkflowNode,
3088    task: &str,
3089    artifacts: &[ArtifactRecord],
3090    transcript: Option<VmValue>,
3091) -> Result<(serde_json::Value, Vec<ArtifactRecord>, Option<VmValue>), VmError> {
3092    let mut selection_policy = node.context_policy.clone();
3093    if selection_policy.include_kinds.is_empty() && !node.input_contract.input_kinds.is_empty() {
3094        selection_policy.include_kinds = node.input_contract.input_kinds.clone();
3095    }
3096    let selected = select_artifacts_adaptive(artifacts.to_vec(), &selection_policy);
3097    let rendered_context = render_artifacts_context(&selected, &node.context_policy);
3098    let transcript = apply_input_transcript_policy(transcript, &node.transcript_policy);
3099    if node.input_contract.require_transcript && transcript.is_none() {
3100        return Err(VmError::Runtime(format!(
3101            "workflow stage {node_id} requires transcript input"
3102        )));
3103    }
3104    if let Some(min_inputs) = node.input_contract.min_inputs {
3105        if selected.len() < min_inputs {
3106            return Err(VmError::Runtime(format!(
3107                "workflow stage {node_id} requires at least {min_inputs} input artifacts"
3108            )));
3109        }
3110    }
3111    if let Some(max_inputs) = node.input_contract.max_inputs {
3112        if selected.len() > max_inputs {
3113            return Err(VmError::Runtime(format!(
3114                "workflow stage {node_id} accepts at most {max_inputs} input artifacts"
3115            )));
3116        }
3117    }
3118    let prompt = if rendered_context.is_empty() {
3119        task.to_string()
3120    } else {
3121        format!(
3122            "{rendered_context}\n\n{}:\n{task}",
3123            node.task_label
3124                .clone()
3125                .unwrap_or_else(|| "Task".to_string())
3126        )
3127    };
3128
3129    let tool_format = std::env::var("HARN_AGENT_TOOL_FORMAT")
3130        .ok()
3131        .filter(|value| !value.trim().is_empty())
3132        .unwrap_or_else(|| "text".to_string());
3133    let mut llm_result = if node.kind == "verify" {
3134        if let Some(command) = node
3135            .verify
3136            .as_ref()
3137            .and_then(|verify| verify.as_object())
3138            .and_then(|verify| verify.get("command"))
3139            .and_then(|value| value.as_str())
3140            .map(str::trim)
3141            .filter(|value| !value.is_empty())
3142        {
3143            let mut process = if cfg!(target_os = "windows") {
3144                let mut cmd = tokio::process::Command::new("cmd");
3145                cmd.arg("/C").arg(command);
3146                cmd
3147            } else {
3148                let mut cmd = tokio::process::Command::new("/bin/sh");
3149                cmd.arg("-lc").arg(command);
3150                cmd
3151            };
3152            process.stdin(std::process::Stdio::null());
3153            if let Some(context) = crate::stdlib::process::current_execution_context() {
3154                if let Some(cwd) = context.cwd.filter(|cwd| !cwd.is_empty()) {
3155                    process.current_dir(cwd);
3156                }
3157                if !context.env.is_empty() {
3158                    process.envs(context.env);
3159                }
3160            }
3161            let output = process
3162                .output()
3163                .await
3164                .map_err(|e| VmError::Runtime(format!("workflow verify exec failed: {e}")))?;
3165            let stdout = String::from_utf8_lossy(&output.stdout).to_string();
3166            let stderr = String::from_utf8_lossy(&output.stderr).to_string();
3167            let combined = if stderr.is_empty() {
3168                stdout.clone()
3169            } else if stdout.is_empty() {
3170                stderr.clone()
3171            } else {
3172                format!("{stdout}\n{stderr}")
3173            };
3174            serde_json::json!({
3175                "status": "completed",
3176                "text": combined,
3177                "visible_text": combined,
3178                "command": command,
3179                "stdout": stdout,
3180                "stderr": stderr,
3181                "exit_status": output.status.code().unwrap_or(-1),
3182                "success": output.status.success(),
3183            })
3184        } else {
3185            serde_json::json!({
3186                "status": "completed",
3187                "text": "",
3188                "visible_text": "",
3189            })
3190        }
3191    } else {
3192        let mut options = BTreeMap::new();
3193        if let Some(provider) = &node.model_policy.provider {
3194            options.insert(
3195                "provider".to_string(),
3196                VmValue::String(Rc::from(provider.clone())),
3197            );
3198        }
3199        if let Some(model) = &node.model_policy.model {
3200            options.insert(
3201                "model".to_string(),
3202                VmValue::String(Rc::from(model.clone())),
3203            );
3204        }
3205        if let Some(model_tier) = &node.model_policy.model_tier {
3206            options.insert(
3207                "model_tier".to_string(),
3208                VmValue::String(Rc::from(model_tier.clone())),
3209            );
3210        }
3211        if let Some(temperature) = node.model_policy.temperature {
3212            options.insert("temperature".to_string(), VmValue::Float(temperature));
3213        }
3214        if let Some(max_tokens) = node.model_policy.max_tokens {
3215            options.insert("max_tokens".to_string(), VmValue::Int(max_tokens));
3216        }
3217        let tool_names = workflow_tool_names(&node.tools);
3218        let tools_value = node.raw_tools.clone().or_else(|| {
3219            if matches!(node.tools, serde_json::Value::Null) {
3220                None
3221            } else {
3222                Some(crate::stdlib::json_to_vm_value(&node.tools))
3223            }
3224        });
3225        if tools_value.is_some() && !tool_names.is_empty() {
3226            options.insert("tools".to_string(), tools_value.unwrap_or(VmValue::Nil));
3227        }
3228        if let Some(transcript) = transcript.clone() {
3229            options.insert("transcript".to_string(), transcript);
3230        }
3231
3232        let args = vec![
3233            VmValue::String(Rc::from(prompt.clone())),
3234            node.system
3235                .clone()
3236                .map(|s| VmValue::String(Rc::from(s)))
3237                .unwrap_or(VmValue::Nil),
3238            VmValue::Dict(Rc::new(options)),
3239        ];
3240        let mut opts = extract_llm_options(&args)?;
3241
3242        if node.mode.as_deref() == Some("agent") || !tool_names.is_empty() {
3243            crate::llm::run_agent_loop_internal(
3244                &mut opts,
3245                crate::llm::AgentLoopConfig {
3246                    persistent: true,
3247                    max_iterations: node.model_policy.max_iterations.unwrap_or(16),
3248                    max_nudges: node.model_policy.max_nudges.unwrap_or(3),
3249                    nudge: node.model_policy.nudge.clone(),
3250                    done_sentinel: node.done_sentinel.clone(),
3251                    break_unless_phase: None,
3252                    tool_retries: 0,
3253                    tool_backoff_ms: 1000,
3254                    tool_format: tool_format.clone(),
3255                    auto_compact: None,
3256                    context_callback: None,
3257                    policy: None,
3258                    daemon: false,
3259                    llm_retries: 2,
3260                    llm_backoff_ms: 2000,
3261                    exit_when_verified: false,
3262                    loop_detect_warn: 2,
3263                    loop_detect_block: 3,
3264                    loop_detect_skip: 4,
3265                },
3266            )
3267            .await?
3268        } else {
3269            let result = vm_call_llm_full(&opts).await?;
3270            crate::llm::agent_loop_result_from_llm(&result, opts)
3271        }
3272    };
3273    if let Some(payload) = llm_result.as_object_mut() {
3274        payload.insert("prompt".to_string(), serde_json::json!(prompt));
3275        payload.insert(
3276            "system_prompt".to_string(),
3277            serde_json::json!(node.system.clone().unwrap_or_default()),
3278        );
3279        payload.insert(
3280            "rendered_context".to_string(),
3281            serde_json::json!(rendered_context),
3282        );
3283        payload.insert(
3284            "selected_artifact_ids".to_string(),
3285            serde_json::json!(selected
3286                .iter()
3287                .map(|artifact| artifact.id.clone())
3288                .collect::<Vec<_>>()),
3289        );
3290        payload.insert(
3291            "selected_artifact_titles".to_string(),
3292            serde_json::json!(selected
3293                .iter()
3294                .map(|artifact| artifact.title.clone())
3295                .collect::<Vec<_>>()),
3296        );
3297        payload.insert(
3298            "tool_calling_mode".to_string(),
3299            serde_json::json!(tool_format.clone()),
3300        );
3301    }
3302
3303    let visible_text = llm_result["text"].as_str().unwrap_or_default().to_string();
3304    let transcript = llm_result
3305        .get("transcript")
3306        .cloned()
3307        .map(|value| crate::stdlib::json_to_vm_value(&value));
3308    let transcript = apply_output_transcript_policy(transcript, &node.transcript_policy);
3309    let output_kind = node
3310        .output_contract
3311        .output_kinds
3312        .first()
3313        .cloned()
3314        .unwrap_or_else(|| {
3315            if node.kind == "verify" {
3316                "verification_result".to_string()
3317            } else {
3318                "artifact".to_string()
3319            }
3320        });
3321    let mut metadata = BTreeMap::new();
3322    metadata.insert(
3323        "input_artifact_ids".to_string(),
3324        serde_json::json!(selected
3325            .iter()
3326            .map(|artifact| artifact.id.clone())
3327            .collect::<Vec<_>>()),
3328    );
3329    metadata.insert("node_kind".to_string(), serde_json::json!(node.kind));
3330    let artifact = ArtifactRecord {
3331        type_name: "artifact".to_string(),
3332        id: new_id("artifact"),
3333        kind: output_kind,
3334        title: Some(format!("stage {node_id} output")),
3335        text: Some(visible_text),
3336        data: Some(llm_result.clone()),
3337        source: Some(node_id.to_string()),
3338        created_at: now_rfc3339(),
3339        freshness: Some("fresh".to_string()),
3340        priority: None,
3341        lineage: selected
3342            .iter()
3343            .map(|artifact| artifact.id.clone())
3344            .collect(),
3345        relevance: Some(1.0),
3346        estimated_tokens: None,
3347        stage: Some(node_id.to_string()),
3348        metadata,
3349    }
3350    .normalize();
3351
3352    Ok((llm_result, vec![artifact], transcript))
3353}
3354
3355pub fn next_nodes_for(
3356    graph: &WorkflowGraph,
3357    current: &str,
3358    branch: Option<&str>,
3359) -> Vec<WorkflowEdge> {
3360    let mut matching: Vec<WorkflowEdge> = graph
3361        .edges
3362        .iter()
3363        .filter(|edge| edge.from == current && edge.branch.as_deref() == branch)
3364        .cloned()
3365        .collect();
3366    if matching.is_empty() {
3367        matching = graph
3368            .edges
3369            .iter()
3370            .filter(|edge| edge.from == current && edge.branch.is_none())
3371            .cloned()
3372            .collect();
3373    }
3374    matching
3375}
3376
3377pub fn next_node_for(graph: &WorkflowGraph, current: &str, branch: &str) -> Option<String> {
3378    next_nodes_for(graph, current, Some(branch))
3379        .into_iter()
3380        .next()
3381        .map(|edge| edge.to)
3382}
3383
3384pub fn append_audit_entry(
3385    graph: &mut WorkflowGraph,
3386    op: &str,
3387    node_id: Option<String>,
3388    reason: Option<String>,
3389    metadata: BTreeMap<String, serde_json::Value>,
3390) {
3391    graph.audit_log.push(WorkflowAuditEntry {
3392        id: new_id("audit"),
3393        op: op.to_string(),
3394        node_id,
3395        timestamp: now_rfc3339(),
3396        reason,
3397        metadata,
3398    });
3399}
3400
3401pub fn builtin_ceiling() -> CapabilityPolicy {
3402    CapabilityPolicy {
3403        // Capabilities left empty — the host capability manifest is the sole
3404        // authority on which operations are available.  An explicit allowlist
3405        // here would silently block any capability the host adds later.
3406        tools: Vec::new(),
3407        capabilities: BTreeMap::new(),
3408        workspace_roots: Vec::new(),
3409        side_effect_level: Some("network".to_string()),
3410        recursion_limit: Some(8),
3411        tool_arg_constraints: Vec::new(),
3412        tool_metadata: BTreeMap::new(),
3413    }
3414}
3415
3416#[cfg(test)]
3417mod tests {
3418    use super::*;
3419
3420    #[test]
3421    fn capability_intersection_rejects_privilege_expansion() {
3422        let ceiling = CapabilityPolicy {
3423            tools: vec!["read".to_string()],
3424            side_effect_level: Some("read_only".to_string()),
3425            recursion_limit: Some(2),
3426            ..Default::default()
3427        };
3428        let requested = CapabilityPolicy {
3429            tools: vec!["read".to_string(), "edit".to_string()],
3430            ..Default::default()
3431        };
3432        let error = ceiling.intersect(&requested).unwrap_err();
3433        assert!(error.contains("host ceiling"));
3434    }
3435
3436    #[test]
3437    fn mutation_session_normalize_fills_defaults() {
3438        let normalized = MutationSessionRecord::default().normalize();
3439        assert!(normalized.session_id.starts_with("session_"));
3440        assert_eq!(normalized.mutation_scope, "read_only");
3441        assert_eq!(normalized.approval_mode, "host_enforced");
3442    }
3443
3444    #[test]
3445    fn install_current_mutation_session_round_trips() {
3446        install_current_mutation_session(Some(MutationSessionRecord {
3447            session_id: "session_test".to_string(),
3448            mutation_scope: "apply_workspace".to_string(),
3449            approval_mode: "explicit".to_string(),
3450            ..Default::default()
3451        }));
3452        let current = current_mutation_session().expect("session installed");
3453        assert_eq!(current.session_id, "session_test");
3454        assert_eq!(current.mutation_scope, "apply_workspace");
3455        assert_eq!(current.approval_mode, "explicit");
3456
3457        install_current_mutation_session(None);
3458        assert!(current_mutation_session().is_none());
3459    }
3460
3461    #[test]
3462    fn active_execution_policy_rejects_unknown_bridge_builtin() {
3463        push_execution_policy(CapabilityPolicy {
3464            tools: vec!["read".to_string()],
3465            capabilities: BTreeMap::from([(
3466                "workspace".to_string(),
3467                vec!["read_text".to_string()],
3468            )]),
3469            side_effect_level: Some("read_only".to_string()),
3470            recursion_limit: Some(1),
3471            ..Default::default()
3472        });
3473        let error = enforce_current_policy_for_bridge_builtin("custom_host_builtin").unwrap_err();
3474        pop_execution_policy();
3475        assert!(matches!(
3476            error,
3477            VmError::CategorizedError {
3478                category: crate::value::ErrorCategory::ToolRejected,
3479                ..
3480            }
3481        ));
3482    }
3483
3484    #[test]
3485    fn active_execution_policy_rejects_mcp_escape_hatch() {
3486        push_execution_policy(CapabilityPolicy {
3487            tools: vec!["read".to_string()],
3488            capabilities: BTreeMap::from([(
3489                "workspace".to_string(),
3490                vec!["read_text".to_string()],
3491            )]),
3492            side_effect_level: Some("read_only".to_string()),
3493            recursion_limit: Some(1),
3494            ..Default::default()
3495        });
3496        let error = enforce_current_policy_for_builtin("mcp_connect", &[]).unwrap_err();
3497        pop_execution_policy();
3498        assert!(matches!(
3499            error,
3500            VmError::CategorizedError {
3501                category: crate::value::ErrorCategory::ToolRejected,
3502                ..
3503            }
3504        ));
3505    }
3506
3507    #[test]
3508    fn workflow_normalization_upgrades_legacy_act_verify_repair_shape() {
3509        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3510            "name": "legacy",
3511            "act": {"mode": "llm"},
3512            "verify": {"kind": "verify"},
3513            "repair": {"mode": "agent"},
3514        }));
3515        let graph = normalize_workflow_value(&value).unwrap();
3516        assert_eq!(graph.type_name, "workflow_graph");
3517        assert!(graph.nodes.contains_key("act"));
3518        assert!(graph.nodes.contains_key("verify"));
3519        assert!(graph.nodes.contains_key("repair"));
3520        assert_eq!(graph.entry, "act");
3521    }
3522
3523    #[test]
3524    fn workflow_normalization_accepts_tool_registry_nodes() {
3525        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3526            "name": "registry_tools",
3527            "entry": "implement",
3528            "nodes": {
3529                "implement": {
3530                    "kind": "stage",
3531                    "mode": "agent",
3532                    "tools": {
3533                        "_type": "tool_registry",
3534                        "tools": [
3535                            {"name": "read", "description": "Read files"},
3536                            {"name": "run", "description": "Run commands"}
3537                        ]
3538                    }
3539                }
3540            },
3541            "edges": []
3542        }));
3543        let graph = normalize_workflow_value(&value).unwrap();
3544        let node = graph.nodes.get("implement").unwrap();
3545        assert_eq!(workflow_tool_names(&node.tools), vec!["read", "run"]);
3546    }
3547
3548    #[test]
3549    fn artifact_selection_honors_budget_and_priority() {
3550        let policy = ContextPolicy {
3551            max_artifacts: Some(2),
3552            max_tokens: Some(30),
3553            prefer_recent: true,
3554            prefer_fresh: true,
3555            prioritize_kinds: vec!["verification_result".to_string()],
3556            ..Default::default()
3557        };
3558        let artifacts = vec![
3559            ArtifactRecord {
3560                type_name: "artifact".to_string(),
3561                id: "a".to_string(),
3562                kind: "summary".to_string(),
3563                text: Some("short".to_string()),
3564                relevance: Some(0.9),
3565                created_at: now_rfc3339(),
3566                ..Default::default()
3567            }
3568            .normalize(),
3569            ArtifactRecord {
3570                type_name: "artifact".to_string(),
3571                id: "b".to_string(),
3572                kind: "summary".to_string(),
3573                text: Some("this is a much larger artifact body".to_string()),
3574                relevance: Some(1.0),
3575                created_at: now_rfc3339(),
3576                ..Default::default()
3577            }
3578            .normalize(),
3579            ArtifactRecord {
3580                type_name: "artifact".to_string(),
3581                id: "c".to_string(),
3582                kind: "summary".to_string(),
3583                text: Some("tiny".to_string()),
3584                relevance: Some(0.5),
3585                created_at: now_rfc3339(),
3586                ..Default::default()
3587            }
3588            .normalize(),
3589        ];
3590        let selected = select_artifacts(artifacts, &policy);
3591        assert_eq!(selected.len(), 2);
3592        assert!(selected.iter().all(|artifact| artifact.kind == "summary"));
3593    }
3594
3595    #[test]
3596    fn workflow_validation_rejects_condition_without_true_false_edges() {
3597        let graph = WorkflowGraph {
3598            entry: "gate".to_string(),
3599            nodes: BTreeMap::from([(
3600                "gate".to_string(),
3601                WorkflowNode {
3602                    id: Some("gate".to_string()),
3603                    kind: "condition".to_string(),
3604                    ..Default::default()
3605                },
3606            )]),
3607            edges: vec![WorkflowEdge {
3608                from: "gate".to_string(),
3609                to: "next".to_string(),
3610                branch: Some("true".to_string()),
3611                label: None,
3612            }],
3613            ..Default::default()
3614        };
3615        let report = validate_workflow(&graph, None);
3616        assert!(!report.valid);
3617        assert!(report
3618            .errors
3619            .iter()
3620            .any(|error| error.contains("true") && error.contains("false")));
3621    }
3622
3623    #[test]
3624    fn replay_fixture_round_trip_passes() {
3625        let run = RunRecord {
3626            type_name: "run_record".to_string(),
3627            id: "run_1".to_string(),
3628            workflow_id: "wf".to_string(),
3629            workflow_name: Some("demo".to_string()),
3630            task: "demo".to_string(),
3631            status: "completed".to_string(),
3632            started_at: "1".to_string(),
3633            finished_at: Some("2".to_string()),
3634            parent_run_id: None,
3635            root_run_id: Some("run_1".to_string()),
3636            stages: vec![RunStageRecord {
3637                id: "stage_1".to_string(),
3638                node_id: "act".to_string(),
3639                kind: "stage".to_string(),
3640                status: "completed".to_string(),
3641                outcome: "success".to_string(),
3642                branch: Some("success".to_string()),
3643                started_at: "1".to_string(),
3644                finished_at: Some("2".to_string()),
3645                visible_text: Some("done".to_string()),
3646                private_reasoning: None,
3647                transcript: None,
3648                verification: None,
3649                usage: None,
3650                artifacts: vec![ArtifactRecord {
3651                    type_name: "artifact".to_string(),
3652                    id: "a1".to_string(),
3653                    kind: "summary".to_string(),
3654                    text: Some("done".to_string()),
3655                    created_at: "1".to_string(),
3656                    ..Default::default()
3657                }
3658                .normalize()],
3659                consumed_artifact_ids: vec![],
3660                produced_artifact_ids: vec!["a1".to_string()],
3661                attempts: vec![],
3662                metadata: BTreeMap::new(),
3663            }],
3664            transitions: vec![],
3665            checkpoints: vec![],
3666            pending_nodes: vec![],
3667            completed_nodes: vec!["act".to_string()],
3668            child_runs: vec![],
3669            artifacts: vec![],
3670            policy: CapabilityPolicy::default(),
3671            execution: None,
3672            transcript: None,
3673            usage: None,
3674            replay_fixture: None,
3675            trace_spans: vec![],
3676            metadata: BTreeMap::new(),
3677            persisted_path: None,
3678        };
3679        let fixture = replay_fixture_from_run(&run);
3680        let report = evaluate_run_against_fixture(&run, &fixture);
3681        assert!(report.pass);
3682        assert!(report.failures.is_empty());
3683    }
3684
3685    #[test]
3686    fn replay_eval_suite_reports_failed_case() {
3687        let good = RunRecord {
3688            id: "run_good".to_string(),
3689            workflow_id: "wf".to_string(),
3690            status: "completed".to_string(),
3691            stages: vec![RunStageRecord {
3692                node_id: "act".to_string(),
3693                status: "completed".to_string(),
3694                outcome: "success".to_string(),
3695                ..Default::default()
3696            }],
3697            ..Default::default()
3698        };
3699        let bad = RunRecord {
3700            id: "run_bad".to_string(),
3701            workflow_id: "wf".to_string(),
3702            status: "failed".to_string(),
3703            stages: vec![RunStageRecord {
3704                node_id: "act".to_string(),
3705                status: "failed".to_string(),
3706                outcome: "error".to_string(),
3707                ..Default::default()
3708            }],
3709            ..Default::default()
3710        };
3711        let suite = evaluate_run_suite(vec![
3712            (
3713                good.clone(),
3714                replay_fixture_from_run(&good),
3715                Some("good.json".to_string()),
3716            ),
3717            (
3718                bad.clone(),
3719                replay_fixture_from_run(&good),
3720                Some("bad.json".to_string()),
3721            ),
3722        ]);
3723        assert!(!suite.pass);
3724        assert_eq!(suite.total, 2);
3725        assert_eq!(suite.failed, 1);
3726        assert!(suite.cases.iter().any(|case| !case.pass));
3727    }
3728
3729    #[test]
3730    fn run_diff_reports_changed_stage() {
3731        let left = RunRecord {
3732            id: "left".to_string(),
3733            workflow_id: "wf".to_string(),
3734            status: "completed".to_string(),
3735            stages: vec![RunStageRecord {
3736                node_id: "act".to_string(),
3737                status: "completed".to_string(),
3738                outcome: "success".to_string(),
3739                ..Default::default()
3740            }],
3741            ..Default::default()
3742        };
3743        let right = RunRecord {
3744            id: "right".to_string(),
3745            workflow_id: "wf".to_string(),
3746            status: "failed".to_string(),
3747            stages: vec![RunStageRecord {
3748                node_id: "act".to_string(),
3749                status: "failed".to_string(),
3750                outcome: "error".to_string(),
3751                ..Default::default()
3752            }],
3753            ..Default::default()
3754        };
3755        let diff = diff_run_records(&left, &right);
3756        assert!(diff.status_changed);
3757        assert!(!diff.identical);
3758        assert_eq!(diff.stage_diffs.len(), 1);
3759    }
3760
3761    #[test]
3762    fn eval_suite_manifest_can_fail_on_baseline_diff() {
3763        let temp_dir =
3764            std::env::temp_dir().join(format!("harn-eval-suite-{}", uuid::Uuid::now_v7()));
3765        std::fs::create_dir_all(&temp_dir).unwrap();
3766        let baseline_path = temp_dir.join("baseline.json");
3767        let candidate_path = temp_dir.join("candidate.json");
3768
3769        let baseline = RunRecord {
3770            id: "baseline".to_string(),
3771            workflow_id: "wf".to_string(),
3772            status: "completed".to_string(),
3773            stages: vec![RunStageRecord {
3774                node_id: "act".to_string(),
3775                status: "completed".to_string(),
3776                outcome: "success".to_string(),
3777                ..Default::default()
3778            }],
3779            ..Default::default()
3780        };
3781        let candidate = RunRecord {
3782            id: "candidate".to_string(),
3783            workflow_id: "wf".to_string(),
3784            status: "failed".to_string(),
3785            stages: vec![RunStageRecord {
3786                node_id: "act".to_string(),
3787                status: "failed".to_string(),
3788                outcome: "error".to_string(),
3789                ..Default::default()
3790            }],
3791            ..Default::default()
3792        };
3793
3794        save_run_record(&baseline, Some(baseline_path.to_str().unwrap())).unwrap();
3795        save_run_record(&candidate, Some(candidate_path.to_str().unwrap())).unwrap();
3796
3797        let manifest = EvalSuiteManifest {
3798            base_dir: Some(temp_dir.display().to_string()),
3799            cases: vec![EvalSuiteCase {
3800                label: Some("candidate".to_string()),
3801                run_path: "candidate.json".to_string(),
3802                fixture_path: None,
3803                compare_to: Some("baseline.json".to_string()),
3804            }],
3805            ..Default::default()
3806        };
3807        let suite = evaluate_run_suite_manifest(&manifest).unwrap();
3808        assert!(!suite.pass);
3809        assert_eq!(suite.failed, 1);
3810        assert!(suite.cases[0].comparison.is_some());
3811        assert!(suite.cases[0]
3812            .failures
3813            .iter()
3814            .any(|failure| failure.contains("baseline")));
3815    }
3816
3817    #[test]
3818    fn render_unified_diff_marks_removed_and_added_lines() {
3819        let diff = render_unified_diff(Some("src/main.rs"), "old\nsame", "new\nsame");
3820        assert!(diff.contains("--- a/src/main.rs"));
3821        assert!(diff.contains("+++ b/src/main.rs"));
3822        assert!(diff.contains("-old"));
3823        assert!(diff.contains("+new"));
3824        assert!(diff.contains(" same"));
3825    }
3826
3827    #[test]
3828    fn render_unified_diff_identical_inputs() {
3829        let text = "line1\nline2\nline3";
3830        let diff = render_unified_diff(None, text, text);
3831        assert!(diff.contains("--- a/artifact"));
3832        let body: Vec<&str> = diff.lines().skip(2).collect();
3833        assert!(!body.iter().any(|l| l.starts_with('-')));
3834        assert!(!body.iter().any(|l| l.starts_with('+')));
3835        assert_eq!(body.len(), 3);
3836    }
3837
3838    #[test]
3839    fn render_unified_diff_empty_before() {
3840        let diff = render_unified_diff(None, "", "new1\nnew2");
3841        assert!(diff.contains("+new1"));
3842        assert!(diff.contains("+new2"));
3843        let body: Vec<&str> = diff.lines().skip(2).collect();
3844        assert!(!body.iter().any(|l| l.starts_with('-')));
3845    }
3846
3847    #[test]
3848    fn render_unified_diff_empty_after() {
3849        let diff = render_unified_diff(None, "old1\nold2", "");
3850        assert!(diff.contains("-old1"));
3851        assert!(diff.contains("-old2"));
3852        let body: Vec<&str> = diff.lines().skip(2).collect();
3853        assert!(!body.iter().any(|l| l.starts_with('+')));
3854    }
3855
3856    #[test]
3857    fn render_unified_diff_both_empty() {
3858        let diff = render_unified_diff(None, "", "");
3859        assert!(diff.contains("--- a/artifact"));
3860        assert!(diff.contains("+++ b/artifact"));
3861        // No content lines
3862        let body: String = diff.lines().skip(2).collect();
3863        assert!(body.is_empty());
3864    }
3865
3866    #[test]
3867    fn render_unified_diff_all_changed() {
3868        let diff = render_unified_diff(None, "a\nb", "x\ny");
3869        assert!(diff.contains("-a"));
3870        assert!(diff.contains("-b"));
3871        assert!(diff.contains("+x"));
3872        assert!(diff.contains("+y"));
3873    }
3874
3875    #[test]
3876    fn render_unified_diff_insertion_in_middle() {
3877        let diff = render_unified_diff(None, "a\nc", "a\nb\nc");
3878        assert!(diff.contains(" a"));
3879        assert!(diff.contains("+b"));
3880        assert!(diff.contains(" c"));
3881        let body: Vec<&str> = diff.lines().skip(2).collect();
3882        assert!(!body.iter().any(|l| l.starts_with('-')));
3883    }
3884
3885    #[test]
3886    fn render_unified_diff_deletion_from_middle() {
3887        let diff = render_unified_diff(None, "a\nb\nc", "a\nc");
3888        assert!(diff.contains(" a"));
3889        assert!(diff.contains("-b"));
3890        assert!(diff.contains(" c"));
3891        let body: Vec<&str> = diff.lines().skip(2).collect();
3892        assert!(!body.iter().any(|l| l.starts_with('+')));
3893    }
3894
3895    #[test]
3896    fn render_unified_diff_default_path() {
3897        let diff = render_unified_diff(None, "a", "b");
3898        assert!(diff.contains("--- a/artifact"));
3899        assert!(diff.contains("+++ b/artifact"));
3900    }
3901
3902    #[test]
3903    fn render_unified_diff_large_similar() {
3904        // Test performance: 1000 lines with one change in the middle
3905        let mut before = Vec::new();
3906        let mut after = Vec::new();
3907        for i in 0..1000 {
3908            before.push(format!("line {i}"));
3909            after.push(format!("line {i}"));
3910        }
3911        before[500] = "OLD LINE 500".to_string();
3912        after[500] = "NEW LINE 500".to_string();
3913        let before_str = before.join("\n");
3914        let after_str = after.join("\n");
3915        let diff = render_unified_diff(None, &before_str, &after_str);
3916        assert!(diff.contains("-OLD LINE 500"));
3917        assert!(diff.contains("+NEW LINE 500"));
3918        // Context lines should be present
3919        assert!(diff.contains(" line 499"));
3920        assert!(diff.contains(" line 501"));
3921    }
3922
3923    #[test]
3924    fn myers_diff_empty_sequences() {
3925        let ops = myers_diff(&[], &[]);
3926        assert!(ops.is_empty());
3927    }
3928
3929    #[test]
3930    fn myers_diff_insert_only() {
3931        let ops = myers_diff(&[], &["a", "b"]);
3932        assert_eq!(ops.len(), 2);
3933        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Insert));
3934    }
3935
3936    #[test]
3937    fn myers_diff_delete_only() {
3938        let ops = myers_diff(&["a", "b"], &[]);
3939        assert_eq!(ops.len(), 2);
3940        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Delete));
3941    }
3942
3943    #[test]
3944    fn myers_diff_equal() {
3945        let ops = myers_diff(&["a", "b", "c"], &["a", "b", "c"]);
3946        assert_eq!(ops.len(), 3);
3947        assert!(ops.iter().all(|(op, _)| *op == DiffOp::Equal));
3948    }
3949
3950    #[test]
3951    fn execution_policy_rejects_process_exec_when_read_only() {
3952        push_execution_policy(CapabilityPolicy {
3953            side_effect_level: Some("read_only".to_string()),
3954            capabilities: BTreeMap::from([("process".to_string(), vec!["exec".to_string()])]),
3955            ..Default::default()
3956        });
3957        let result = enforce_current_policy_for_builtin("exec", &[]);
3958        pop_execution_policy();
3959        assert!(result.is_err());
3960    }
3961
3962    #[test]
3963    fn execution_policy_rejects_unlisted_tool() {
3964        push_execution_policy(CapabilityPolicy {
3965            tools: vec!["read".to_string()],
3966            ..Default::default()
3967        });
3968        let result = enforce_current_policy_for_tool("edit");
3969        pop_execution_policy();
3970        assert!(result.is_err());
3971    }
3972
3973    #[test]
3974    fn normalize_run_record_preserves_trace_spans() {
3975        let value = crate::stdlib::json_to_vm_value(&serde_json::json!({
3976            "_type": "run_record",
3977            "id": "run_trace",
3978            "workflow_id": "wf",
3979            "status": "completed",
3980            "started_at": "1",
3981            "trace_spans": [
3982                {
3983                    "span_id": 1,
3984                    "parent_id": null,
3985                    "kind": "pipeline",
3986                    "name": "workflow",
3987                    "start_ms": 0,
3988                    "duration_ms": 42,
3989                    "metadata": {"model": "demo"}
3990                }
3991            ]
3992        }));
3993
3994        let run = normalize_run_record(&value).unwrap();
3995        assert_eq!(run.trace_spans.len(), 1);
3996        assert_eq!(run.trace_spans[0].kind, "pipeline");
3997        assert_eq!(
3998            run.trace_spans[0].metadata["model"],
3999            serde_json::json!("demo")
4000        );
4001    }
4002
4003    // ── Tool hook tests ──────────────────────────────────────────────
4004
4005    #[test]
4006    fn pre_tool_hook_deny_blocks_execution() {
4007        clear_tool_hooks();
4008        register_tool_hook(ToolHook {
4009            pattern: "dangerous_*".to_string(),
4010            pre: Some(Rc::new(|_name, _args| {
4011                PreToolAction::Deny("blocked by policy".to_string())
4012            })),
4013            post: None,
4014        });
4015        let result = run_pre_tool_hooks("dangerous_delete", &serde_json::json!({}));
4016        clear_tool_hooks();
4017        assert!(matches!(result, PreToolAction::Deny(_)));
4018    }
4019
4020    #[test]
4021    fn pre_tool_hook_allow_passes_through() {
4022        clear_tool_hooks();
4023        register_tool_hook(ToolHook {
4024            pattern: "safe_*".to_string(),
4025            pre: Some(Rc::new(|_name, _args| PreToolAction::Allow)),
4026            post: None,
4027        });
4028        let result = run_pre_tool_hooks("safe_read", &serde_json::json!({}));
4029        clear_tool_hooks();
4030        assert!(matches!(result, PreToolAction::Allow));
4031    }
4032
4033    #[test]
4034    fn pre_tool_hook_modify_rewrites_args() {
4035        clear_tool_hooks();
4036        register_tool_hook(ToolHook {
4037            pattern: "*".to_string(),
4038            pre: Some(Rc::new(|_name, _args| {
4039                PreToolAction::Modify(serde_json::json!({"path": "/sanitized"}))
4040            })),
4041            post: None,
4042        });
4043        let result = run_pre_tool_hooks("read_file", &serde_json::json!({"path": "/etc/passwd"}));
4044        clear_tool_hooks();
4045        match result {
4046            PreToolAction::Modify(args) => assert_eq!(args["path"], "/sanitized"),
4047            _ => panic!("expected Modify"),
4048        }
4049    }
4050
4051    #[test]
4052    fn post_tool_hook_modifies_result() {
4053        clear_tool_hooks();
4054        register_tool_hook(ToolHook {
4055            pattern: "exec".to_string(),
4056            pre: None,
4057            post: Some(Rc::new(|_name, result| {
4058                if result.contains("SECRET") {
4059                    PostToolAction::Modify("[REDACTED]".to_string())
4060                } else {
4061                    PostToolAction::Pass
4062                }
4063            })),
4064        });
4065        let result = run_post_tool_hooks("exec", "output with SECRET data");
4066        let clean = run_post_tool_hooks("exec", "clean output");
4067        clear_tool_hooks();
4068        assert_eq!(result, "[REDACTED]");
4069        assert_eq!(clean, "clean output");
4070    }
4071
4072    #[test]
4073    fn unmatched_hook_pattern_does_not_fire() {
4074        clear_tool_hooks();
4075        register_tool_hook(ToolHook {
4076            pattern: "exec".to_string(),
4077            pre: Some(Rc::new(|_name, _args| {
4078                PreToolAction::Deny("should not match".to_string())
4079            })),
4080            post: None,
4081        });
4082        let result = run_pre_tool_hooks("read_file", &serde_json::json!({}));
4083        clear_tool_hooks();
4084        assert!(matches!(result, PreToolAction::Allow));
4085    }
4086
4087    #[test]
4088    fn glob_match_patterns() {
4089        assert!(glob_match("*", "anything"));
4090        assert!(glob_match("exec*", "exec_at"));
4091        assert!(glob_match("*_file", "read_file"));
4092        assert!(!glob_match("exec*", "read_file"));
4093        assert!(glob_match("read_file", "read_file"));
4094        assert!(!glob_match("read_file", "write_file"));
4095    }
4096
4097    // ── Auto-compaction tests ────────────────────────────────────────
4098
4099    #[test]
4100    fn microcompact_snips_large_output() {
4101        let large = "x".repeat(50_000);
4102        let result = microcompact_tool_output(&large, 10_000);
4103        assert!(result.len() < 15_000);
4104        assert!(result.contains("snipped"));
4105    }
4106
4107    #[test]
4108    fn microcompact_preserves_small_output() {
4109        let small = "hello world";
4110        let result = microcompact_tool_output(small, 10_000);
4111        assert_eq!(result, small);
4112    }
4113
4114    #[test]
4115    fn microcompact_preserves_strong_keyword_lines_without_file_line() {
4116        // Regression: diagnostic extraction used to require both a
4117        // file:line reference AND a keyword. Strong keywords like "FAIL"
4118        // and "panic" should preserve the line on their own, because they
4119        // carry signal even when they appear on narrative lines (Go's
4120        // "--- FAIL: TestName", Rust's "thread '...' panicked at ...",
4121        // pytest's "FAILED tests/..."). The exact patterns are language-
4122        // specific and don't belong in the VM — but the generic rule
4123        // "strong keywords count even without file:line" does.
4124        let mut output = String::new();
4125        for i in 0..100 {
4126            output.push_str(&format!("verbose progress line {i}\n"));
4127        }
4128        output.push_str("--- FAIL: TestEmpty (0.00s)\n");
4129        output.push_str("thread 'tests::test_foo' panicked at src/lib.rs:42:5\n");
4130        output.push_str("FAILED tests/test_parser.py::test_empty\n");
4131        for i in 0..100 {
4132            output.push_str(&format!("more output after failures {i}\n"));
4133        }
4134        let result = microcompact_tool_output(&output, 2_000);
4135        assert!(
4136            result.contains("--- FAIL: TestEmpty"),
4137            "strong 'FAIL' keyword should preserve the line:\n{result}"
4138        );
4139        assert!(
4140            result.contains("panicked at"),
4141            "strong 'panic' keyword should preserve the line:\n{result}"
4142        );
4143        assert!(
4144            result.contains("FAILED tests/test_parser.py"),
4145            "strong 'FAIL' keyword should preserve pytest-style lines too:\n{result}"
4146        );
4147    }
4148
4149    #[test]
4150    fn auto_compact_messages_reduces_count() {
4151        let mut messages: Vec<serde_json::Value> = (0..20)
4152            .map(|i| serde_json::json!({"role": "user", "content": format!("message {i}")}))
4153            .collect();
4154        let runtime = tokio::runtime::Builder::new_current_thread()
4155            .enable_all()
4156            .build()
4157            .unwrap();
4158        let compacted = runtime.block_on(auto_compact_messages(
4159            &mut messages,
4160            &AutoCompactConfig {
4161                compact_strategy: CompactStrategy::Truncate,
4162                keep_last: 6,
4163                ..Default::default()
4164            },
4165            None,
4166        ));
4167        let summary = compacted.unwrap();
4168        assert!(summary.is_some());
4169        assert!(messages.len() <= 7); // 6 kept + 1 summary
4170        assert!(messages[0]["content"]
4171            .as_str()
4172            .unwrap()
4173            .contains("auto-compacted"));
4174    }
4175
4176    #[test]
4177    fn auto_compact_noop_when_under_threshold() {
4178        let mut messages: Vec<serde_json::Value> = (0..4)
4179            .map(|i| serde_json::json!({"role": "user", "content": format!("msg {i}")}))
4180            .collect();
4181        let runtime = tokio::runtime::Builder::new_current_thread()
4182            .enable_all()
4183            .build()
4184            .unwrap();
4185        let compacted = runtime.block_on(auto_compact_messages(
4186            &mut messages,
4187            &AutoCompactConfig {
4188                compact_strategy: CompactStrategy::Truncate,
4189                keep_last: 6,
4190                ..Default::default()
4191            },
4192            None,
4193        ));
4194        assert!(compacted.unwrap().is_none());
4195        assert_eq!(messages.len(), 4);
4196    }
4197
4198    #[test]
4199    fn observation_mask_preserves_errors_masks_verbose_output() {
4200        let mut messages = vec![
4201            serde_json::json!({"role": "assistant", "content": "I'll create the file now."}),
4202            serde_json::json!({"role": "user", "content": "File created: a.go\npackage main\nimport \"fmt\"\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n// more lines\n// more lines\n// more lines\n// more lines"}),
4203            serde_json::json!({"role": "assistant", "content": "Now let me run the tests."}),
4204            serde_json::json!({"role": "user", "content": "error: cannot find module\nexit code 1\nfailed to compile"}),
4205            serde_json::json!({"role": "assistant", "content": "I see the issue. Let me fix it."}),
4206            serde_json::json!({"role": "user", "content": "File patched successfully."}),
4207            // These last 2 will be kept verbatim (keep_last)
4208            serde_json::json!({"role": "assistant", "content": "Running tests again."}),
4209            serde_json::json!({"role": "user", "content": "All tests passed."}),
4210        ];
4211        let runtime = tokio::runtime::Builder::new_current_thread()
4212            .enable_all()
4213            .build()
4214            .unwrap();
4215        let compacted = runtime.block_on(auto_compact_messages(
4216            &mut messages,
4217            &AutoCompactConfig {
4218                compact_strategy: CompactStrategy::ObservationMask,
4219                keep_last: 2,
4220                ..Default::default()
4221            },
4222            None,
4223        ));
4224        let summary = compacted.unwrap().unwrap();
4225        // Assistant messages preserved verbatim
4226        assert!(summary.contains("I'll create the file now."));
4227        assert!(summary.contains("Now let me run the tests."));
4228        assert!(summary.contains("I see the issue. Let me fix it."));
4229        // Error output preserved verbatim
4230        assert!(summary.contains("error: cannot find module"));
4231        assert!(summary.contains("exit code 1"));
4232        // Verbose tool output masked
4233        assert!(summary.contains("masked]"));
4234        assert!(summary.contains("File created: a.go"));
4235        // Short tool output kept
4236        assert!(summary.contains("File patched successfully."));
4237        // Kept messages not in summary
4238        assert!(!summary.contains("Running tests again."));
4239        assert!(!summary.contains("All tests passed."));
4240        // 2 kept + 1 summary = 3
4241        assert_eq!(messages.len(), 3);
4242    }
4243
4244    #[test]
4245    fn observation_mask_keeps_short_tool_output() {
4246        let messages = vec![
4247            serde_json::json!({"role": "user", "content": "OK"}),
4248            serde_json::json!({"role": "user", "content": "Done."}),
4249        ];
4250        let summary = observation_mask_compaction(&messages, 2);
4251        assert!(summary.contains("[user] OK"));
4252        assert!(summary.contains("[user] Done."));
4253        assert!(!summary.contains("masked"));
4254    }
4255
4256    #[test]
4257    fn estimate_message_tokens_basic() {
4258        let messages = vec![
4259            serde_json::json!({"role": "user", "content": "a".repeat(400)}),
4260            serde_json::json!({"role": "assistant", "content": "b".repeat(400)}),
4261        ];
4262        let tokens = estimate_message_tokens(&messages);
4263        assert_eq!(tokens, 200); // 800 chars / 4
4264    }
4265
4266    // ── Artifact dedup and microcompaction tests ─────────────────────
4267
4268    #[test]
4269    fn dedup_artifacts_removes_duplicates() {
4270        let mut artifacts = vec![
4271            ArtifactRecord {
4272                id: "a1".to_string(),
4273                kind: "test".to_string(),
4274                text: Some("duplicate content".to_string()),
4275                ..Default::default()
4276            },
4277            ArtifactRecord {
4278                id: "a2".to_string(),
4279                kind: "test".to_string(),
4280                text: Some("duplicate content".to_string()),
4281                ..Default::default()
4282            },
4283            ArtifactRecord {
4284                id: "a3".to_string(),
4285                kind: "test".to_string(),
4286                text: Some("unique content".to_string()),
4287                ..Default::default()
4288            },
4289        ];
4290        dedup_artifacts(&mut artifacts);
4291        assert_eq!(artifacts.len(), 2);
4292    }
4293
4294    #[test]
4295    fn microcompact_artifact_snips_oversized() {
4296        let mut artifact = ArtifactRecord {
4297            id: "a1".to_string(),
4298            kind: "test".to_string(),
4299            text: Some("x".repeat(10_000)),
4300            estimated_tokens: Some(2_500),
4301            ..Default::default()
4302        };
4303        microcompact_artifact(&mut artifact, 500);
4304        assert!(artifact.text.as_ref().unwrap().len() < 5_000);
4305        assert_eq!(artifact.estimated_tokens, Some(500));
4306    }
4307
4308    // ── Tool argument constraint tests ───────────────────────────────
4309
4310    #[test]
4311    fn arg_constraint_allows_matching_pattern() {
4312        let policy = CapabilityPolicy {
4313            tool_arg_constraints: vec![ToolArgConstraint {
4314                tool: "exec".to_string(),
4315                arg_patterns: vec!["cargo *".to_string()],
4316            }],
4317            ..Default::default()
4318        };
4319        let result = enforce_tool_arg_constraints(
4320            &policy,
4321            "exec",
4322            &serde_json::json!({"command": "cargo test"}),
4323        );
4324        assert!(result.is_ok());
4325    }
4326
4327    #[test]
4328    fn arg_constraint_rejects_non_matching_pattern() {
4329        let policy = CapabilityPolicy {
4330            tool_arg_constraints: vec![ToolArgConstraint {
4331                tool: "exec".to_string(),
4332                arg_patterns: vec!["cargo *".to_string()],
4333            }],
4334            ..Default::default()
4335        };
4336        let result = enforce_tool_arg_constraints(
4337            &policy,
4338            "exec",
4339            &serde_json::json!({"command": "rm -rf /"}),
4340        );
4341        assert!(result.is_err());
4342    }
4343
4344    #[test]
4345    fn arg_constraint_ignores_unmatched_tool() {
4346        let policy = CapabilityPolicy {
4347            tool_arg_constraints: vec![ToolArgConstraint {
4348                tool: "exec".to_string(),
4349                arg_patterns: vec!["cargo *".to_string()],
4350            }],
4351            ..Default::default()
4352        };
4353        let result = enforce_tool_arg_constraints(
4354            &policy,
4355            "read_file",
4356            &serde_json::json!({"path": "/etc/passwd"}),
4357        );
4358        assert!(result.is_ok());
4359    }
4360
4361    #[test]
4362    fn microcompact_handles_multibyte_utf8() {
4363        // Emoji are 4 bytes each — slicing at arbitrary byte offsets would panic
4364        let emoji_output = "🔥".repeat(500); // 2000 bytes, 500 chars
4365        let result = microcompact_tool_output(&emoji_output, 400);
4366        // Should not panic and should contain the snip marker
4367        assert!(result.contains("snipped"));
4368
4369        // Mixed ASCII + multi-byte
4370        let mixed = format!("{}{}{}", "a".repeat(300), "é".repeat(500), "b".repeat(300));
4371        let result2 = microcompact_tool_output(&mixed, 400);
4372        assert!(result2.contains("snipped"));
4373
4374        // CJK characters (3 bytes each)
4375        let cjk = "中文".repeat(500);
4376        let result3 = microcompact_tool_output(&cjk, 400);
4377        assert!(result3.contains("snipped"));
4378    }
4379}
harn_vm/orchestration.rs

harn_vm/
orchestration.rs