Skip to main content

harn_vm/orchestration/
policy.rs

1//! Policy types and capability-ceiling enforcement.
2
3use std::cell::RefCell;
4use std::collections::BTreeMap;
5use std::rc::Rc;
6use std::thread_local;
7
8use serde::{Deserialize, Serialize};
9
10use super::{glob_match, new_id};
11use crate::value::{VmError, VmValue};
12
13thread_local! {
14    static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
15    static EXECUTION_APPROVAL_POLICY_STACK: RefCell<Vec<ToolApprovalPolicy>> = const { RefCell::new(Vec::new()) };
16}
17
18// ── Per-agent policy with argument patterns ───────────────────────────
19
20/// Extended policy that supports argument-level constraints.
21#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
22#[serde(default)]
23pub struct ToolArgConstraint {
24    /// Tool name to constrain.
25    pub tool: String,
26    /// Glob patterns that the first string argument must match.
27    /// If empty, no argument constraint is applied.
28    pub arg_patterns: Vec<String>,
29}
30
31/// Check if a tool call satisfies argument constraints in the policy.
32pub fn enforce_tool_arg_constraints(
33    policy: &CapabilityPolicy,
34    tool_name: &str,
35    args: &serde_json::Value,
36) -> Result<(), VmError> {
37    for constraint in &policy.tool_arg_constraints {
38        if !glob_match(&constraint.tool, tool_name) {
39            continue;
40        }
41        if constraint.arg_patterns.is_empty() {
42            continue;
43        }
44        let first_arg = args
45            .as_object()
46            .and_then(|o| {
47                policy
48                    .tool_metadata
49                    .get(tool_name)
50                    .into_iter()
51                    .flat_map(|metadata| metadata.path_params.iter())
52                    .find_map(|param| o.get(param).and_then(|v| v.as_str()))
53                    .or_else(|| o.values().find_map(|v| v.as_str()))
54            })
55            .or_else(|| args.as_str())
56            .unwrap_or("");
57        let matches = constraint
58            .arg_patterns
59            .iter()
60            .any(|pattern| glob_match(pattern, first_arg));
61        if !matches {
62            return reject_policy(format!(
63                "tool '{tool_name}' argument '{first_arg}' does not match allowed patterns: {:?}",
64                constraint.arg_patterns
65            ));
66        }
67    }
68    Ok(())
69}
70
71#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
72#[serde(default)]
73pub struct ToolRuntimePolicyMetadata {
74    pub capabilities: BTreeMap<String, Vec<String>>,
75    pub side_effect_level: Option<String>,
76    pub path_params: Vec<String>,
77    pub mutation_classification: Option<String>,
78}
79
80#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
81#[serde(default)]
82pub struct CapabilityPolicy {
83    pub tools: Vec<String>,
84    pub capabilities: BTreeMap<String, Vec<String>>,
85    pub workspace_roots: Vec<String>,
86    pub side_effect_level: Option<String>,
87    pub recursion_limit: Option<usize>,
88    /// Argument-level constraints for specific tools.
89    #[serde(default)]
90    pub tool_arg_constraints: Vec<ToolArgConstraint>,
91    #[serde(default)]
92    pub tool_metadata: BTreeMap<String, ToolRuntimePolicyMetadata>,
93}
94
95impl CapabilityPolicy {
96    pub fn intersect(&self, requested: &CapabilityPolicy) -> Result<CapabilityPolicy, String> {
97        let side_effect_level = match (&self.side_effect_level, &requested.side_effect_level) {
98            (Some(a), Some(b)) => Some(min_side_effect(a, b).to_string()),
99            (Some(a), None) => Some(a.clone()),
100            (None, Some(b)) => Some(b.clone()),
101            (None, None) => None,
102        };
103
104        if !self.tools.is_empty() {
105            let denied: Vec<String> = requested
106                .tools
107                .iter()
108                .filter(|tool| !self.tools.contains(*tool))
109                .cloned()
110                .collect();
111            if !denied.is_empty() {
112                return Err(format!(
113                    "requested tools exceed host ceiling: {}",
114                    denied.join(", ")
115                ));
116            }
117        }
118
119        for (capability, requested_ops) in &requested.capabilities {
120            if let Some(allowed_ops) = self.capabilities.get(capability) {
121                let denied: Vec<String> = requested_ops
122                    .iter()
123                    .filter(|op| !allowed_ops.contains(*op))
124                    .cloned()
125                    .collect();
126                if !denied.is_empty() {
127                    return Err(format!(
128                        "requested capability operations exceed host ceiling: {}.{}",
129                        capability,
130                        denied.join(",")
131                    ));
132                }
133            } else if !self.capabilities.is_empty() {
134                return Err(format!(
135                    "requested capability exceeds host ceiling: {capability}"
136                ));
137            }
138        }
139
140        let tools = if self.tools.is_empty() {
141            requested.tools.clone()
142        } else if requested.tools.is_empty() {
143            self.tools.clone()
144        } else {
145            requested
146                .tools
147                .iter()
148                .filter(|tool| self.tools.contains(*tool))
149                .cloned()
150                .collect()
151        };
152
153        let capabilities = if self.capabilities.is_empty() {
154            requested.capabilities.clone()
155        } else if requested.capabilities.is_empty() {
156            self.capabilities.clone()
157        } else {
158            requested
159                .capabilities
160                .iter()
161                .filter_map(|(capability, requested_ops)| {
162                    self.capabilities.get(capability).map(|allowed_ops| {
163                        (
164                            capability.clone(),
165                            requested_ops
166                                .iter()
167                                .filter(|op| allowed_ops.contains(*op))
168                                .cloned()
169                                .collect::<Vec<_>>(),
170                        )
171                    })
172                })
173                .collect()
174        };
175
176        let workspace_roots = if self.workspace_roots.is_empty() {
177            requested.workspace_roots.clone()
178        } else if requested.workspace_roots.is_empty() {
179            self.workspace_roots.clone()
180        } else {
181            requested
182                .workspace_roots
183                .iter()
184                .filter(|root| self.workspace_roots.contains(*root))
185                .cloned()
186                .collect()
187        };
188
189        let recursion_limit = match (self.recursion_limit, requested.recursion_limit) {
190            (Some(a), Some(b)) => Some(a.min(b)),
191            (Some(a), None) => Some(a),
192            (None, Some(b)) => Some(b),
193            (None, None) => None,
194        };
195
196        // Merge arg constraints from both sides
197        let mut tool_arg_constraints = self.tool_arg_constraints.clone();
198        tool_arg_constraints.extend(requested.tool_arg_constraints.clone());
199
200        let tool_metadata = tools
201            .iter()
202            .filter_map(|tool| {
203                requested
204                    .tool_metadata
205                    .get(tool)
206                    .or_else(|| self.tool_metadata.get(tool))
207                    .cloned()
208                    .map(|metadata| (tool.clone(), metadata))
209            })
210            .collect();
211
212        Ok(CapabilityPolicy {
213            tools,
214            capabilities,
215            workspace_roots,
216            side_effect_level,
217            recursion_limit,
218            tool_arg_constraints,
219            tool_metadata,
220        })
221    }
222}
223
224fn min_side_effect<'a>(a: &'a str, b: &'a str) -> &'a str {
225    fn rank(v: &str) -> usize {
226        match v {
227            "none" => 0,
228            "read_only" => 1,
229            "workspace_write" => 2,
230            "process_exec" => 3,
231            "network" => 4,
232            _ => 5,
233        }
234    }
235    if rank(a) <= rank(b) {
236        a
237    } else {
238        b
239    }
240}
241
242#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
243#[serde(default)]
244pub struct TurnPolicy {
245    /// When true, text-only responses in a tool-capable stage are treated as
246    /// invalid unless they switch phase / finish the stage. This keeps action
247    /// stages moving instead of drifting into narration.
248    pub require_action_or_yield: bool,
249    /// When false, workflow-owned action stages should hand control back via
250    /// successful tool calls instead of advertising an additional done
251    /// sentinel pathway in corrective nudges.
252    #[serde(default = "default_true")]
253    pub allow_done_sentinel: bool,
254    /// Optional visible prose budget for a single assistant turn. When the
255    /// assistant exceeds it, the recorded transcript keeps only a shortened
256    /// version and the next corrective nudge reminds the model to stay brief.
257    pub max_prose_chars: Option<usize>,
258}
259
260impl Default for TurnPolicy {
261    fn default() -> Self {
262        Self {
263            require_action_or_yield: false,
264            allow_done_sentinel: true,
265            max_prose_chars: None,
266        }
267    }
268}
269
270fn default_true() -> bool {
271    true
272}
273
274#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
275#[serde(default)]
276pub struct ModelPolicy {
277    pub provider: Option<String>,
278    pub model: Option<String>,
279    pub model_tier: Option<String>,
280    pub temperature: Option<f64>,
281    pub max_tokens: Option<i64>,
282    /// Maximum agent_loop iterations for this stage. Overrides the default 16.
283    pub max_iterations: Option<usize>,
284    /// Maximum consecutive text-only (no tool call) responses before declaring stuck.
285    pub max_nudges: Option<usize>,
286    /// Custom nudge message injected when the model produces text without tool calls.
287    /// If omitted, the VM uses a generic "Continue — use a tool call" message.
288    pub nudge: Option<String>,
289    /// Few-shot tool-call examples injected into the tool contract prompt,
290    /// shown before the tool schema listing. Pipelines provide these —
291    /// the VM has no hardcoded tool names.
292    pub tool_examples: Option<String>,
293    /// Optional Harn closure called after each tool-calling turn.
294    /// Receives turn metadata; returns either a string user message to inject,
295    /// a bool stop flag, or a dict like {message, stop}.
296    /// Wrapped in EqIgnored so it doesn't affect PartialEq derivation.
297    #[serde(skip)]
298    pub post_turn_callback: Option<EqIgnored<VmValue>>,
299    /// When set, the stage stops after any tool-calling turn whose successful
300    /// results include one of these tool names. This is useful for
301    /// workflow-owned verify loops where a productive write turn should hand
302    /// control back to verification immediately.
303    pub stop_after_successful_tools: Option<Vec<String>>,
304    /// When set, the stage is reported as failed unless at least one of these
305    /// tool names succeeds during the interaction. Pipelines use this to
306    /// assert a stage cannot quietly finish without running a specific tool.
307    pub require_successful_tools: Option<Vec<String>>,
308    /// Turn-shape constraints for action stages.
309    pub turn_policy: Option<TurnPolicy>,
310}
311
312/// Wrapper that always compares equal, allowing non-Eq types in derived PartialEq structs.
313#[derive(Clone, Debug, Default)]
314pub struct EqIgnored<T>(pub T);
315
316impl<T> PartialEq for EqIgnored<T> {
317    fn eq(&self, _: &Self) -> bool {
318        true
319    }
320}
321
322impl<T> std::ops::Deref for EqIgnored<T> {
323    type Target = T;
324    fn deref(&self) -> &T {
325        &self.0
326    }
327}
328
329#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
330#[serde(default)]
331pub struct TranscriptPolicy {
332    pub mode: Option<String>,
333    pub visibility: Option<String>,
334    pub summarize: bool,
335    pub compact: bool,
336    pub keep_last: Option<usize>,
337    /// Enable per-turn auto-compaction within agent loops.
338    pub auto_compact: bool,
339    /// Token threshold for tier-1 compaction.
340    pub compact_threshold: Option<usize>,
341    /// Max chars per tool result before compression.
342    pub tool_output_max_chars: Option<usize>,
343    /// Tier-1 compaction strategy name (e.g., "observation_mask", "llm").
344    pub compact_strategy: Option<String>,
345    /// Token threshold for tier-2 aggressive compaction.
346    pub hard_limit_tokens: Option<usize>,
347    /// Tier-2 compaction strategy name.
348    pub hard_limit_strategy: Option<String>,
349}
350
351#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
352#[serde(default)]
353pub struct ContextPolicy {
354    pub max_artifacts: Option<usize>,
355    pub max_tokens: Option<usize>,
356    pub reserve_tokens: Option<usize>,
357    pub include_kinds: Vec<String>,
358    pub exclude_kinds: Vec<String>,
359    pub prioritize_kinds: Vec<String>,
360    pub pinned_ids: Vec<String>,
361    pub include_stages: Vec<String>,
362    pub prefer_recent: bool,
363    pub prefer_fresh: bool,
364    pub render: Option<String>,
365}
366
367#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
368#[serde(default)]
369pub struct RetryPolicy {
370    pub max_attempts: usize,
371    pub verify: bool,
372    pub repair: bool,
373    /// Initial backoff duration in milliseconds between retry attempts.
374    /// When `None`, retries proceed without delay.
375    #[serde(default)]
376    pub backoff_ms: Option<u64>,
377    /// Multiplier applied to `backoff_ms` after each retry attempt.
378    /// Defaults to 2.0 when `backoff_ms` is set and this field is `None`.
379    #[serde(default)]
380    pub backoff_multiplier: Option<f64>,
381}
382
383#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
384#[serde(default)]
385pub struct StageContract {
386    pub input_kinds: Vec<String>,
387    pub output_kinds: Vec<String>,
388    pub min_inputs: Option<usize>,
389    pub max_inputs: Option<usize>,
390    pub require_transcript: bool,
391    pub schema: Option<serde_json::Value>,
392}
393
394#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
395#[serde(default)]
396pub struct BranchSemantics {
397    pub success: Option<String>,
398    pub failure: Option<String>,
399    pub verify_pass: Option<String>,
400    pub verify_fail: Option<String>,
401    pub condition_true: Option<String>,
402    pub condition_false: Option<String>,
403    pub loop_continue: Option<String>,
404    pub loop_exit: Option<String>,
405    pub escalation: Option<String>,
406}
407
408#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
409#[serde(default)]
410pub struct MapPolicy {
411    pub items: Vec<serde_json::Value>,
412    pub item_artifact_kind: Option<String>,
413    pub output_kind: Option<String>,
414    pub max_items: Option<usize>,
415    pub max_concurrent: Option<usize>,
416}
417
418#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
419#[serde(default)]
420pub struct JoinPolicy {
421    pub strategy: String,
422    pub require_all_inputs: bool,
423    pub min_completed: Option<usize>,
424}
425
426#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
427#[serde(default)]
428pub struct ReducePolicy {
429    pub strategy: String,
430    pub separator: Option<String>,
431    pub output_kind: Option<String>,
432}
433
434#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
435#[serde(default)]
436pub struct EscalationPolicy {
437    pub level: Option<String>,
438    pub queue: Option<String>,
439    pub reason: Option<String>,
440}
441
442// ── Execution policy stack ──────────────────────────────────────────
443
444pub fn push_execution_policy(policy: CapabilityPolicy) {
445    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
446}
447
448pub fn pop_execution_policy() {
449    EXECUTION_POLICY_STACK.with(|stack| {
450        stack.borrow_mut().pop();
451    });
452}
453
454pub fn current_execution_policy() -> Option<CapabilityPolicy> {
455    EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
456}
457
458// ── Approval policy stack ───────────────────────────────────────────
459
460pub fn push_approval_policy(policy: ToolApprovalPolicy) {
461    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
462}
463
464pub fn pop_approval_policy() {
465    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| {
466        stack.borrow_mut().pop();
467    });
468}
469
470pub fn current_approval_policy() -> Option<ToolApprovalPolicy> {
471    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
472}
473
474pub fn current_tool_metadata(tool: &str) -> Option<ToolRuntimePolicyMetadata> {
475    current_execution_policy().and_then(|policy| policy.tool_metadata.get(tool).cloned())
476}
477
478fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
479    policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
480}
481
482fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
483    policy.capabilities.is_empty()
484        || policy
485            .capabilities
486            .get(capability)
487            .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
488}
489
490fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
491    fn rank(v: &str) -> usize {
492        match v {
493            "none" => 0,
494            "read_only" => 1,
495            "workspace_write" => 2,
496            "process_exec" => 3,
497            "network" => 4,
498            _ => 5,
499        }
500    }
501    policy
502        .side_effect_level
503        .as_ref()
504        .map(|allowed| rank(allowed) >= rank(requested))
505        .unwrap_or(true)
506}
507
508fn reject_policy(reason: String) -> Result<(), VmError> {
509    Err(VmError::CategorizedError {
510        message: reason,
511        category: crate::value::ErrorCategory::ToolRejected,
512    })
513}
514
515fn fallback_mutation_classification(tool_name: &str) -> String {
516    let lower = tool_name.to_ascii_lowercase();
517    if lower.starts_with("mcp_") {
518        return "host_defined".to_string();
519    }
520    if lower == "exec"
521        || lower == "shell"
522        || lower == "exec_at"
523        || lower == "shell_at"
524        || lower == "run"
525        || lower.starts_with("run_")
526    {
527        return "ambient_side_effect".to_string();
528    }
529    if lower.starts_with("delete")
530        || lower.starts_with("remove")
531        || lower.starts_with("move")
532        || lower.starts_with("rename")
533    {
534        return "destructive".to_string();
535    }
536    if lower.contains("write")
537        || lower.contains("edit")
538        || lower.contains("patch")
539        || lower.contains("create")
540        || lower.contains("scaffold")
541        || lower.starts_with("insert")
542        || lower.starts_with("replace")
543        || lower == "add_import"
544    {
545        return "apply_workspace".to_string();
546    }
547    "read_only".to_string()
548}
549
550pub fn current_tool_mutation_classification(tool_name: &str) -> String {
551    current_tool_metadata(tool_name)
552        .and_then(|metadata| metadata.mutation_classification)
553        .unwrap_or_else(|| fallback_mutation_classification(tool_name))
554}
555
556pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
557    let Some(map) = args.as_object() else {
558        return Vec::new();
559    };
560    let path_keys = current_tool_metadata(tool_name)
561        .map(|metadata| metadata.path_params)
562        .filter(|keys| !keys.is_empty())
563        .unwrap_or_else(|| {
564            vec![
565                "path".to_string(),
566                "file".to_string(),
567                "cwd".to_string(),
568                "repo".to_string(),
569                "target".to_string(),
570                "destination".to_string(),
571            ]
572        });
573    let mut paths = Vec::new();
574    for key in path_keys {
575        if let Some(value) = map.get(&key).and_then(|value| value.as_str()) {
576            if !value.is_empty() {
577                paths.push(value.to_string());
578            }
579        }
580    }
581    if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
582        for item in items {
583            if let Some(value) = item.as_str() {
584                if !value.is_empty() {
585                    paths.push(value.to_string());
586                }
587            }
588        }
589    }
590    paths.sort();
591    paths.dedup();
592    paths
593}
594
595pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
596    let Some(policy) = current_execution_policy() else {
597        return Ok(());
598    };
599    match name {
600        "read" | "read_file" => {
601            if !policy_allows_tool(&policy, name)
602                || !policy_allows_capability(&policy, "workspace", "read_text")
603            {
604                return reject_policy(format!(
605                    "builtin '{name}' exceeds workspace.read_text ceiling"
606                ));
607            }
608        }
609        "search" | "list_dir" => {
610            if !policy_allows_tool(&policy, name)
611                || !policy_allows_capability(&policy, "workspace", "list")
612            {
613                return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
614            }
615        }
616        "file_exists" | "stat" => {
617            if !policy_allows_capability(&policy, "workspace", "exists") {
618                return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
619            }
620        }
621        "edit" | "write_file" | "append_file" | "mkdir" | "copy_file" => {
622            if !policy_allows_tool(&policy, "edit")
623                || !policy_allows_capability(&policy, "workspace", "write_text")
624                || !policy_allows_side_effect(&policy, "workspace_write")
625            {
626                return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
627            }
628        }
629        "delete_file" => {
630            if !policy_allows_capability(&policy, "workspace", "delete")
631                || !policy_allows_side_effect(&policy, "workspace_write")
632            {
633                return reject_policy(
634                    "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
635                );
636            }
637        }
638        "apply_edit" => {
639            if !policy_allows_capability(&policy, "workspace", "apply_edit")
640                || !policy_allows_side_effect(&policy, "workspace_write")
641            {
642                return reject_policy(
643                    "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
644                );
645            }
646        }
647        "exec" | "exec_at" | "shell" | "shell_at" | "run_command" => {
648            if !policy_allows_tool(&policy, "run")
649                || !policy_allows_capability(&policy, "process", "exec")
650                || !policy_allows_side_effect(&policy, "process_exec")
651            {
652                return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
653            }
654        }
655        "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
656            if !policy_allows_side_effect(&policy, "network") {
657                return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
658            }
659        }
660        "mcp_connect"
661        | "mcp_call"
662        | "mcp_list_tools"
663        | "mcp_list_resources"
664        | "mcp_list_resource_templates"
665        | "mcp_read_resource"
666        | "mcp_list_prompts"
667        | "mcp_get_prompt"
668        | "mcp_server_info"
669        | "mcp_disconnect" => {
670            if !policy_allows_tool(&policy, "run")
671                || !policy_allows_capability(&policy, "process", "exec")
672                || !policy_allows_side_effect(&policy, "process_exec")
673            {
674                return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
675            }
676        }
677        "host_call" => {
678            let name = args.first().map(|v| v.display()).unwrap_or_default();
679            let Some((capability, op)) = name.split_once('.') else {
680                return reject_policy(format!(
681                    "host_call '{name}' must use capability.operation naming"
682                ));
683            };
684            if !policy_allows_capability(&policy, capability, op) {
685                return reject_policy(format!(
686                    "host_call {capability}.{op} exceeds capability ceiling"
687                ));
688            }
689            let requested_side_effect = match (capability, op) {
690                ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
691                ("process", "exec") => "process_exec",
692                _ => "read_only",
693            };
694            if !policy_allows_side_effect(&policy, requested_side_effect) {
695                return reject_policy(format!(
696                    "host_call {capability}.{op} exceeds side-effect ceiling"
697                ));
698            }
699        }
700        _ => {}
701    }
702    Ok(())
703}
704
705pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
706    if current_execution_policy().is_some() {
707        return reject_policy(format!(
708            "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
709        ));
710    }
711    Ok(())
712}
713
714pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
715    let Some(policy) = current_execution_policy() else {
716        return Ok(());
717    };
718    if !policy_allows_tool(&policy, tool_name) {
719        return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
720    }
721    if let Some(metadata) = policy.tool_metadata.get(tool_name) {
722        for (capability, ops) in &metadata.capabilities {
723            for op in ops {
724                if !policy_allows_capability(&policy, capability, op) {
725                    return reject_policy(format!(
726                        "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
727                    ));
728                }
729            }
730        }
731        if let Some(side_effect_level) = metadata.side_effect_level.as_deref() {
732            if !policy_allows_side_effect(&policy, side_effect_level) {
733                return reject_policy(format!(
734                    "tool '{tool_name}' exceeds side-effect ceiling: {side_effect_level}"
735                ));
736            }
737        }
738    }
739    Ok(())
740}
741
742// ── Transcript policy helpers ───────────────────────────────────────
743
744fn compact_transcript(transcript: &VmValue, keep_last: usize) -> Option<VmValue> {
745    let dict = transcript.as_dict()?;
746    let messages = match dict.get("messages") {
747        Some(VmValue::List(list)) => list.iter().cloned().collect::<Vec<_>>(),
748        _ => Vec::new(),
749    };
750    let retained = messages
751        .into_iter()
752        .rev()
753        .take(keep_last)
754        .collect::<Vec<_>>()
755        .into_iter()
756        .rev()
757        .collect::<Vec<_>>();
758    let mut compacted = dict.clone();
759    compacted.insert(
760        "messages".to_string(),
761        VmValue::List(Rc::new(retained.clone())),
762    );
763    compacted.insert(
764        "events".to_string(),
765        VmValue::List(Rc::new(
766            crate::llm::helpers::transcript_events_from_messages(&retained),
767        )),
768    );
769    Some(VmValue::Dict(Rc::new(compacted)))
770}
771
772fn redact_transcript_visibility(transcript: &VmValue, visibility: Option<&str>) -> Option<VmValue> {
773    let Some(visibility) = visibility else {
774        return Some(transcript.clone());
775    };
776    if visibility != "public" && visibility != "public_only" {
777        return Some(transcript.clone());
778    }
779    let dict = transcript.as_dict()?;
780    let public_messages = match dict.get("messages") {
781        Some(VmValue::List(list)) => list
782            .iter()
783            .filter(|message| {
784                message
785                    .as_dict()
786                    .and_then(|d| d.get("role"))
787                    .map(|v| v.display())
788                    .map(|role| role != "tool_result")
789                    .unwrap_or(true)
790            })
791            .cloned()
792            .collect::<Vec<_>>(),
793        _ => Vec::new(),
794    };
795    let public_events = match dict.get("events") {
796        Some(VmValue::List(list)) => list
797            .iter()
798            .filter(|event| {
799                event
800                    .as_dict()
801                    .and_then(|d| d.get("visibility"))
802                    .map(|v| v.display())
803                    .map(|value| value == "public")
804                    .unwrap_or(true)
805            })
806            .cloned()
807            .collect::<Vec<_>>(),
808        _ => Vec::new(),
809    };
810    let mut redacted = dict.clone();
811    redacted.insert(
812        "messages".to_string(),
813        VmValue::List(Rc::new(public_messages)),
814    );
815    redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
816    Some(VmValue::Dict(Rc::new(redacted)))
817}
818
819pub(crate) fn apply_input_transcript_policy(
820    transcript: Option<VmValue>,
821    policy: &TranscriptPolicy,
822) -> Option<VmValue> {
823    let mut transcript = transcript;
824    match policy.mode.as_deref() {
825        Some("reset") => return None,
826        Some("fork") => {
827            if let Some(VmValue::Dict(dict)) = transcript.as_ref() {
828                let mut forked = dict.as_ref().clone();
829                forked.insert(
830                    "id".to_string(),
831                    VmValue::String(Rc::from(new_id("transcript"))),
832                );
833                transcript = Some(VmValue::Dict(Rc::new(forked)));
834            }
835        }
836        _ => {}
837    }
838    if policy.compact {
839        let keep_last = policy.keep_last.unwrap_or(6);
840        transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
841    }
842    transcript
843}
844
845pub(crate) fn apply_output_transcript_policy(
846    transcript: Option<VmValue>,
847    policy: &TranscriptPolicy,
848) -> Option<VmValue> {
849    let mut transcript = transcript;
850    if policy.compact {
851        let keep_last = policy.keep_last.unwrap_or(6);
852        transcript = transcript.and_then(|value| compact_transcript(&value, keep_last));
853    }
854    transcript.and_then(|value| redact_transcript_visibility(&value, policy.visibility.as_deref()))
855}
856
857pub fn builtin_ceiling() -> CapabilityPolicy {
858    CapabilityPolicy {
859        // Capabilities left empty — the host capability manifest is the sole
860        // authority on which operations are available.  An explicit allowlist
861        // here would silently block any capability the host adds later.
862        tools: Vec::new(),
863        capabilities: BTreeMap::new(),
864        workspace_roots: Vec::new(),
865        side_effect_level: Some("network".to_string()),
866        recursion_limit: Some(8),
867        tool_arg_constraints: Vec::new(),
868        tool_metadata: BTreeMap::new(),
869    }
870}
871
872// ── Tool approval policy ─────────────────────────────────────────────
873
874/// Declarative policy for tool approval gating. Allows pipelines to
875/// specify which tools are auto-approved, auto-denied, or require
876/// host confirmation, plus write-path allowlists.
877#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
878#[serde(default)]
879pub struct ToolApprovalPolicy {
880    /// Glob patterns for tools that should be auto-approved.
881    #[serde(default)]
882    pub auto_approve: Vec<String>,
883    /// Glob patterns for tools that should always be denied.
884    #[serde(default)]
885    pub auto_deny: Vec<String>,
886    /// Glob patterns for tools that require host confirmation.
887    #[serde(default)]
888    pub require_approval: Vec<String>,
889    /// Glob patterns for writable paths.
890    #[serde(default)]
891    pub write_path_allowlist: Vec<String>,
892}
893
894/// Result of evaluating a tool call against a ToolApprovalPolicy.
895#[derive(Debug, Clone, PartialEq, Eq)]
896pub enum ToolApprovalDecision {
897    /// Tool is auto-approved by policy.
898    AutoApproved,
899    /// Tool is auto-denied by policy.
900    AutoDenied { reason: String },
901    /// Tool requires explicit host approval; the caller already owns the
902    /// tool name and args and forwards them to the host bridge.
903    RequiresHostApproval,
904}
905
906impl ToolApprovalPolicy {
907    /// Evaluate whether a tool call should be approved, denied, or needs
908    /// host confirmation.
909    pub fn evaluate(&self, tool_name: &str, args: &serde_json::Value) -> ToolApprovalDecision {
910        // Auto-deny takes precedence.
911        for pattern in &self.auto_deny {
912            if glob_match(pattern, tool_name) {
913                return ToolApprovalDecision::AutoDenied {
914                    reason: format!("tool '{tool_name}' matches deny pattern '{pattern}'"),
915                };
916            }
917        }
918
919        // Check write-path allowlist for tools that declare paths.
920        if !self.write_path_allowlist.is_empty() {
921            let paths = super::current_tool_declared_paths(tool_name, args);
922            for path in &paths {
923                let allowed = self
924                    .write_path_allowlist
925                    .iter()
926                    .any(|pattern| glob_match(pattern, path));
927                if !allowed {
928                    return ToolApprovalDecision::AutoDenied {
929                        reason: format!(
930                            "tool '{tool_name}' writes to '{path}' which is not in the write-path allowlist"
931                        ),
932                    };
933                }
934            }
935        }
936
937        // Auto-approve.
938        for pattern in &self.auto_approve {
939            if glob_match(pattern, tool_name) {
940                return ToolApprovalDecision::AutoApproved;
941            }
942        }
943
944        // Require approval.
945        for pattern in &self.require_approval {
946            if glob_match(pattern, tool_name) {
947                return ToolApprovalDecision::RequiresHostApproval;
948            }
949        }
950
951        // Default: auto-approve if no pattern matched.
952        ToolApprovalDecision::AutoApproved
953    }
954
955    /// Merge two approval policies, taking the most restrictive combination.
956    /// - auto_approve: only tools approved by BOTH policies stay approved
957    ///   (if either policy has no patterns, the other's patterns are used)
958    /// - auto_deny / require_approval: union (either policy can deny/gate)
959    /// - write_path_allowlist: intersection (both must allow the path)
960    pub fn intersect(&self, other: &ToolApprovalPolicy) -> ToolApprovalPolicy {
961        // auto_approve: intersection semantics — a tool should only be
962        // auto-approved if both policies agree. If one side has no patterns,
963        // defer to the other.
964        let auto_approve = if self.auto_approve.is_empty() {
965            other.auto_approve.clone()
966        } else if other.auto_approve.is_empty() {
967            self.auto_approve.clone()
968        } else {
969            // Keep only patterns that appear in both lists.
970            self.auto_approve
971                .iter()
972                .filter(|p| other.auto_approve.contains(p))
973                .cloned()
974                .collect()
975        };
976        // auto_deny / require_approval: union (more restrictive).
977        let mut auto_deny = self.auto_deny.clone();
978        auto_deny.extend(other.auto_deny.iter().cloned());
979        let mut require_approval = self.require_approval.clone();
980        require_approval.extend(other.require_approval.iter().cloned());
981        // write_path_allowlist: intersection (both must allow the path).
982        let write_path_allowlist = if self.write_path_allowlist.is_empty() {
983            other.write_path_allowlist.clone()
984        } else if other.write_path_allowlist.is_empty() {
985            self.write_path_allowlist.clone()
986        } else {
987            self.write_path_allowlist
988                .iter()
989                .filter(|p| other.write_path_allowlist.contains(p))
990                .cloned()
991                .collect()
992        };
993        ToolApprovalPolicy {
994            auto_approve,
995            auto_deny,
996            require_approval,
997            write_path_allowlist,
998        }
999    }
1000}
1001
1002#[cfg(test)]
1003mod approval_policy_tests {
1004    use super::*;
1005
1006    #[test]
1007    fn auto_deny_takes_precedence_over_auto_approve() {
1008        let policy = ToolApprovalPolicy {
1009            auto_approve: vec!["*".to_string()],
1010            auto_deny: vec!["dangerous_*".to_string()],
1011            ..Default::default()
1012        };
1013        assert_eq!(
1014            policy.evaluate("dangerous_rm", &serde_json::json!({})),
1015            ToolApprovalDecision::AutoDenied {
1016                reason: "tool 'dangerous_rm' matches deny pattern 'dangerous_*'".to_string()
1017            }
1018        );
1019    }
1020
1021    #[test]
1022    fn auto_approve_matches_glob() {
1023        let policy = ToolApprovalPolicy {
1024            auto_approve: vec!["read*".to_string(), "search*".to_string()],
1025            ..Default::default()
1026        };
1027        assert_eq!(
1028            policy.evaluate("read_file", &serde_json::json!({})),
1029            ToolApprovalDecision::AutoApproved
1030        );
1031        assert_eq!(
1032            policy.evaluate("search", &serde_json::json!({})),
1033            ToolApprovalDecision::AutoApproved
1034        );
1035    }
1036
1037    #[test]
1038    fn require_approval_emits_decision() {
1039        let policy = ToolApprovalPolicy {
1040            require_approval: vec!["edit*".to_string()],
1041            ..Default::default()
1042        };
1043        let decision = policy.evaluate("edit_file", &serde_json::json!({"path": "foo.rs"}));
1044        assert!(matches!(
1045            decision,
1046            ToolApprovalDecision::RequiresHostApproval
1047        ));
1048    }
1049
1050    #[test]
1051    fn unmatched_tool_defaults_to_approved() {
1052        let policy = ToolApprovalPolicy {
1053            auto_approve: vec!["read*".to_string()],
1054            require_approval: vec!["edit*".to_string()],
1055            ..Default::default()
1056        };
1057        assert_eq!(
1058            policy.evaluate("unknown_tool", &serde_json::json!({})),
1059            ToolApprovalDecision::AutoApproved
1060        );
1061    }
1062
1063    #[test]
1064    fn intersect_merges_deny_lists() {
1065        let a = ToolApprovalPolicy {
1066            auto_deny: vec!["rm*".to_string()],
1067            ..Default::default()
1068        };
1069        let b = ToolApprovalPolicy {
1070            auto_deny: vec!["drop*".to_string()],
1071            ..Default::default()
1072        };
1073        let merged = a.intersect(&b);
1074        assert_eq!(merged.auto_deny.len(), 2);
1075    }
1076
1077    #[test]
1078    fn intersect_restricts_auto_approve_to_common_patterns() {
1079        let a = ToolApprovalPolicy {
1080            auto_approve: vec!["read*".to_string(), "search*".to_string()],
1081            ..Default::default()
1082        };
1083        let b = ToolApprovalPolicy {
1084            auto_approve: vec!["read*".to_string(), "write*".to_string()],
1085            ..Default::default()
1086        };
1087        let merged = a.intersect(&b);
1088        // Only "read*" is in both — "search*" and "write*" dropped.
1089        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
1090    }
1091
1092    #[test]
1093    fn intersect_defers_auto_approve_when_one_side_empty() {
1094        let a = ToolApprovalPolicy {
1095            auto_approve: vec!["read*".to_string()],
1096            ..Default::default()
1097        };
1098        let b = ToolApprovalPolicy::default();
1099        let merged = a.intersect(&b);
1100        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
1101    }
1102}
1103
1104#[cfg(test)]
1105mod turn_policy_tests {
1106    use super::TurnPolicy;
1107
1108    #[test]
1109    fn default_allows_done_sentinel() {
1110        let policy = TurnPolicy::default();
1111        assert!(policy.allow_done_sentinel);
1112        assert!(!policy.require_action_or_yield);
1113        assert!(policy.max_prose_chars.is_none());
1114    }
1115
1116    #[test]
1117    fn deserializing_partial_dict_preserves_done_sentinel_pathway() {
1118        // Pre-existing workflows passed `turn_policy: { require_action_or_yield: true }`
1119        // without knowing about `allow_done_sentinel`. Deserializing such a dict
1120        // must keep the done-sentinel pathway enabled so persistent agent loops
1121        // don't lose their completion signal in this release.
1122        let policy: TurnPolicy =
1123            serde_json::from_value(serde_json::json!({ "require_action_or_yield": true }))
1124                .expect("deserialize");
1125        assert!(policy.require_action_or_yield);
1126        assert!(policy.allow_done_sentinel);
1127    }
1128
1129    #[test]
1130    fn deserializing_explicit_false_disables_done_sentinel() {
1131        let policy: TurnPolicy = serde_json::from_value(serde_json::json!({
1132            "require_action_or_yield": true,
1133            "allow_done_sentinel": false,
1134        }))
1135        .expect("deserialize");
1136        assert!(policy.require_action_or_yield);
1137        assert!(!policy.allow_done_sentinel);
1138    }
1139}
1140
1141#[cfg(test)]
1142mod transcript_policy_tests {
1143    use super::*;
1144    use crate::value::VmValue;
1145
1146    fn mock_transcript(message_count: usize) -> VmValue {
1147        let messages: Vec<serde_json::Value> = (0..message_count)
1148            .map(|i| {
1149                let role = if i % 2 == 0 { "user" } else { "assistant" };
1150                serde_json::json!({"role": role, "content": format!("message {i}")})
1151            })
1152            .collect();
1153        crate::llm::helpers::transcript_to_vm_with_events(
1154            Some("test-id".to_string()),
1155            None,
1156            None,
1157            &messages,
1158            Vec::new(),
1159            Vec::new(),
1160            Some("active"),
1161        )
1162    }
1163
1164    fn message_count(transcript: &VmValue) -> usize {
1165        transcript
1166            .as_dict()
1167            .and_then(|d| d.get("messages"))
1168            .and_then(|v| match v {
1169                VmValue::List(list) => Some(list.len()),
1170                _ => None,
1171            })
1172            .unwrap_or(0)
1173    }
1174
1175    #[test]
1176    fn continue_mode_passes_transcript_through() {
1177        let transcript = mock_transcript(4);
1178        let policy = TranscriptPolicy {
1179            mode: Some("continue".to_string()),
1180            ..Default::default()
1181        };
1182        let result = apply_input_transcript_policy(Some(transcript), &policy);
1183        assert!(result.is_some());
1184        assert_eq!(message_count(&result.unwrap()), 4);
1185    }
1186
1187    #[test]
1188    fn default_mode_passes_transcript_through() {
1189        let transcript = mock_transcript(3);
1190        let policy = TranscriptPolicy::default();
1191        let result = apply_input_transcript_policy(Some(transcript), &policy);
1192        assert!(result.is_some());
1193        assert_eq!(message_count(&result.unwrap()), 3);
1194    }
1195
1196    #[test]
1197    fn reset_mode_clears_transcript() {
1198        let transcript = mock_transcript(4);
1199        let policy = TranscriptPolicy {
1200            mode: Some("reset".to_string()),
1201            ..Default::default()
1202        };
1203        let result = apply_input_transcript_policy(Some(transcript), &policy);
1204        assert!(result.is_none());
1205    }
1206
1207    #[test]
1208    fn fork_mode_assigns_new_id() {
1209        let transcript = mock_transcript(3);
1210        let policy = TranscriptPolicy {
1211            mode: Some("fork".to_string()),
1212            ..Default::default()
1213        };
1214        let result = apply_input_transcript_policy(Some(transcript), &policy);
1215        let result = result.expect("fork should return a transcript");
1216        let dict = result.as_dict().expect("must be a dict");
1217        let id = dict.get("id").map(|v| v.display()).unwrap_or_default();
1218        assert_ne!(id, "test-id", "fork should assign a new transcript ID");
1219        assert_eq!(message_count(&result), 3, "fork should preserve messages");
1220    }
1221
1222    #[test]
1223    fn none_input_stays_none_for_all_modes() {
1224        for mode in &["continue", "reset", "fork"] {
1225            let policy = TranscriptPolicy {
1226                mode: Some(mode.to_string()),
1227                ..Default::default()
1228            };
1229            let result = apply_input_transcript_policy(None, &policy);
1230            assert!(
1231                result.is_none(),
1232                "mode {mode} with None input should return None"
1233            );
1234        }
1235    }
1236}