Skip to main content

harn_vm/orchestration/policy/
mod.rs

1//! Policy types and capability-ceiling enforcement.
2
3mod types;
4
5use std::cell::RefCell;
6use std::collections::BTreeMap;
7use std::rc::Rc;
8use std::thread_local;
9
10use serde::{Deserialize, Serialize};
11
12use super::glob_match;
13use crate::tool_annotations::{SideEffectLevel, ToolAnnotations};
14use crate::value::{VmError, VmValue};
15
16pub use crate::tool_annotations::{ToolArgSchema, ToolKind};
17pub use types::{
18    enforce_tool_arg_constraints, AutoCompactPolicy, BranchSemantics, CapabilityPolicy,
19    ContextPolicy, EqIgnored, EscalationPolicy, JoinPolicy, MapPolicy, ModelPolicy, ReducePolicy,
20    RetryPolicy, StageContract, ToolArgConstraint, TurnPolicy,
21};
22
23thread_local! {
24    static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
25    static EXECUTION_APPROVAL_POLICY_STACK: RefCell<Vec<ToolApprovalPolicy>> = const { RefCell::new(Vec::new()) };
26}
27
28pub fn push_execution_policy(policy: CapabilityPolicy) {
29    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
30}
31
32pub fn pop_execution_policy() {
33    EXECUTION_POLICY_STACK.with(|stack| {
34        stack.borrow_mut().pop();
35    });
36}
37
38pub fn current_execution_policy() -> Option<CapabilityPolicy> {
39    EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
40}
41
42pub fn push_approval_policy(policy: ToolApprovalPolicy) {
43    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
44}
45
46pub fn pop_approval_policy() {
47    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| {
48        stack.borrow_mut().pop();
49    });
50}
51
52pub fn current_approval_policy() -> Option<ToolApprovalPolicy> {
53    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
54}
55
56pub fn current_tool_annotations(tool: &str) -> Option<ToolAnnotations> {
57    current_execution_policy().and_then(|policy| policy.tool_annotations.get(tool).cloned())
58}
59
60fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
61    policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
62}
63
64fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
65    policy.capabilities.is_empty()
66        || policy
67            .capabilities
68            .get(capability)
69            .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
70}
71
72fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
73    fn rank(v: &str) -> usize {
74        match v {
75            "none" => 0,
76            "read_only" => 1,
77            "workspace_write" => 2,
78            "process_exec" => 3,
79            "network" => 4,
80            _ => 5,
81        }
82    }
83    policy
84        .side_effect_level
85        .as_ref()
86        .map(|allowed| rank(allowed) >= rank(requested))
87        .unwrap_or(true)
88}
89
90pub(super) fn reject_policy(reason: String) -> Result<(), VmError> {
91    Err(VmError::CategorizedError {
92        message: reason,
93        category: crate::value::ErrorCategory::ToolRejected,
94    })
95}
96
97/// Mutation classification for a tool, derived from the pipeline's
98/// declared `ToolKind`. Used in telemetry and pre/post-bridge payloads
99/// while those methods still exist. Returns `"other"` for unannotated
100/// tools (fail-safe; unknown tools don't auto-classify).
101pub fn current_tool_mutation_classification(tool_name: &str) -> String {
102    current_tool_annotations(tool_name)
103        .map(|annotations| annotations.kind.mutation_class().to_string())
104        .unwrap_or_else(|| "other".to_string())
105}
106
107/// Workspace paths declared by this tool call, read from the tool's
108/// annotated `arg_schema.path_params`. Unannotated tools declare no
109/// paths — the VM no longer guesses by common argument names.
110pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
111    let Some(map) = args.as_object() else {
112        return Vec::new();
113    };
114    let Some(annotations) = current_tool_annotations(tool_name) else {
115        return Vec::new();
116    };
117    let mut paths = Vec::new();
118    for key in &annotations.arg_schema.path_params {
119        if let Some(value) = map.get(key).and_then(|value| value.as_str()) {
120            if !value.is_empty() {
121                paths.push(value.to_string());
122            }
123        }
124    }
125    if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
126        for item in items {
127            if let Some(value) = item.as_str() {
128                if !value.is_empty() {
129                    paths.push(value.to_string());
130                }
131            }
132        }
133    }
134    paths.sort();
135    paths.dedup();
136    paths
137}
138
139pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
140    let Some(policy) = current_execution_policy() else {
141        return Ok(());
142    };
143    match name {
144        "read_file" if !policy_allows_capability(&policy, "workspace", "read_text") => {
145            return reject_policy(format!(
146                "builtin '{name}' exceeds workspace.read_text ceiling"
147            ));
148        }
149        "list_dir" if !policy_allows_capability(&policy, "workspace", "list") => {
150            return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
151        }
152        "file_exists" | "stat" if !policy_allows_capability(&policy, "workspace", "exists") => {
153            return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
154        }
155        "write_file" | "append_file" | "mkdir" | "copy_file"
156            if !policy_allows_capability(&policy, "workspace", "write_text")
157                || !policy_allows_side_effect(&policy, "workspace_write") =>
158        {
159            return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
160        }
161        "delete_file"
162            if !policy_allows_capability(&policy, "workspace", "delete")
163                || !policy_allows_side_effect(&policy, "workspace_write") =>
164        {
165            return reject_policy(
166                "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
167            );
168        }
169        "apply_edit"
170            if !policy_allows_capability(&policy, "workspace", "apply_edit")
171                || !policy_allows_side_effect(&policy, "workspace_write") =>
172        {
173            return reject_policy(
174                "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
175            );
176        }
177        "exec" | "exec_at" | "shell" | "shell_at"
178            if !policy_allows_capability(&policy, "process", "exec")
179                || !policy_allows_side_effect(&policy, "process_exec") =>
180        {
181            return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
182        }
183        "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request"
184            if !policy_allows_side_effect(&policy, "network") =>
185        {
186            return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
187        }
188        "mcp_connect"
189        | "mcp_call"
190        | "mcp_list_tools"
191        | "mcp_list_resources"
192        | "mcp_list_resource_templates"
193        | "mcp_read_resource"
194        | "mcp_list_prompts"
195        | "mcp_get_prompt"
196        | "mcp_server_info"
197        | "mcp_disconnect"
198            if !policy_allows_capability(&policy, "process", "exec")
199                || !policy_allows_side_effect(&policy, "process_exec") =>
200        {
201            return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
202        }
203        "host_call" => {
204            let name = args.first().map(|v| v.display()).unwrap_or_default();
205            let Some((capability, op)) = name.split_once('.') else {
206                return reject_policy(format!(
207                    "host_call '{name}' must use capability.operation naming"
208                ));
209            };
210            if !policy_allows_capability(&policy, capability, op) {
211                return reject_policy(format!(
212                    "host_call {capability}.{op} exceeds capability ceiling"
213                ));
214            }
215            let requested_side_effect = match (capability, op) {
216                ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
217                ("process", "exec") => "process_exec",
218                _ => "read_only",
219            };
220            if !policy_allows_side_effect(&policy, requested_side_effect) {
221                return reject_policy(format!(
222                    "host_call {capability}.{op} exceeds side-effect ceiling"
223                ));
224            }
225        }
226        _ => {}
227    }
228    Ok(())
229}
230
231pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
232    if current_execution_policy().is_some() {
233        return reject_policy(format!(
234            "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
235        ));
236    }
237    Ok(())
238}
239
240pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
241    let Some(policy) = current_execution_policy() else {
242        return Ok(());
243    };
244    if !policy_allows_tool(&policy, tool_name) {
245        return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
246    }
247    if let Some(annotations) = policy.tool_annotations.get(tool_name) {
248        for (capability, ops) in &annotations.capabilities {
249            for op in ops {
250                if !policy_allows_capability(&policy, capability, op) {
251                    return reject_policy(format!(
252                        "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
253                    ));
254                }
255            }
256        }
257        let requested_level = annotations.side_effect_level;
258        if requested_level != SideEffectLevel::None
259            && !policy_allows_side_effect(&policy, requested_level.as_str())
260        {
261            return reject_policy(format!(
262                "tool '{tool_name}' exceeds side-effect ceiling: {}",
263                requested_level.as_str()
264            ));
265        }
266    }
267    Ok(())
268}
269
270// ── Output visibility redaction ─────────────────────────────────────
271//
272// Transcript lifecycle (reset, fork, trim, compact) now lives on
273// `crate::agent_sessions` as explicit imperative builtins. All that
274// remains here is the per-call visibility filter, which is
275// output-shaping (not lifecycle).
276
277/// Filter a transcript dict down to the caller-visible subset, based
278/// on the `output_visibility` node option. `None` or any unknown
279/// visibility returns the transcript unchanged — callers are expected
280/// to validate the string against a known set upstream.
281pub fn redact_transcript_visibility(
282    transcript: &VmValue,
283    visibility: Option<&str>,
284) -> Option<VmValue> {
285    let Some(visibility) = visibility else {
286        return Some(transcript.clone());
287    };
288    if visibility != "public" && visibility != "public_only" {
289        return Some(transcript.clone());
290    }
291    let dict = transcript.as_dict()?;
292    let public_messages = match dict.get("messages") {
293        Some(VmValue::List(list)) => list
294            .iter()
295            .filter(|message| {
296                message
297                    .as_dict()
298                    .and_then(|d| d.get("role"))
299                    .map(|v| v.display())
300                    .map(|role| role != "tool_result")
301                    .unwrap_or(true)
302            })
303            .cloned()
304            .collect::<Vec<_>>(),
305        _ => Vec::new(),
306    };
307    let public_events = match dict.get("events") {
308        Some(VmValue::List(list)) => list
309            .iter()
310            .filter(|event| {
311                event
312                    .as_dict()
313                    .and_then(|d| d.get("visibility"))
314                    .map(|v| v.display())
315                    .map(|value| value == "public")
316                    .unwrap_or(true)
317            })
318            .cloned()
319            .collect::<Vec<_>>(),
320        _ => Vec::new(),
321    };
322    let mut redacted = dict.clone();
323    redacted.insert(
324        "messages".to_string(),
325        VmValue::List(Rc::new(public_messages)),
326    );
327    redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
328    Some(VmValue::Dict(Rc::new(redacted)))
329}
330
331pub fn builtin_ceiling() -> CapabilityPolicy {
332    CapabilityPolicy {
333        // `capabilities` is intentionally empty: the host capability manifest
334        // is the sole authority, and an allowlist here would silently block
335        // any capability the host adds later.
336        tools: Vec::new(),
337        capabilities: BTreeMap::new(),
338        workspace_roots: Vec::new(),
339        side_effect_level: Some("network".to_string()),
340        recursion_limit: Some(8),
341        tool_arg_constraints: Vec::new(),
342        tool_annotations: BTreeMap::new(),
343    }
344}
345
346/// Declarative policy for tool approval gating. Allows pipelines to
347/// specify which tools are auto-approved, auto-denied, or require
348/// host confirmation, plus write-path allowlists.
349#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
350#[serde(default)]
351pub struct ToolApprovalPolicy {
352    /// Glob patterns for tools that should be auto-approved.
353    #[serde(default)]
354    pub auto_approve: Vec<String>,
355    /// Glob patterns for tools that should always be denied.
356    #[serde(default)]
357    pub auto_deny: Vec<String>,
358    /// Glob patterns for tools that require host confirmation.
359    #[serde(default)]
360    pub require_approval: Vec<String>,
361    /// Glob patterns for writable paths.
362    #[serde(default)]
363    pub write_path_allowlist: Vec<String>,
364}
365
366/// Result of evaluating a tool call against a ToolApprovalPolicy.
367#[derive(Debug, Clone, PartialEq, Eq)]
368pub enum ToolApprovalDecision {
369    /// Tool is auto-approved by policy.
370    AutoApproved,
371    /// Tool is auto-denied by policy.
372    AutoDenied { reason: String },
373    /// Tool requires explicit host approval; the caller already owns the
374    /// tool name and args and forwards them to the host bridge.
375    RequiresHostApproval,
376}
377
378impl ToolApprovalPolicy {
379    /// Evaluate whether a tool call should be approved, denied, or needs
380    /// host confirmation.
381    pub fn evaluate(&self, tool_name: &str, args: &serde_json::Value) -> ToolApprovalDecision {
382        // Auto-deny takes precedence over every other pattern list.
383        for pattern in &self.auto_deny {
384            if glob_match(pattern, tool_name) {
385                return ToolApprovalDecision::AutoDenied {
386                    reason: format!("tool '{tool_name}' matches deny pattern '{pattern}'"),
387                };
388            }
389        }
390
391        if !self.write_path_allowlist.is_empty() {
392            let paths = super::current_tool_declared_paths(tool_name, args);
393            for path in &paths {
394                let allowed = self
395                    .write_path_allowlist
396                    .iter()
397                    .any(|pattern| glob_match(pattern, path));
398                if !allowed {
399                    return ToolApprovalDecision::AutoDenied {
400                        reason: format!(
401                            "tool '{tool_name}' writes to '{path}' which is not in the write-path allowlist"
402                        ),
403                    };
404                }
405            }
406        }
407
408        for pattern in &self.auto_approve {
409            if glob_match(pattern, tool_name) {
410                return ToolApprovalDecision::AutoApproved;
411            }
412        }
413
414        for pattern in &self.require_approval {
415            if glob_match(pattern, tool_name) {
416                return ToolApprovalDecision::RequiresHostApproval;
417            }
418        }
419
420        ToolApprovalDecision::AutoApproved
421    }
422
423    /// Merge two approval policies, taking the most restrictive combination.
424    /// - auto_approve: only tools approved by BOTH policies stay approved
425    ///   (if either policy has no patterns, the other's patterns are used)
426    /// - auto_deny / require_approval: union (either policy can deny/gate)
427    /// - write_path_allowlist: intersection (both must allow the path)
428    pub fn intersect(&self, other: &ToolApprovalPolicy) -> ToolApprovalPolicy {
429        let auto_approve = if self.auto_approve.is_empty() {
430            other.auto_approve.clone()
431        } else if other.auto_approve.is_empty() {
432            self.auto_approve.clone()
433        } else {
434            self.auto_approve
435                .iter()
436                .filter(|p| other.auto_approve.contains(p))
437                .cloned()
438                .collect()
439        };
440        let mut auto_deny = self.auto_deny.clone();
441        auto_deny.extend(other.auto_deny.iter().cloned());
442        let mut require_approval = self.require_approval.clone();
443        require_approval.extend(other.require_approval.iter().cloned());
444        let write_path_allowlist = if self.write_path_allowlist.is_empty() {
445            other.write_path_allowlist.clone()
446        } else if other.write_path_allowlist.is_empty() {
447            self.write_path_allowlist.clone()
448        } else {
449            self.write_path_allowlist
450                .iter()
451                .filter(|p| other.write_path_allowlist.contains(p))
452                .cloned()
453                .collect()
454        };
455        ToolApprovalPolicy {
456            auto_approve,
457            auto_deny,
458            require_approval,
459            write_path_allowlist,
460        }
461    }
462}
463
464#[cfg(test)]
465mod approval_policy_tests {
466    use super::*;
467
468    #[test]
469    fn auto_deny_takes_precedence_over_auto_approve() {
470        let policy = ToolApprovalPolicy {
471            auto_approve: vec!["*".to_string()],
472            auto_deny: vec!["dangerous_*".to_string()],
473            ..Default::default()
474        };
475        assert_eq!(
476            policy.evaluate("dangerous_rm", &serde_json::json!({})),
477            ToolApprovalDecision::AutoDenied {
478                reason: "tool 'dangerous_rm' matches deny pattern 'dangerous_*'".to_string()
479            }
480        );
481    }
482
483    #[test]
484    fn auto_approve_matches_glob() {
485        let policy = ToolApprovalPolicy {
486            auto_approve: vec!["read*".to_string(), "search*".to_string()],
487            ..Default::default()
488        };
489        assert_eq!(
490            policy.evaluate("read_file", &serde_json::json!({})),
491            ToolApprovalDecision::AutoApproved
492        );
493        assert_eq!(
494            policy.evaluate("search", &serde_json::json!({})),
495            ToolApprovalDecision::AutoApproved
496        );
497    }
498
499    #[test]
500    fn require_approval_emits_decision() {
501        let policy = ToolApprovalPolicy {
502            require_approval: vec!["edit*".to_string()],
503            ..Default::default()
504        };
505        let decision = policy.evaluate("edit_file", &serde_json::json!({"path": "foo.rs"}));
506        assert!(matches!(
507            decision,
508            ToolApprovalDecision::RequiresHostApproval
509        ));
510    }
511
512    #[test]
513    fn unmatched_tool_defaults_to_approved() {
514        let policy = ToolApprovalPolicy {
515            auto_approve: vec!["read*".to_string()],
516            require_approval: vec!["edit*".to_string()],
517            ..Default::default()
518        };
519        assert_eq!(
520            policy.evaluate("unknown_tool", &serde_json::json!({})),
521            ToolApprovalDecision::AutoApproved
522        );
523    }
524
525    #[test]
526    fn intersect_merges_deny_lists() {
527        let a = ToolApprovalPolicy {
528            auto_deny: vec!["rm*".to_string()],
529            ..Default::default()
530        };
531        let b = ToolApprovalPolicy {
532            auto_deny: vec!["drop*".to_string()],
533            ..Default::default()
534        };
535        let merged = a.intersect(&b);
536        assert_eq!(merged.auto_deny.len(), 2);
537    }
538
539    #[test]
540    fn intersect_restricts_auto_approve_to_common_patterns() {
541        let a = ToolApprovalPolicy {
542            auto_approve: vec!["read*".to_string(), "search*".to_string()],
543            ..Default::default()
544        };
545        let b = ToolApprovalPolicy {
546            auto_approve: vec!["read*".to_string(), "write*".to_string()],
547            ..Default::default()
548        };
549        let merged = a.intersect(&b);
550        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
551    }
552
553    #[test]
554    fn intersect_defers_auto_approve_when_one_side_empty() {
555        let a = ToolApprovalPolicy {
556            auto_approve: vec!["read*".to_string()],
557            ..Default::default()
558        };
559        let b = ToolApprovalPolicy::default();
560        let merged = a.intersect(&b);
561        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
562    }
563}
564
565#[cfg(test)]
566mod turn_policy_tests {
567    use super::TurnPolicy;
568
569    #[test]
570    fn default_allows_done_sentinel() {
571        let policy = TurnPolicy::default();
572        assert!(policy.allow_done_sentinel);
573        assert!(!policy.require_action_or_yield);
574        assert!(policy.max_prose_chars.is_none());
575    }
576
577    #[test]
578    fn deserializing_partial_dict_preserves_done_sentinel_pathway() {
579        // Pre-existing workflows passed `turn_policy: { require_action_or_yield: true }`
580        // without knowing about `allow_done_sentinel`. Deserializing such a dict
581        // must keep the done-sentinel pathway enabled so persistent agent loops
582        // don't lose their completion signal in this release.
583        let policy: TurnPolicy =
584            serde_json::from_value(serde_json::json!({ "require_action_or_yield": true }))
585                .expect("deserialize");
586        assert!(policy.require_action_or_yield);
587        assert!(policy.allow_done_sentinel);
588    }
589
590    #[test]
591    fn deserializing_explicit_false_disables_done_sentinel() {
592        let policy: TurnPolicy = serde_json::from_value(serde_json::json!({
593            "require_action_or_yield": true,
594            "allow_done_sentinel": false,
595        }))
596        .expect("deserialize");
597        assert!(policy.require_action_or_yield);
598        assert!(!policy.allow_done_sentinel);
599    }
600}
601
602#[cfg(test)]
603mod visibility_redaction_tests {
604    use super::*;
605    use crate::value::VmValue;
606
607    fn mock_transcript() -> VmValue {
608        let messages = vec![
609            serde_json::json!({"role": "user", "content": "hi"}),
610            serde_json::json!({"role": "assistant", "content": "hello"}),
611            serde_json::json!({"role": "tool_result", "content": "internal tool output"}),
612        ];
613        crate::llm::helpers::transcript_to_vm_with_events(
614            Some("test-id".to_string()),
615            None,
616            None,
617            &messages,
618            Vec::new(),
619            Vec::new(),
620            Some("active"),
621        )
622    }
623
624    fn message_count(transcript: &VmValue) -> usize {
625        transcript
626            .as_dict()
627            .and_then(|d| d.get("messages"))
628            .and_then(|v| match v {
629                VmValue::List(list) => Some(list.len()),
630                _ => None,
631            })
632            .unwrap_or(0)
633    }
634
635    #[test]
636    fn visibility_none_returns_unchanged() {
637        let t = mock_transcript();
638        let result = redact_transcript_visibility(&t, None).unwrap();
639        assert_eq!(message_count(&result), 3);
640    }
641
642    #[test]
643    fn visibility_public_drops_tool_results() {
644        let t = mock_transcript();
645        let result = redact_transcript_visibility(&t, Some("public")).unwrap();
646        assert_eq!(message_count(&result), 2);
647    }
648
649    #[test]
650    fn visibility_unknown_string_is_pass_through() {
651        let t = mock_transcript();
652        let result = redact_transcript_visibility(&t, Some("internal")).unwrap();
653        assert_eq!(message_count(&result), 3);
654    }
655}