Skip to main content

harn_vm/orchestration/policy/
mod.rs

1//! Policy types and capability-ceiling enforcement.
2
3mod types;
4
5use std::cell::RefCell;
6use std::collections::BTreeMap;
7use std::rc::Rc;
8use std::thread_local;
9
10use serde::{Deserialize, Serialize};
11
12use super::glob_match;
13use crate::tool_annotations::{SideEffectLevel, ToolAnnotations};
14use crate::value::{VmError, VmValue};
15
16pub use crate::tool_annotations::{ToolArgSchema, ToolKind};
17pub use types::{
18    enforce_tool_arg_constraints, AutoCompactPolicy, BranchSemantics, CapabilityPolicy,
19    ContextPolicy, EqIgnored, EscalationPolicy, JoinPolicy, MapPolicy, ModelPolicy, ReducePolicy,
20    RetryPolicy, StageContract, ToolArgConstraint, TurnPolicy,
21};
22
23thread_local! {
24    static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
25    static EXECUTION_APPROVAL_POLICY_STACK: RefCell<Vec<ToolApprovalPolicy>> = const { RefCell::new(Vec::new()) };
26}
27
28pub fn push_execution_policy(policy: CapabilityPolicy) {
29    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
30}
31
32pub fn pop_execution_policy() {
33    EXECUTION_POLICY_STACK.with(|stack| {
34        stack.borrow_mut().pop();
35    });
36}
37
38pub fn current_execution_policy() -> Option<CapabilityPolicy> {
39    EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
40}
41
42pub fn push_approval_policy(policy: ToolApprovalPolicy) {
43    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
44}
45
46pub fn pop_approval_policy() {
47    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| {
48        stack.borrow_mut().pop();
49    });
50}
51
52pub fn current_approval_policy() -> Option<ToolApprovalPolicy> {
53    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
54}
55
56pub fn current_tool_annotations(tool: &str) -> Option<ToolAnnotations> {
57    current_execution_policy().and_then(|policy| policy.tool_annotations.get(tool).cloned())
58}
59
60fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
61    policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
62}
63
64fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
65    policy.capabilities.is_empty()
66        || policy
67            .capabilities
68            .get(capability)
69            .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
70}
71
72fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
73    fn rank(v: &str) -> usize {
74        match v {
75            "none" => 0,
76            "read_only" => 1,
77            "workspace_write" => 2,
78            "process_exec" => 3,
79            "network" => 4,
80            _ => 5,
81        }
82    }
83    policy
84        .side_effect_level
85        .as_ref()
86        .map(|allowed| rank(allowed) >= rank(requested))
87        .unwrap_or(true)
88}
89
90pub(super) fn reject_policy(reason: String) -> Result<(), VmError> {
91    Err(VmError::CategorizedError {
92        message: reason,
93        category: crate::value::ErrorCategory::ToolRejected,
94    })
95}
96
97/// Mutation classification for a tool, derived from the pipeline's
98/// declared `ToolKind`. Used in telemetry and pre/post-bridge payloads
99/// while those methods still exist. Returns `"other"` for unannotated
100/// tools (fail-safe; unknown tools don't auto-classify).
101pub fn current_tool_mutation_classification(tool_name: &str) -> String {
102    current_tool_annotations(tool_name)
103        .map(|annotations| annotations.kind.mutation_class().to_string())
104        .unwrap_or_else(|| "other".to_string())
105}
106
107/// Workspace paths declared by this tool call, read from the tool's
108/// annotated `arg_schema.path_params`. Unannotated tools declare no
109/// paths — the VM no longer guesses by common argument names.
110pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
111    let Some(map) = args.as_object() else {
112        return Vec::new();
113    };
114    let Some(annotations) = current_tool_annotations(tool_name) else {
115        return Vec::new();
116    };
117    let mut paths = Vec::new();
118    for key in &annotations.arg_schema.path_params {
119        if let Some(value) = map.get(key).and_then(|value| value.as_str()) {
120            if !value.is_empty() {
121                paths.push(value.to_string());
122            }
123        }
124    }
125    if let Some(items) = map.get("paths").and_then(|value| value.as_array()) {
126        for item in items {
127            if let Some(value) = item.as_str() {
128                if !value.is_empty() {
129                    paths.push(value.to_string());
130                }
131            }
132        }
133    }
134    paths.sort();
135    paths.dedup();
136    paths
137}
138
139pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
140    let Some(policy) = current_execution_policy() else {
141        return Ok(());
142    };
143    match name {
144        "read_file" => {
145            if !policy_allows_capability(&policy, "workspace", "read_text") {
146                return reject_policy(format!(
147                    "builtin '{name}' exceeds workspace.read_text ceiling"
148                ));
149            }
150        }
151        "list_dir" => {
152            if !policy_allows_capability(&policy, "workspace", "list") {
153                return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
154            }
155        }
156        "file_exists" | "stat" => {
157            if !policy_allows_capability(&policy, "workspace", "exists") {
158                return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
159            }
160        }
161        "write_file" | "append_file" | "mkdir" | "copy_file" => {
162            if !policy_allows_capability(&policy, "workspace", "write_text")
163                || !policy_allows_side_effect(&policy, "workspace_write")
164            {
165                return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
166            }
167        }
168        "delete_file" => {
169            if !policy_allows_capability(&policy, "workspace", "delete")
170                || !policy_allows_side_effect(&policy, "workspace_write")
171            {
172                return reject_policy(
173                    "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
174                );
175            }
176        }
177        "apply_edit" => {
178            if !policy_allows_capability(&policy, "workspace", "apply_edit")
179                || !policy_allows_side_effect(&policy, "workspace_write")
180            {
181                return reject_policy(
182                    "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
183                );
184            }
185        }
186        "exec" | "exec_at" | "shell" | "shell_at" => {
187            if !policy_allows_capability(&policy, "process", "exec")
188                || !policy_allows_side_effect(&policy, "process_exec")
189            {
190                return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
191            }
192        }
193        "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_request" => {
194            if !policy_allows_side_effect(&policy, "network") {
195                return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
196            }
197        }
198        "mcp_connect"
199        | "mcp_call"
200        | "mcp_list_tools"
201        | "mcp_list_resources"
202        | "mcp_list_resource_templates"
203        | "mcp_read_resource"
204        | "mcp_list_prompts"
205        | "mcp_get_prompt"
206        | "mcp_server_info"
207        | "mcp_disconnect" => {
208            if !policy_allows_capability(&policy, "process", "exec")
209                || !policy_allows_side_effect(&policy, "process_exec")
210            {
211                return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
212            }
213        }
214        "host_call" => {
215            let name = args.first().map(|v| v.display()).unwrap_or_default();
216            let Some((capability, op)) = name.split_once('.') else {
217                return reject_policy(format!(
218                    "host_call '{name}' must use capability.operation naming"
219                ));
220            };
221            if !policy_allows_capability(&policy, capability, op) {
222                return reject_policy(format!(
223                    "host_call {capability}.{op} exceeds capability ceiling"
224                ));
225            }
226            let requested_side_effect = match (capability, op) {
227                ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
228                ("process", "exec") => "process_exec",
229                _ => "read_only",
230            };
231            if !policy_allows_side_effect(&policy, requested_side_effect) {
232                return reject_policy(format!(
233                    "host_call {capability}.{op} exceeds side-effect ceiling"
234                ));
235            }
236        }
237        _ => {}
238    }
239    Ok(())
240}
241
242pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
243    if current_execution_policy().is_some() {
244        return reject_policy(format!(
245            "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
246        ));
247    }
248    Ok(())
249}
250
251pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
252    let Some(policy) = current_execution_policy() else {
253        return Ok(());
254    };
255    if !policy_allows_tool(&policy, tool_name) {
256        return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
257    }
258    if let Some(annotations) = policy.tool_annotations.get(tool_name) {
259        for (capability, ops) in &annotations.capabilities {
260            for op in ops {
261                if !policy_allows_capability(&policy, capability, op) {
262                    return reject_policy(format!(
263                        "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
264                    ));
265                }
266            }
267        }
268        let requested_level = annotations.side_effect_level;
269        if requested_level != SideEffectLevel::None
270            && !policy_allows_side_effect(&policy, requested_level.as_str())
271        {
272            return reject_policy(format!(
273                "tool '{tool_name}' exceeds side-effect ceiling: {}",
274                requested_level.as_str()
275            ));
276        }
277    }
278    Ok(())
279}
280
281// ── Output visibility redaction ─────────────────────────────────────
282//
283// Transcript lifecycle (reset, fork, trim, compact) now lives on
284// `crate::agent_sessions` as explicit imperative builtins. All that
285// remains here is the per-call visibility filter, which is
286// output-shaping (not lifecycle).
287
288/// Filter a transcript dict down to the caller-visible subset, based
289/// on the `output_visibility` node option. `None` or any unknown
290/// visibility returns the transcript unchanged — callers are expected
291/// to validate the string against a known set upstream.
292pub fn redact_transcript_visibility(
293    transcript: &VmValue,
294    visibility: Option<&str>,
295) -> Option<VmValue> {
296    let Some(visibility) = visibility else {
297        return Some(transcript.clone());
298    };
299    if visibility != "public" && visibility != "public_only" {
300        return Some(transcript.clone());
301    }
302    let dict = transcript.as_dict()?;
303    let public_messages = match dict.get("messages") {
304        Some(VmValue::List(list)) => list
305            .iter()
306            .filter(|message| {
307                message
308                    .as_dict()
309                    .and_then(|d| d.get("role"))
310                    .map(|v| v.display())
311                    .map(|role| role != "tool_result")
312                    .unwrap_or(true)
313            })
314            .cloned()
315            .collect::<Vec<_>>(),
316        _ => Vec::new(),
317    };
318    let public_events = match dict.get("events") {
319        Some(VmValue::List(list)) => list
320            .iter()
321            .filter(|event| {
322                event
323                    .as_dict()
324                    .and_then(|d| d.get("visibility"))
325                    .map(|v| v.display())
326                    .map(|value| value == "public")
327                    .unwrap_or(true)
328            })
329            .cloned()
330            .collect::<Vec<_>>(),
331        _ => Vec::new(),
332    };
333    let mut redacted = dict.clone();
334    redacted.insert(
335        "messages".to_string(),
336        VmValue::List(Rc::new(public_messages)),
337    );
338    redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
339    Some(VmValue::Dict(Rc::new(redacted)))
340}
341
342pub fn builtin_ceiling() -> CapabilityPolicy {
343    CapabilityPolicy {
344        // `capabilities` is intentionally empty: the host capability manifest
345        // is the sole authority, and an allowlist here would silently block
346        // any capability the host adds later.
347        tools: Vec::new(),
348        capabilities: BTreeMap::new(),
349        workspace_roots: Vec::new(),
350        side_effect_level: Some("network".to_string()),
351        recursion_limit: Some(8),
352        tool_arg_constraints: Vec::new(),
353        tool_annotations: BTreeMap::new(),
354    }
355}
356
357/// Declarative policy for tool approval gating. Allows pipelines to
358/// specify which tools are auto-approved, auto-denied, or require
359/// host confirmation, plus write-path allowlists.
360#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
361#[serde(default)]
362pub struct ToolApprovalPolicy {
363    /// Glob patterns for tools that should be auto-approved.
364    #[serde(default)]
365    pub auto_approve: Vec<String>,
366    /// Glob patterns for tools that should always be denied.
367    #[serde(default)]
368    pub auto_deny: Vec<String>,
369    /// Glob patterns for tools that require host confirmation.
370    #[serde(default)]
371    pub require_approval: Vec<String>,
372    /// Glob patterns for writable paths.
373    #[serde(default)]
374    pub write_path_allowlist: Vec<String>,
375}
376
377/// Result of evaluating a tool call against a ToolApprovalPolicy.
378#[derive(Debug, Clone, PartialEq, Eq)]
379pub enum ToolApprovalDecision {
380    /// Tool is auto-approved by policy.
381    AutoApproved,
382    /// Tool is auto-denied by policy.
383    AutoDenied { reason: String },
384    /// Tool requires explicit host approval; the caller already owns the
385    /// tool name and args and forwards them to the host bridge.
386    RequiresHostApproval,
387}
388
389impl ToolApprovalPolicy {
390    /// Evaluate whether a tool call should be approved, denied, or needs
391    /// host confirmation.
392    pub fn evaluate(&self, tool_name: &str, args: &serde_json::Value) -> ToolApprovalDecision {
393        // Auto-deny takes precedence over every other pattern list.
394        for pattern in &self.auto_deny {
395            if glob_match(pattern, tool_name) {
396                return ToolApprovalDecision::AutoDenied {
397                    reason: format!("tool '{tool_name}' matches deny pattern '{pattern}'"),
398                };
399            }
400        }
401
402        if !self.write_path_allowlist.is_empty() {
403            let paths = super::current_tool_declared_paths(tool_name, args);
404            for path in &paths {
405                let allowed = self
406                    .write_path_allowlist
407                    .iter()
408                    .any(|pattern| glob_match(pattern, path));
409                if !allowed {
410                    return ToolApprovalDecision::AutoDenied {
411                        reason: format!(
412                            "tool '{tool_name}' writes to '{path}' which is not in the write-path allowlist"
413                        ),
414                    };
415                }
416            }
417        }
418
419        for pattern in &self.auto_approve {
420            if glob_match(pattern, tool_name) {
421                return ToolApprovalDecision::AutoApproved;
422            }
423        }
424
425        for pattern in &self.require_approval {
426            if glob_match(pattern, tool_name) {
427                return ToolApprovalDecision::RequiresHostApproval;
428            }
429        }
430
431        ToolApprovalDecision::AutoApproved
432    }
433
434    /// Merge two approval policies, taking the most restrictive combination.
435    /// - auto_approve: only tools approved by BOTH policies stay approved
436    ///   (if either policy has no patterns, the other's patterns are used)
437    /// - auto_deny / require_approval: union (either policy can deny/gate)
438    /// - write_path_allowlist: intersection (both must allow the path)
439    pub fn intersect(&self, other: &ToolApprovalPolicy) -> ToolApprovalPolicy {
440        let auto_approve = if self.auto_approve.is_empty() {
441            other.auto_approve.clone()
442        } else if other.auto_approve.is_empty() {
443            self.auto_approve.clone()
444        } else {
445            self.auto_approve
446                .iter()
447                .filter(|p| other.auto_approve.contains(p))
448                .cloned()
449                .collect()
450        };
451        let mut auto_deny = self.auto_deny.clone();
452        auto_deny.extend(other.auto_deny.iter().cloned());
453        let mut require_approval = self.require_approval.clone();
454        require_approval.extend(other.require_approval.iter().cloned());
455        let write_path_allowlist = if self.write_path_allowlist.is_empty() {
456            other.write_path_allowlist.clone()
457        } else if other.write_path_allowlist.is_empty() {
458            self.write_path_allowlist.clone()
459        } else {
460            self.write_path_allowlist
461                .iter()
462                .filter(|p| other.write_path_allowlist.contains(p))
463                .cloned()
464                .collect()
465        };
466        ToolApprovalPolicy {
467            auto_approve,
468            auto_deny,
469            require_approval,
470            write_path_allowlist,
471        }
472    }
473}
474
475#[cfg(test)]
476mod approval_policy_tests {
477    use super::*;
478
479    #[test]
480    fn auto_deny_takes_precedence_over_auto_approve() {
481        let policy = ToolApprovalPolicy {
482            auto_approve: vec!["*".to_string()],
483            auto_deny: vec!["dangerous_*".to_string()],
484            ..Default::default()
485        };
486        assert_eq!(
487            policy.evaluate("dangerous_rm", &serde_json::json!({})),
488            ToolApprovalDecision::AutoDenied {
489                reason: "tool 'dangerous_rm' matches deny pattern 'dangerous_*'".to_string()
490            }
491        );
492    }
493
494    #[test]
495    fn auto_approve_matches_glob() {
496        let policy = ToolApprovalPolicy {
497            auto_approve: vec!["read*".to_string(), "search*".to_string()],
498            ..Default::default()
499        };
500        assert_eq!(
501            policy.evaluate("read_file", &serde_json::json!({})),
502            ToolApprovalDecision::AutoApproved
503        );
504        assert_eq!(
505            policy.evaluate("search", &serde_json::json!({})),
506            ToolApprovalDecision::AutoApproved
507        );
508    }
509
510    #[test]
511    fn require_approval_emits_decision() {
512        let policy = ToolApprovalPolicy {
513            require_approval: vec!["edit*".to_string()],
514            ..Default::default()
515        };
516        let decision = policy.evaluate("edit_file", &serde_json::json!({"path": "foo.rs"}));
517        assert!(matches!(
518            decision,
519            ToolApprovalDecision::RequiresHostApproval
520        ));
521    }
522
523    #[test]
524    fn unmatched_tool_defaults_to_approved() {
525        let policy = ToolApprovalPolicy {
526            auto_approve: vec!["read*".to_string()],
527            require_approval: vec!["edit*".to_string()],
528            ..Default::default()
529        };
530        assert_eq!(
531            policy.evaluate("unknown_tool", &serde_json::json!({})),
532            ToolApprovalDecision::AutoApproved
533        );
534    }
535
536    #[test]
537    fn intersect_merges_deny_lists() {
538        let a = ToolApprovalPolicy {
539            auto_deny: vec!["rm*".to_string()],
540            ..Default::default()
541        };
542        let b = ToolApprovalPolicy {
543            auto_deny: vec!["drop*".to_string()],
544            ..Default::default()
545        };
546        let merged = a.intersect(&b);
547        assert_eq!(merged.auto_deny.len(), 2);
548    }
549
550    #[test]
551    fn intersect_restricts_auto_approve_to_common_patterns() {
552        let a = ToolApprovalPolicy {
553            auto_approve: vec!["read*".to_string(), "search*".to_string()],
554            ..Default::default()
555        };
556        let b = ToolApprovalPolicy {
557            auto_approve: vec!["read*".to_string(), "write*".to_string()],
558            ..Default::default()
559        };
560        let merged = a.intersect(&b);
561        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
562    }
563
564    #[test]
565    fn intersect_defers_auto_approve_when_one_side_empty() {
566        let a = ToolApprovalPolicy {
567            auto_approve: vec!["read*".to_string()],
568            ..Default::default()
569        };
570        let b = ToolApprovalPolicy::default();
571        let merged = a.intersect(&b);
572        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
573    }
574}
575
576#[cfg(test)]
577mod turn_policy_tests {
578    use super::TurnPolicy;
579
580    #[test]
581    fn default_allows_done_sentinel() {
582        let policy = TurnPolicy::default();
583        assert!(policy.allow_done_sentinel);
584        assert!(!policy.require_action_or_yield);
585        assert!(policy.max_prose_chars.is_none());
586    }
587
588    #[test]
589    fn deserializing_partial_dict_preserves_done_sentinel_pathway() {
590        // Pre-existing workflows passed `turn_policy: { require_action_or_yield: true }`
591        // without knowing about `allow_done_sentinel`. Deserializing such a dict
592        // must keep the done-sentinel pathway enabled so persistent agent loops
593        // don't lose their completion signal in this release.
594        let policy: TurnPolicy =
595            serde_json::from_value(serde_json::json!({ "require_action_or_yield": true }))
596                .expect("deserialize");
597        assert!(policy.require_action_or_yield);
598        assert!(policy.allow_done_sentinel);
599    }
600
601    #[test]
602    fn deserializing_explicit_false_disables_done_sentinel() {
603        let policy: TurnPolicy = serde_json::from_value(serde_json::json!({
604            "require_action_or_yield": true,
605            "allow_done_sentinel": false,
606        }))
607        .expect("deserialize");
608        assert!(policy.require_action_or_yield);
609        assert!(!policy.allow_done_sentinel);
610    }
611}
612
613#[cfg(test)]
614mod visibility_redaction_tests {
615    use super::*;
616    use crate::value::VmValue;
617
618    fn mock_transcript() -> VmValue {
619        let messages = vec![
620            serde_json::json!({"role": "user", "content": "hi"}),
621            serde_json::json!({"role": "assistant", "content": "hello"}),
622            serde_json::json!({"role": "tool_result", "content": "internal tool output"}),
623        ];
624        crate::llm::helpers::transcript_to_vm_with_events(
625            Some("test-id".to_string()),
626            None,
627            None,
628            &messages,
629            Vec::new(),
630            Vec::new(),
631            Some("active"),
632        )
633    }
634
635    fn message_count(transcript: &VmValue) -> usize {
636        transcript
637            .as_dict()
638            .and_then(|d| d.get("messages"))
639            .and_then(|v| match v {
640                VmValue::List(list) => Some(list.len()),
641                _ => None,
642            })
643            .unwrap_or(0)
644    }
645
646    #[test]
647    fn visibility_none_returns_unchanged() {
648        let t = mock_transcript();
649        let result = redact_transcript_visibility(&t, None).unwrap();
650        assert_eq!(message_count(&result), 3);
651    }
652
653    #[test]
654    fn visibility_public_drops_tool_results() {
655        let t = mock_transcript();
656        let result = redact_transcript_visibility(&t, Some("public")).unwrap();
657        assert_eq!(message_count(&result), 2);
658    }
659
660    #[test]
661    fn visibility_unknown_string_is_pass_through() {
662        let t = mock_transcript();
663        let result = redact_transcript_visibility(&t, Some("internal")).unwrap();
664        assert_eq!(message_count(&result), 3);
665    }
666}