Skip to main content

harn_vm/orchestration/policy/
mod.rs

1//! Policy types and capability-ceiling enforcement.
2
3mod types;
4
5use std::cell::RefCell;
6use std::collections::BTreeMap;
7use std::rc::Rc;
8use std::thread_local;
9
10use serde::{Deserialize, Serialize};
11
12use super::glob_match;
13use crate::tool_annotations::{SideEffectLevel, ToolAnnotations};
14use crate::value::{VmError, VmValue};
15use crate::workspace_path::{classify_workspace_path, WorkspacePathInfo};
16
17pub use crate::tool_annotations::{ToolArgSchema, ToolKind};
18pub use types::{
19    enforce_tool_arg_constraints, AutoCompactPolicy, BranchSemantics, CapabilityPolicy,
20    ContextPolicy, EqIgnored, EscalationPolicy, JoinPolicy, MapPolicy, ModelPolicy,
21    NativeToolFallbackPolicy, ReducePolicy, RetryPolicy, StageContract, ToolArgConstraint,
22    TurnPolicy,
23};
24
25thread_local! {
26    static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
27    static EXECUTION_APPROVAL_POLICY_STACK: RefCell<Vec<ToolApprovalPolicy>> = const { RefCell::new(Vec::new()) };
28    static TRUSTED_BRIDGE_CALL_DEPTH: RefCell<usize> = const { RefCell::new(0) };
29}
30
31pub fn push_execution_policy(policy: CapabilityPolicy) {
32    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
33}
34
35pub fn pop_execution_policy() {
36    EXECUTION_POLICY_STACK.with(|stack| {
37        stack.borrow_mut().pop();
38    });
39}
40
41pub fn clear_execution_policy_stacks() {
42    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().clear());
43    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow_mut().clear());
44    TRUSTED_BRIDGE_CALL_DEPTH.with(|depth| *depth.borrow_mut() = 0);
45}
46
47pub fn current_execution_policy() -> Option<CapabilityPolicy> {
48    EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
49}
50
51pub fn push_approval_policy(policy: ToolApprovalPolicy) {
52    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
53}
54
55pub fn pop_approval_policy() {
56    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| {
57        stack.borrow_mut().pop();
58    });
59}
60
61pub fn current_approval_policy() -> Option<ToolApprovalPolicy> {
62    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
63}
64
65pub fn current_tool_annotations(tool: &str) -> Option<ToolAnnotations> {
66    current_execution_policy().and_then(|policy| policy.tool_annotations.get(tool).cloned())
67}
68
69fn tool_kind_participates_in_write_allowlist(tool_name: &str) -> bool {
70    current_tool_annotations(tool_name)
71        .map(|annotations| !annotations.kind.is_read_only())
72        .unwrap_or(true)
73}
74
75pub struct TrustedBridgeCallGuard;
76
77pub fn allow_trusted_bridge_calls() -> TrustedBridgeCallGuard {
78    TRUSTED_BRIDGE_CALL_DEPTH.with(|depth| {
79        *depth.borrow_mut() += 1;
80    });
81    TrustedBridgeCallGuard
82}
83
84impl Drop for TrustedBridgeCallGuard {
85    fn drop(&mut self) {
86        TRUSTED_BRIDGE_CALL_DEPTH.with(|depth| {
87            let mut depth = depth.borrow_mut();
88            *depth = depth.saturating_sub(1);
89        });
90    }
91}
92
93fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
94    policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
95}
96
97fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
98    policy.capabilities.is_empty()
99        || policy
100            .capabilities
101            .get(capability)
102            .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
103}
104
105fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
106    fn rank(v: &str) -> usize {
107        match v {
108            "none" => 0,
109            "read_only" => 1,
110            "workspace_write" => 2,
111            "process_exec" => 3,
112            "network" => 4,
113            _ => 5,
114        }
115    }
116    policy
117        .side_effect_level
118        .as_ref()
119        .map(|allowed| rank(allowed) >= rank(requested))
120        .unwrap_or(true)
121}
122
123pub(super) fn reject_policy(reason: String) -> Result<(), VmError> {
124    Err(VmError::CategorizedError {
125        message: reason,
126        category: crate::value::ErrorCategory::ToolRejected,
127    })
128}
129
130/// Mutation classification for a tool, derived from the pipeline's
131/// declared `ToolKind`. Used in telemetry and pre/post-bridge payloads
132/// while those methods still exist. Returns `"other"` for unannotated
133/// tools (fail-safe; unknown tools don't auto-classify).
134pub fn current_tool_mutation_classification(tool_name: &str) -> String {
135    current_tool_annotations(tool_name)
136        .map(|annotations| annotations.kind.mutation_class().to_string())
137        .unwrap_or_else(|| "other".to_string())
138}
139
140/// Workspace paths declared by this tool call, read from the tool's
141/// annotated `arg_schema.path_params`. Unannotated tools declare no
142/// paths — the VM no longer guesses by common argument names.
143pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
144    current_tool_declared_path_entries(tool_name, args)
145        .into_iter()
146        .map(|entry| entry.display_path().to_string())
147        .collect()
148}
149
150/// Rich workspace-path descriptors declared by this tool call. Each
151/// entry preserves the original input while also projecting the path
152/// into workspace-relative and host-absolute forms when that mapping is
153/// known.
154pub fn current_tool_declared_path_entries(
155    tool_name: &str,
156    args: &serde_json::Value,
157) -> Vec<WorkspacePathInfo> {
158    let Some(map) = args.as_object() else {
159        return Vec::new();
160    };
161    let Some(annotations) = current_tool_annotations(tool_name) else {
162        return Vec::new();
163    };
164    let workspace_root = crate::stdlib::process::execution_root_path();
165    let mut entries = Vec::new();
166    for key in &annotations.arg_schema.path_params {
167        if let Some(value) = map.get(key) {
168            match value {
169                serde_json::Value::String(path) if !path.is_empty() => {
170                    entries.push(classify_workspace_path(path, Some(&workspace_root)));
171                }
172                serde_json::Value::Array(items) => {
173                    for item in items.iter().filter_map(|item| item.as_str()) {
174                        if !item.is_empty() {
175                            entries.push(classify_workspace_path(item, Some(&workspace_root)));
176                        }
177                    }
178                }
179                _ => {}
180            }
181        }
182    }
183    entries.sort_by(|a, b| a.display_path().cmp(b.display_path()));
184    entries.dedup_by(|left, right| left.policy_candidates() == right.policy_candidates());
185    entries
186}
187
188pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
189    let Some(policy) = current_execution_policy() else {
190        return Ok(());
191    };
192    match name {
193        "read_file" | "read_file_result" | "read_file_bytes"
194            if !policy_allows_capability(&policy, "workspace", "read_text") =>
195        {
196            return reject_policy(format!(
197                "builtin '{name}' exceeds workspace.read_text ceiling"
198            ));
199        }
200        "list_dir" if !policy_allows_capability(&policy, "workspace", "list") => {
201            return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
202        }
203        "file_exists" | "stat" if !policy_allows_capability(&policy, "workspace", "exists") => {
204            return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
205        }
206        "write_file" | "write_file_bytes" | "append_file" | "mkdir" | "copy_file"
207            if !policy_allows_capability(&policy, "workspace", "write_text")
208                || !policy_allows_side_effect(&policy, "workspace_write") =>
209        {
210            return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
211        }
212        "delete_file"
213            if !policy_allows_capability(&policy, "workspace", "delete")
214                || !policy_allows_side_effect(&policy, "workspace_write") =>
215        {
216            return reject_policy(
217                "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
218            );
219        }
220        "apply_edit"
221            if !policy_allows_capability(&policy, "workspace", "apply_edit")
222                || !policy_allows_side_effect(&policy, "workspace_write") =>
223        {
224            return reject_policy(
225                "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
226            );
227        }
228        "exec"
229        | "exec_at"
230        | "shell"
231        | "shell_at"
232        | "git.repo.discover"
233        | "git.worktree.create"
234        | "git.worktree.remove"
235        | "git.fetch"
236        | "git.rebase"
237        | "git.status"
238        | "git.conflicts"
239        | "git.push"
240        | "git.diff"
241        | "git.merge_base"
242            if !policy_allows_capability(&policy, "process", "exec")
243                || !policy_allows_side_effect(&policy, "process_exec") =>
244        {
245            return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
246        }
247        "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_download"
248        | "http_request"
249            if !policy_allows_side_effect(&policy, "network") =>
250        {
251            return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
252        }
253        "http_session_request"
254        | "http_stream_open"
255        | "http_stream_read"
256        | "http_stream_close"
257        | "http_stream_info"
258        | "sse_connect"
259        | "sse_receive"
260        | "websocket_accept"
261        | "websocket_connect"
262        | "websocket_route"
263        | "websocket_send"
264        | "websocket_receive"
265        | "websocket_server"
266            if !policy_allows_side_effect(&policy, "network") =>
267        {
268            return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
269        }
270        "llm_call" | "llm_call_safe" | "llm_completion" | "llm_stream" | "llm_stream_call"
271        | "llm_healthcheck" | "agent_loop"
272            if !policy_allows_capability(&policy, "llm", "call")
273                || !policy_allows_side_effect(&policy, "network") =>
274        {
275            return reject_policy(format!("builtin '{name}' exceeds LLM/network ceiling"));
276        }
277        "connector_call"
278            if !policy_allows_capability(&policy, "connector", "call")
279                || !policy_allows_side_effect(&policy, "network") =>
280        {
281            return reject_policy(
282                "builtin 'connector_call' exceeds connector.call/network ceiling".to_string(),
283            );
284        }
285        "secret_get" if !policy_allows_capability(&policy, "connector", "secret_get") => {
286            return reject_policy(
287                "builtin 'secret_get' exceeds connector.secret_get ceiling".to_string(),
288            );
289        }
290        "event_log_emit" if !policy_allows_capability(&policy, "connector", "event_log_emit") => {
291            return reject_policy(
292                "builtin 'event_log_emit' exceeds connector.event_log_emit ceiling".to_string(),
293            );
294        }
295        "metrics_inc" if !policy_allows_capability(&policy, "connector", "metrics_inc") => {
296            return reject_policy(
297                "builtin 'metrics_inc' exceeds connector.metrics_inc ceiling".to_string(),
298            );
299        }
300        "project_fingerprint"
301        | "project_scan_native"
302        | "project_scan_tree_native"
303        | "project_walk_tree_native"
304        | "project_catalog_native"
305            if !policy_allows_capability(&policy, "workspace", "list")
306                || !policy_allows_side_effect(&policy, "read_only") =>
307        {
308            return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
309        }
310        "__agent_state_init"
311        | "__agent_state_resume"
312        | "__agent_state_write"
313        | "__agent_state_read"
314        | "__agent_state_list"
315        | "__agent_state_delete"
316        | "__agent_state_handoff"
317            if !policy_allows_capability(&policy, "agent_state", "access") =>
318        {
319            return reject_policy(format!(
320                "builtin '{name}' exceeds agent_state.access ceiling"
321            ));
322        }
323        "vision_ocr"
324            if !policy_allows_capability(&policy, "vision", "ocr")
325                || !policy_allows_side_effect(&policy, "process_exec") =>
326        {
327            return reject_policy(format!(
328                "builtin '{name}' exceeds vision.ocr/process ceiling"
329            ));
330        }
331        "mcp_connect"
332        | "mcp_ensure_active"
333        | "mcp_call"
334        | "mcp_list_tools"
335        | "mcp_list_resources"
336        | "mcp_list_resource_templates"
337        | "mcp_read_resource"
338        | "mcp_list_prompts"
339        | "mcp_get_prompt"
340        | "mcp_server_info"
341        | "mcp_disconnect"
342            if !policy_allows_capability(&policy, "process", "exec")
343                || !policy_allows_side_effect(&policy, "process_exec") =>
344        {
345            return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
346        }
347        "host_call" => {
348            let name = args.first().map(|v| v.display()).unwrap_or_default();
349            let Some((capability, op)) = name.split_once('.') else {
350                return reject_policy(format!(
351                    "host_call '{name}' must use capability.operation naming"
352                ));
353            };
354            if !policy_allows_capability(&policy, capability, op) {
355                return reject_policy(format!(
356                    "host_call {capability}.{op} exceeds capability ceiling"
357                ));
358            }
359            let requested_side_effect = match (capability, op) {
360                ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
361                ("process", "exec") => "process_exec",
362                _ => "read_only",
363            };
364            if !policy_allows_side_effect(&policy, requested_side_effect) {
365                return reject_policy(format!(
366                    "host_call {capability}.{op} exceeds side-effect ceiling"
367                ));
368            }
369        }
370        "host_tool_list" | "host_tool_call"
371            if !policy_allows_capability(&policy, "host", "tool_call") =>
372        {
373            return reject_policy(format!("builtin '{name}' exceeds host.tool_call ceiling"));
374        }
375        _ => {}
376    }
377    Ok(())
378}
379
380pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
381    let trusted = TRUSTED_BRIDGE_CALL_DEPTH.with(|depth| *depth.borrow() > 0);
382    if trusted {
383        return Ok(());
384    }
385    if current_execution_policy().is_some() {
386        return reject_policy(format!(
387            "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
388        ));
389    }
390    Ok(())
391}
392
393pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
394    let Some(policy) = current_execution_policy() else {
395        return Ok(());
396    };
397    if !policy_allows_tool(&policy, tool_name) {
398        return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
399    }
400    if let Some(annotations) = policy.tool_annotations.get(tool_name) {
401        for (capability, ops) in &annotations.capabilities {
402            for op in ops {
403                if !policy_allows_capability(&policy, capability, op) {
404                    return reject_policy(format!(
405                        "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
406                    ));
407                }
408            }
409        }
410        let requested_level = annotations.side_effect_level;
411        if requested_level != SideEffectLevel::None
412            && !policy_allows_side_effect(&policy, requested_level.as_str())
413        {
414            return reject_policy(format!(
415                "tool '{tool_name}' exceeds side-effect ceiling: {}",
416                requested_level.as_str()
417            ));
418        }
419    }
420    Ok(())
421}
422
423// ── Output visibility redaction ─────────────────────────────────────
424//
425// Transcript lifecycle (reset, fork, trim, compact) now lives on
426// `crate::agent_sessions` as explicit imperative builtins. All that
427// remains here is the per-call visibility filter, which is
428// output-shaping (not lifecycle).
429
430/// Filter a transcript dict down to the caller-visible subset, based
431/// on the `output_visibility` node option. `None` or any unknown
432/// visibility returns the transcript unchanged — callers are expected
433/// to validate the string against a known set upstream.
434pub fn redact_transcript_visibility(
435    transcript: &VmValue,
436    visibility: Option<&str>,
437) -> Option<VmValue> {
438    let Some(visibility) = visibility else {
439        return Some(transcript.clone());
440    };
441    if visibility != "public" && visibility != "public_only" {
442        return Some(transcript.clone());
443    }
444    let dict = transcript.as_dict()?;
445    let public_messages = match dict.get("messages") {
446        Some(VmValue::List(list)) => list
447            .iter()
448            .filter(|message| {
449                message
450                    .as_dict()
451                    .and_then(|d| d.get("role"))
452                    .map(|v| v.display())
453                    .map(|role| role != "tool_result")
454                    .unwrap_or(true)
455            })
456            .cloned()
457            .collect::<Vec<_>>(),
458        _ => Vec::new(),
459    };
460    let public_events = match dict.get("events") {
461        Some(VmValue::List(list)) => list
462            .iter()
463            .filter(|event| {
464                event
465                    .as_dict()
466                    .and_then(|d| d.get("visibility"))
467                    .map(|v| v.display())
468                    .map(|value| value == "public")
469                    .unwrap_or(true)
470            })
471            .cloned()
472            .collect::<Vec<_>>(),
473        _ => Vec::new(),
474    };
475    let mut redacted = dict.clone();
476    redacted.insert(
477        "messages".to_string(),
478        VmValue::List(Rc::new(public_messages)),
479    );
480    redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
481    Some(VmValue::Dict(Rc::new(redacted)))
482}
483
484pub fn builtin_ceiling() -> CapabilityPolicy {
485    CapabilityPolicy {
486        // `capabilities` is intentionally empty: the host capability manifest
487        // is the sole authority, and an allowlist here would silently block
488        // any capability the host adds later.
489        tools: Vec::new(),
490        capabilities: BTreeMap::new(),
491        workspace_roots: Vec::new(),
492        side_effect_level: Some("network".to_string()),
493        recursion_limit: Some(8),
494        tool_arg_constraints: Vec::new(),
495        tool_annotations: BTreeMap::new(),
496    }
497}
498
499/// Declarative policy for tool approval gating. Allows pipelines to
500/// specify which tools are auto-approved, auto-denied, or require
501/// host confirmation, plus write-path allowlists.
502#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
503#[serde(default)]
504pub struct ToolApprovalPolicy {
505    /// Glob patterns for tools that should be auto-approved.
506    #[serde(default)]
507    pub auto_approve: Vec<String>,
508    /// Glob patterns for tools that should always be denied.
509    #[serde(default)]
510    pub auto_deny: Vec<String>,
511    /// Glob patterns for tools that require host confirmation.
512    #[serde(default)]
513    pub require_approval: Vec<String>,
514    /// Glob patterns for writable paths.
515    #[serde(default)]
516    pub write_path_allowlist: Vec<String>,
517}
518
519/// Result of evaluating a tool call against a ToolApprovalPolicy.
520#[derive(Debug, Clone, PartialEq, Eq)]
521pub enum ToolApprovalDecision {
522    /// Tool is auto-approved by policy.
523    AutoApproved,
524    /// Tool is auto-denied by policy.
525    AutoDenied { reason: String },
526    /// Tool requires explicit host approval; the caller already owns the
527    /// tool name and args and forwards them to the host bridge.
528    RequiresHostApproval,
529}
530
531impl ToolApprovalPolicy {
532    /// Evaluate whether a tool call should be approved, denied, or needs
533    /// host confirmation.
534    pub fn evaluate(&self, tool_name: &str, args: &serde_json::Value) -> ToolApprovalDecision {
535        // Auto-deny takes precedence over every other pattern list.
536        for pattern in &self.auto_deny {
537            if glob_match(pattern, tool_name) {
538                return ToolApprovalDecision::AutoDenied {
539                    reason: format!("tool '{tool_name}' matches deny pattern '{pattern}'"),
540                };
541            }
542        }
543
544        if !self.write_path_allowlist.is_empty()
545            && tool_kind_participates_in_write_allowlist(tool_name)
546        {
547            let paths = super::current_tool_declared_path_entries(tool_name, args);
548            for path in &paths {
549                let allowed = self.write_path_allowlist.iter().any(|pattern| {
550                    path.policy_candidates()
551                        .iter()
552                        .any(|candidate| glob_match(pattern, candidate))
553                });
554                if !allowed {
555                    return ToolApprovalDecision::AutoDenied {
556                        reason: format!(
557                            "tool '{tool_name}' targets '{}' which is not in the write-path allowlist",
558                            path.display_path()
559                        ),
560                    };
561                }
562            }
563        }
564
565        for pattern in &self.auto_approve {
566            if glob_match(pattern, tool_name) {
567                return ToolApprovalDecision::AutoApproved;
568            }
569        }
570
571        for pattern in &self.require_approval {
572            if glob_match(pattern, tool_name) {
573                return ToolApprovalDecision::RequiresHostApproval;
574            }
575        }
576
577        ToolApprovalDecision::AutoApproved
578    }
579
580    /// Merge two approval policies, taking the most restrictive combination.
581    /// - auto_approve: only tools approved by BOTH policies stay approved
582    ///   (if either policy has no patterns, the other's patterns are used)
583    /// - auto_deny / require_approval: union (either policy can deny/gate)
584    /// - write_path_allowlist: intersection (both must allow the path)
585    pub fn intersect(&self, other: &ToolApprovalPolicy) -> ToolApprovalPolicy {
586        let auto_approve = if self.auto_approve.is_empty() {
587            other.auto_approve.clone()
588        } else if other.auto_approve.is_empty() {
589            self.auto_approve.clone()
590        } else {
591            self.auto_approve
592                .iter()
593                .filter(|p| other.auto_approve.contains(p))
594                .cloned()
595                .collect()
596        };
597        let mut auto_deny = self.auto_deny.clone();
598        auto_deny.extend(other.auto_deny.iter().cloned());
599        let mut require_approval = self.require_approval.clone();
600        require_approval.extend(other.require_approval.iter().cloned());
601        let write_path_allowlist = if self.write_path_allowlist.is_empty() {
602            other.write_path_allowlist.clone()
603        } else if other.write_path_allowlist.is_empty() {
604            self.write_path_allowlist.clone()
605        } else {
606            self.write_path_allowlist
607                .iter()
608                .filter(|p| other.write_path_allowlist.contains(p))
609                .cloned()
610                .collect()
611        };
612        ToolApprovalPolicy {
613            auto_approve,
614            auto_deny,
615            require_approval,
616            write_path_allowlist,
617        }
618    }
619}
620
621#[cfg(test)]
622mod approval_policy_tests {
623    use super::*;
624    use crate::orchestration::{pop_execution_policy, push_execution_policy, CapabilityPolicy};
625    use crate::tool_annotations::{ToolAnnotations, ToolArgSchema, ToolKind};
626
627    #[test]
628    fn auto_deny_takes_precedence_over_auto_approve() {
629        let policy = ToolApprovalPolicy {
630            auto_approve: vec!["*".to_string()],
631            auto_deny: vec!["dangerous_*".to_string()],
632            ..Default::default()
633        };
634        assert_eq!(
635            policy.evaluate("dangerous_rm", &serde_json::json!({})),
636            ToolApprovalDecision::AutoDenied {
637                reason: "tool 'dangerous_rm' matches deny pattern 'dangerous_*'".to_string()
638            }
639        );
640    }
641
642    #[test]
643    fn auto_approve_matches_glob() {
644        let policy = ToolApprovalPolicy {
645            auto_approve: vec!["read*".to_string(), "search*".to_string()],
646            ..Default::default()
647        };
648        assert_eq!(
649            policy.evaluate("read_file", &serde_json::json!({})),
650            ToolApprovalDecision::AutoApproved
651        );
652        assert_eq!(
653            policy.evaluate("search", &serde_json::json!({})),
654            ToolApprovalDecision::AutoApproved
655        );
656    }
657
658    #[test]
659    fn require_approval_emits_decision() {
660        let policy = ToolApprovalPolicy {
661            require_approval: vec!["edit*".to_string()],
662            ..Default::default()
663        };
664        let decision = policy.evaluate("edit_file", &serde_json::json!({"path": "foo.rs"}));
665        assert!(matches!(
666            decision,
667            ToolApprovalDecision::RequiresHostApproval
668        ));
669    }
670
671    #[test]
672    fn unmatched_tool_defaults_to_approved() {
673        let policy = ToolApprovalPolicy {
674            auto_approve: vec!["read*".to_string()],
675            require_approval: vec!["edit*".to_string()],
676            ..Default::default()
677        };
678        assert_eq!(
679            policy.evaluate("unknown_tool", &serde_json::json!({})),
680            ToolApprovalDecision::AutoApproved
681        );
682    }
683
684    #[test]
685    fn intersect_merges_deny_lists() {
686        let a = ToolApprovalPolicy {
687            auto_deny: vec!["rm*".to_string()],
688            ..Default::default()
689        };
690        let b = ToolApprovalPolicy {
691            auto_deny: vec!["drop*".to_string()],
692            ..Default::default()
693        };
694        let merged = a.intersect(&b);
695        assert_eq!(merged.auto_deny.len(), 2);
696    }
697
698    #[test]
699    fn intersect_restricts_auto_approve_to_common_patterns() {
700        let a = ToolApprovalPolicy {
701            auto_approve: vec!["read*".to_string(), "search*".to_string()],
702            ..Default::default()
703        };
704        let b = ToolApprovalPolicy {
705            auto_approve: vec!["read*".to_string(), "write*".to_string()],
706            ..Default::default()
707        };
708        let merged = a.intersect(&b);
709        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
710    }
711
712    #[test]
713    fn intersect_defers_auto_approve_when_one_side_empty() {
714        let a = ToolApprovalPolicy {
715            auto_approve: vec!["read*".to_string()],
716            ..Default::default()
717        };
718        let b = ToolApprovalPolicy::default();
719        let merged = a.intersect(&b);
720        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
721    }
722
723    #[test]
724    fn write_path_allowlist_matches_recovered_workspace_relative_path() {
725        let temp = tempfile::tempdir().unwrap();
726        std::fs::create_dir_all(temp.path().join("packages/demo")).unwrap();
727        std::fs::write(temp.path().join("packages/demo/file.txt"), "ok").unwrap();
728        crate::stdlib::process::set_thread_execution_context(Some(
729            crate::orchestration::RunExecutionRecord {
730                cwd: Some(temp.path().to_string_lossy().into_owned()),
731                source_dir: Some(temp.path().to_string_lossy().into_owned()),
732                env: BTreeMap::new(),
733                adapter: None,
734                repo_path: None,
735                worktree_path: None,
736                branch: None,
737                base_ref: None,
738                cleanup: None,
739            },
740        ));
741
742        let mut tool_annotations = BTreeMap::new();
743        tool_annotations.insert(
744            "write_file".to_string(),
745            ToolAnnotations {
746                kind: ToolKind::Edit,
747                arg_schema: ToolArgSchema {
748                    path_params: vec!["path".to_string()],
749                    ..Default::default()
750                },
751                ..Default::default()
752            },
753        );
754        push_execution_policy(CapabilityPolicy {
755            tool_annotations,
756            ..Default::default()
757        });
758
759        let policy = ToolApprovalPolicy {
760            write_path_allowlist: vec!["packages/demo/file.txt".to_string()],
761            ..Default::default()
762        };
763        let decision = policy.evaluate(
764            "write_file",
765            &serde_json::json!({"path": "/packages/demo/file.txt"}),
766        );
767        assert_eq!(decision, ToolApprovalDecision::AutoApproved);
768
769        pop_execution_policy();
770        crate::stdlib::process::set_thread_execution_context(None);
771    }
772
773    #[test]
774    fn write_path_allowlist_does_not_block_read_only_tools() {
775        let temp = tempfile::tempdir().unwrap();
776        std::fs::create_dir_all(temp.path().join("packages/demo")).unwrap();
777        std::fs::write(temp.path().join("packages/demo/context.txt"), "ok").unwrap();
778        crate::stdlib::process::set_thread_execution_context(Some(
779            crate::orchestration::RunExecutionRecord {
780                cwd: Some(temp.path().to_string_lossy().into_owned()),
781                source_dir: Some(temp.path().to_string_lossy().into_owned()),
782                env: BTreeMap::new(),
783                adapter: None,
784                repo_path: None,
785                worktree_path: None,
786                branch: None,
787                base_ref: None,
788                cleanup: None,
789            },
790        ));
791
792        let mut tool_annotations = BTreeMap::new();
793        tool_annotations.insert(
794            "read_file".to_string(),
795            ToolAnnotations {
796                kind: ToolKind::Read,
797                arg_schema: ToolArgSchema {
798                    path_params: vec!["path".to_string()],
799                    ..Default::default()
800                },
801                ..Default::default()
802            },
803        );
804        push_execution_policy(CapabilityPolicy {
805            tool_annotations,
806            ..Default::default()
807        });
808
809        let policy = ToolApprovalPolicy {
810            write_path_allowlist: vec!["packages/demo/file.txt".to_string()],
811            ..Default::default()
812        };
813        let decision = policy.evaluate(
814            "read_file",
815            &serde_json::json!({"path": "/packages/demo/context.txt"}),
816        );
817        assert_eq!(decision, ToolApprovalDecision::AutoApproved);
818
819        pop_execution_policy();
820        crate::stdlib::process::set_thread_execution_context(None);
821    }
822}
823
824#[cfg(test)]
825mod turn_policy_tests {
826    use super::TurnPolicy;
827
828    #[test]
829    fn default_allows_done_sentinel() {
830        let policy = TurnPolicy::default();
831        assert!(policy.allow_done_sentinel);
832        assert!(!policy.require_action_or_yield);
833        assert!(policy.max_prose_chars.is_none());
834    }
835
836    #[test]
837    fn deserializing_partial_dict_preserves_done_sentinel_pathway() {
838        // Pre-existing workflows passed `turn_policy: { require_action_or_yield: true }`
839        // without knowing about `allow_done_sentinel`. Deserializing such a dict
840        // must keep the done-sentinel pathway enabled so loop-until-done agents
841        // don't lose their completion signal.
842        let policy: TurnPolicy =
843            serde_json::from_value(serde_json::json!({ "require_action_or_yield": true }))
844                .expect("deserialize");
845        assert!(policy.require_action_or_yield);
846        assert!(policy.allow_done_sentinel);
847    }
848
849    #[test]
850    fn deserializing_explicit_false_disables_done_sentinel() {
851        let policy: TurnPolicy = serde_json::from_value(serde_json::json!({
852            "require_action_or_yield": true,
853            "allow_done_sentinel": false,
854        }))
855        .expect("deserialize");
856        assert!(policy.require_action_or_yield);
857        assert!(!policy.allow_done_sentinel);
858    }
859}
860
861#[cfg(test)]
862mod visibility_redaction_tests {
863    use super::*;
864    use crate::value::VmValue;
865
866    fn mock_transcript() -> VmValue {
867        let messages = vec![
868            serde_json::json!({"role": "user", "content": "hi"}),
869            serde_json::json!({"role": "assistant", "content": "hello"}),
870            serde_json::json!({"role": "tool_result", "content": "internal tool output"}),
871        ];
872        crate::llm::helpers::transcript_to_vm_with_events(
873            Some("test-id".to_string()),
874            None,
875            None,
876            &messages,
877            Vec::new(),
878            Vec::new(),
879            Some("active"),
880        )
881    }
882
883    fn message_count(transcript: &VmValue) -> usize {
884        transcript
885            .as_dict()
886            .and_then(|d| d.get("messages"))
887            .and_then(|v| match v {
888                VmValue::List(list) => Some(list.len()),
889                _ => None,
890            })
891            .unwrap_or(0)
892    }
893
894    #[test]
895    fn visibility_none_returns_unchanged() {
896        let t = mock_transcript();
897        let result = redact_transcript_visibility(&t, None).unwrap();
898        assert_eq!(message_count(&result), 3);
899    }
900
901    #[test]
902    fn visibility_public_drops_tool_results() {
903        let t = mock_transcript();
904        let result = redact_transcript_visibility(&t, Some("public")).unwrap();
905        assert_eq!(message_count(&result), 2);
906    }
907
908    #[test]
909    fn visibility_unknown_string_is_pass_through() {
910        let t = mock_transcript();
911        let result = redact_transcript_visibility(&t, Some("internal")).unwrap();
912        assert_eq!(message_count(&result), 3);
913    }
914}