Skip to main content

harn_vm/orchestration/policy/
mod.rs

1//! Policy types and capability-ceiling enforcement.
2
3mod types;
4
5use std::cell::RefCell;
6use std::collections::BTreeMap;
7use std::rc::Rc;
8use std::thread_local;
9
10use serde::{Deserialize, Serialize};
11
12use super::glob_match;
13use crate::tool_annotations::{SideEffectLevel, ToolAnnotations};
14use crate::value::{VmError, VmValue};
15use crate::workspace_path::{classify_workspace_path, WorkspacePathInfo};
16
17pub use crate::tool_annotations::{ToolArgSchema, ToolKind};
18pub use types::{
19    enforce_tool_arg_constraints, AutoCompactPolicy, BranchSemantics, CapabilityPolicy,
20    ContextPolicy, EqIgnored, EscalationPolicy, JoinPolicy, MapPolicy, ModelPolicy,
21    NativeToolFallbackPolicy, ReducePolicy, RetryPolicy, StageContract, ToolArgConstraint,
22    TurnPolicy,
23};
24
25thread_local! {
26    static EXECUTION_POLICY_STACK: RefCell<Vec<CapabilityPolicy>> = const { RefCell::new(Vec::new()) };
27    static EXECUTION_APPROVAL_POLICY_STACK: RefCell<Vec<ToolApprovalPolicy>> = const { RefCell::new(Vec::new()) };
28    static TRUSTED_BRIDGE_CALL_DEPTH: RefCell<usize> = const { RefCell::new(0) };
29}
30
31pub fn push_execution_policy(policy: CapabilityPolicy) {
32    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
33}
34
35pub fn pop_execution_policy() {
36    EXECUTION_POLICY_STACK.with(|stack| {
37        stack.borrow_mut().pop();
38    });
39}
40
41pub fn clear_execution_policy_stacks() {
42    EXECUTION_POLICY_STACK.with(|stack| stack.borrow_mut().clear());
43    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow_mut().clear());
44    TRUSTED_BRIDGE_CALL_DEPTH.with(|depth| *depth.borrow_mut() = 0);
45}
46
47pub fn current_execution_policy() -> Option<CapabilityPolicy> {
48    EXECUTION_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
49}
50
51pub fn push_approval_policy(policy: ToolApprovalPolicy) {
52    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow_mut().push(policy));
53}
54
55pub fn pop_approval_policy() {
56    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| {
57        stack.borrow_mut().pop();
58    });
59}
60
61pub fn current_approval_policy() -> Option<ToolApprovalPolicy> {
62    EXECUTION_APPROVAL_POLICY_STACK.with(|stack| stack.borrow().last().cloned())
63}
64
65pub fn current_tool_annotations(tool: &str) -> Option<ToolAnnotations> {
66    current_execution_policy().and_then(|policy| policy.tool_annotations.get(tool).cloned())
67}
68
69fn tool_kind_participates_in_write_allowlist(tool_name: &str) -> bool {
70    current_tool_annotations(tool_name)
71        .map(|annotations| !annotations.kind.is_read_only())
72        .unwrap_or(true)
73}
74
75pub struct TrustedBridgeCallGuard;
76
77pub fn allow_trusted_bridge_calls() -> TrustedBridgeCallGuard {
78    TRUSTED_BRIDGE_CALL_DEPTH.with(|depth| {
79        *depth.borrow_mut() += 1;
80    });
81    TrustedBridgeCallGuard
82}
83
84impl Drop for TrustedBridgeCallGuard {
85    fn drop(&mut self) {
86        TRUSTED_BRIDGE_CALL_DEPTH.with(|depth| {
87            let mut depth = depth.borrow_mut();
88            *depth = depth.saturating_sub(1);
89        });
90    }
91}
92
93fn policy_allows_tool(policy: &CapabilityPolicy, tool: &str) -> bool {
94    policy.tools.is_empty() || policy.tools.iter().any(|allowed| allowed == tool)
95}
96
97fn policy_allows_capability(policy: &CapabilityPolicy, capability: &str, op: &str) -> bool {
98    policy.capabilities.is_empty()
99        || policy
100            .capabilities
101            .get(capability)
102            .is_some_and(|ops| ops.is_empty() || ops.iter().any(|allowed| allowed == op))
103}
104
105fn policy_allows_side_effect(policy: &CapabilityPolicy, requested: &str) -> bool {
106    fn rank(v: &str) -> usize {
107        match v {
108            "none" => 0,
109            "read_only" => 1,
110            "workspace_write" => 2,
111            "process_exec" => 3,
112            "network" => 4,
113            _ => 5,
114        }
115    }
116    policy
117        .side_effect_level
118        .as_ref()
119        .map(|allowed| rank(allowed) >= rank(requested))
120        .unwrap_or(true)
121}
122
123pub(super) fn reject_policy(reason: String) -> Result<(), VmError> {
124    Err(VmError::CategorizedError {
125        message: reason,
126        category: crate::value::ErrorCategory::ToolRejected,
127    })
128}
129
130/// Mutation classification for a tool, derived from the pipeline's
131/// declared `ToolKind`. Used in telemetry and pre/post-bridge payloads
132/// while those methods still exist. Returns `"other"` for unannotated
133/// tools (fail-safe; unknown tools don't auto-classify).
134pub fn current_tool_mutation_classification(tool_name: &str) -> String {
135    current_tool_annotations(tool_name)
136        .map(|annotations| annotations.kind.mutation_class().to_string())
137        .unwrap_or_else(|| "other".to_string())
138}
139
140/// Workspace paths declared by this tool call, read from the tool's
141/// annotated `arg_schema.path_params`. Unannotated tools declare no
142/// paths — the VM no longer guesses by common argument names.
143pub fn current_tool_declared_paths(tool_name: &str, args: &serde_json::Value) -> Vec<String> {
144    current_tool_declared_path_entries(tool_name, args)
145        .into_iter()
146        .map(|entry| entry.display_path().to_string())
147        .collect()
148}
149
150/// Rich workspace-path descriptors declared by this tool call. Each
151/// entry preserves the original input while also projecting the path
152/// into workspace-relative and host-absolute forms when that mapping is
153/// known.
154pub fn current_tool_declared_path_entries(
155    tool_name: &str,
156    args: &serde_json::Value,
157) -> Vec<WorkspacePathInfo> {
158    let Some(map) = args.as_object() else {
159        return Vec::new();
160    };
161    let Some(annotations) = current_tool_annotations(tool_name) else {
162        return Vec::new();
163    };
164    let workspace_root = crate::stdlib::process::execution_root_path();
165    let mut entries = Vec::new();
166    for key in &annotations.arg_schema.path_params {
167        if let Some(value) = map.get(key) {
168            match value {
169                serde_json::Value::String(path) if !path.is_empty() => {
170                    entries.push(classify_workspace_path(path, Some(&workspace_root)));
171                }
172                serde_json::Value::Array(items) => {
173                    for item in items.iter().filter_map(|item| item.as_str()) {
174                        if !item.is_empty() {
175                            entries.push(classify_workspace_path(item, Some(&workspace_root)));
176                        }
177                    }
178                }
179                _ => {}
180            }
181        }
182    }
183    entries.sort_by(|a, b| a.display_path().cmp(b.display_path()));
184    entries.dedup_by(|left, right| left.policy_candidates() == right.policy_candidates());
185    entries
186}
187
188pub fn enforce_current_policy_for_builtin(name: &str, args: &[VmValue]) -> Result<(), VmError> {
189    let Some(policy) = current_execution_policy() else {
190        return Ok(());
191    };
192    match name {
193        "read_file" | "read_file_result" | "read_file_bytes"
194            if !policy_allows_capability(&policy, "workspace", "read_text") =>
195        {
196            return reject_policy(format!(
197                "builtin '{name}' exceeds workspace.read_text ceiling"
198            ));
199        }
200        "list_dir" if !policy_allows_capability(&policy, "workspace", "list") => {
201            return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
202        }
203        "file_exists" | "stat" if !policy_allows_capability(&policy, "workspace", "exists") => {
204            return reject_policy(format!("builtin '{name}' exceeds workspace.exists ceiling"));
205        }
206        "write_file" | "write_file_bytes" | "append_file" | "mkdir" | "copy_file"
207            if !policy_allows_capability(&policy, "workspace", "write_text")
208                || !policy_allows_side_effect(&policy, "workspace_write") =>
209        {
210            return reject_policy(format!("builtin '{name}' exceeds workspace write ceiling"));
211        }
212        "delete_file"
213            if !policy_allows_capability(&policy, "workspace", "delete")
214                || !policy_allows_side_effect(&policy, "workspace_write") =>
215        {
216            return reject_policy(
217                "builtin 'delete_file' exceeds workspace.delete ceiling".to_string(),
218            );
219        }
220        "apply_edit"
221            if !policy_allows_capability(&policy, "workspace", "apply_edit")
222                || !policy_allows_side_effect(&policy, "workspace_write") =>
223        {
224            return reject_policy(
225                "builtin 'apply_edit' exceeds workspace.apply_edit ceiling".to_string(),
226            );
227        }
228        "exec"
229        | "exec_at"
230        | "shell"
231        | "shell_at"
232        | "git.repo.discover"
233        | "git.worktree.create"
234        | "git.worktree.remove"
235        | "git.fetch"
236        | "git.rebase"
237        | "git.status"
238        | "git.conflicts"
239        | "git.push"
240        | "git.diff"
241        | "git.merge_base"
242            if !policy_allows_capability(&policy, "process", "exec")
243                || !policy_allows_side_effect(&policy, "process_exec") =>
244        {
245            return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
246        }
247        "http_get" | "http_post" | "http_put" | "http_patch" | "http_delete" | "http_download"
248        | "http_request"
249            if !policy_allows_side_effect(&policy, "network") =>
250        {
251            return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
252        }
253        "http_session_request"
254        | "http_stream_open"
255        | "http_stream_read"
256        | "http_stream_close"
257        | "http_stream_info"
258        | "sse_connect"
259        | "sse_receive"
260        | "websocket_accept"
261        | "websocket_connect"
262        | "websocket_route"
263        | "websocket_send"
264        | "websocket_receive"
265        | "websocket_server"
266            if !policy_allows_side_effect(&policy, "network") =>
267        {
268            return reject_policy(format!("builtin '{name}' exceeds network ceiling"));
269        }
270        "llm_call" | "llm_call_safe" | "llm_completion" | "llm_stream" | "llm_stream_call"
271        | "llm_healthcheck" | "agent_loop"
272            if !policy_allows_capability(&policy, "llm", "call") =>
273        {
274            return reject_policy(format!("builtin '{name}' exceeds llm.call ceiling"));
275        }
276        "connector_call"
277            if !policy_allows_capability(&policy, "connector", "call")
278                || !policy_allows_side_effect(&policy, "network") =>
279        {
280            return reject_policy(
281                "builtin 'connector_call' exceeds connector.call/network ceiling".to_string(),
282            );
283        }
284        "secret_get" if !policy_allows_capability(&policy, "connector", "secret_get") => {
285            return reject_policy(
286                "builtin 'secret_get' exceeds connector.secret_get ceiling".to_string(),
287            );
288        }
289        "event_log_emit" if !policy_allows_capability(&policy, "connector", "event_log_emit") => {
290            return reject_policy(
291                "builtin 'event_log_emit' exceeds connector.event_log_emit ceiling".to_string(),
292            );
293        }
294        "metrics_inc" if !policy_allows_capability(&policy, "connector", "metrics_inc") => {
295            return reject_policy(
296                "builtin 'metrics_inc' exceeds connector.metrics_inc ceiling".to_string(),
297            );
298        }
299        "project_fingerprint"
300        | "project_scan_native"
301        | "project_scan_tree_native"
302        | "project_walk_tree_native"
303        | "project_catalog_native"
304            if !policy_allows_capability(&policy, "workspace", "list")
305                || !policy_allows_side_effect(&policy, "read_only") =>
306        {
307            return reject_policy(format!("builtin '{name}' exceeds workspace.list ceiling"));
308        }
309        "__agent_state_init"
310        | "__agent_state_resume"
311        | "__agent_state_write"
312        | "__agent_state_read"
313        | "__agent_state_list"
314        | "__agent_state_delete"
315        | "__agent_state_handoff"
316            if !policy_allows_capability(&policy, "agent_state", "access") =>
317        {
318            return reject_policy(format!(
319                "builtin '{name}' exceeds agent_state.access ceiling"
320            ));
321        }
322        "vision_ocr"
323            if !policy_allows_capability(&policy, "vision", "ocr")
324                || !policy_allows_side_effect(&policy, "process_exec") =>
325        {
326            return reject_policy(format!(
327                "builtin '{name}' exceeds vision.ocr/process ceiling"
328            ));
329        }
330        "mcp_connect"
331        | "mcp_ensure_active"
332        | "mcp_call"
333        | "mcp_list_tools"
334        | "mcp_list_resources"
335        | "mcp_list_resource_templates"
336        | "mcp_read_resource"
337        | "mcp_list_prompts"
338        | "mcp_get_prompt"
339        | "mcp_server_info"
340        | "mcp_disconnect"
341            if !policy_allows_capability(&policy, "process", "exec")
342                || !policy_allows_side_effect(&policy, "process_exec") =>
343        {
344            return reject_policy(format!("builtin '{name}' exceeds process.exec ceiling"));
345        }
346        "host_call" => {
347            let name = args.first().map(|v| v.display()).unwrap_or_default();
348            let Some((capability, op)) = name.split_once('.') else {
349                return reject_policy(format!(
350                    "host_call '{name}' must use capability.operation naming"
351                ));
352            };
353            if !policy_allows_capability(&policy, capability, op) {
354                return reject_policy(format!(
355                    "host_call {capability}.{op} exceeds capability ceiling"
356                ));
357            }
358            let requested_side_effect = match (capability, op) {
359                ("workspace", "write_text" | "apply_edit" | "delete") => "workspace_write",
360                ("process", "exec") => "process_exec",
361                _ => "read_only",
362            };
363            if !policy_allows_side_effect(&policy, requested_side_effect) {
364                return reject_policy(format!(
365                    "host_call {capability}.{op} exceeds side-effect ceiling"
366                ));
367            }
368        }
369        "host_tool_list" | "host_tool_call"
370            if !policy_allows_capability(&policy, "host", "tool_call") =>
371        {
372            return reject_policy(format!("builtin '{name}' exceeds host.tool_call ceiling"));
373        }
374        _ => {}
375    }
376    Ok(())
377}
378
379pub fn enforce_current_policy_for_bridge_builtin(name: &str) -> Result<(), VmError> {
380    let trusted = TRUSTED_BRIDGE_CALL_DEPTH.with(|depth| *depth.borrow() > 0);
381    if trusted {
382        return Ok(());
383    }
384    if current_execution_policy().is_some() {
385        return reject_policy(format!(
386            "bridged builtin '{name}' exceeds execution policy; declare an explicit capability/tool surface instead"
387        ));
388    }
389    Ok(())
390}
391
392pub fn enforce_current_policy_for_tool(tool_name: &str) -> Result<(), VmError> {
393    let Some(policy) = current_execution_policy() else {
394        return Ok(());
395    };
396    if !policy_allows_tool(&policy, tool_name) {
397        return reject_policy(format!("tool '{tool_name}' exceeds tool ceiling"));
398    }
399    if let Some(annotations) = policy.tool_annotations.get(tool_name) {
400        for (capability, ops) in &annotations.capabilities {
401            for op in ops {
402                if !policy_allows_capability(&policy, capability, op) {
403                    return reject_policy(format!(
404                        "tool '{tool_name}' exceeds capability ceiling: {capability}.{op}"
405                    ));
406                }
407            }
408        }
409        let requested_level = annotations.side_effect_level;
410        if requested_level != SideEffectLevel::None
411            && !policy_allows_side_effect(&policy, requested_level.as_str())
412        {
413            return reject_policy(format!(
414                "tool '{tool_name}' exceeds side-effect ceiling: {}",
415                requested_level.as_str()
416            ));
417        }
418    }
419    Ok(())
420}
421
422// ── Output visibility redaction ─────────────────────────────────────
423//
424// Transcript lifecycle (reset, fork, trim, compact) now lives on
425// `crate::agent_sessions` as explicit imperative builtins. All that
426// remains here is the per-call visibility filter, which is
427// output-shaping (not lifecycle).
428
429/// Filter a transcript dict down to the caller-visible subset, based
430/// on the `output_visibility` node option. `None` or any unknown
431/// visibility returns the transcript unchanged — callers are expected
432/// to validate the string against a known set upstream.
433pub fn redact_transcript_visibility(
434    transcript: &VmValue,
435    visibility: Option<&str>,
436) -> Option<VmValue> {
437    let Some(visibility) = visibility else {
438        return Some(transcript.clone());
439    };
440    if visibility != "public" && visibility != "public_only" {
441        return Some(transcript.clone());
442    }
443    let dict = transcript.as_dict()?;
444    let public_messages = match dict.get("messages") {
445        Some(VmValue::List(list)) => list
446            .iter()
447            .filter(|message| {
448                message
449                    .as_dict()
450                    .and_then(|d| d.get("role"))
451                    .map(|v| v.display())
452                    .map(|role| role != "tool_result")
453                    .unwrap_or(true)
454            })
455            .cloned()
456            .collect::<Vec<_>>(),
457        _ => Vec::new(),
458    };
459    let public_events = match dict.get("events") {
460        Some(VmValue::List(list)) => list
461            .iter()
462            .filter(|event| {
463                event
464                    .as_dict()
465                    .and_then(|d| d.get("visibility"))
466                    .map(|v| v.display())
467                    .map(|value| value == "public")
468                    .unwrap_or(true)
469            })
470            .cloned()
471            .collect::<Vec<_>>(),
472        _ => Vec::new(),
473    };
474    let mut redacted = dict.clone();
475    redacted.insert(
476        "messages".to_string(),
477        VmValue::List(Rc::new(public_messages)),
478    );
479    redacted.insert("events".to_string(), VmValue::List(Rc::new(public_events)));
480    Some(VmValue::Dict(Rc::new(redacted)))
481}
482
483pub fn builtin_ceiling() -> CapabilityPolicy {
484    CapabilityPolicy {
485        // `capabilities` is intentionally empty: the host capability manifest
486        // is the sole authority, and an allowlist here would silently block
487        // any capability the host adds later.
488        tools: Vec::new(),
489        capabilities: BTreeMap::new(),
490        workspace_roots: Vec::new(),
491        side_effect_level: Some("network".to_string()),
492        recursion_limit: Some(8),
493        tool_arg_constraints: Vec::new(),
494        tool_annotations: BTreeMap::new(),
495    }
496}
497
498/// Declarative policy for tool approval gating. Allows pipelines to
499/// specify which tools are auto-approved, auto-denied, or require
500/// host confirmation, plus write-path allowlists.
501#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
502#[serde(default)]
503pub struct ToolApprovalPolicy {
504    /// Glob patterns for tools that should be auto-approved.
505    #[serde(default)]
506    pub auto_approve: Vec<String>,
507    /// Glob patterns for tools that should always be denied.
508    #[serde(default)]
509    pub auto_deny: Vec<String>,
510    /// Glob patterns for tools that require host confirmation.
511    #[serde(default)]
512    pub require_approval: Vec<String>,
513    /// Glob patterns for writable paths.
514    #[serde(default)]
515    pub write_path_allowlist: Vec<String>,
516}
517
518/// Result of evaluating a tool call against a ToolApprovalPolicy.
519#[derive(Debug, Clone, PartialEq, Eq)]
520pub enum ToolApprovalDecision {
521    /// Tool is auto-approved by policy.
522    AutoApproved,
523    /// Tool is auto-denied by policy.
524    AutoDenied { reason: String },
525    /// Tool requires explicit host approval; the caller already owns the
526    /// tool name and args and forwards them to the host bridge.
527    RequiresHostApproval,
528}
529
530impl ToolApprovalPolicy {
531    /// Evaluate whether a tool call should be approved, denied, or needs
532    /// host confirmation.
533    pub fn evaluate(&self, tool_name: &str, args: &serde_json::Value) -> ToolApprovalDecision {
534        // Auto-deny takes precedence over every other pattern list.
535        for pattern in &self.auto_deny {
536            if glob_match(pattern, tool_name) {
537                return ToolApprovalDecision::AutoDenied {
538                    reason: format!("tool '{tool_name}' matches deny pattern '{pattern}'"),
539                };
540            }
541        }
542
543        if !self.write_path_allowlist.is_empty()
544            && tool_kind_participates_in_write_allowlist(tool_name)
545        {
546            let paths = super::current_tool_declared_path_entries(tool_name, args);
547            for path in &paths {
548                let allowed = self.write_path_allowlist.iter().any(|pattern| {
549                    path.policy_candidates()
550                        .iter()
551                        .any(|candidate| glob_match(pattern, candidate))
552                });
553                if !allowed {
554                    return ToolApprovalDecision::AutoDenied {
555                        reason: format!(
556                            "tool '{tool_name}' targets '{}' which is not in the write-path allowlist",
557                            path.display_path()
558                        ),
559                    };
560                }
561            }
562        }
563
564        for pattern in &self.auto_approve {
565            if glob_match(pattern, tool_name) {
566                return ToolApprovalDecision::AutoApproved;
567            }
568        }
569
570        for pattern in &self.require_approval {
571            if glob_match(pattern, tool_name) {
572                return ToolApprovalDecision::RequiresHostApproval;
573            }
574        }
575
576        ToolApprovalDecision::AutoApproved
577    }
578
579    /// Merge two approval policies, taking the most restrictive combination.
580    /// - auto_approve: only tools approved by BOTH policies stay approved
581    ///   (if either policy has no patterns, the other's patterns are used)
582    /// - auto_deny / require_approval: union (either policy can deny/gate)
583    /// - write_path_allowlist: intersection (both must allow the path)
584    pub fn intersect(&self, other: &ToolApprovalPolicy) -> ToolApprovalPolicy {
585        let auto_approve = if self.auto_approve.is_empty() {
586            other.auto_approve.clone()
587        } else if other.auto_approve.is_empty() {
588            self.auto_approve.clone()
589        } else {
590            self.auto_approve
591                .iter()
592                .filter(|p| other.auto_approve.contains(p))
593                .cloned()
594                .collect()
595        };
596        let mut auto_deny = self.auto_deny.clone();
597        auto_deny.extend(other.auto_deny.iter().cloned());
598        let mut require_approval = self.require_approval.clone();
599        require_approval.extend(other.require_approval.iter().cloned());
600        let write_path_allowlist = if self.write_path_allowlist.is_empty() {
601            other.write_path_allowlist.clone()
602        } else if other.write_path_allowlist.is_empty() {
603            self.write_path_allowlist.clone()
604        } else {
605            self.write_path_allowlist
606                .iter()
607                .filter(|p| other.write_path_allowlist.contains(p))
608                .cloned()
609                .collect()
610        };
611        ToolApprovalPolicy {
612            auto_approve,
613            auto_deny,
614            require_approval,
615            write_path_allowlist,
616        }
617    }
618}
619
620#[cfg(test)]
621mod approval_policy_tests {
622    use super::*;
623    use crate::orchestration::{pop_execution_policy, push_execution_policy, CapabilityPolicy};
624    use crate::tool_annotations::{ToolAnnotations, ToolArgSchema, ToolKind};
625
626    #[test]
627    fn auto_deny_takes_precedence_over_auto_approve() {
628        let policy = ToolApprovalPolicy {
629            auto_approve: vec!["*".to_string()],
630            auto_deny: vec!["dangerous_*".to_string()],
631            ..Default::default()
632        };
633        assert_eq!(
634            policy.evaluate("dangerous_rm", &serde_json::json!({})),
635            ToolApprovalDecision::AutoDenied {
636                reason: "tool 'dangerous_rm' matches deny pattern 'dangerous_*'".to_string()
637            }
638        );
639    }
640
641    #[test]
642    fn auto_approve_matches_glob() {
643        let policy = ToolApprovalPolicy {
644            auto_approve: vec!["read*".to_string(), "search*".to_string()],
645            ..Default::default()
646        };
647        assert_eq!(
648            policy.evaluate("read_file", &serde_json::json!({})),
649            ToolApprovalDecision::AutoApproved
650        );
651        assert_eq!(
652            policy.evaluate("search", &serde_json::json!({})),
653            ToolApprovalDecision::AutoApproved
654        );
655    }
656
657    #[test]
658    fn require_approval_emits_decision() {
659        let policy = ToolApprovalPolicy {
660            require_approval: vec!["edit*".to_string()],
661            ..Default::default()
662        };
663        let decision = policy.evaluate("edit_file", &serde_json::json!({"path": "foo.rs"}));
664        assert!(matches!(
665            decision,
666            ToolApprovalDecision::RequiresHostApproval
667        ));
668    }
669
670    #[test]
671    fn unmatched_tool_defaults_to_approved() {
672        let policy = ToolApprovalPolicy {
673            auto_approve: vec!["read*".to_string()],
674            require_approval: vec!["edit*".to_string()],
675            ..Default::default()
676        };
677        assert_eq!(
678            policy.evaluate("unknown_tool", &serde_json::json!({})),
679            ToolApprovalDecision::AutoApproved
680        );
681    }
682
683    #[test]
684    fn intersect_merges_deny_lists() {
685        let a = ToolApprovalPolicy {
686            auto_deny: vec!["rm*".to_string()],
687            ..Default::default()
688        };
689        let b = ToolApprovalPolicy {
690            auto_deny: vec!["drop*".to_string()],
691            ..Default::default()
692        };
693        let merged = a.intersect(&b);
694        assert_eq!(merged.auto_deny.len(), 2);
695    }
696
697    #[test]
698    fn intersect_restricts_auto_approve_to_common_patterns() {
699        let a = ToolApprovalPolicy {
700            auto_approve: vec!["read*".to_string(), "search*".to_string()],
701            ..Default::default()
702        };
703        let b = ToolApprovalPolicy {
704            auto_approve: vec!["read*".to_string(), "write*".to_string()],
705            ..Default::default()
706        };
707        let merged = a.intersect(&b);
708        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
709    }
710
711    #[test]
712    fn intersect_defers_auto_approve_when_one_side_empty() {
713        let a = ToolApprovalPolicy {
714            auto_approve: vec!["read*".to_string()],
715            ..Default::default()
716        };
717        let b = ToolApprovalPolicy::default();
718        let merged = a.intersect(&b);
719        assert_eq!(merged.auto_approve, vec!["read*".to_string()]);
720    }
721
722    #[test]
723    fn write_path_allowlist_matches_recovered_workspace_relative_path() {
724        let temp = tempfile::tempdir().unwrap();
725        std::fs::create_dir_all(temp.path().join("packages/demo")).unwrap();
726        std::fs::write(temp.path().join("packages/demo/file.txt"), "ok").unwrap();
727        crate::stdlib::process::set_thread_execution_context(Some(
728            crate::orchestration::RunExecutionRecord {
729                cwd: Some(temp.path().to_string_lossy().into_owned()),
730                source_dir: Some(temp.path().to_string_lossy().into_owned()),
731                env: BTreeMap::new(),
732                adapter: None,
733                repo_path: None,
734                worktree_path: None,
735                branch: None,
736                base_ref: None,
737                cleanup: None,
738            },
739        ));
740
741        let mut tool_annotations = BTreeMap::new();
742        tool_annotations.insert(
743            "write_file".to_string(),
744            ToolAnnotations {
745                kind: ToolKind::Edit,
746                arg_schema: ToolArgSchema {
747                    path_params: vec!["path".to_string()],
748                    ..Default::default()
749                },
750                ..Default::default()
751            },
752        );
753        push_execution_policy(CapabilityPolicy {
754            tool_annotations,
755            ..Default::default()
756        });
757
758        let policy = ToolApprovalPolicy {
759            write_path_allowlist: vec!["packages/demo/file.txt".to_string()],
760            ..Default::default()
761        };
762        let decision = policy.evaluate(
763            "write_file",
764            &serde_json::json!({"path": "/packages/demo/file.txt"}),
765        );
766        assert_eq!(decision, ToolApprovalDecision::AutoApproved);
767
768        pop_execution_policy();
769        crate::stdlib::process::set_thread_execution_context(None);
770    }
771
772    #[test]
773    fn write_path_allowlist_does_not_block_read_only_tools() {
774        let temp = tempfile::tempdir().unwrap();
775        std::fs::create_dir_all(temp.path().join("packages/demo")).unwrap();
776        std::fs::write(temp.path().join("packages/demo/context.txt"), "ok").unwrap();
777        crate::stdlib::process::set_thread_execution_context(Some(
778            crate::orchestration::RunExecutionRecord {
779                cwd: Some(temp.path().to_string_lossy().into_owned()),
780                source_dir: Some(temp.path().to_string_lossy().into_owned()),
781                env: BTreeMap::new(),
782                adapter: None,
783                repo_path: None,
784                worktree_path: None,
785                branch: None,
786                base_ref: None,
787                cleanup: None,
788            },
789        ));
790
791        let mut tool_annotations = BTreeMap::new();
792        tool_annotations.insert(
793            "read_file".to_string(),
794            ToolAnnotations {
795                kind: ToolKind::Read,
796                arg_schema: ToolArgSchema {
797                    path_params: vec!["path".to_string()],
798                    ..Default::default()
799                },
800                ..Default::default()
801            },
802        );
803        push_execution_policy(CapabilityPolicy {
804            tool_annotations,
805            ..Default::default()
806        });
807
808        let policy = ToolApprovalPolicy {
809            write_path_allowlist: vec!["packages/demo/file.txt".to_string()],
810            ..Default::default()
811        };
812        let decision = policy.evaluate(
813            "read_file",
814            &serde_json::json!({"path": "/packages/demo/context.txt"}),
815        );
816        assert_eq!(decision, ToolApprovalDecision::AutoApproved);
817
818        pop_execution_policy();
819        crate::stdlib::process::set_thread_execution_context(None);
820    }
821}
822
823#[cfg(test)]
824mod turn_policy_tests {
825    use super::TurnPolicy;
826
827    #[test]
828    fn default_allows_done_sentinel() {
829        let policy = TurnPolicy::default();
830        assert!(policy.allow_done_sentinel);
831        assert!(!policy.require_action_or_yield);
832        assert!(policy.max_prose_chars.is_none());
833    }
834
835    #[test]
836    fn deserializing_partial_dict_preserves_done_sentinel_pathway() {
837        // Pre-existing workflows passed `turn_policy: { require_action_or_yield: true }`
838        // without knowing about `allow_done_sentinel`. Deserializing such a dict
839        // must keep the done-sentinel pathway enabled so loop-until-done agents
840        // don't lose their completion signal.
841        let policy: TurnPolicy =
842            serde_json::from_value(serde_json::json!({ "require_action_or_yield": true }))
843                .expect("deserialize");
844        assert!(policy.require_action_or_yield);
845        assert!(policy.allow_done_sentinel);
846    }
847
848    #[test]
849    fn deserializing_explicit_false_disables_done_sentinel() {
850        let policy: TurnPolicy = serde_json::from_value(serde_json::json!({
851            "require_action_or_yield": true,
852            "allow_done_sentinel": false,
853        }))
854        .expect("deserialize");
855        assert!(policy.require_action_or_yield);
856        assert!(!policy.allow_done_sentinel);
857    }
858}
859
860#[cfg(test)]
861mod visibility_redaction_tests {
862    use super::*;
863    use crate::value::VmValue;
864
865    fn mock_transcript() -> VmValue {
866        let messages = vec![
867            serde_json::json!({"role": "user", "content": "hi"}),
868            serde_json::json!({"role": "assistant", "content": "hello"}),
869            serde_json::json!({"role": "tool_result", "content": "internal tool output"}),
870        ];
871        crate::llm::helpers::transcript_to_vm_with_events(
872            Some("test-id".to_string()),
873            None,
874            None,
875            &messages,
876            Vec::new(),
877            Vec::new(),
878            Some("active"),
879        )
880    }
881
882    fn message_count(transcript: &VmValue) -> usize {
883        transcript
884            .as_dict()
885            .and_then(|d| d.get("messages"))
886            .and_then(|v| match v {
887                VmValue::List(list) => Some(list.len()),
888                _ => None,
889            })
890            .unwrap_or(0)
891    }
892
893    #[test]
894    fn visibility_none_returns_unchanged() {
895        let t = mock_transcript();
896        let result = redact_transcript_visibility(&t, None).unwrap();
897        assert_eq!(message_count(&result), 3);
898    }
899
900    #[test]
901    fn visibility_public_drops_tool_results() {
902        let t = mock_transcript();
903        let result = redact_transcript_visibility(&t, Some("public")).unwrap();
904        assert_eq!(message_count(&result), 2);
905    }
906
907    #[test]
908    fn visibility_unknown_string_is_pass_through() {
909        let t = mock_transcript();
910        let result = redact_transcript_visibility(&t, Some("internal")).unwrap();
911        assert_eq!(message_count(&result), 3);
912    }
913}