harn_vm/
tool_annotations.rs

1//! Tool annotations — the single source of truth for tool semantics.
2//!
3//! These types describe what a tool does at a semantic level. The VM
4//! consumes them to make policy decisions (read-only vs mutating, which
5//! argument holds the workspace path, which aliases to normalize, etc.)
6//! without hardcoding tool names or file-extension lists. Pipeline
7//! authors declare a `ToolAnnotations` value per tool in their
8//! `CapabilityPolicy.tool_annotations` registry; everything downstream
9//! is driven by that declaration.
10//!
11//! This alignment is ACP-compliant: `ToolKind` matches the canonical
12//! tool-kind vocabulary from the [Agent Client Protocol schema]
13//! (https://agentclientprotocol.com/protocol/schema) one-for-one.
14
15use std::collections::BTreeMap;
16
17use serde::{Deserialize, Serialize};
18
19/// Canonical tool-kind vocabulary. Matches the ACP `ToolKind` enum so
20/// harn-cli's ACP server can forward the value unchanged in
21/// `sessionUpdate` variants.
22///
23/// The VM treats `Read`, `Search`, `Think`, and `Fetch` as read-only
24/// for concurrent-dispatch purposes. `Other` is intentionally NOT
25/// treated as read-only — unannotated tools should not slip through
26/// as auto-approved by default (fail-safe).
27#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash, Serialize, Deserialize)]
28#[serde(rename_all = "snake_case")]
29pub enum ToolKind {
30    /// Reads file/workspace content without mutation.
31    Read,
32    /// Mutates workspace content (write, patch, edit).
33    Edit,
34    /// Removes content irreversibly.
35    Delete,
36    /// Relocates or renames content.
37    Move,
38    /// Queries indexes or directories; no mutation.
39    Search,
40    /// Runs a subprocess or a shell command.
41    Execute,
42    /// Pure reasoning/thought invocation, no side effects.
43    Think,
44    /// Retrieves remote content (HTTP, MCP fetch, etc.).
45    Fetch,
46    /// Anything that doesn't map cleanly into the canonical kinds.
47    /// Not treated as read-only — the fail-safe default.
48    #[default]
49    Other,
50}
51
52impl ToolKind {
53    pub const ALL: [Self; 9] = [
54        Self::Read,
55        Self::Edit,
56        Self::Delete,
57        Self::Move,
58        Self::Search,
59        Self::Execute,
60        Self::Think,
61        Self::Fetch,
62        Self::Other,
63    ];
64
65    /// Read-only tools can dispatch concurrently without risking
66    /// conflicting state mutations. `Other` is excluded by design —
67    /// unannotated tools must not auto-approve as read-only.
68    pub fn is_read_only(&self) -> bool {
69        matches!(self, Self::Read | Self::Search | Self::Think | Self::Fetch)
70    }
71
72    /// Coarse mutation-classification string used in tool-call
73    /// telemetry and pre/post bridge payloads. Derived directly from
74    /// the kind — the VM no longer guesses from tool names.
75    pub fn mutation_class(&self) -> &'static str {
76        match self {
77            Self::Read | Self::Search | Self::Think | Self::Fetch => "read_only",
78            Self::Edit => "workspace_write",
79            Self::Delete | Self::Move => "destructive",
80            Self::Execute => "ambient_side_effect",
81            Self::Other => "other",
82        }
83    }
84}
85
86/// Rough side-effect taxonomy for the capability-ceiling check.
87#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash, Serialize, Deserialize)]
88#[serde(rename_all = "snake_case")]
89pub enum SideEffectLevel {
90    /// No side effect declared (conservative default; permission logic
91    /// treats this as "unknown → deny unless explicitly allowed").
92    #[default]
93    None,
94    /// Pure reads only.
95    ReadOnly,
96    /// Writes to workspace files.
97    WorkspaceWrite,
98    /// Runs subprocesses.
99    ProcessExec,
100    /// Reaches external services over the network.
101    Network,
102    /// Drives the physical desktop — synthetic mouse/keyboard input and screen
103    /// capture. The most invasive local class: it can operate ANY application
104    /// (not just a sandboxed subprocess or a single network sink), inject
105    /// keystrokes that paste secrets or dismiss dialogs, and every screenshot
106    /// exfiltrates whatever is on screen to the model. It therefore sits at the
107    /// top of the ceiling ladder — a policy must opt into it explicitly, above
108    /// even network access.
109    DesktopControl,
110}
111
112impl SideEffectLevel {
113    pub const ALL: [Self; 6] = [
114        Self::None,
115        Self::ReadOnly,
116        Self::WorkspaceWrite,
117        Self::ProcessExec,
118        Self::Network,
119        Self::DesktopControl,
120    ];
121
122    /// The most-permissive side-effect level — the TOP of the ladder. This is
123    /// the single source of truth for "the outermost / most-autonomous ceiling":
124    /// the runtime's builtin ceiling and the top autonomy tier both reference it,
125    /// so adding a new most-invasive level (as `desktop_control` was added above
126    /// `network`) automatically raises every permissive bound instead of leaving
127    /// hardcoded `"network"` strings that silently cap the new level out. NEVER
128    /// hardcode a specific top level as "the max"; call this.
129    pub const MAX: Self = Self::DesktopControl;
130
131    /// Numeric rank used by the policy intersector and side-effect
132    /// ceiling check. Higher rank ⇒ more invasive.
133    pub fn rank(&self) -> usize {
134        match self {
135            Self::None => 0,
136            Self::ReadOnly => 1,
137            Self::WorkspaceWrite => 2,
138            Self::ProcessExec => 3,
139            Self::Network => 4,
140            Self::DesktopControl => 5,
141        }
142    }
143
144    /// Short string used in policy documents, bridge payloads, and
145    /// error messages. Stable wire identifier.
146    pub fn as_str(&self) -> &'static str {
147        match self {
148            Self::None => "none",
149            Self::ReadOnly => "read_only",
150            Self::WorkspaceWrite => "workspace_write",
151            Self::ProcessExec => "process_exec",
152            Self::Network => "network",
153            Self::DesktopControl => "desktop_control",
154        }
155    }
156
157    /// Rank a level given as a string, through the canonical ladder — the single
158    /// source of truth for every ceiling/effect comparison that works with the
159    /// wire strings instead of the typed enum. An unrecognized value ranks as
160    /// `None` (0): tool levels always come from [`Self::as_str`] so they are
161    /// never unknown, and for a ceiling a typo then grants nothing above `none`
162    /// rather than silently widening the ceiling.
163    pub fn rank_str(level: &str) -> usize {
164        Self::parse(level).rank()
165    }
166
167    /// Parse from the stable string used in policy documents. Unknown
168    /// values deserialize to `None` (the conservative default).
169    pub fn parse(value: &str) -> Self {
170        match value {
171            "none" => Self::None,
172            "read_only" => Self::ReadOnly,
173            "workspace_write" => Self::WorkspaceWrite,
174            "process_exec" => Self::ProcessExec,
175            "network" => Self::Network,
176            "desktop_control" => Self::DesktopControl,
177            _ => Self::None,
178        }
179    }
180}
181
182/// Declarative description of a tool's argument shape. The VM uses
183/// this to:
184///
185/// - resolve `ToolArgConstraint` lookups (`path_params`),
186/// - rewrite high-level aliases to canonical keys without any
187///   per-tool hardcoded branches (`arg_aliases`),
188/// - validate presence of required arguments at the dispatch boundary
189///   (`required`).
190#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
191#[serde(default)]
192pub struct ToolArgSchema {
193    /// Argument keys whose values are workspace-relative paths.
194    /// First matching key whose value is a string wins.
195    pub path_params: Vec<String>,
196    /// Alias → canonical key. When a tool call arrives with an alias
197    /// in its argument object, the VM rewrites the key to the canonical
198    /// form before dispatch (generic; no tool-name branches).
199    pub arg_aliases: BTreeMap<String, String>,
200    /// Argument keys that must be present (non-null) on every call.
201    pub required: Vec<String>,
202}
203
204/// Full annotations for one tool. Pipelines populate one of these per
205/// tool in the capability-policy registry; the VM consults the registry
206/// on every tool call.
207#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
208#[serde(default)]
209pub struct ToolAnnotations {
210    /// ACP-aligned tool-kind classification.
211    pub kind: ToolKind,
212    /// Required side-effect level for the capability ceiling check.
213    pub side_effect_level: SideEffectLevel,
214    /// Argument shape declarations.
215    pub arg_schema: ToolArgSchema,
216    /// Capability operations requested by this tool (e.g.
217    /// `"workspace": ["read_text", "list"]`).
218    pub capabilities: BTreeMap<String, Vec<String>>,
219    /// True when the tool may return only a handle/reference to a large
220    /// output artifact instead of inline output. Execute tools with this
221    /// flag must also declare an inspection route.
222    pub emits_artifacts: bool,
223    /// Tool names that can inspect artifacts/results emitted by this tool.
224    pub result_readers: Vec<String>,
225    /// Explicit escape hatch for tools whose results are always complete
226    /// inline, even though they are execute-like.
227    pub inline_result: bool,
228    /// MCP `readOnlyHint`. This remains advisory; policy decides whether
229    /// the server that supplied it is trusted enough to rely on it.
230    #[serde(rename = "readOnlyHint", skip_serializing_if = "Option::is_none")]
231    pub read_only_hint: Option<bool>,
232    /// MCP `destructiveHint`. This remains advisory; policy decides whether
233    /// the server that supplied it is trusted enough to rely on it.
234    #[serde(rename = "destructiveHint", skip_serializing_if = "Option::is_none")]
235    pub destructive_hint: Option<bool>,
236    /// MCP `idempotentHint`. This remains advisory; policy decides whether
237    /// the server that supplied it is trusted enough to rely on it.
238    #[serde(rename = "idempotentHint", skip_serializing_if = "Option::is_none")]
239    pub idempotent_hint: Option<bool>,
240    /// MCP `openWorldHint`. This remains advisory; policy decides whether
241    /// the server that supplied it is trusted enough to rely on it.
242    #[serde(rename = "openWorldHint", skip_serializing_if = "Option::is_none")]
243    pub open_world_hint: Option<bool>,
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    #[test]
251    fn tool_kind_serde_roundtrip() {
252        for (kind, expected) in [
253            (ToolKind::Read, "\"read\""),
254            (ToolKind::Edit, "\"edit\""),
255            (ToolKind::Delete, "\"delete\""),
256            (ToolKind::Move, "\"move\""),
257            (ToolKind::Search, "\"search\""),
258            (ToolKind::Execute, "\"execute\""),
259            (ToolKind::Think, "\"think\""),
260            (ToolKind::Fetch, "\"fetch\""),
261            (ToolKind::Other, "\"other\""),
262        ] {
263            let encoded = serde_json::to_string(&kind).unwrap();
264            assert_eq!(encoded, expected);
265            let decoded: ToolKind = serde_json::from_str(expected).unwrap();
266            assert_eq!(decoded, kind);
267        }
268    }
269
270    #[test]
271    fn only_read_search_think_fetch_are_read_only() {
272        assert!(ToolKind::Read.is_read_only());
273        assert!(ToolKind::Search.is_read_only());
274        assert!(ToolKind::Think.is_read_only());
275        assert!(ToolKind::Fetch.is_read_only());
276        // Fail-safe: Other is NOT read-only.
277        assert!(!ToolKind::Other.is_read_only());
278        assert!(!ToolKind::Edit.is_read_only());
279        assert!(!ToolKind::Delete.is_read_only());
280        assert!(!ToolKind::Move.is_read_only());
281        assert!(!ToolKind::Execute.is_read_only());
282    }
283
284    #[test]
285    fn mutation_class_derived_from_kind() {
286        assert_eq!(ToolKind::Read.mutation_class(), "read_only");
287        assert_eq!(ToolKind::Search.mutation_class(), "read_only");
288        assert_eq!(ToolKind::Edit.mutation_class(), "workspace_write");
289        assert_eq!(ToolKind::Delete.mutation_class(), "destructive");
290        assert_eq!(ToolKind::Move.mutation_class(), "destructive");
291        assert_eq!(ToolKind::Execute.mutation_class(), "ambient_side_effect");
292        assert_eq!(ToolKind::Other.mutation_class(), "other");
293    }
294
295    #[test]
296    fn side_effect_level_round_trip() {
297        for level in [
298            SideEffectLevel::None,
299            SideEffectLevel::ReadOnly,
300            SideEffectLevel::WorkspaceWrite,
301            SideEffectLevel::ProcessExec,
302            SideEffectLevel::Network,
303        ] {
304            assert_eq!(SideEffectLevel::parse(level.as_str()), level);
305            let encoded = serde_json::to_string(&level).unwrap();
306            let decoded: SideEffectLevel = serde_json::from_str(&encoded).unwrap();
307            assert_eq!(decoded, level);
308        }
309    }
310
311    #[test]
312    fn side_effect_level_rank_orders() {
313        assert!(SideEffectLevel::None.rank() < SideEffectLevel::ReadOnly.rank());
314        assert!(SideEffectLevel::ReadOnly.rank() < SideEffectLevel::WorkspaceWrite.rank());
315        assert!(SideEffectLevel::WorkspaceWrite.rank() < SideEffectLevel::ProcessExec.rank());
316        assert!(SideEffectLevel::ProcessExec.rank() < SideEffectLevel::Network.rank());
317        // Desktop control is the most invasive local class — top of the ladder,
318        // above even network egress.
319        assert!(SideEffectLevel::Network.rank() < SideEffectLevel::DesktopControl.rank());
320        assert_eq!(
321            SideEffectLevel::parse("desktop_control"),
322            SideEffectLevel::DesktopControl
323        );
324        assert_eq!(SideEffectLevel::DesktopControl.as_str(), "desktop_control");
325    }
326
327    #[test]
328    fn max_is_the_unique_top_of_the_ladder() {
329        // Guardrail: `SideEffectLevel::MAX` MUST be the strictly-highest-ranked
330        // level. Adding a new most-invasive variant without updating `MAX` (the
331        // single "most-permissive ceiling" the builtin ceiling and top autonomy
332        // tier both reference) fails here — so the "network was the top" footgun
333        // that silently capped `desktop_control` cannot recur.
334        for level in SideEffectLevel::ALL {
335            assert!(
336                level.rank() <= SideEffectLevel::MAX.rank(),
337                "{level:?} outranks MAX ({:?}); update SideEffectLevel::MAX",
338                SideEffectLevel::MAX
339            );
340        }
341        // And MAX is uniquely the top (exactly one level at the max rank).
342        let at_top = SideEffectLevel::ALL
343            .iter()
344            .filter(|l| l.rank() == SideEffectLevel::MAX.rank())
345            .count();
346        assert_eq!(at_top, 1, "MAX must be the unique top of the ladder");
347
348        // Compiler guardrail on `ALL` completeness: this match is exhaustive
349        // over the TYPE, so adding a variant fails the build here — and the
350        // count assertion then forces that variant into `ALL`. Without both,
351        // a variant omitted from the (hand-maintained) `ALL` array would
352        // silently escape the uniqueness check above.
353        fn _every_variant_accounted_for(level: SideEffectLevel) {
354            match level {
355                SideEffectLevel::None
356                | SideEffectLevel::ReadOnly
357                | SideEffectLevel::WorkspaceWrite
358                | SideEffectLevel::ProcessExec
359                | SideEffectLevel::Network
360                | SideEffectLevel::DesktopControl => {}
361            }
362        }
363        assert_eq!(
364            SideEffectLevel::ALL.len(),
365            6,
366            "a SideEffectLevel variant was added; list it in ALL and bump this count"
367        );
368    }
369
370    #[test]
371    fn arg_schema_defaults_empty() {
372        let schema = ToolArgSchema::default();
373        assert!(schema.path_params.is_empty());
374        assert!(schema.arg_aliases.is_empty());
375        assert!(schema.required.is_empty());
376    }
377
378    #[test]
379    fn annotations_default_result_routes_empty() {
380        let annotations = ToolAnnotations::default();
381        assert!(!annotations.emits_artifacts);
382        assert!(annotations.result_readers.is_empty());
383        assert!(!annotations.inline_result);
384    }
385
386    #[test]
387    fn mcp_annotation_hints_round_trip() {
388        let annotations: ToolAnnotations = serde_json::from_value(serde_json::json!({
389            "readOnlyHint": true,
390            "destructiveHint": false,
391            "idempotentHint": true,
392            "openWorldHint": false
393        }))
394        .expect("MCP hints should deserialize");
395        assert_eq!(annotations.read_only_hint, Some(true));
396        assert_eq!(annotations.destructive_hint, Some(false));
397        assert_eq!(annotations.idempotent_hint, Some(true));
398        assert_eq!(annotations.open_world_hint, Some(false));
399
400        let encoded = serde_json::to_value(&annotations).expect("serialize annotations");
401        assert_eq!(encoded["readOnlyHint"], true);
402        assert_eq!(encoded["idempotentHint"], true);
403    }
404}
harn_vm/tool_annotations.rs

harn_vm/
tool_annotations.rs