Skip to main content

agent_sdk_foundation/
audit.rs

1//! Authoritative tool audit records.
2//!
3//! The audit surface that the server uses to explain **every** tool
4//! lifecycle outcome — not just successful completion. This replaces the
5//! `post_tool_use` hook as the sole audit surface on the authoritative
6//! (server) execution path.
7//!
8//! # Why this exists
9//!
10//! `post_tool_use` only fires once per tool call and only describes the
11//! terminal [`ToolResult`]. The server has to explain paths that never
12//! reach a successful result, including:
13//!
14//! - **Blocked** — the policy hook rejected the tool.
15//! - **`RequiresConfirmation`** — the policy hook yielded for user approval.
16//! - **Cached** — an earlier completed execution was replayed from the
17//!   execution store.
18//! - **Replayed** — the caller resubmitted external tool results for an
19//!   already-processed handoff.
20//! - **Invalidated** — a listen-tool snapshot expired or was invalidated
21//!   before the user could confirm.
22//! - **Completed** — the tool ran to completion (success or failure).
23//! - **`PersistenceFailed`** — the tool ran but the event / execution
24//!   store refused to durably record the outcome.
25//!
26//! These outcomes are modelled as [`ToolAuditOutcome`] variants on a
27//! single [`ToolAuditRecord`]. Sinks receive one record per lifecycle
28//! transition and can persist them to a durable audit table without
29//! having to reconstruct the path from scattered hook calls.
30//!
31//! # Trait location
32//!
33//! Only the **record shape** lives in `agent-sdk-foundation` (this module is
34//! data-only). The async [`ToolAuditSink`](../../agent_sdk_tools/audit/trait.ToolAuditSink.html)
35//! trait lives in `agent-sdk-tools` so `agent-sdk-foundation` stays free of
36//! async-trait dependencies.
37
38use crate::types::{ListenExecutionContext, ToolResult, ToolTier};
39use serde::{Deserialize, Serialize};
40use time::OffsetDateTime;
41
42/// Provider / model provenance for an audit record.
43///
44/// Captured at the moment the record is emitted so that durable audit
45/// rows survive provider/model rotations. Present on every record
46/// because every tool-call lifecycle event happens in the context of
47/// the LLM turn that requested the tool.
48#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
49pub struct AuditProvenance {
50    /// Provider identifier (e.g. `"anthropic"`, `"openai"`, `"vertex"`).
51    pub provider: String,
52    /// Model identifier (e.g. `"claude-sonnet-4-5-20250929"`).
53    pub model: String,
54}
55
56impl AuditProvenance {
57    /// Construct a provenance record from borrowed strings.
58    #[must_use]
59    pub fn new(provider: impl Into<String>, model: impl Into<String>) -> Self {
60        Self {
61            provider: provider.into(),
62            model: model.into(),
63        }
64    }
65}
66
67/// Lifecycle outcome for a single tool call.
68///
69/// Every variant is an **authoritative** terminal state the server must
70/// persist — including paths that bypass tool execution entirely (blocked,
71/// confirmation, cached replay) or that fail persistence after the tool
72/// already ran.
73///
74/// Variants are ordered roughly by lifecycle position: policy check → cache
75/// lookup → execution → post-execution persistence.
76#[derive(Clone, Debug, Serialize, Deserialize)]
77#[serde(tag = "kind", rename_all = "snake_case")]
78#[non_exhaustive]
79pub enum ToolAuditOutcome {
80    /// The policy hook rejected the tool call.
81    ///
82    /// The tool never executed. The reason is the string returned by
83    /// [`ToolDecision::Block`](../../agent_sdk_tools/hooks/enum.ToolDecision.html#variant.Block).
84    Blocked {
85        /// Reason provided by the policy hook.
86        reason: String,
87    },
88
89    /// The policy hook yielded for user approval.
90    ///
91    /// The tool is paused pending a resume decision. The turn loop will
92    /// emit a follow-up record on resume (either [`Completed`](Self::Completed)
93    /// after execution or [`Blocked`](Self::Blocked) if policy now rejects).
94    RequiresConfirmation {
95        /// Human-readable confirmation description shown to the user.
96        description: String,
97        /// Optional listen-context captured at confirmation time.
98        listen_context: Option<ListenExecutionContext>,
99    },
100
101    /// The execution store already held a completed result for this
102    /// tool call — the idempotency layer replayed the cached outcome
103    /// instead of calling the tool again.
104    Cached {
105        /// The cached [`ToolResult`] that was replayed.
106        result: ToolResult,
107    },
108
109    /// The caller resubmitted external tool results for an already
110    /// processed handoff, and the SDK served the previously recorded
111    /// result rather than re-accepting the payload.
112    ///
113    /// Distinct from [`Cached`](Self::Cached) in that this fires on the
114    /// **external** runtime path where the SDK did not execute the tool
115    /// itself in any attempt.
116    Replayed {
117        /// The [`ToolResult`] previously recorded for this tool call.
118        result: ToolResult,
119    },
120
121    /// A listen-tool snapshot expired or was invalidated before the
122    /// user could confirm it.
123    ///
124    /// This is a non-completion path: no final [`ToolResult`] is
125    /// produced because the confirmation window closed.
126    Invalidated {
127        /// Reason the listen-tool invalidated its snapshot.
128        reason: String,
129    },
130
131    /// The tool ran to completion (success or failure).
132    ///
133    /// `result.success` indicates whether the tool itself succeeded;
134    /// even a failing run is considered a completed lifecycle.
135    Completed {
136        /// Final [`ToolResult`] produced by the tool.
137        result: ToolResult,
138    },
139
140    /// The tool executed but the server could not durably persist the
141    /// outcome (event store, execution store, or message append failed).
142    ///
143    /// The record preserves the in-memory [`ToolResult`] so that audit
144    /// consumers can reason about divergence between what the tool
145    /// produced and what made it to durable storage.
146    PersistenceFailed {
147        /// The [`ToolResult`] that would have been persisted, if any.
148        ///
149        /// `None` when the persistence layer failed before a result was
150        /// produced (e.g. a `tool_call_start` event failed to append).
151        result: Option<ToolResult>,
152        /// Short, human-readable description of the persistence failure.
153        error: String,
154    },
155}
156
157impl ToolAuditOutcome {
158    /// Static discriminant string used for metrics, tracing attributes,
159    /// and durable audit rows.
160    #[must_use]
161    pub const fn kind(&self) -> &'static str {
162        match self {
163            Self::Blocked { .. } => "blocked",
164            Self::RequiresConfirmation { .. } => "requires_confirmation",
165            Self::Cached { .. } => "cached",
166            Self::Replayed { .. } => "replayed",
167            Self::Invalidated { .. } => "invalidated",
168            Self::Completed { .. } => "completed",
169            Self::PersistenceFailed { .. } => "persistence_failed",
170        }
171    }
172
173    /// Returns the [`ToolResult`] associated with this outcome, if one
174    /// is available.
175    ///
176    /// Present for [`Cached`](Self::Cached), [`Replayed`](Self::Replayed),
177    /// [`Completed`](Self::Completed), and most
178    /// [`PersistenceFailed`](Self::PersistenceFailed) paths. Absent for
179    /// [`Blocked`](Self::Blocked), [`RequiresConfirmation`](Self::RequiresConfirmation),
180    /// and [`Invalidated`](Self::Invalidated).
181    #[must_use]
182    pub const fn result(&self) -> Option<&ToolResult> {
183        match self {
184            Self::Cached { result } | Self::Replayed { result } | Self::Completed { result } => {
185                Some(result)
186            }
187            Self::PersistenceFailed { result, .. } => result.as_ref(),
188            Self::Blocked { .. } | Self::RequiresConfirmation { .. } | Self::Invalidated { .. } => {
189                None
190            }
191        }
192    }
193}
194
195/// Single authoritative audit record for one tool-call lifecycle event.
196///
197/// A tool call may produce **multiple** records over its lifetime — for
198/// example a `RequiresConfirmation` followed by a `Completed` after the
199/// user approves, or a `Completed` followed by a `PersistenceFailed` if
200/// the event store rejects the terminal event.
201///
202/// Records are self-describing: consumers do **not** need to correlate
203/// them with hook calls or event-store rows to understand what happened.
204#[derive(Clone, Debug, Serialize, Deserialize)]
205pub struct ToolAuditRecord {
206    /// Unique tool call ID (from the LLM's `tool_use`).
207    pub tool_call_id: String,
208    /// Wire-format tool name.
209    pub tool_name: String,
210    /// Human-readable display name.
211    pub display_name: String,
212    /// Permission tier of the tool at the moment the record was emitted.
213    pub tier: ToolTier,
214    /// Input as requested by the LLM (audit trail).
215    pub requested_input: serde_json::Value,
216    /// Effective input after SDK preparation (may differ for listen-tools).
217    pub effective_input: serde_json::Value,
218    /// Turn number this record belongs to.
219    pub turn: usize,
220    /// Provider / model provenance for this turn's LLM call.
221    pub provenance: AuditProvenance,
222    /// Lifecycle outcome carrying the variant-specific payload.
223    pub outcome: ToolAuditOutcome,
224    /// UTC timestamp when the record was produced.
225    #[serde(with = "time::serde::rfc3339")]
226    pub recorded_at: OffsetDateTime,
227}
228
229/// Arguments for building a [`ToolAuditRecord`] via [`ToolAuditRecord::new`].
230///
231/// Replaces a 9-parameter positional constructor so each field is named
232/// at the call site — three of the positional parameters were
233/// `impl Into<String>` and two were `serde_json::Value`, which made
234/// positional confusion a real risk for a struct that lands in the
235/// durable audit log.
236///
237/// Every field is required; the timestamp (`recorded_at`) is the only
238/// value [`ToolAuditRecord::new`] fills in automatically.
239#[derive(Clone, Debug)]
240pub struct ToolAuditRecordParams {
241    /// Unique tool call ID (from the LLM's `tool_use`).
242    pub tool_call_id: String,
243    /// Wire-format tool name.
244    pub tool_name: String,
245    /// Human-readable display name.
246    pub display_name: String,
247    /// Permission tier of the tool at the moment the record was emitted.
248    pub tier: ToolTier,
249    /// Input as requested by the LLM (audit trail).
250    pub requested_input: serde_json::Value,
251    /// Effective input after SDK preparation (may differ for listen-tools).
252    pub effective_input: serde_json::Value,
253    /// Turn number this record belongs to.
254    pub turn: usize,
255    /// Provider / model provenance for this turn's LLM call.
256    pub provenance: AuditProvenance,
257    /// Lifecycle outcome carrying the variant-specific payload.
258    pub outcome: ToolAuditOutcome,
259}
260
261impl ToolAuditRecord {
262    /// Build a record using the current wall-clock time.
263    ///
264    /// See [`ToolAuditRecordParams`] for the field list.
265    #[must_use]
266    pub fn new(params: ToolAuditRecordParams) -> Self {
267        let ToolAuditRecordParams {
268            tool_call_id,
269            tool_name,
270            display_name,
271            tier,
272            requested_input,
273            effective_input,
274            turn,
275            provenance,
276            outcome,
277        } = params;
278        Self {
279            tool_call_id,
280            tool_name,
281            display_name,
282            tier,
283            requested_input,
284            effective_input,
285            turn,
286            provenance,
287            outcome,
288            recorded_at: OffsetDateTime::now_utc(),
289        }
290    }
291
292    /// Return the outcome's discriminant string.
293    #[must_use]
294    pub const fn outcome_kind(&self) -> &'static str {
295        self.outcome.kind()
296    }
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302
303    fn sample_record(outcome: ToolAuditOutcome) -> ToolAuditRecord {
304        ToolAuditRecord::new(ToolAuditRecordParams {
305            tool_call_id: "call_1".into(),
306            tool_name: "read_file".into(),
307            display_name: "Read File".into(),
308            tier: ToolTier::Observe,
309            requested_input: serde_json::json!({"path": "/tmp/x"}),
310            effective_input: serde_json::json!({"path": "/tmp/x"}),
311            turn: 2,
312            provenance: AuditProvenance::new("anthropic", "claude-sonnet-4-5-20250929"),
313            outcome,
314        })
315    }
316
317    #[test]
318    fn outcome_kind_matches_variant() {
319        assert_eq!(
320            ToolAuditOutcome::Blocked {
321                reason: "no".into(),
322            }
323            .kind(),
324            "blocked",
325        );
326        assert_eq!(
327            ToolAuditOutcome::RequiresConfirmation {
328                description: "pls".into(),
329                listen_context: None,
330            }
331            .kind(),
332            "requires_confirmation",
333        );
334        assert_eq!(
335            ToolAuditOutcome::Cached {
336                result: ToolResult::success("ok"),
337            }
338            .kind(),
339            "cached",
340        );
341        assert_eq!(
342            ToolAuditOutcome::Replayed {
343                result: ToolResult::success("ok"),
344            }
345            .kind(),
346            "replayed",
347        );
348        assert_eq!(
349            ToolAuditOutcome::Invalidated {
350                reason: "expired".into(),
351            }
352            .kind(),
353            "invalidated",
354        );
355        assert_eq!(
356            ToolAuditOutcome::Completed {
357                result: ToolResult::success("ok"),
358            }
359            .kind(),
360            "completed",
361        );
362        assert_eq!(
363            ToolAuditOutcome::PersistenceFailed {
364                result: None,
365                error: "boom".into(),
366            }
367            .kind(),
368            "persistence_failed",
369        );
370    }
371
372    #[test]
373    fn outcome_result_accessor() {
374        let ok = ToolResult::success("ok");
375        assert!(
376            ToolAuditOutcome::Blocked { reason: "n".into() }
377                .result()
378                .is_none()
379        );
380        assert_eq!(
381            ToolAuditOutcome::Completed { result: ok.clone() }
382                .result()
383                .map(|r| r.output.as_str()),
384            Some("ok"),
385        );
386        assert_eq!(
387            ToolAuditOutcome::PersistenceFailed {
388                result: Some(ok),
389                error: "e".into(),
390            }
391            .result()
392            .map(|r| r.output.as_str()),
393            Some("ok"),
394        );
395    }
396
397    #[test]
398    fn record_round_trips_through_json() {
399        let record = sample_record(ToolAuditOutcome::Completed {
400            result: ToolResult::success("hello"),
401        });
402        let json = serde_json::to_string(&record).unwrap();
403        let back: ToolAuditRecord = serde_json::from_str(&json).unwrap();
404        assert_eq!(back.tool_call_id, "call_1");
405        assert_eq!(back.outcome_kind(), "completed");
406        assert_eq!(back.provenance.provider, "anthropic");
407        assert_eq!(back.provenance.model, "claude-sonnet-4-5-20250929");
408    }
409
410    #[test]
411    fn every_outcome_serialises_with_snake_case_tag() {
412        // Non-trivial assertion: the external tag format must be stable
413        // for durable audit tables and dashboards.
414        let record = sample_record(ToolAuditOutcome::Blocked {
415            reason: "policy".into(),
416        });
417        let json = serde_json::to_value(&record).unwrap();
418        assert_eq!(json["outcome"]["kind"], "blocked");
419        assert_eq!(json["outcome"]["reason"], "policy");
420    }
421}