agent_sdk_foundation/audit.rs
1//! Authoritative tool audit records.
2//!
3//! The audit surface that the server uses to explain **every** tool
4//! lifecycle outcome — not just successful completion. This replaces the
5//! `post_tool_use` hook as the sole audit surface on the authoritative
6//! (server) execution path.
7//!
8//! # Why this exists
9//!
10//! `post_tool_use` only fires once per tool call and only describes the
11//! terminal [`ToolResult`]. The server has to explain paths that never
12//! reach a successful result, including:
13//!
14//! - **Blocked** — the policy hook rejected the tool.
15//! - **`RequiresConfirmation`** — the policy hook yielded for user approval.
16//! - **Cached** — an earlier completed execution was replayed from the
17//! execution store.
18//! - **Replayed** — the caller resubmitted external tool results for an
19//! already-processed handoff.
20//! - **Invalidated** — a listen-tool snapshot expired or was invalidated
21//! before the user could confirm.
22//! - **Completed** — the tool ran to completion (success or failure).
23//! - **`PersistenceFailed`** — the tool ran but the event / execution
24//! store refused to durably record the outcome.
25//!
26//! These outcomes are modelled as [`ToolAuditOutcome`] variants on a
27//! single [`ToolAuditRecord`]. Sinks receive one record per lifecycle
28//! transition and can persist them to a durable audit table without
29//! having to reconstruct the path from scattered hook calls.
30//!
31//! # Trait location
32//!
33//! Only the **record shape** lives in `agent-sdk-foundation` (this module is
34//! data-only). The async [`ToolAuditSink`](../../agent_sdk_tools/audit/trait.ToolAuditSink.html)
35//! trait lives in `agent-sdk-tools` so `agent-sdk-foundation` stays free of
36//! async-trait dependencies.
37
38use crate::types::{ListenExecutionContext, ToolResult, ToolTier};
39use serde::{Deserialize, Serialize};
40use time::OffsetDateTime;
41
42/// Provider / model provenance for an audit record.
43///
44/// Captured at the moment the record is emitted so that durable audit
45/// rows survive provider/model rotations. Present on every record
46/// because every tool-call lifecycle event happens in the context of
47/// the LLM turn that requested the tool.
48#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
49pub struct AuditProvenance {
50 /// Provider identifier (e.g. `"anthropic"`, `"openai"`, `"vertex"`).
51 pub provider: String,
52 /// Model identifier (e.g. `"claude-sonnet-4-5-20250929"`).
53 pub model: String,
54}
55
56impl AuditProvenance {
57 /// Construct a provenance record from borrowed strings.
58 #[must_use]
59 pub fn new(provider: impl Into<String>, model: impl Into<String>) -> Self {
60 Self {
61 provider: provider.into(),
62 model: model.into(),
63 }
64 }
65}
66
67/// Lifecycle outcome for a single tool call.
68///
69/// Every variant is an **authoritative** terminal state the server must
70/// persist — including paths that bypass tool execution entirely (blocked,
71/// confirmation, cached replay) or that fail persistence after the tool
72/// already ran.
73///
74/// Variants are ordered roughly by lifecycle position: policy check → cache
75/// lookup → execution → post-execution persistence.
76#[derive(Clone, Debug, Serialize, Deserialize)]
77#[serde(tag = "kind", rename_all = "snake_case")]
78#[non_exhaustive]
79pub enum ToolAuditOutcome {
80 /// The policy hook rejected the tool call.
81 ///
82 /// The tool never executed. The reason is the string returned by
83 /// [`ToolDecision::Block`](../../agent_sdk_tools/hooks/enum.ToolDecision.html#variant.Block).
84 Blocked {
85 /// Reason provided by the policy hook.
86 reason: String,
87 },
88
89 /// The policy hook yielded for user approval.
90 ///
91 /// The tool is paused pending a resume decision. The turn loop will
92 /// emit a follow-up record on resume (either [`Completed`](Self::Completed)
93 /// after execution or [`Blocked`](Self::Blocked) if policy now rejects).
94 RequiresConfirmation {
95 /// Human-readable confirmation description shown to the user.
96 description: String,
97 /// Optional listen-context captured at confirmation time.
98 listen_context: Option<ListenExecutionContext>,
99 },
100
101 /// The execution store already held a completed result for this
102 /// tool call — the idempotency layer replayed the cached outcome
103 /// instead of calling the tool again.
104 Cached {
105 /// The cached [`ToolResult`] that was replayed.
106 result: ToolResult,
107 },
108
109 /// The caller resubmitted external tool results for an already
110 /// processed handoff, and the SDK served the previously recorded
111 /// result rather than re-accepting the payload.
112 ///
113 /// Distinct from [`Cached`](Self::Cached) in that this fires on the
114 /// **external** runtime path where the SDK did not execute the tool
115 /// itself in any attempt.
116 Replayed {
117 /// The [`ToolResult`] previously recorded for this tool call.
118 result: ToolResult,
119 },
120
121 /// A listen-tool snapshot expired or was invalidated before the
122 /// user could confirm it.
123 ///
124 /// This is a non-completion path: no final [`ToolResult`] is
125 /// produced because the confirmation window closed.
126 Invalidated {
127 /// Reason the listen-tool invalidated its snapshot.
128 reason: String,
129 },
130
131 /// The tool ran to completion (success or failure).
132 ///
133 /// `result.success` indicates whether the tool itself succeeded;
134 /// even a failing run is considered a completed lifecycle.
135 Completed {
136 /// Final [`ToolResult`] produced by the tool.
137 result: ToolResult,
138 },
139
140 /// The tool executed but the server could not durably persist the
141 /// outcome (event store, execution store, or message append failed).
142 ///
143 /// The record preserves the in-memory [`ToolResult`] so that audit
144 /// consumers can reason about divergence between what the tool
145 /// produced and what made it to durable storage.
146 PersistenceFailed {
147 /// The [`ToolResult`] that would have been persisted, if any.
148 ///
149 /// `None` when the persistence layer failed before a result was
150 /// produced (e.g. a `tool_call_start` event failed to append).
151 result: Option<ToolResult>,
152 /// Short, human-readable description of the persistence failure.
153 error: String,
154 },
155}
156
157impl ToolAuditOutcome {
158 /// Static discriminant string used for metrics, tracing attributes,
159 /// and durable audit rows.
160 #[must_use]
161 pub const fn kind(&self) -> &'static str {
162 match self {
163 Self::Blocked { .. } => "blocked",
164 Self::RequiresConfirmation { .. } => "requires_confirmation",
165 Self::Cached { .. } => "cached",
166 Self::Replayed { .. } => "replayed",
167 Self::Invalidated { .. } => "invalidated",
168 Self::Completed { .. } => "completed",
169 Self::PersistenceFailed { .. } => "persistence_failed",
170 }
171 }
172
173 /// Returns the [`ToolResult`] associated with this outcome, if one
174 /// is available.
175 ///
176 /// Present for [`Cached`](Self::Cached), [`Replayed`](Self::Replayed),
177 /// [`Completed`](Self::Completed), and most
178 /// [`PersistenceFailed`](Self::PersistenceFailed) paths. Absent for
179 /// [`Blocked`](Self::Blocked), [`RequiresConfirmation`](Self::RequiresConfirmation),
180 /// and [`Invalidated`](Self::Invalidated).
181 #[must_use]
182 pub const fn result(&self) -> Option<&ToolResult> {
183 match self {
184 Self::Cached { result } | Self::Replayed { result } | Self::Completed { result } => {
185 Some(result)
186 }
187 Self::PersistenceFailed { result, .. } => result.as_ref(),
188 Self::Blocked { .. } | Self::RequiresConfirmation { .. } | Self::Invalidated { .. } => {
189 None
190 }
191 }
192 }
193}
194
195/// Single authoritative audit record for one tool-call lifecycle event.
196///
197/// A tool call may produce **multiple** records over its lifetime — for
198/// example a `RequiresConfirmation` followed by a `Completed` after the
199/// user approves, or a `Completed` followed by a `PersistenceFailed` if
200/// the event store rejects the terminal event.
201///
202/// Records are self-describing: consumers do **not** need to correlate
203/// them with hook calls or event-store rows to understand what happened.
204#[derive(Clone, Debug, Serialize, Deserialize)]
205pub struct ToolAuditRecord {
206 /// Unique tool call ID (from the LLM's `tool_use`).
207 pub tool_call_id: String,
208 /// Wire-format tool name.
209 pub tool_name: String,
210 /// Human-readable display name.
211 pub display_name: String,
212 /// Permission tier of the tool at the moment the record was emitted.
213 pub tier: ToolTier,
214 /// Input as requested by the LLM (audit trail).
215 pub requested_input: serde_json::Value,
216 /// Effective input after SDK preparation (may differ for listen-tools).
217 pub effective_input: serde_json::Value,
218 /// Turn number this record belongs to.
219 pub turn: usize,
220 /// Provider / model provenance for this turn's LLM call.
221 pub provenance: AuditProvenance,
222 /// Lifecycle outcome carrying the variant-specific payload.
223 pub outcome: ToolAuditOutcome,
224 /// UTC timestamp when the record was produced.
225 #[serde(with = "time::serde::rfc3339")]
226 pub recorded_at: OffsetDateTime,
227}
228
229/// Arguments for building a [`ToolAuditRecord`] via [`ToolAuditRecord::new`].
230///
231/// Replaces a 9-parameter positional constructor so each field is named
232/// at the call site — three of the positional parameters were
233/// `impl Into<String>` and two were `serde_json::Value`, which made
234/// positional confusion a real risk for a struct that lands in the
235/// durable audit log.
236///
237/// Every field is required; the timestamp (`recorded_at`) is the only
238/// value [`ToolAuditRecord::new`] fills in automatically.
239#[derive(Clone, Debug)]
240pub struct ToolAuditRecordParams {
241 /// Unique tool call ID (from the LLM's `tool_use`).
242 pub tool_call_id: String,
243 /// Wire-format tool name.
244 pub tool_name: String,
245 /// Human-readable display name.
246 pub display_name: String,
247 /// Permission tier of the tool at the moment the record was emitted.
248 pub tier: ToolTier,
249 /// Input as requested by the LLM (audit trail).
250 pub requested_input: serde_json::Value,
251 /// Effective input after SDK preparation (may differ for listen-tools).
252 pub effective_input: serde_json::Value,
253 /// Turn number this record belongs to.
254 pub turn: usize,
255 /// Provider / model provenance for this turn's LLM call.
256 pub provenance: AuditProvenance,
257 /// Lifecycle outcome carrying the variant-specific payload.
258 pub outcome: ToolAuditOutcome,
259}
260
261impl ToolAuditRecord {
262 /// Build a record using the current wall-clock time.
263 ///
264 /// See [`ToolAuditRecordParams`] for the field list.
265 #[must_use]
266 pub fn new(params: ToolAuditRecordParams) -> Self {
267 let ToolAuditRecordParams {
268 tool_call_id,
269 tool_name,
270 display_name,
271 tier,
272 requested_input,
273 effective_input,
274 turn,
275 provenance,
276 outcome,
277 } = params;
278 Self {
279 tool_call_id,
280 tool_name,
281 display_name,
282 tier,
283 requested_input,
284 effective_input,
285 turn,
286 provenance,
287 outcome,
288 recorded_at: OffsetDateTime::now_utc(),
289 }
290 }
291
292 /// Return the outcome's discriminant string.
293 #[must_use]
294 pub const fn outcome_kind(&self) -> &'static str {
295 self.outcome.kind()
296 }
297}
298
299#[cfg(test)]
300mod tests {
301 use super::*;
302
303 fn sample_record(outcome: ToolAuditOutcome) -> ToolAuditRecord {
304 ToolAuditRecord::new(ToolAuditRecordParams {
305 tool_call_id: "call_1".into(),
306 tool_name: "read_file".into(),
307 display_name: "Read File".into(),
308 tier: ToolTier::Observe,
309 requested_input: serde_json::json!({"path": "/tmp/x"}),
310 effective_input: serde_json::json!({"path": "/tmp/x"}),
311 turn: 2,
312 provenance: AuditProvenance::new("anthropic", "claude-sonnet-4-5-20250929"),
313 outcome,
314 })
315 }
316
317 #[test]
318 fn outcome_kind_matches_variant() {
319 assert_eq!(
320 ToolAuditOutcome::Blocked {
321 reason: "no".into(),
322 }
323 .kind(),
324 "blocked",
325 );
326 assert_eq!(
327 ToolAuditOutcome::RequiresConfirmation {
328 description: "pls".into(),
329 listen_context: None,
330 }
331 .kind(),
332 "requires_confirmation",
333 );
334 assert_eq!(
335 ToolAuditOutcome::Cached {
336 result: ToolResult::success("ok"),
337 }
338 .kind(),
339 "cached",
340 );
341 assert_eq!(
342 ToolAuditOutcome::Replayed {
343 result: ToolResult::success("ok"),
344 }
345 .kind(),
346 "replayed",
347 );
348 assert_eq!(
349 ToolAuditOutcome::Invalidated {
350 reason: "expired".into(),
351 }
352 .kind(),
353 "invalidated",
354 );
355 assert_eq!(
356 ToolAuditOutcome::Completed {
357 result: ToolResult::success("ok"),
358 }
359 .kind(),
360 "completed",
361 );
362 assert_eq!(
363 ToolAuditOutcome::PersistenceFailed {
364 result: None,
365 error: "boom".into(),
366 }
367 .kind(),
368 "persistence_failed",
369 );
370 }
371
372 #[test]
373 fn outcome_result_accessor() {
374 let ok = ToolResult::success("ok");
375 assert!(
376 ToolAuditOutcome::Blocked { reason: "n".into() }
377 .result()
378 .is_none()
379 );
380 assert_eq!(
381 ToolAuditOutcome::Completed { result: ok.clone() }
382 .result()
383 .map(|r| r.output.as_str()),
384 Some("ok"),
385 );
386 assert_eq!(
387 ToolAuditOutcome::PersistenceFailed {
388 result: Some(ok),
389 error: "e".into(),
390 }
391 .result()
392 .map(|r| r.output.as_str()),
393 Some("ok"),
394 );
395 }
396
397 #[test]
398 fn record_round_trips_through_json() {
399 let record = sample_record(ToolAuditOutcome::Completed {
400 result: ToolResult::success("hello"),
401 });
402 let json = serde_json::to_string(&record).unwrap();
403 let back: ToolAuditRecord = serde_json::from_str(&json).unwrap();
404 assert_eq!(back.tool_call_id, "call_1");
405 assert_eq!(back.outcome_kind(), "completed");
406 assert_eq!(back.provenance.provider, "anthropic");
407 assert_eq!(back.provenance.model, "claude-sonnet-4-5-20250929");
408 }
409
410 #[test]
411 fn every_outcome_serialises_with_snake_case_tag() {
412 // Non-trivial assertion: the external tag format must be stable
413 // for durable audit tables and dashboards.
414 let record = sample_record(ToolAuditOutcome::Blocked {
415 reason: "policy".into(),
416 });
417 let json = serde_json::to_value(&record).unwrap();
418 assert_eq!(json["outcome"]["kind"], "blocked");
419 assert_eq!(json["outcome"]["reason"], "policy");
420 }
421}