entelix_agents/agent/event.rs
1//! `AgentEvent<S>` — runtime events the agent emits during a turn.
2//!
3//! ## Two surfaces, one type
4//!
5//! - **LLM-facing**: state inside `Complete { state }` round-trips
6//! into the next graph turn when an agent is composed inside a
7//! larger graph. Sinks render the same value for observability.
8//! - **Observability-facing**: every variant carries a `run_id` for
9//! correlation; `OTel` sinks stamp it onto `entelix.run_id` span
10//! attributes without the agent itself reading the field.
11//!
12//! ## Lifecycle contract
13//!
14//! Each run attempts to emit `Started{run_id}` followed by exactly
15//! one of `Complete{run_id, ...}`, `Failed{run_id, ...}`, or
16//! `Interrupted{run_id, ...}` with the same `run_id`. The three
17//! terminal variants are mutually exclusive — a successful run
18//! sends `Complete`, an interruption sends `Interrupted`, every
19//! other failure sends `Failed`. Tool variants (`ToolStart` /
20//! `ToolComplete` / `ToolError`) are interleaved between the
21//! book-ends as the agent's inner graph dispatches tools.
22//!
23//! Sink-delivery semantics: `Started` and `Complete` are
24//! must-succeed (`?` on the send), so a sink that errors on
25//! delivery aborts the run before its terminal partner fires —
26//! the caller still receives the typed sink error via
27//! `Result::Err`. `Failed` and `Interrupted` are best-effort
28//! (`let _ = …`) so the original error never gets swallowed by a
29//! secondary sink failure. Consumers that need lossless lifecycle
30//! delivery wire a `FailOpenSink` adapter or a `BroadcastSink` so
31//! a single subscriber drop never affects the run.
32//!
33//! The three terminal variants are mutually exclusive and carry
34//! distinct semantics:
35//! - `Complete` — successful termination, the agent produced a final
36//! state.
37//! - `Failed` — unsuccessful termination, the agent ran out of
38//! options. `kind: FailureKind` discriminates the cause.
39//! - `Interrupted` — *resumable* pause. A graph node, tool, or
40//! middleware layer raised `Error::Interrupted` (HITL gate,
41//! scheduled pause, operator-defined interrupt). The caller
42//! persists a checkpoint and resumes later via
43//! `CompiledGraph::resume_with(Command)`. Distinct from `Failed`
44//! because the run is *not* over — calling it a failure would
45//! pollute dashboards and trigger spurious alerting.
46//!
47//! `#[non_exhaustive]` keeps adding variants forward-compatible —
48//! consumer `match` arms always need a fallback.
49//!
50//! ## Relationship to [`entelix_session::GraphEvent`]
51//!
52//! `AgentEvent<S>` is the **runtime-side superset** of the durable
53//! audit log entry [`entelix_session::GraphEvent`]:
54//!
55//! - **Runtime-only variants** — `Started`, `Complete`, `Failed`,
56//! `Interrupted`, plus the `tool_version` / `duration_ms` metric
57//! fields on the tool variants — exist for telemetry and per-run
58//! correlation. They have no audit projection: durable audit for
59//! interrupts rides [`entelix_core::AuditSink::record_interrupted`]
60//! fired by the graph dispatch loop, so an `AgentEventSink`
61//! consumer that calls `to_graph_event` cannot double-emit the
62//! pause event.
63//! - **Audit-projecting variants** — `ToolStart` / `ToolComplete` /
64//! `ToolError` — map onto `GraphEvent::ToolCall` /
65//! `GraphEvent::ToolResult` via [`AgentEvent::to_graph_event`].
66//!
67//! The projection is the single source of truth: an operator
68//! wiring both an `AgentEventSink` (for telemetry) and a
69//! `SessionGraph` (for durable audit) routes tool emissions
70//! through this method rather than constructing `GraphEvent`
71//! independently — the two channels record the same fact through
72//! one construction path.
73
74use chrono::{DateTime, Utc};
75use serde_json::Value;
76
77use entelix_core::Error;
78use entelix_core::ErrorEnvelope;
79use entelix_core::InterruptionKind;
80use entelix_core::RenderedForLlm;
81use entelix_core::TenantId;
82use entelix_core::ToolErrorKind;
83use entelix_core::UsageSnapshot;
84use entelix_core::ir::ToolResultContent;
85use entelix_session::GraphEvent;
86
87/// Cause of an `AgentEvent::Failed` event — the typed discriminator
88/// dashboards split on instead of pattern-matching the inner `Error`.
89///
90/// Variants are populated from [`Self::from_error`] at the single
91/// `Failed`-emit site inside `Agent::execute`. Every classifier arm
92/// maps to exactly one variant; the `_` catch-all required by
93/// `Error`'s `#[non_exhaustive]` shape routes to
94/// [`Self::Unclassified`] *with* a `tracing::warn!` so SDK-version
95/// drift surfaces as its own dashboard bucket (not absorbed into
96/// the legitimate [`Self::Internal`] count) and as an operator log
97/// line — invariant 15: no silent fallback that masks the drift.
98#[derive(Clone, Debug, PartialEq, Eq)]
99#[non_exhaustive]
100pub enum FailureKind {
101 /// A classified tool failure was escalated to terminal by
102 /// `ToolErrorPolicyLayer`. Carries the matched
103 /// [`ToolErrorKind`] (the operator's policy input) and the
104 /// dispatched tool's name so dashboards can split
105 /// "BigQuery auth rotated" from "Salesforce quota cap reached"
106 /// without joining against a separate event.
107 ToolErrorTerminal {
108 /// Classified kind of the underlying tool error.
109 kind: ToolErrorKind,
110 /// Name of the tool that produced the failure.
111 tool_name: String,
112 },
113 /// `RunBudget` axis cap fired — operator-class throttling rather
114 /// than tool / model failure.
115 UsageLimitExceeded,
116 /// `ExecutionContext` cancellation token fired.
117 Cancelled,
118 /// `ExecutionContext` deadline hit.
119 DeadlineExceeded,
120 /// Model call failed at transport, codec, or auth layer. The
121 /// model never produced a complete turn.
122 ModelDispatch,
123 /// Caller / programmer-error category: invalid request shape,
124 /// schema mismatch, internal config drift, JSON serialisation
125 /// failure at an entelix-managed boundary. Distinct from
126 /// [`Self::ToolErrorTerminal`] carrying
127 /// `kind == ToolErrorKind::Internal` — that variant is
128 /// operator-policy-driven (the operator chose to terminate on
129 /// the `Internal` bucket); this one is the raw programmer
130 /// signal.
131 Internal,
132 /// `Error` variant the classifier does not recognise — landed
133 /// in `entelix-core` without a paired classifier arm here.
134 /// Surfaces as its own dashboard bucket so SDK upgrades that
135 /// introduce a new `Error` shape are immediately visible
136 /// without polluting the [`Self::Internal`] count. Operator
137 /// action: update [`Self::from_error`] to classify the new
138 /// variant explicitly.
139 Unclassified,
140}
141
142impl FailureKind {
143 /// Classify an `Error` into a `FailureKind`. Used at the single
144 /// `AgentEvent::Failed` emit site so the event's `kind` field
145 /// stays in sync with the typed error returned to the caller.
146 ///
147 /// `Error::Interrupted` is **not** classified here — the agent
148 /// run loop routes it through [`AgentEvent::Interrupted`]
149 /// instead of `Failed`, because HITL pause-and-resume is
150 /// resumable and "failure" semantics would pollute dashboards.
151 /// Reaching the catch-all with `Error::Interrupted` means the
152 /// run loop missed it, which is the same SDK-drift signal as
153 /// any other unrecognised variant.
154 ///
155 /// `Error` is `#[non_exhaustive]` so a `_` arm is mandatory —
156 /// the arm routes to [`Self::Unclassified`] (not
157 /// [`Self::Internal`]) so SDK-version drift surfaces as a
158 /// distinct dashboard bucket rather than masquerading as a
159 /// programmer-error.
160 #[must_use]
161 pub fn from_error(err: &Error) -> Self {
162 match err {
163 Error::ToolErrorTerminal {
164 kind, tool_name, ..
165 } => Self::ToolErrorTerminal {
166 kind: *kind,
167 tool_name: tool_name.clone(),
168 },
169 Error::UsageLimitExceeded(_) => Self::UsageLimitExceeded,
170 Error::Cancelled => Self::Cancelled,
171 Error::DeadlineExceeded => Self::DeadlineExceeded,
172 Error::Provider { .. } | Error::Auth(_) | Error::ModelRetry { .. } => {
173 Self::ModelDispatch
174 }
175 Error::InvalidRequest(_) | Error::Config(_) | Error::Serde(_) => Self::Internal,
176 other => {
177 tracing::warn!(
178 target: "entelix_agents::failure_kind",
179 error = ?other,
180 "FailureKind::from_error catalog drift — add an explicit classifier arm"
181 );
182 Self::Unclassified
183 }
184 }
185 }
186
187 /// Stable snake-case identifier surfaced through OTel
188 /// (`entelix.failure.kind`), structured logs, and dashboards.
189 /// Renaming a value is a breaking change for downstream
190 /// consumers keying off the string.
191 #[must_use]
192 pub const fn wire_id(&self) -> &'static str {
193 match self {
194 Self::ToolErrorTerminal { .. } => "tool_error_terminal",
195 Self::UsageLimitExceeded => "usage_limit_exceeded",
196 Self::Cancelled => "cancelled",
197 Self::DeadlineExceeded => "deadline_exceeded",
198 Self::ModelDispatch => "model_dispatch",
199 Self::Internal => "internal",
200 Self::Unclassified => "unclassified",
201 }
202 }
203}
204
205/// Runtime events emitted by the agent during a single
206/// `execute` / `execute_stream` call.
207#[derive(Clone, Debug, PartialEq, Eq)]
208#[non_exhaustive]
209pub enum AgentEvent<S> {
210 /// Run opened. Sinks use this to mark span beginnings, allocate
211 /// per-run state, and emit "session opened" telemetry.
212 Started {
213 /// Per-run correlation id (UUID v7). Stable for the
214 /// duration of the run; matches the id on every subsequent
215 /// event for this same call.
216 run_id: String,
217 /// Tenant scope this event belongs to (invariant 11 —
218 /// every emit site stamps `ctx.tenant_id().clone()`). Audit /
219 /// billing / replay consumers key off this field directly
220 /// instead of correlating through a separate `run_id` →
221 /// `tenant_id` lookup.
222 tenant_id: TenantId,
223 /// Run id of the calling agent when this run was dispatched
224 /// from a parent (sub-agent fan-out, supervisor handoff).
225 /// `None` for top-level runs. LangSmith-style trace-tree
226 /// consumers reconstruct the hierarchy from
227 /// `(run_id, parent_run_id)` edges across these events.
228 parent_run_id: Option<String>,
229 /// Agent identifier configured on `AgentBuilder::name(...)`.
230 agent: String,
231 },
232
233 /// One tool dispatch began. Emitted by
234 /// [`crate::agent::tool_event_layer::ToolEventLayer`] when wired
235 /// into the tool registry. Absent when the layer is not wired
236 /// (the agent runtime itself does not generate tool events).
237 ToolStart {
238 /// Run correlation id.
239 run_id: String,
240 /// Tenant scope this event belongs to (see `Started`).
241 tenant_id: TenantId,
242 /// Stable tool-use id matching the originating
243 /// `ContentPart::ToolUse`.
244 tool_use_id: String,
245 /// Tool name being dispatched.
246 tool: String,
247 /// Tool version (`Tool::version()`) when the tool advertises
248 /// one — useful for distinguishing behaviour changes between
249 /// otherwise-identically-named tool revisions.
250 tool_version: Option<String>,
251 /// Tool input (already JSON-validated by the tool's schema).
252 input: Value,
253 },
254
255 /// One tool dispatch finished successfully.
256 ToolComplete {
257 /// Run correlation id.
258 run_id: String,
259 /// Tenant scope this event belongs to (see `Started`).
260 tenant_id: TenantId,
261 /// Stable tool-use id matching the corresponding `ToolStart`.
262 tool_use_id: String,
263 /// Tool name (echoed for sink convenience).
264 tool: String,
265 /// Tool version echoed from the matching `ToolStart` so sinks
266 /// can correlate completion telemetry without retaining
267 /// per-`tool_use_id` state.
268 tool_version: Option<String>,
269 /// Wall-clock duration measured by the layer.
270 duration_ms: u64,
271 /// JSON output the tool produced. Sinks that persist tool
272 /// audit logs read this directly; PII redaction happens at
273 /// the policy layer before this event is emitted, so the
274 /// payload is safe for storage.
275 output: Value,
276 },
277
278 /// One tool dispatch failed.
279 ToolError {
280 /// Run correlation id.
281 run_id: String,
282 /// Tenant scope this event belongs to (see `Started`).
283 tenant_id: TenantId,
284 /// Stable tool-use id matching the corresponding `ToolStart`.
285 tool_use_id: String,
286 /// Tool name (echoed for sink convenience).
287 tool: String,
288 /// Tool version echoed from the matching `ToolStart` so sinks
289 /// see the same provenance on the failure path as on success.
290 tool_version: Option<String>,
291 /// Operator-facing error message (`Display` form, includes
292 /// vendor status, source chain). Sinks, OTel, and log
293 /// destinations consume this.
294 error: String,
295 /// LLM-facing error message wrapped in a sealed
296 /// [`RenderedForLlm`] carrier. The carrier's constructor is
297 /// `pub(crate)` to `entelix-core`, so the only path from a
298 /// raw `String` to this field is
299 /// [`entelix_core::LlmRenderable::for_llm`] — emit sites
300 /// cannot fabricate model-facing content. The audit-log
301 /// projection ([`Self::to_graph_event`]) extracts the inner
302 /// rendering into `GraphEvent::ToolResult` so replay
303 /// reconstructs the model's view without re-leaking
304 /// operator content (invariant #16).
305 error_for_llm: RenderedForLlm<String>,
306 /// Typed wire shape produced by
307 /// [`entelix_core::Error::envelope`]. Bundles `wire_code`
308 /// (i18n key / metric label), `wire_class` (responsibility
309 /// split), `retry_after_secs` (vendor `Retry-After` hint),
310 /// and `provider_status` (raw HTTP status) so sinks, audit
311 /// replay, SSE adapters, and FE rate-limit timers all read
312 /// one `Copy` value instead of pattern-matching the inner
313 /// error variant. Patch-version-stable.
314 envelope: ErrorEnvelope,
315 /// Classified [`ToolErrorKind`] (`ToolErrorKind::classify`
316 /// of the inner error). Dashboards split error volumes by
317 /// kind without re-classifying from the envelope; the
318 /// reasoning-loop layer reads the same classification to
319 /// decide whether to escalate to terminal
320 /// (`ToolErrorPolicyLayer`).
321 kind: ToolErrorKind,
322 /// Wall-clock duration measured by the layer.
323 duration_ms: u64,
324 },
325
326 /// Run terminated with the inner runnable's error. The matching
327 /// `Started{run_id}` is always present in the same stream.
328 /// Caller-facing streams additionally surface the typed error
329 /// via `Result::Err`; sinks see only this event.
330 Failed {
331 /// Run correlation id.
332 run_id: String,
333 /// Tenant scope this event belongs to (see `Started`).
334 tenant_id: TenantId,
335 /// Lean error message (`Display` form).
336 error: String,
337 /// Typed wire shape produced by
338 /// [`entelix_core::Error::envelope`] — see `ToolError` for
339 /// the field roster. Replay / audit / metric / SSE consumers
340 /// route off this field instead of parsing `error` prose.
341 envelope: ErrorEnvelope,
342 /// Typed cause discriminator — `ToolErrorTerminal {kind, ..}`,
343 /// `UsageLimitExceeded`, `Cancelled`, `DeadlineExceeded`,
344 /// `ModelDispatch`, `Internal`. Dashboards split
345 /// "operator-class failure caught early" from "model dispatch
346 /// transport blip" without parsing `error` prose. Populated
347 /// by [`FailureKind::from_error`] at the emit site.
348 kind: FailureKind,
349 },
350
351 /// Run paused at an [`entelix_core::Error::Interrupted`] raise — a HITL
352 /// gate, a scheduled `interrupt_before` / `interrupt_after`, or
353 /// any tool / graph node calling `interrupt(payload)`. Distinct
354 /// from `Failed` because the run is **resumable** — the runtime
355 /// has persisted a checkpoint at the pre-interrupt state and the
356 /// caller continues via `CompiledGraph::resume_with(Command)`.
357 /// Dashboards split "human waiting" from "run failed" without
358 /// pattern-matching the inner error.
359 ///
360 /// `Caller`-facing streams additionally surface the typed
361 /// `Error::Interrupted` via `Result::Err` after this event;
362 /// sinks see only this event.
363 Interrupted {
364 /// Run correlation id.
365 run_id: String,
366 /// Tenant scope this event belongs to (see `Started`).
367 tenant_id: TenantId,
368 /// Typed reason — `Custom` for operator-defined pauses,
369 /// `ApprovalPending { tool_use_id }` for tool-approval
370 /// pauses raised by `ApprovalLayer`, `ScheduledPause` for
371 /// `interrupt_before` / `interrupt_after` graph schedules.
372 kind: InterruptionKind,
373 /// Operator free-form data describing what the resumer needs
374 /// to know — `Value::Null` for typed kinds, carrier for
375 /// `Custom`.
376 payload: Value,
377 },
378
379 /// Run terminated successfully with the agent's terminal state.
380 Complete {
381 /// Run correlation id.
382 run_id: String,
383 /// Tenant scope this event belongs to (see `Started`).
384 tenant_id: TenantId,
385 /// Final state returned by the inner runnable.
386 state: S,
387 /// Frozen [`UsageSnapshot`] of the [`entelix_core::RunBudget`]
388 /// counters at the moment the inner runnable returned.
389 /// `None` when no budget was attached to the
390 /// [`entelix_core::ExecutionContext`]. Mirrors the
391 /// `usage` field on
392 /// [`crate::AgentRunResult`] so streaming and one-shot
393 /// surfaces observe the same terminal artifact.
394 usage: Option<UsageSnapshot>,
395 },
396
397 /// HITL approver decided to permit one tool dispatch. Emitted by
398 /// [`crate::agent::ApprovalLayer`] before the matching `ToolStart`
399 /// fires. Only present when an `Approver` is wired (default
400 /// agents skip approval and never emit this variant).
401 ToolCallApproved {
402 /// Run correlation id.
403 run_id: String,
404 /// Tenant scope this event belongs to (see `Started`).
405 tenant_id: TenantId,
406 /// Stable tool-use id matching the originating
407 /// `ContentPart::ToolUse`. Pairs with the subsequent
408 /// `ToolStart` / `ToolComplete` / `ToolError`.
409 tool_use_id: String,
410 /// Tool name being approved.
411 tool: String,
412 },
413
414 /// HITL approver decided to reject one tool dispatch. The
415 /// matching `ToolStart` does NOT fire — denial short-circuits
416 /// the dispatch path. The agent observes the rejection as
417 /// `Error::InvalidRequest` carrying the same reason.
418 ToolCallDenied {
419 /// Run correlation id.
420 run_id: String,
421 /// Tenant scope this event belongs to (see `Started`).
422 tenant_id: TenantId,
423 /// Stable tool-use id of the rejected dispatch.
424 tool_use_id: String,
425 /// Tool name being denied.
426 tool: String,
427 /// Approver-supplied rationale.
428 reason: String,
429 },
430}
431
432impl<S> AgentEvent<S> {
433 /// Project this runtime event onto the durable audit-log shape
434 /// `GraphEvent`. Returns `None` when the variant has no audit
435 /// projection — `Started`, `Complete`, `Failed` are runtime-only
436 /// lifecycle markers that do not belong in the per-thread audit
437 /// trail.
438 ///
439 /// The `timestamp` argument is supplied by the caller (typically
440 /// `Utc::now()` at emit time) so this method stays pure: a single
441 /// runtime event projected at two different points in time
442 /// produces two distinct (but otherwise equal) `GraphEvent`s.
443 ///
444 /// Lossy projection notes — `run_id`, `tool_version`, and
445 /// `duration_ms` are dropped because the audit log keys
446 /// correlation by `tool_use_id` + `timestamp` and is not the
447 /// home for runtime metrics. Operators who need run-level
448 /// correlation in audit do it at the sink layer (e.g. by
449 /// stamping a thread tag prior to append).
450 ///
451 /// `ToolError` is mapped onto a `GraphEvent::ToolResult` with
452 /// `is_error: true` and the error message carried as text
453 /// content — preserving the same correlation key
454 /// (`tool_use_id`) so a session replay can pair the failed
455 /// dispatch back with the originating `ToolCall`.
456 pub fn to_graph_event(&self, timestamp: DateTime<Utc>) -> Option<GraphEvent> {
457 match self {
458 // Lifecycle / approval markers are runtime-only — the
459 // audit log records the actual `ToolCall` / `ToolResult`
460 // pair, not the surrounding gate decisions.
461 // Lifecycle markers are runtime-only — the durable
462 // audit channel for `Interrupted` rides
463 // `AuditSink::record_interrupted` fired by the graph
464 // dispatch loop, so an AgentEventSink consumer that
465 // routes every event through this projection cannot
466 // double-write `GraphEvent::Interrupt`.
467 Self::Started { .. }
468 | Self::Complete { .. }
469 | Self::Failed { .. }
470 | Self::Interrupted { .. }
471 | Self::ToolCallApproved { .. }
472 | Self::ToolCallDenied { .. } => None,
473 Self::ToolStart {
474 tool_use_id,
475 tool,
476 input,
477 ..
478 } => Some(GraphEvent::ToolCall {
479 id: tool_use_id.clone(),
480 name: tool.clone(),
481 input: input.clone(),
482 timestamp,
483 }),
484 Self::ToolComplete {
485 tool_use_id,
486 tool,
487 output,
488 ..
489 } => Some(GraphEvent::ToolResult {
490 tool_use_id: tool_use_id.clone(),
491 name: tool.clone(),
492 content: ToolResultContent::Json(output.clone()),
493 is_error: false,
494 timestamp,
495 }),
496 Self::ToolError {
497 tool_use_id,
498 tool,
499 error_for_llm,
500 ..
501 } => Some(GraphEvent::ToolResult {
502 tool_use_id: tool_use_id.clone(),
503 name: tool.clone(),
504 // Audit log carries the LLM-facing rendering — replay
505 // and resume paths reconstruct conversation history
506 // from `GraphEvent::ToolResult`, so the content here
507 // becomes the model's view (invariant #16). The full
508 // operator-facing `error` continues to flow through
509 // the event sink and OTel.
510 content: ToolResultContent::Text(error_for_llm.as_inner().clone()),
511 is_error: true,
512 timestamp,
513 }),
514 }
515 }
516
517 /// Erase the agent-state type parameter, replacing
518 /// [`Self::Complete::state`] with the unit value. Every other
519 /// variant rebuilds with identical field values — they carry no
520 /// state. Enables a single audit / SSE / OTel sink (typed
521 /// [`AgentEventSink<()>`](crate::agent::AgentEventSink)) to fan
522 /// in from heterogeneous agents (`Agent<ReActState>`,
523 /// `Agent<SupervisorState>`, …) through the
524 /// [`StateErasureSink`](crate::agent::StateErasureSink) adapter.
525 ///
526 /// Operators consuming the post-erasure event tree retain access
527 /// to every header field (`run_id`, `tenant_id`, `parent_run_id`)
528 /// and every per-variant payload (tool inputs / outputs, error
529 /// envelope, usage snapshot) — only the agent's terminal state
530 /// is dropped, which is the field a state-agnostic sink could
531 /// not type-erase anyway.
532 ///
533 /// # Examples
534 ///
535 /// ```
536 /// use entelix_agents::AgentEvent;
537 /// use entelix_core::TenantId;
538 ///
539 /// let typed: AgentEvent<u32> = AgentEvent::Complete {
540 /// run_id: "r1".into(),
541 /// tenant_id: TenantId::new("t1"),
542 /// state: 42_u32,
543 /// usage: None,
544 /// };
545 /// let erased: AgentEvent<()> = typed.erase_state();
546 /// match erased {
547 /// AgentEvent::Complete { state, .. } => assert_eq!(state, ()),
548 /// _ => unreachable!(),
549 /// }
550 /// ```
551 #[allow(clippy::too_many_lines)]
552 // 1-to-1 exhaustive variant rebuild — splitting hurts readability and the line count is structural, not accidental.
553 #[must_use]
554 pub fn erase_state(self) -> AgentEvent<()> {
555 match self {
556 Self::Started {
557 run_id,
558 tenant_id,
559 parent_run_id,
560 agent,
561 } => AgentEvent::Started {
562 run_id,
563 tenant_id,
564 parent_run_id,
565 agent,
566 },
567 Self::ToolStart {
568 run_id,
569 tenant_id,
570 tool_use_id,
571 tool,
572 tool_version,
573 input,
574 } => AgentEvent::ToolStart {
575 run_id,
576 tenant_id,
577 tool_use_id,
578 tool,
579 tool_version,
580 input,
581 },
582 Self::ToolComplete {
583 run_id,
584 tenant_id,
585 tool_use_id,
586 tool,
587 tool_version,
588 duration_ms,
589 output,
590 } => AgentEvent::ToolComplete {
591 run_id,
592 tenant_id,
593 tool_use_id,
594 tool,
595 tool_version,
596 duration_ms,
597 output,
598 },
599 Self::ToolError {
600 run_id,
601 tenant_id,
602 tool_use_id,
603 tool,
604 tool_version,
605 error,
606 error_for_llm,
607 envelope,
608 kind,
609 duration_ms,
610 } => AgentEvent::ToolError {
611 run_id,
612 tenant_id,
613 tool_use_id,
614 tool,
615 tool_version,
616 error,
617 error_for_llm,
618 envelope,
619 kind,
620 duration_ms,
621 },
622 Self::Failed {
623 run_id,
624 tenant_id,
625 error,
626 envelope,
627 kind,
628 } => AgentEvent::Failed {
629 run_id,
630 tenant_id,
631 error,
632 envelope,
633 kind,
634 },
635 Self::Interrupted {
636 run_id,
637 tenant_id,
638 kind,
639 payload,
640 } => AgentEvent::Interrupted {
641 run_id,
642 tenant_id,
643 kind,
644 payload,
645 },
646 Self::Complete {
647 run_id,
648 tenant_id,
649 state: _,
650 usage,
651 } => AgentEvent::Complete {
652 run_id,
653 tenant_id,
654 state: (),
655 usage,
656 },
657 Self::ToolCallApproved {
658 run_id,
659 tenant_id,
660 tool_use_id,
661 tool,
662 } => AgentEvent::ToolCallApproved {
663 run_id,
664 tenant_id,
665 tool_use_id,
666 tool,
667 },
668 Self::ToolCallDenied {
669 run_id,
670 tenant_id,
671 tool_use_id,
672 tool,
673 reason,
674 } => AgentEvent::ToolCallDenied {
675 run_id,
676 tenant_id,
677 tool_use_id,
678 tool,
679 reason,
680 },
681 }
682 }
683}
684
685#[cfg(test)]
686#[allow(clippy::unwrap_used)]
687mod tests {
688 use super::*;
689 use serde_json::json;
690
691 fn ts() -> DateTime<Utc> {
692 chrono::DateTime::parse_from_rfc3339("2026-04-29T12:00:00Z")
693 .unwrap()
694 .with_timezone(&Utc)
695 }
696
697 #[test]
698 fn lifecycle_variants_have_no_audit_projection() {
699 let tenant = TenantId::new("t-test");
700 let started: AgentEvent<u32> = AgentEvent::Started {
701 run_id: "r1".into(),
702 tenant_id: tenant.clone(),
703 parent_run_id: None,
704 agent: "a".into(),
705 };
706 let complete: AgentEvent<u32> = AgentEvent::Complete {
707 run_id: "r1".into(),
708 tenant_id: tenant.clone(),
709 state: 7,
710 usage: None,
711 };
712 let failed: AgentEvent<u32> = AgentEvent::Failed {
713 run_id: "r1".into(),
714 tenant_id: tenant,
715 error: "boom".into(),
716 envelope: entelix_core::Error::config("boom").envelope(),
717 kind: FailureKind::Internal,
718 };
719 assert!(started.to_graph_event(ts()).is_none());
720 assert!(complete.to_graph_event(ts()).is_none());
721 assert!(failed.to_graph_event(ts()).is_none());
722 }
723
724 #[test]
725 fn tool_start_projects_to_graph_event_tool_call() {
726 let event: AgentEvent<u32> = AgentEvent::ToolStart {
727 run_id: "r1".into(),
728 tenant_id: TenantId::new("t-test"),
729 tool_use_id: "tu-1".into(),
730 tool: "double".into(),
731 tool_version: Some("1.2.0".into()),
732 input: json!({"n": 21}),
733 };
734 let projected = event.to_graph_event(ts()).unwrap();
735 match projected {
736 GraphEvent::ToolCall {
737 id,
738 name,
739 input,
740 timestamp,
741 } => {
742 assert_eq!(id, "tu-1");
743 assert_eq!(name, "double");
744 assert_eq!(input, json!({"n": 21}));
745 assert_eq!(timestamp, ts());
746 }
747 other => panic!("expected ToolCall, got {other:?}"),
748 }
749 }
750
751 #[test]
752 fn tool_complete_projects_to_successful_tool_result() {
753 let event: AgentEvent<u32> = AgentEvent::ToolComplete {
754 run_id: "r1".into(),
755 tenant_id: TenantId::new("t-test"),
756 tool_use_id: "tu-1".into(),
757 tool: "double".into(),
758 tool_version: Some("1.2.0".into()),
759 duration_ms: 42,
760 output: json!({"doubled": 42}),
761 };
762 let projected = event.to_graph_event(ts()).unwrap();
763 match projected {
764 GraphEvent::ToolResult {
765 tool_use_id,
766 name,
767 content,
768 is_error,
769 timestamp,
770 } => {
771 assert_eq!(tool_use_id, "tu-1");
772 assert_eq!(name, "double");
773 assert!(!is_error, "successful tool dispatch must not flag is_error");
774 assert_eq!(timestamp, ts());
775 match content {
776 ToolResultContent::Json(v) => assert_eq!(v, json!({"doubled": 42})),
777 other => panic!("expected Json content, got {other:?}"),
778 }
779 }
780 other => panic!("expected ToolResult, got {other:?}"),
781 }
782 }
783
784 #[test]
785 fn tool_error_projects_to_error_flagged_tool_result_using_llm_facing_text() {
786 use entelix_core::{Error, LlmRenderable};
787 // The carrier `RenderedForLlm<String>` is sealed to
788 // `entelix-core` — there is no way to fabricate one from a
789 // raw `String` here. The only path to populate
790 // `error_for_llm` is `LlmRenderable::for_llm` on a value
791 // that implements the trait. `Error::provider_http(503,
792 // ...).for_llm()` produces the canonical "upstream model
793 // error" rendering through the same code path the
794 // production tool-event layer uses, so the test exercises
795 // the real boundary instead of stubbing past it.
796 let source = Error::provider_http(503, "vendor down");
797 let envelope = source.envelope();
798 let kind = ToolErrorKind::classify(&source);
799 let llm_facing = source.for_llm();
800 let event: AgentEvent<u32> = AgentEvent::ToolError {
801 run_id: "r1".into(),
802 tenant_id: TenantId::new("t-test"),
803 tool_use_id: "tu-1".into(),
804 tool: "double".into(),
805 tool_version: None,
806 // Operator-facing text — full Display, includes vendor
807 // status / source chain. The audit projection MUST NOT
808 // surface this to the model channel.
809 error: "provider returned 503: vendor down".into(),
810 // LLM-facing rendering — short, actionable, no vendor
811 // identifiers. The audit projection picks this.
812 error_for_llm: llm_facing,
813 envelope,
814 kind,
815 duration_ms: 7,
816 };
817 let projected = event.to_graph_event(ts()).unwrap();
818 match projected {
819 GraphEvent::ToolResult {
820 tool_use_id,
821 name,
822 content,
823 is_error,
824 ..
825 } => {
826 assert_eq!(tool_use_id, "tu-1");
827 assert_eq!(name, "double");
828 assert!(is_error, "ToolError must surface as is_error: true");
829 match content {
830 ToolResultContent::Text(s) => {
831 assert_eq!(s, "upstream model error");
832 assert!(
833 !s.contains("provider returned"),
834 "audit log content must use the LLM-facing rendering, not the operator-facing one: {s}"
835 );
836 assert!(
837 !s.contains("503"),
838 "audit log must not leak vendor status code: {s}"
839 );
840 }
841 other => panic!("expected Text content for error, got {other:?}"),
842 }
843 }
844 other => panic!("expected ToolResult, got {other:?}"),
845 }
846 }
847
848 #[test]
849 fn projection_is_deterministic_across_calls() {
850 // Same event projected with the same timestamp produces the
851 // same GraphEvent — required for replay coherence (two
852 // operators running the same projection at the same wall
853 // clock get the same audit row).
854 let event: AgentEvent<u32> = AgentEvent::ToolStart {
855 run_id: "r1".into(),
856 tenant_id: TenantId::new("t-test"),
857 tool_use_id: "tu-1".into(),
858 tool: "double".into(),
859 tool_version: None,
860 input: json!({"n": 21}),
861 };
862 let a = event.to_graph_event(ts()).unwrap();
863 let b = event.to_graph_event(ts()).unwrap();
864 assert_eq!(a, b);
865 }
866}