Skip to main content

entelix_session/
event.rs

1//! `GraphEvent` — the audit-trail unit appended to a `SessionGraph`.
2//!
3//! Every event is timestamped and serializable, so a persisted log can be
4//! replayed verbatim by a fresh process (Anthropic-style `wake(thread_id)`).
5//! Events are **strictly additive** — once written, never mutated.
6
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10use entelix_core::ir::{ContentPart, ModelWarning, ProviderEchoSnapshot, ToolResultContent, Usage};
11use entelix_core::rate_limit::RateLimitSnapshot;
12
13/// One audit-log entry.
14///
15/// Aggregating these (oldest-to-newest) reconstructs the full conversation
16/// trace for a thread. Branches and checkpoints are recorded inline so a
17/// single linear scan is enough for replay.
18#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(tag = "kind", rename_all = "snake_case")]
20#[non_exhaustive]
21pub enum GraphEvent {
22    /// User-authored input.
23    UserMessage {
24        /// Multi-part content (text, image, `tool_result`).
25        content: Vec<ContentPart>,
26        /// Wall-clock time the event was appended.
27        timestamp: DateTime<Utc>,
28    },
29    /// Assistant reply (after stream aggregation).
30    AssistantMessage {
31        /// Multi-part content (text, `tool_use`).
32        content: Vec<ContentPart>,
33        /// Token accounting if reported by the provider.
34        usage: Option<Usage>,
35        /// Wall-clock time the event was appended.
36        timestamp: DateTime<Utc>,
37    },
38    /// An auto-compaction adapter trimmed the working message slice.
39    /// `dropped_chars` is the character cost the compactor removed
40    /// (or summarised away); `retained_chars` is the cost the
41    /// post-compaction slice carries forward. The pair lets dashboards
42    /// detect drift between the threshold the operator wired and the
43    /// actual trim each invocation produces.
44    ContextCompacted {
45        /// Character cost the compactor dropped.
46        dropped_chars: usize,
47        /// Character cost the post-compaction slice retained.
48        retained_chars: usize,
49        /// Wall-clock time the event was appended.
50        timestamp: DateTime<Utc>,
51    },
52    /// A tool was dispatched by the assistant.
53    ToolCall {
54        /// Stable tool-use id matching a future `ToolResult`.
55        id: String,
56        /// Registered tool name.
57        name: String,
58        /// Tool input as JSON.
59        input: serde_json::Value,
60        /// Wall-clock time the event was appended.
61        timestamp: DateTime<Utc>,
62    },
63    /// The dispatched tool returned.
64    ToolResult {
65        /// `ToolCall::id` this result resolves.
66        tool_use_id: String,
67        /// `ToolCall::name` this result resolves — required by
68        /// codecs whose wire format keys correlation by name
69        /// (Gemini's `functionResponse`) rather than id.
70        name: String,
71        /// Result payload.
72        content: ToolResultContent,
73        /// True if the tool reported an error.
74        is_error: bool,
75        /// Wall-clock time the event was appended.
76        timestamp: DateTime<Utc>,
77    },
78    /// A branch was forked off this session at the indicated event index.
79    /// The new branch's thread id is recorded alongside.
80    BranchCreated {
81        /// Identifier of the forked sub-session.
82        branch_id: String,
83        /// Index in `events` (0-based) the branch diverged at.
84        parent_event: usize,
85        /// Wall-clock time the event was appended.
86        timestamp: DateTime<Utc>,
87    },
88    /// Marker tying this position in the audit log to a `Checkpointer`
89    /// snapshot. Cross-tier reference for crash recovery flows that pair
90    /// `SessionGraph` (Tier 2) with `StateGraph` checkpoints (Tier 1).
91    CheckpointMarker {
92        /// Stringified `entelix_graph::CheckpointId`.
93        checkpoint_id: String,
94        /// Thread the checkpoint was written under (typically same as
95        /// the session's thread).
96        thread_id: String,
97        /// Wall-clock time the event was appended.
98        timestamp: DateTime<Utc>,
99    },
100    /// Codec / runtime advisory captured into the audit trail.
101    Warning {
102        /// Underlying advisory.
103        warning: ModelWarning,
104        /// Wall-clock time the event was appended.
105        timestamp: DateTime<Utc>,
106    },
107    /// Streaming thinking-content fragment captured into the audit
108    /// trail. Aggregators fold consecutive deltas into a single
109    /// `ContentPart::Thinking` when reconstructing a finalised
110    /// message. Recording deltas individually keeps the audit log
111    /// faithful to the wire — a replay that needs only the final
112    /// block can fold the deltas, while a replay that needs per-token
113    /// timing has the data.
114    ThinkingDelta {
115        /// Token text appended to the in-progress thinking block.
116        text: String,
117        /// Vendor opaque round-trip tokens carried on this delta
118        /// (Anthropic `signature_delta`, Gemini `thought_signature`
119        /// on streamed parts, `OpenAI` Responses reasoning-item
120        /// `encrypted_content`). Codecs pre-wrap into
121        /// `ProviderEchoSnapshot` on decode; the audit log preserves
122        /// the same opaque bytes for replay.
123        #[serde(default, skip_serializing_if = "Vec::is_empty")]
124        provider_echoes: Vec<ProviderEchoSnapshot>,
125        /// Wall-clock time the event was appended.
126        timestamp: DateTime<Utc>,
127    },
128    /// Provider rate-limit snapshot at this position in the
129    /// conversation. Operators reading the audit log can correlate a
130    /// later throttling failure with the snapshot that warned them.
131    /// Recorded inline rather than on a separate metric channel so
132    /// the audit trail is self-contained for compliance review.
133    RateLimit {
134        /// Snapshot the codec extracted from response headers.
135        snapshot: RateLimitSnapshot,
136        /// Wall-clock time the event was appended.
137        timestamp: DateTime<Utc>,
138    },
139    /// HITL pause point — the runtime asked the host application for
140    /// input. The matching resume signal lands in
141    /// `entelix_graph::Command` outside the audit log; this event
142    /// records that the pause happened, the typed reason, and what
143    /// was visible to the human at the time.
144    Interrupt {
145        /// Typed pause reason — `Custom` for operator-defined
146        /// pauses, `ApprovalPending { tool_use_id }` for tool-
147        /// approval pauses raised by `ApprovalLayer`,
148        /// `ScheduledPause { phase, node }` for the graph's
149        /// `interrupt_before` / `interrupt_after` schedules. Audit
150        /// consumers split compliance-reportable HITL approvals
151        /// from scheduled pauses without parsing the free-form
152        /// payload.
153        ///
154        /// The field is named `interruption_kind` (not `kind`)
155        /// because the surrounding `#[serde(tag = "kind")]` already
156        /// consumes `kind` as the variant discriminator — same
157        /// reason the parallel `GraphEvent::ToolErrorTerminal::
158        /// error_kind` field is renamed.
159        interruption_kind: entelix_core::InterruptionKind,
160        /// Operator-supplied payload describing the pause point.
161        /// Free-form JSON so the agent recipe owns the schema; the
162        /// audit log just persists it. `Value::Null` for typed
163        /// `interruption_kind`s that carry their own structured
164        /// detail.
165        payload: serde_json::Value,
166        /// Wall-clock time the event was appended.
167        timestamp: DateTime<Utc>,
168    },
169    /// The run was cancelled — either via cancellation token or via
170    /// a deadline elapsing. Recording the reason inline lets a
171    /// replay reconstruct partial-run audit traces faithfully.
172    Cancelled {
173        /// Lean reason string. Human-readable; not parsed downstream.
174        reason: String,
175        /// Wall-clock time the event was appended.
176        timestamp: DateTime<Utc>,
177    },
178    /// A sub-agent was dispatched from the parent's run. The parent
179    /// `run_id` (recorded on the surrounding `AgentEvent::Started`)
180    /// scopes the audit trail; this event ties the parent's
181    /// position to the child's `sub_thread_id` so a replay can walk
182    /// from parent to child without keying on heuristic timing.
183    /// Managed-agent shape — every `Subagent::execute`
184    /// call surfaces here as the canonical "brain passes hand"
185    /// audit boundary.
186    SubAgentInvoked {
187        /// Stable identifier the parent uses to refer to the
188        /// sub-agent (typically the `Subagent`'s configured name).
189        agent_id: String,
190        /// Thread the sub-agent ran under. Same as the parent's
191        /// thread when the sub-agent shares state; a fresh value
192        /// when the sub-agent runs in its own scope.
193        sub_thread_id: String,
194        /// Wall-clock time the event was appended.
195        timestamp: DateTime<Utc>,
196    },
197    /// A supervisor recipe handed control between named agents.
198    /// Distinct from `SubAgentInvoked` — supervisor handoffs route
199    /// inside one logical conversation, while sub-agent invocations
200    /// open a child run.
201    AgentHandoff {
202        /// Agent name that finished this turn (`None` on the first
203        /// supervisor turn where no agent has spoken yet).
204        from: Option<String>,
205        /// Agent name the supervisor routed to next.
206        to: String,
207        /// Wall-clock time the event was appended.
208        timestamp: DateTime<Utc>,
209    },
210    /// A run resumed from a prior checkpoint — either via
211    /// `wake(thread_id)` after a crash or via `Command::Resume` from
212    /// a HITL pause. Pairs with the `CheckpointMarker` whose id is
213    /// referenced so a single linear replay stays coherent across
214    /// the suspend / resume seam.
215    Resumed {
216        /// `CheckpointMarker::checkpoint_id` the resume hydrated
217        /// from. Empty string when the resume happened from a fresh
218        /// state (operator built the resume payload by hand).
219        from_checkpoint: String,
220        /// Wall-clock time the event was appended.
221        timestamp: DateTime<Utc>,
222    },
223    /// A long-term memory tier returned hits to the agent. Records
224    /// which tier was queried (`semantic` / `entity` / `graph` /
225    /// caller-defined), the namespace key (operator identifier for
226    /// the slice queried), and the number of hits returned. The
227    /// hits themselves stay outside the audit log — the model-facing
228    /// content already lands in `AssistantMessage` / `ToolResult`,
229    /// and storing the full retrieved corpus inline would balloon
230    /// the audit trail.
231    MemoryRecall {
232        /// Memory tier identifier (typically `"semantic"`,
233        /// `"entity"`, `"graph"`, or an operator-supplied label).
234        tier: String,
235        /// Rendered namespace key the query targeted.
236        namespace_key: String,
237        /// Number of records returned to the agent.
238        hits: usize,
239        /// Wall-clock time the event was appended.
240        timestamp: DateTime<Utc>,
241    },
242    /// An [`entelix_core::RunBudget`] axis hit its cap and
243    /// short-circuited the run with
244    /// `entelix_core::Error::UsageLimitExceeded`. Compliance and
245    /// billing audits replay this to attribute breaches per-tenant
246    /// per-run; the operator-facing `Error` continues to flow
247    /// through the typed dispatch return as well, so the audit
248    /// channel's role here is the durable record, not the only
249    /// breach signal.
250    UsageLimitExceeded {
251        /// Typed axis-and-magnitude pair carried straight through
252        /// from the matching `Error::UsageLimitExceeded(breach)`.
253        /// The axis variant carries its own magnitude shape
254        /// (`u64` for counts, `Decimal` for cost).
255        breach: entelix_core::UsageLimitBreach,
256        /// Wall-clock time the event was appended.
257        timestamp: DateTime<Utc>,
258    },
259    /// A classified tool-dispatch failure was escalated to terminal
260    /// by [`entelix_core::tools::ToolErrorPolicyLayer`]. The reasoning
261    /// loop returns `Error::ToolErrorTerminal` to the caller without
262    /// consulting the model — distinct from the matching `ToolResult
263    /// { is_error: true }` event (which records the tool's *response*)
264    /// because the loop-termination decision is a separate
265    /// observability fact dashboards need to split from generic
266    /// retry-loop exhaustion.
267    ToolErrorTerminal {
268        /// Classified `ToolErrorKind` (`"auth"`, `"quota"`,
269        /// `"permanent"`, ...) that matched the active
270        /// `ToolErrorPolicy::terminate_on` set.
271        ///
272        /// The field is named `error_kind` (not `kind`) because the
273        /// surrounding `#[serde(tag = "kind")]` already consumes
274        /// `kind` as the variant discriminator.
275        error_kind: entelix_core::ToolErrorKind,
276        /// Dispatched tool's name.
277        tool_name: String,
278        /// Wall-clock time the event was appended.
279        timestamp: DateTime<Utc>,
280    },
281    /// A failure surfaced from the model / tool / graph runtime.
282    /// Errors that the agent recovers from internally are still
283    /// recorded so post-mortems see the full picture.
284    Error {
285        /// Coarse classification matching `entelix_core::Error`
286        /// variants (`"provider"`, `"invalid_request"`, `"config"`,
287        /// `"auth"`, `"interrupted"`, `"cancelled"`, `"serde"`,
288        /// `"transport"`). Stable wire strings — dashboards key off
289        /// these without the SDK leaking internal error layout.
290        class: String,
291        /// Human-readable summary (`Display` form).
292        message: String,
293        /// Wall-clock time the event was appended.
294        timestamp: DateTime<Utc>,
295    },
296}
297
298impl GraphEvent {
299    /// Borrow the timestamp of any event variant.
300    pub const fn timestamp(&self) -> &DateTime<Utc> {
301        match self {
302            Self::UserMessage { timestamp, .. }
303            | Self::AssistantMessage { timestamp, .. }
304            | Self::ToolCall { timestamp, .. }
305            | Self::ToolResult { timestamp, .. }
306            | Self::BranchCreated { timestamp, .. }
307            | Self::CheckpointMarker { timestamp, .. }
308            | Self::Warning { timestamp, .. }
309            | Self::ThinkingDelta { timestamp, .. }
310            | Self::RateLimit { timestamp, .. }
311            | Self::Interrupt { timestamp, .. }
312            | Self::Cancelled { timestamp, .. }
313            | Self::SubAgentInvoked { timestamp, .. }
314            | Self::AgentHandoff { timestamp, .. }
315            | Self::Resumed { timestamp, .. }
316            | Self::MemoryRecall { timestamp, .. }
317            | Self::UsageLimitExceeded { timestamp, .. }
318            | Self::ContextCompacted { timestamp, .. }
319            | Self::ToolErrorTerminal { timestamp, .. }
320            | Self::Error { timestamp, .. } => timestamp,
321        }
322    }
323}