entelix_session/event.rs
1//! `GraphEvent` — the audit-trail unit appended to a `SessionGraph`.
2//!
3//! Every event is timestamped and serializable, so a persisted log can be
4//! replayed verbatim by a fresh process (Anthropic-style `wake(thread_id)`).
5//! Events are **strictly additive** — once written, never mutated.
6
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10use entelix_core::ir::{ContentPart, ModelWarning, ProviderEchoSnapshot, ToolResultContent, Usage};
11use entelix_core::rate_limit::RateLimitSnapshot;
12
13/// One audit-log entry.
14///
15/// Aggregating these (oldest-to-newest) reconstructs the full conversation
16/// trace for a thread. Branches and checkpoints are recorded inline so a
17/// single linear scan is enough for replay.
18#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(tag = "kind", rename_all = "snake_case")]
20#[non_exhaustive]
21pub enum GraphEvent {
22 /// User-authored input.
23 UserMessage {
24 /// Multi-part content (text, image, `tool_result`).
25 content: Vec<ContentPart>,
26 /// Wall-clock time the event was appended.
27 timestamp: DateTime<Utc>,
28 },
29 /// Assistant reply (after stream aggregation).
30 AssistantMessage {
31 /// Multi-part content (text, `tool_use`).
32 content: Vec<ContentPart>,
33 /// Token accounting if reported by the provider.
34 usage: Option<Usage>,
35 /// Wall-clock time the event was appended.
36 timestamp: DateTime<Utc>,
37 },
38 /// An auto-compaction adapter trimmed the working message slice.
39 /// `dropped_chars` is the character cost the compactor removed
40 /// (or summarised away); `retained_chars` is the cost the
41 /// post-compaction slice carries forward. The pair lets dashboards
42 /// detect drift between the threshold the operator wired and the
43 /// actual trim each invocation produces.
44 ContextCompacted {
45 /// Character cost the compactor dropped.
46 dropped_chars: usize,
47 /// Character cost the post-compaction slice retained.
48 retained_chars: usize,
49 /// Wall-clock time the event was appended.
50 timestamp: DateTime<Utc>,
51 },
52 /// A tool was dispatched by the assistant.
53 ToolCall {
54 /// Stable tool-use id matching a future `ToolResult`.
55 id: String,
56 /// Registered tool name.
57 name: String,
58 /// Tool input as JSON.
59 input: serde_json::Value,
60 /// Wall-clock time the event was appended.
61 timestamp: DateTime<Utc>,
62 },
63 /// The dispatched tool returned.
64 ToolResult {
65 /// `ToolCall::id` this result resolves.
66 tool_use_id: String,
67 /// `ToolCall::name` this result resolves — required by
68 /// codecs whose wire format keys correlation by name
69 /// (Gemini's `functionResponse`) rather than id.
70 name: String,
71 /// Result payload.
72 content: ToolResultContent,
73 /// True if the tool reported an error.
74 is_error: bool,
75 /// Wall-clock time the event was appended.
76 timestamp: DateTime<Utc>,
77 },
78 /// A branch was forked off this session at the indicated event index.
79 /// The new branch's thread id is recorded alongside.
80 BranchCreated {
81 /// Identifier of the forked sub-session.
82 branch_id: String,
83 /// Index in `events` (0-based) the branch diverged at.
84 parent_event: usize,
85 /// Wall-clock time the event was appended.
86 timestamp: DateTime<Utc>,
87 },
88 /// Marker tying this position in the audit log to a `Checkpointer`
89 /// snapshot. Cross-tier reference for crash recovery flows that pair
90 /// `SessionGraph` (Tier 2) with `StateGraph` checkpoints (Tier 1).
91 CheckpointMarker {
92 /// Stringified `entelix_graph::CheckpointId`.
93 checkpoint_id: String,
94 /// Thread the checkpoint was written under (typically same as
95 /// the session's thread).
96 thread_id: String,
97 /// Wall-clock time the event was appended.
98 timestamp: DateTime<Utc>,
99 },
100 /// Codec / runtime advisory captured into the audit trail.
101 Warning {
102 /// Underlying advisory.
103 warning: ModelWarning,
104 /// Wall-clock time the event was appended.
105 timestamp: DateTime<Utc>,
106 },
107 /// Streaming thinking-content fragment captured into the audit
108 /// trail. Aggregators fold consecutive deltas into a single
109 /// `ContentPart::Thinking` when reconstructing a finalised
110 /// message. Recording deltas individually keeps the audit log
111 /// faithful to the wire — a replay that needs only the final
112 /// block can fold the deltas, while a replay that needs per-token
113 /// timing has the data.
114 ThinkingDelta {
115 /// Token text appended to the in-progress thinking block.
116 text: String,
117 /// Vendor opaque round-trip tokens carried on this delta
118 /// (Anthropic `signature_delta`, Gemini `thought_signature`
119 /// on streamed parts, `OpenAI` Responses reasoning-item
120 /// `encrypted_content`). Codecs pre-wrap into
121 /// `ProviderEchoSnapshot` on decode; the audit log preserves
122 /// the same opaque bytes for replay.
123 #[serde(default, skip_serializing_if = "Vec::is_empty")]
124 provider_echoes: Vec<ProviderEchoSnapshot>,
125 /// Wall-clock time the event was appended.
126 timestamp: DateTime<Utc>,
127 },
128 /// Provider rate-limit snapshot at this position in the
129 /// conversation. Operators reading the audit log can correlate a
130 /// later throttling failure with the snapshot that warned them.
131 /// Recorded inline rather than on a separate metric channel so
132 /// the audit trail is self-contained for compliance review.
133 RateLimit {
134 /// Snapshot the codec extracted from response headers.
135 snapshot: RateLimitSnapshot,
136 /// Wall-clock time the event was appended.
137 timestamp: DateTime<Utc>,
138 },
139 /// HITL pause point — the runtime asked the host application for
140 /// input. The matching resume signal lands in
141 /// `entelix_graph::Command` outside the audit log; this event
142 /// records that the pause happened and what was visible to the
143 /// human at the time.
144 Interrupt {
145 /// Operator-supplied payload describing the pause point.
146 /// Free-form JSON so the agent recipe owns the schema; the
147 /// audit log just persists it.
148 payload: serde_json::Value,
149 /// Wall-clock time the event was appended.
150 timestamp: DateTime<Utc>,
151 },
152 /// The run was cancelled — either via cancellation token or via
153 /// a deadline elapsing. Recording the reason inline lets a
154 /// replay reconstruct partial-run audit traces faithfully.
155 Cancelled {
156 /// Lean reason string. Human-readable; not parsed downstream.
157 reason: String,
158 /// Wall-clock time the event was appended.
159 timestamp: DateTime<Utc>,
160 },
161 /// A sub-agent was dispatched from the parent's run. The parent
162 /// `run_id` (recorded on the surrounding `AgentEvent::Started`)
163 /// scopes the audit trail; this event ties the parent's
164 /// position to the child's `sub_thread_id` so a replay can walk
165 /// from parent to child without keying on heuristic timing.
166 /// Managed-agent shape — every `Subagent::execute`
167 /// call surfaces here as the canonical "brain passes hand"
168 /// audit boundary.
169 SubAgentInvoked {
170 /// Stable identifier the parent uses to refer to the
171 /// sub-agent (typically the `Subagent`'s configured name).
172 agent_id: String,
173 /// Thread the sub-agent ran under. Same as the parent's
174 /// thread when the sub-agent shares state; a fresh value
175 /// when the sub-agent runs in its own scope.
176 sub_thread_id: String,
177 /// Wall-clock time the event was appended.
178 timestamp: DateTime<Utc>,
179 },
180 /// A supervisor recipe handed control between named agents.
181 /// Distinct from `SubAgentInvoked` — supervisor handoffs route
182 /// inside one logical conversation, while sub-agent invocations
183 /// open a child run.
184 AgentHandoff {
185 /// Agent name that finished this turn (`None` on the first
186 /// supervisor turn where no agent has spoken yet).
187 from: Option<String>,
188 /// Agent name the supervisor routed to next.
189 to: String,
190 /// Wall-clock time the event was appended.
191 timestamp: DateTime<Utc>,
192 },
193 /// A run resumed from a prior checkpoint — either via
194 /// `wake(thread_id)` after a crash or via `Command::Resume` from
195 /// a HITL pause. Pairs with the `CheckpointMarker` whose id is
196 /// referenced so a single linear replay stays coherent across
197 /// the suspend / resume seam.
198 Resumed {
199 /// `CheckpointMarker::checkpoint_id` the resume hydrated
200 /// from. Empty string when the resume happened from a fresh
201 /// state (operator built the resume payload by hand).
202 from_checkpoint: String,
203 /// Wall-clock time the event was appended.
204 timestamp: DateTime<Utc>,
205 },
206 /// A long-term memory tier returned hits to the agent. Records
207 /// which tier was queried (`semantic` / `entity` / `graph` /
208 /// caller-defined), the namespace key (operator identifier for
209 /// the slice queried), and the number of hits returned. The
210 /// hits themselves stay outside the audit log — the model-facing
211 /// content already lands in `AssistantMessage` / `ToolResult`,
212 /// and storing the full retrieved corpus inline would balloon
213 /// the audit trail.
214 MemoryRecall {
215 /// Memory tier identifier (typically `"semantic"`,
216 /// `"entity"`, `"graph"`, or an operator-supplied label).
217 tier: String,
218 /// Rendered namespace key the query targeted.
219 namespace_key: String,
220 /// Number of records returned to the agent.
221 hits: usize,
222 /// Wall-clock time the event was appended.
223 timestamp: DateTime<Utc>,
224 },
225 /// An [`entelix_core::RunBudget`] axis hit its cap and
226 /// short-circuited the run with
227 /// `entelix_core::Error::UsageLimitExceeded`. Compliance and
228 /// billing audits replay this to attribute breaches per-tenant
229 /// per-run; the operator-facing `Error` continues to flow
230 /// through the typed dispatch return as well, so the audit
231 /// channel's role here is the durable record, not the only
232 /// breach signal.
233 UsageLimitExceeded {
234 /// Typed axis-and-magnitude pair carried straight through
235 /// from the matching `Error::UsageLimitExceeded(breach)`.
236 /// The axis variant carries its own magnitude shape
237 /// (`u64` for counts, `Decimal` for cost).
238 breach: entelix_core::UsageLimitBreach,
239 /// Wall-clock time the event was appended.
240 timestamp: DateTime<Utc>,
241 },
242 /// A failure surfaced from the model / tool / graph runtime.
243 /// Errors that the agent recovers from internally are still
244 /// recorded so post-mortems see the full picture.
245 Error {
246 /// Coarse classification matching `entelix_core::Error`
247 /// variants (`"provider"`, `"invalid_request"`, `"config"`,
248 /// `"auth"`, `"interrupted"`, `"cancelled"`, `"serde"`,
249 /// `"transport"`). Stable wire strings — dashboards key off
250 /// these without the SDK leaking internal error layout.
251 class: String,
252 /// Human-readable summary (`Display` form).
253 message: String,
254 /// Wall-clock time the event was appended.
255 timestamp: DateTime<Utc>,
256 },
257}
258
259impl GraphEvent {
260 /// Borrow the timestamp of any event variant.
261 pub const fn timestamp(&self) -> &DateTime<Utc> {
262 match self {
263 Self::UserMessage { timestamp, .. }
264 | Self::AssistantMessage { timestamp, .. }
265 | Self::ToolCall { timestamp, .. }
266 | Self::ToolResult { timestamp, .. }
267 | Self::BranchCreated { timestamp, .. }
268 | Self::CheckpointMarker { timestamp, .. }
269 | Self::Warning { timestamp, .. }
270 | Self::ThinkingDelta { timestamp, .. }
271 | Self::RateLimit { timestamp, .. }
272 | Self::Interrupt { timestamp, .. }
273 | Self::Cancelled { timestamp, .. }
274 | Self::SubAgentInvoked { timestamp, .. }
275 | Self::AgentHandoff { timestamp, .. }
276 | Self::Resumed { timestamp, .. }
277 | Self::MemoryRecall { timestamp, .. }
278 | Self::UsageLimitExceeded { timestamp, .. }
279 | Self::ContextCompacted { timestamp, .. }
280 | Self::Error { timestamp, .. } => timestamp,
281 }
282 }
283}