1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
//! `GraphEvent` — the audit-trail unit appended to a `SessionGraph`.
//!
//! Every event is timestamped and serializable, so a persisted log can be
//! replayed verbatim by a fresh process (Anthropic-style `wake(thread_id)`).
//! Events are **strictly additive** — once written, never mutated.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use entelix_core::ir::{ContentPart, ModelWarning, ProviderEchoSnapshot, ToolResultContent, Usage};
use entelix_core::rate_limit::RateLimitSnapshot;
/// One audit-log entry.
///
/// Aggregating these (oldest-to-newest) reconstructs the full conversation
/// trace for a thread. Branches and checkpoints are recorded inline so a
/// single linear scan is enough for replay.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
#[non_exhaustive]
pub enum GraphEvent {
/// User-authored input.
UserMessage {
/// Multi-part content (text, image, `tool_result`).
content: Vec<ContentPart>,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// Assistant reply (after stream aggregation).
AssistantMessage {
/// Multi-part content (text, `tool_use`).
content: Vec<ContentPart>,
/// Token accounting if reported by the provider.
usage: Option<Usage>,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// An auto-compaction adapter trimmed the working message slice.
/// `dropped_chars` is the character cost the compactor removed
/// (or summarised away); `retained_chars` is the cost the
/// post-compaction slice carries forward. The pair lets dashboards
/// detect drift between the threshold the operator wired and the
/// actual trim each invocation produces.
ContextCompacted {
/// Character cost the compactor dropped.
dropped_chars: usize,
/// Character cost the post-compaction slice retained.
retained_chars: usize,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// A tool was dispatched by the assistant.
ToolCall {
/// Stable tool-use id matching a future `ToolResult`.
id: String,
/// Registered tool name.
name: String,
/// Tool input as JSON.
input: serde_json::Value,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// The dispatched tool returned.
ToolResult {
/// `ToolCall::id` this result resolves.
tool_use_id: String,
/// `ToolCall::name` this result resolves — required by
/// codecs whose wire format keys correlation by name
/// (Gemini's `functionResponse`) rather than id.
name: String,
/// Result payload.
content: ToolResultContent,
/// True if the tool reported an error.
is_error: bool,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// A branch was forked off this session at the indicated event index.
/// The new branch's thread id is recorded alongside.
BranchCreated {
/// Identifier of the forked sub-session.
branch_id: String,
/// Index in `events` (0-based) the branch diverged at.
parent_event: usize,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// Marker tying this position in the audit log to a `Checkpointer`
/// snapshot. Cross-tier reference for crash recovery flows that pair
/// `SessionGraph` (Tier 2) with `StateGraph` checkpoints (Tier 1).
CheckpointMarker {
/// Stringified `entelix_graph::CheckpointId`.
checkpoint_id: String,
/// Thread the checkpoint was written under (typically same as
/// the session's thread).
thread_id: String,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// Codec / runtime advisory captured into the audit trail.
Warning {
/// Underlying advisory.
warning: ModelWarning,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// Streaming thinking-content fragment captured into the audit
/// trail. Aggregators fold consecutive deltas into a single
/// `ContentPart::Thinking` when reconstructing a finalised
/// message. Recording deltas individually keeps the audit log
/// faithful to the wire — a replay that needs only the final
/// block can fold the deltas, while a replay that needs per-token
/// timing has the data.
ThinkingDelta {
/// Token text appended to the in-progress thinking block.
text: String,
/// Vendor opaque round-trip tokens carried on this delta
/// (Anthropic `signature_delta`, Gemini `thought_signature`
/// on streamed parts, `OpenAI` Responses reasoning-item
/// `encrypted_content`). Codecs pre-wrap into
/// `ProviderEchoSnapshot` on decode; the audit log preserves
/// the same opaque bytes for replay.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
provider_echoes: Vec<ProviderEchoSnapshot>,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// Provider rate-limit snapshot at this position in the
/// conversation. Operators reading the audit log can correlate a
/// later throttling failure with the snapshot that warned them.
/// Recorded inline rather than on a separate metric channel so
/// the audit trail is self-contained for compliance review.
RateLimit {
/// Snapshot the codec extracted from response headers.
snapshot: RateLimitSnapshot,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// HITL pause point — the runtime asked the host application for
/// input. The matching resume signal lands in
/// `entelix_graph::Command` outside the audit log; this event
/// records that the pause happened and what was visible to the
/// human at the time.
Interrupt {
/// Operator-supplied payload describing the pause point.
/// Free-form JSON so the agent recipe owns the schema; the
/// audit log just persists it.
payload: serde_json::Value,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// The run was cancelled — either via cancellation token or via
/// a deadline elapsing. Recording the reason inline lets a
/// replay reconstruct partial-run audit traces faithfully.
Cancelled {
/// Lean reason string. Human-readable; not parsed downstream.
reason: String,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// A sub-agent was dispatched from the parent's run. The parent
/// `run_id` (recorded on the surrounding `AgentEvent::Started`)
/// scopes the audit trail; this event ties the parent's
/// position to the child's `sub_thread_id` so a replay can walk
/// from parent to child without keying on heuristic timing.
/// Managed-agent shape — every `Subagent::execute`
/// call surfaces here as the canonical "brain passes hand"
/// audit boundary.
SubAgentInvoked {
/// Stable identifier the parent uses to refer to the
/// sub-agent (typically the `Subagent`'s configured name).
agent_id: String,
/// Thread the sub-agent ran under. Same as the parent's
/// thread when the sub-agent shares state; a fresh value
/// when the sub-agent runs in its own scope.
sub_thread_id: String,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// A supervisor recipe handed control between named agents.
/// Distinct from `SubAgentInvoked` — supervisor handoffs route
/// inside one logical conversation, while sub-agent invocations
/// open a child run.
AgentHandoff {
/// Agent name that finished this turn (`None` on the first
/// supervisor turn where no agent has spoken yet).
from: Option<String>,
/// Agent name the supervisor routed to next.
to: String,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// A run resumed from a prior checkpoint — either via
/// `wake(thread_id)` after a crash or via `Command::Resume` from
/// a HITL pause. Pairs with the `CheckpointMarker` whose id is
/// referenced so a single linear replay stays coherent across
/// the suspend / resume seam.
Resumed {
/// `CheckpointMarker::checkpoint_id` the resume hydrated
/// from. Empty string when the resume happened from a fresh
/// state (operator built the resume payload by hand).
from_checkpoint: String,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// A long-term memory tier returned hits to the agent. Records
/// which tier was queried (`semantic` / `entity` / `graph` /
/// caller-defined), the namespace key (operator identifier for
/// the slice queried), and the number of hits returned. The
/// hits themselves stay outside the audit log — the model-facing
/// content already lands in `AssistantMessage` / `ToolResult`,
/// and storing the full retrieved corpus inline would balloon
/// the audit trail.
MemoryRecall {
/// Memory tier identifier (typically `"semantic"`,
/// `"entity"`, `"graph"`, or an operator-supplied label).
tier: String,
/// Rendered namespace key the query targeted.
namespace_key: String,
/// Number of records returned to the agent.
hits: usize,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// An [`entelix_core::RunBudget`] axis hit its cap and
/// short-circuited the run with
/// `entelix_core::Error::UsageLimitExceeded`. Compliance and
/// billing audits replay this to attribute breaches per-tenant
/// per-run; the operator-facing `Error` continues to flow
/// through the typed dispatch return as well, so the audit
/// channel's role here is the durable record, not the only
/// breach signal.
UsageLimitExceeded {
/// Typed axis-and-magnitude pair carried straight through
/// from the matching `Error::UsageLimitExceeded(breach)`.
/// The axis variant carries its own magnitude shape
/// (`u64` for counts, `Decimal` for cost).
breach: entelix_core::UsageLimitBreach,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
/// A failure surfaced from the model / tool / graph runtime.
/// Errors that the agent recovers from internally are still
/// recorded so post-mortems see the full picture.
Error {
/// Coarse classification matching `entelix_core::Error`
/// variants (`"provider"`, `"invalid_request"`, `"config"`,
/// `"auth"`, `"interrupted"`, `"cancelled"`, `"serde"`,
/// `"transport"`). Stable wire strings — dashboards key off
/// these without the SDK leaking internal error layout.
class: String,
/// Human-readable summary (`Display` form).
message: String,
/// Wall-clock time the event was appended.
timestamp: DateTime<Utc>,
},
}
impl GraphEvent {
/// Borrow the timestamp of any event variant.
pub const fn timestamp(&self) -> &DateTime<Utc> {
match self {
Self::UserMessage { timestamp, .. }
| Self::AssistantMessage { timestamp, .. }
| Self::ToolCall { timestamp, .. }
| Self::ToolResult { timestamp, .. }
| Self::BranchCreated { timestamp, .. }
| Self::CheckpointMarker { timestamp, .. }
| Self::Warning { timestamp, .. }
| Self::ThinkingDelta { timestamp, .. }
| Self::RateLimit { timestamp, .. }
| Self::Interrupt { timestamp, .. }
| Self::Cancelled { timestamp, .. }
| Self::SubAgentInvoked { timestamp, .. }
| Self::AgentHandoff { timestamp, .. }
| Self::Resumed { timestamp, .. }
| Self::MemoryRecall { timestamp, .. }
| Self::UsageLimitExceeded { timestamp, .. }
| Self::ContextCompacted { timestamp, .. }
| Self::Error { timestamp, .. } => timestamp,
}
}
}