Skip to main content

entelix_session/
event.rs

1//! `GraphEvent` — the audit-trail unit appended to a `SessionGraph`.
2//!
3//! Every event is timestamped and serializable, so a persisted log can be
4//! replayed verbatim by a fresh process (Anthropic-style `wake(thread_id)`).
5//! Events are **strictly additive** — once written, never mutated.
6
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10use entelix_core::ir::{ContentPart, ModelWarning, ProviderEchoSnapshot, ToolResultContent, Usage};
11use entelix_core::rate_limit::RateLimitSnapshot;
12
13/// One audit-log entry.
14///
15/// Aggregating these (oldest-to-newest) reconstructs the full conversation
16/// trace for a thread. Branches and checkpoints are recorded inline so a
17/// single linear scan is enough for replay.
18#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(tag = "kind", rename_all = "snake_case")]
20#[non_exhaustive]
21pub enum GraphEvent {
22    /// User-authored input.
23    UserMessage {
24        /// Multi-part content (text, image, `tool_result`).
25        content: Vec<ContentPart>,
26        /// Wall-clock time the event was appended.
27        timestamp: DateTime<Utc>,
28    },
29    /// Assistant reply (after stream aggregation).
30    AssistantMessage {
31        /// Multi-part content (text, `tool_use`).
32        content: Vec<ContentPart>,
33        /// Token accounting if reported by the provider.
34        usage: Option<Usage>,
35        /// Wall-clock time the event was appended.
36        timestamp: DateTime<Utc>,
37    },
38    /// An auto-compaction adapter trimmed the working message slice.
39    /// `dropped_chars` is the character cost the compactor removed
40    /// (or summarised away); `retained_chars` is the cost the
41    /// post-compaction slice carries forward. The pair lets dashboards
42    /// detect drift between the threshold the operator wired and the
43    /// actual trim each invocation produces.
44    ContextCompacted {
45        /// Character cost the compactor dropped.
46        dropped_chars: usize,
47        /// Character cost the post-compaction slice retained.
48        retained_chars: usize,
49        /// Wall-clock time the event was appended.
50        timestamp: DateTime<Utc>,
51    },
52    /// A tool was dispatched by the assistant.
53    ToolCall {
54        /// Stable tool-use id matching a future `ToolResult`.
55        id: String,
56        /// Registered tool name.
57        name: String,
58        /// Tool input as JSON.
59        input: serde_json::Value,
60        /// Wall-clock time the event was appended.
61        timestamp: DateTime<Utc>,
62    },
63    /// The dispatched tool returned.
64    ToolResult {
65        /// `ToolCall::id` this result resolves.
66        tool_use_id: String,
67        /// `ToolCall::name` this result resolves — required by
68        /// codecs whose wire format keys correlation by name
69        /// (Gemini's `functionResponse`) rather than id.
70        name: String,
71        /// Result payload.
72        content: ToolResultContent,
73        /// True if the tool reported an error.
74        is_error: bool,
75        /// Wall-clock time the event was appended.
76        timestamp: DateTime<Utc>,
77    },
78    /// A branch was forked off this session at the indicated event index.
79    /// The new branch's thread id is recorded alongside.
80    BranchCreated {
81        /// Identifier of the forked sub-session.
82        branch_id: String,
83        /// Index in `events` (0-based) the branch diverged at.
84        parent_event: usize,
85        /// Wall-clock time the event was appended.
86        timestamp: DateTime<Utc>,
87    },
88    /// Marker tying this position in the audit log to a `Checkpointer`
89    /// snapshot. Cross-tier reference for crash recovery flows that pair
90    /// `SessionGraph` (Tier 2) with `StateGraph` checkpoints (Tier 1).
91    CheckpointMarker {
92        /// Stringified `entelix_graph::CheckpointId`.
93        checkpoint_id: String,
94        /// Thread the checkpoint was written under (typically same as
95        /// the session's thread).
96        thread_id: String,
97        /// Wall-clock time the event was appended.
98        timestamp: DateTime<Utc>,
99    },
100    /// Codec / runtime advisory captured into the audit trail.
101    Warning {
102        /// Underlying advisory.
103        warning: ModelWarning,
104        /// Wall-clock time the event was appended.
105        timestamp: DateTime<Utc>,
106    },
107    /// Streaming thinking-content fragment captured into the audit
108    /// trail. Aggregators fold consecutive deltas into a single
109    /// `ContentPart::Thinking` when reconstructing a finalised
110    /// message. Recording deltas individually keeps the audit log
111    /// faithful to the wire — a replay that needs only the final
112    /// block can fold the deltas, while a replay that needs per-token
113    /// timing has the data.
114    ThinkingDelta {
115        /// Token text appended to the in-progress thinking block.
116        text: String,
117        /// Vendor opaque round-trip tokens carried on this delta
118        /// (Anthropic `signature_delta`, Gemini `thought_signature`
119        /// on streamed parts, `OpenAI` Responses reasoning-item
120        /// `encrypted_content`). Codecs pre-wrap into
121        /// `ProviderEchoSnapshot` on decode; the audit log preserves
122        /// the same opaque bytes for replay.
123        #[serde(default, skip_serializing_if = "Vec::is_empty")]
124        provider_echoes: Vec<ProviderEchoSnapshot>,
125        /// Wall-clock time the event was appended.
126        timestamp: DateTime<Utc>,
127    },
128    /// Provider rate-limit snapshot at this position in the
129    /// conversation. Operators reading the audit log can correlate a
130    /// later throttling failure with the snapshot that warned them.
131    /// Recorded inline rather than on a separate metric channel so
132    /// the audit trail is self-contained for compliance review.
133    RateLimit {
134        /// Snapshot the codec extracted from response headers.
135        snapshot: RateLimitSnapshot,
136        /// Wall-clock time the event was appended.
137        timestamp: DateTime<Utc>,
138    },
139    /// HITL pause point — the runtime asked the host application for
140    /// input. The matching resume signal lands in
141    /// `entelix_graph::Command` outside the audit log; this event
142    /// records that the pause happened and what was visible to the
143    /// human at the time.
144    Interrupt {
145        /// Operator-supplied payload describing the pause point.
146        /// Free-form JSON so the agent recipe owns the schema; the
147        /// audit log just persists it.
148        payload: serde_json::Value,
149        /// Wall-clock time the event was appended.
150        timestamp: DateTime<Utc>,
151    },
152    /// The run was cancelled — either via cancellation token or via
153    /// a deadline elapsing. Recording the reason inline lets a
154    /// replay reconstruct partial-run audit traces faithfully.
155    Cancelled {
156        /// Lean reason string. Human-readable; not parsed downstream.
157        reason: String,
158        /// Wall-clock time the event was appended.
159        timestamp: DateTime<Utc>,
160    },
161    /// A sub-agent was dispatched from the parent's run. The parent
162    /// `run_id` (recorded on the surrounding `AgentEvent::Started`)
163    /// scopes the audit trail; this event ties the parent's
164    /// position to the child's `sub_thread_id` so a replay can walk
165    /// from parent to child without keying on heuristic timing.
166    /// Managed-agent shape — every `Subagent::execute`
167    /// call surfaces here as the canonical "brain passes hand"
168    /// audit boundary.
169    SubAgentInvoked {
170        /// Stable identifier the parent uses to refer to the
171        /// sub-agent (typically the `Subagent`'s configured name).
172        agent_id: String,
173        /// Thread the sub-agent ran under. Same as the parent's
174        /// thread when the sub-agent shares state; a fresh value
175        /// when the sub-agent runs in its own scope.
176        sub_thread_id: String,
177        /// Wall-clock time the event was appended.
178        timestamp: DateTime<Utc>,
179    },
180    /// A supervisor recipe handed control between named agents.
181    /// Distinct from `SubAgentInvoked` — supervisor handoffs route
182    /// inside one logical conversation, while sub-agent invocations
183    /// open a child run.
184    AgentHandoff {
185        /// Agent name that finished this turn (`None` on the first
186        /// supervisor turn where no agent has spoken yet).
187        from: Option<String>,
188        /// Agent name the supervisor routed to next.
189        to: String,
190        /// Wall-clock time the event was appended.
191        timestamp: DateTime<Utc>,
192    },
193    /// A run resumed from a prior checkpoint — either via
194    /// `wake(thread_id)` after a crash or via `Command::Resume` from
195    /// a HITL pause. Pairs with the `CheckpointMarker` whose id is
196    /// referenced so a single linear replay stays coherent across
197    /// the suspend / resume seam.
198    Resumed {
199        /// `CheckpointMarker::checkpoint_id` the resume hydrated
200        /// from. Empty string when the resume happened from a fresh
201        /// state (operator built the resume payload by hand).
202        from_checkpoint: String,
203        /// Wall-clock time the event was appended.
204        timestamp: DateTime<Utc>,
205    },
206    /// A long-term memory tier returned hits to the agent. Records
207    /// which tier was queried (`semantic` / `entity` / `graph` /
208    /// caller-defined), the namespace key (operator identifier for
209    /// the slice queried), and the number of hits returned. The
210    /// hits themselves stay outside the audit log — the model-facing
211    /// content already lands in `AssistantMessage` / `ToolResult`,
212    /// and storing the full retrieved corpus inline would balloon
213    /// the audit trail.
214    MemoryRecall {
215        /// Memory tier identifier (typically `"semantic"`,
216        /// `"entity"`, `"graph"`, or an operator-supplied label).
217        tier: String,
218        /// Rendered namespace key the query targeted.
219        namespace_key: String,
220        /// Number of records returned to the agent.
221        hits: usize,
222        /// Wall-clock time the event was appended.
223        timestamp: DateTime<Utc>,
224    },
225    /// An [`entelix_core::RunBudget`] axis hit its cap and
226    /// short-circuited the run with
227    /// `entelix_core::Error::UsageLimitExceeded`. Compliance and
228    /// billing audits replay this to attribute breaches per-tenant
229    /// per-run; the operator-facing `Error` continues to flow
230    /// through the typed dispatch return as well, so the audit
231    /// channel's role here is the durable record, not the only
232    /// breach signal.
233    UsageLimitExceeded {
234        /// Typed axis-and-magnitude pair carried straight through
235        /// from the matching `Error::UsageLimitExceeded(breach)`.
236        /// The axis variant carries its own magnitude shape
237        /// (`u64` for counts, `Decimal` for cost).
238        breach: entelix_core::UsageLimitBreach,
239        /// Wall-clock time the event was appended.
240        timestamp: DateTime<Utc>,
241    },
242    /// A failure surfaced from the model / tool / graph runtime.
243    /// Errors that the agent recovers from internally are still
244    /// recorded so post-mortems see the full picture.
245    Error {
246        /// Coarse classification matching `entelix_core::Error`
247        /// variants (`"provider"`, `"invalid_request"`, `"config"`,
248        /// `"auth"`, `"interrupted"`, `"cancelled"`, `"serde"`,
249        /// `"transport"`). Stable wire strings — dashboards key off
250        /// these without the SDK leaking internal error layout.
251        class: String,
252        /// Human-readable summary (`Display` form).
253        message: String,
254        /// Wall-clock time the event was appended.
255        timestamp: DateTime<Utc>,
256    },
257}
258
259impl GraphEvent {
260    /// Borrow the timestamp of any event variant.
261    pub const fn timestamp(&self) -> &DateTime<Utc> {
262        match self {
263            Self::UserMessage { timestamp, .. }
264            | Self::AssistantMessage { timestamp, .. }
265            | Self::ToolCall { timestamp, .. }
266            | Self::ToolResult { timestamp, .. }
267            | Self::BranchCreated { timestamp, .. }
268            | Self::CheckpointMarker { timestamp, .. }
269            | Self::Warning { timestamp, .. }
270            | Self::ThinkingDelta { timestamp, .. }
271            | Self::RateLimit { timestamp, .. }
272            | Self::Interrupt { timestamp, .. }
273            | Self::Cancelled { timestamp, .. }
274            | Self::SubAgentInvoked { timestamp, .. }
275            | Self::AgentHandoff { timestamp, .. }
276            | Self::Resumed { timestamp, .. }
277            | Self::MemoryRecall { timestamp, .. }
278            | Self::UsageLimitExceeded { timestamp, .. }
279            | Self::ContextCompacted { timestamp, .. }
280            | Self::Error { timestamp, .. } => timestamp,
281        }
282    }
283}