clark_agent/types.rs
1//! Agent message shapes.
2//!
3//! `AgentMessage` is the canonical typed conversation transcript. Apps that
4//! need richer shapes either extend the `Custom` variant (kind-tagged JSON
5//! payload) or wrap the entire enum in their own outer enum. The loop never
6//! peeks into `Custom` — it's pass-through context.
7//!
8//! The discriminator lives on the role tag, content is typed, and the loop
9//! avoids `Value` walking via field-name strings.
10
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13use std::time::SystemTime;
14
15use crate::tool::ToolCall;
16
17/// One message in the conversation transcript.
18///
19/// Discriminated by `role`. Each variant carries its own payload shape;
20/// the loop pattern-matches, never field-walks.
21#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
22#[serde(tag = "role", rename_all = "snake_case")]
23pub enum AgentMessage {
24 /// System prompt. Typically only one, at the head of the transcript.
25 System {
26 content: String,
27 #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
28 timestamp: Option<u64>,
29 },
30 /// User input. May be a single text block or rich blocks (text + images).
31 User {
32 content: UserContent,
33 #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
34 timestamp: Option<u64>,
35 },
36 /// Model output. Carries text, thinking blocks, and tool calls.
37 Assistant {
38 content: AssistantContent,
39 stop_reason: StopReason,
40 #[serde(default, skip_serializing_if = "Option::is_none")]
41 error_message: Option<String>,
42 #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
43 timestamp: Option<u64>,
44 /// Provider-reported token accounting for the call that produced
45 /// this message. Populated by streaming transports that request
46 /// `stream_options.include_usage`; consumed by cost/billing
47 /// observers (e.g. eval matrix). `None` when the transport
48 /// didn't surface usage data.
49 #[serde(default, skip_serializing_if = "Option::is_none")]
50 usage: Option<Usage>,
51 },
52 /// Output of a tool call. Always paired with a prior assistant message
53 /// that contains the corresponding `ToolCall` block.
54 ToolResult {
55 tool_call_id: String,
56 tool_name: String,
57 content: ToolResultContent,
58 #[serde(default)]
59 is_error: bool,
60 /// Tool-side prose summary — the row-caption sentence the UI
61 /// renders ("Ran `ls -la`.", "Wrote `index.html` (4 KB).",
62 /// "Searched: `rust async` — 8 results."). The loop fills this
63 /// from `ToolResult::narration` when the typed result is
64 /// appended to history; tools may set it deterministically
65 /// from their own structured signals. Optional for
66 /// backward-compatibility with persisted histories that
67 /// pre-date this field.
68 ///
69 /// `working_memory_anchor` and other history-aware plugins
70 /// consume this in preference to walking the content blocks
71 /// for a preview; the model's first-line peek of a densified
72 /// shell result is opaque metadata, while narration carries
73 /// the actual prose every other surface already shows.
74 #[serde(default, skip_serializing_if = "Option::is_none")]
75 narration: Option<String>,
76 /// Host-side structured payload carried from the tool's
77 /// `ToolResult::details`. Stripped from provider wire formats
78 /// (the model sees `content` only) but preserved into history
79 /// so host-side plugins — delivery gates, artifact dispatchers,
80 /// UI projectors — can read structured fields without
81 /// text-grepping. Typed producers (`create_slides`,
82 /// `create_website`, `publish`) put canonical artifact metadata
83 /// here (`html_url`, `artifacts: [...]`, …). `None` when the
84 /// tool returned no structured payload, or for messages
85 /// deserialized from histories persisted before this field
86 /// existed.
87 #[serde(default, skip_serializing_if = "Option::is_none")]
88 details: Option<Value>,
89 #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
90 timestamp: Option<u64>,
91 },
92 /// Escape hatch for app-specific message types (UI notifications, hidden
93 /// runtime feedback, replay markers). The loop ignores these for tool
94 /// dispatch but apps can route them through plugins or the event sink.
95 Custom {
96 kind: String,
97 #[serde(default)]
98 payload: Value,
99 #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
100 timestamp: Option<u64>,
101 },
102}
103
104fn default_timestamp() -> Option<u64> {
105 SystemTime::now()
106 .duration_since(SystemTime::UNIX_EPOCH)
107 .ok()
108 .map(|d| d.as_millis() as u64)
109}
110
111/// Provider-reported token accounting for one LLM call.
112///
113/// All counts are in tokens; field names mirror the OpenAI-shape
114/// `usage` block (input/output) plus the cache-related fields the
115/// OpenRouter and Anthropic streams expose. Cost aggregators
116/// (`evals/rust/src/cost.rs`) read `input_tokens`,
117/// `output_tokens`, `cache_creation_input_tokens`,
118/// `cache_read_input_tokens` directly.
119#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
120pub struct Usage {
121 #[serde(default)]
122 pub input_tokens: i64,
123 #[serde(default)]
124 pub output_tokens: i64,
125 #[serde(default)]
126 pub cache_creation_input_tokens: i64,
127 #[serde(default)]
128 pub cache_read_input_tokens: i64,
129}
130
131/// Why an assistant turn ended.
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
133#[serde(rename_all = "snake_case")]
134pub enum StopReason {
135 /// Model emitted a natural end-of-turn (no tool calls).
136 EndTurn,
137 /// Model emitted one or more tool calls; loop will dispatch and continue.
138 ToolUse,
139 /// Provider hit max output tokens.
140 MaxTokens,
141 /// Provider raised an error during streaming.
142 Error,
143 /// Caller cancelled via the abort signal.
144 Aborted,
145 /// Other / provider-specific stop. Use the model's own value.
146 Other,
147}
148
149/// User-message content. Plain text is the common case; the block form
150/// supports images, attachments, and other multimodal inputs.
151#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
152#[serde(untagged)]
153pub enum UserContent {
154 Text(String),
155 Blocks(Vec<UserBlock>),
156}
157
158#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
159#[serde(tag = "type", rename_all = "snake_case")]
160pub enum UserBlock {
161 Text(TextContent),
162 Image(ImageContent),
163}
164
165/// Assistant-message content. Carries text, hidden reasoning blocks, and
166/// tool call requests in source order.
167#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
168#[serde(transparent)]
169pub struct AssistantContent {
170 pub blocks: Vec<AssistantBlock>,
171}
172
173impl AssistantContent {
174 pub fn text(text: impl Into<String>) -> Self {
175 Self {
176 blocks: vec![AssistantBlock::Text(TextContent { text: text.into() })],
177 }
178 }
179
180 pub fn with_tool_calls(text: Option<String>, tool_calls: Vec<ToolCall>) -> Self {
181 let mut blocks = Vec::new();
182 if let Some(t) = text.filter(|s| !s.trim().is_empty()) {
183 blocks.push(AssistantBlock::Text(TextContent { text: t }));
184 }
185 for call in tool_calls {
186 blocks.push(AssistantBlock::ToolCall(call));
187 }
188 Self { blocks }
189 }
190
191 /// Concatenate all text blocks into a single string.
192 pub fn plain_text(&self) -> String {
193 self.blocks
194 .iter()
195 .filter_map(|b| match b {
196 AssistantBlock::Text(t) => Some(t.text.as_str()),
197 _ => None,
198 })
199 .collect::<Vec<_>>()
200 .join("")
201 }
202
203 /// Return all tool call blocks in source order.
204 pub fn tool_calls(&self) -> Vec<&ToolCall> {
205 self.blocks
206 .iter()
207 .filter_map(|b| match b {
208 AssistantBlock::ToolCall(c) => Some(c),
209 _ => None,
210 })
211 .collect()
212 }
213
214 pub fn thinking_text(&self) -> String {
215 self.blocks
216 .iter()
217 .filter_map(|b| match b {
218 AssistantBlock::Thinking(t) => Some(t.text.as_str()),
219 _ => None,
220 })
221 .collect::<Vec<_>>()
222 .join("\n")
223 }
224
225 pub fn reasoning_text(&self) -> String {
226 self.blocks
227 .iter()
228 .filter_map(|b| match b {
229 AssistantBlock::Reasoning(t) => Some(t.text.as_str()),
230 _ => None,
231 })
232 .collect::<Vec<_>>()
233 .join("")
234 }
235
236 pub fn reasoning_details_values(&self) -> Vec<Value> {
237 self.blocks
238 .iter()
239 .filter_map(|b| match b {
240 AssistantBlock::ReasoningDetails(d) => Some(d.details.as_slice()),
241 _ => None,
242 })
243 .flatten()
244 .cloned()
245 .collect()
246 }
247}
248
249/// Blocks an assistant message can carry.
250///
251/// ## Channel separation contract
252///
253/// `Thinking` and `Reasoning` are two **independent** channels and the
254/// loop must never mix them:
255///
256/// - [`Thinking`](AssistantBlock::Thinking) is **prompt-elicited
257/// tag-text**. The model wraps reasoning inside
258/// `<thought>...</thought>` (or one of the synonym tags handled by
259/// [`crate::ThinkingTagStreamFilter`]) inside its visible text
260/// stream. The bridge parses those tags out of the visible-text
261/// channel and stores the captured content here. On the next
262/// provider request it is **rewoven into the `content` field as a
263/// `<thought>...</thought>` tag** — never as the wire `reasoning`
264/// field.
265///
266/// - [`Reasoning`](AssistantBlock::Reasoning) and
267/// [`ReasoningDetails`](AssistantBlock::ReasoningDetails) are
268/// **provider-native reasoning**. They arrive on a dedicated
269/// sideband (`delta.reasoning` / `delta.reasoning_details` on the
270/// OpenRouter wire) and represent the provider's own
271/// chain-of-thought tokens. On the next provider request they are
272/// replayed verbatim through the typed `reasoning` /
273/// `reasoning_details` fields — never wrapped in a
274/// `<thought>...</thought>` tag in the `content` field.
275///
276/// The two processes are not interchangeable: tag-elicited scratch is
277/// the model writing into its visible output by convention, and the
278/// loop strips it before the user sees anything. Provider-native
279/// reasoning is the upstream API delivering structured thinking
280/// alongside the message. Conflating them risks (a) shipping the
281/// model's hidden scratch as if it were native reasoning (some
282/// providers reject unknown content there or refuse to bill it as
283/// cached input) and (b) leaking native reasoning into visible text
284/// by way of `<thought>` rewrap (would round-trip native tokens
285/// through the visible channel and double-count them).
286///
287/// The invariants are pinned by tests in
288/// `openrouter_request_tests.rs::channel_separation_invariants` and
289/// `openrouter_stream::tests::stream_chunk_routing_invariants`.
290#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
291#[serde(tag = "type", rename_all = "snake_case")]
292pub enum AssistantBlock {
293 Text(TextContent),
294 /// Prompt-elicited hidden scratchpad. Captured from
295 /// `<thought>...</thought>` tags the model writes inside its
296 /// visible text stream; rewoven into the wire `content` field as
297 /// `<thought>...</thought>` on the next request. **Never** flows
298 /// into the wire `reasoning` field — see the type-level docs for
299 /// the channel-separation contract.
300 Thinking(TextContent),
301 /// Provider-native reasoning (xAI Grok, OpenAI o-series, Anthropic
302 /// native thinking). Arrives on the dedicated `delta.reasoning`
303 /// sideband and replayed via the wire `reasoning` field. **Never**
304 /// wrapped in a `<thought>...</thought>` tag inside the `content`
305 /// field — see the type-level docs.
306 Reasoning(TextContent),
307 /// Native provider reasoning detail blocks (xAI's
308 /// `reasoning.encrypted` envelopes, etc.). Replayed unmodified on
309 /// tool-continuation requests for reasoning models that rely on
310 /// signed/encrypted thinking continuity. Same channel contract as
311 /// [`Reasoning`](AssistantBlock::Reasoning).
312 ReasoningDetails(ReasoningDetailsContent),
313 /// Tool call request. Loop dispatches via the registry.
314 ToolCall(ToolCall),
315}
316
317/// Persistent envelope for provider-native reasoning items on an
318/// assistant turn. The wire shape is `Vec<Value>` matching
319/// OpenRouter's `reasoning_details[]` schema (the broadest typed
320/// surface across providers); `as_items` lifts it to typed
321/// [`crate::reasoning::ReasoningItem`]s for codec operations and `from_items` projects
322/// typed items back. The `details` field stays the source of truth so
323/// persisted trajectories round-trip byte-exact, even when a future
324/// provider sends shapes the typed enum doesn't yet recognize.
325#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
326pub struct ReasoningDetailsContent {
327 pub details: Vec<Value>,
328}
329
330impl ReasoningDetailsContent {
331 pub fn new(details: Vec<Value>) -> Self {
332 Self { details }
333 }
334
335 /// Lift the stored `details` array into typed
336 /// [`crate::reasoning::ReasoningItem`]s. Items that don't match a known variant
337 /// are preserved in `details` but elided from the typed view —
338 /// so consumers iterating typed items never see corrupt data
339 /// while replay-via-`details` still ships the original bytes.
340 pub fn as_items(&self) -> Vec<crate::reasoning::ReasoningItem> {
341 self.details
342 .iter()
343 .filter_map(crate::reasoning::ReasoningItem::from_openrouter_value)
344 .collect()
345 }
346
347 /// Build from typed items. Used when a codec produces typed
348 /// items from a non-OpenRouter provider response.
349 pub fn from_items(items: &[crate::reasoning::ReasoningItem]) -> Self {
350 Self {
351 details: items
352 .iter()
353 .map(crate::reasoning::ReasoningItem::to_openrouter_value)
354 .collect(),
355 }
356 }
357
358 /// True iff any item carries a signed/encrypted payload that a
359 /// strict-replay provider would reject if missing on next turn.
360 pub fn has_signed_payload(&self) -> bool {
361 self.as_items()
362 .iter()
363 .any(crate::reasoning::ReasoningItem::carries_signed_payload)
364 }
365}
366
367/// Tool result content. Multiple blocks support image-returning tools.
368#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
369#[serde(transparent)]
370pub struct ToolResultContent {
371 pub blocks: Vec<ToolResultBlock>,
372}
373
374impl ToolResultContent {
375 pub fn text(text: impl Into<String>) -> Self {
376 Self {
377 blocks: vec![ToolResultBlock::Text(TextContent { text: text.into() })],
378 }
379 }
380
381 pub fn plain_text(&self) -> String {
382 self.blocks
383 .iter()
384 .filter_map(|b| match b {
385 ToolResultBlock::Text(t) => Some(t.text.as_str()),
386 _ => None,
387 })
388 .collect::<Vec<_>>()
389 .join("")
390 }
391}
392
393#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
394#[serde(tag = "type", rename_all = "snake_case")]
395pub enum ToolResultBlock {
396 Text(TextContent),
397 Image(ImageContent),
398}
399
400#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
401pub struct TextContent {
402 pub text: String,
403}
404
405#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
406pub struct ImageContent {
407 /// Either a data: URL or an external URL the provider can fetch.
408 pub source: String,
409 #[serde(default, skip_serializing_if = "Option::is_none")]
410 pub media_type: Option<String>,
411 #[serde(default, skip_serializing_if = "Option::is_none")]
412 pub alt: Option<String>,
413}
414
415/// Identity of one agent run.
416///
417/// Threaded through child spawns so the loop, its plugins, and any
418/// trajectory sink can answer "who am I, who is my parent, how deep am
419/// I, what conversation, when do I expire" without consulting a
420/// side-channel. The fields are typed at the same level as
421/// [`AgentMessage`] — every run has identity, full stop. Today's bridge
422/// scatters these across `LoopConfig.conversation_id`,
423/// `RunnerJob.depth`, `ChildScope.parent_conversation_id`, and
424/// `parent_deadline`; `RunIdentity` is the merge target.
425///
426/// Identity is serializable so trajectory writers can pin every event
427/// to its run without inventing a parallel key store.
428#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
429pub struct RunIdentity {
430 /// Stable identifier for this run. UUIDv4 by default; callers may
431 /// supply their own value (e.g. to keep run ids aligned with an
432 /// external trace system).
433 pub run_id: String,
434 /// `Some(parent.run_id)` when this run was spawned by another run;
435 /// `None` for top-level runs initiated by a user-facing entry
436 /// point.
437 #[serde(default, skip_serializing_if = "Option::is_none")]
438 pub parent_run_id: Option<String>,
439 /// 0 for top-level runs; +1 per nested spawn.
440 #[serde(default)]
441 pub depth: usize,
442 /// Conversation this run belongs to (when the host runtime has
443 /// one). `None` for tests and isolated runs that don't carry
444 /// conversation identity.
445 #[serde(default, skip_serializing_if = "Option::is_none")]
446 pub conversation_id: Option<String>,
447 /// Wall-clock deadline as milliseconds since the UNIX epoch. The
448 /// loop does not enforce this directly — plugins that care
449 /// (wall-clock steering, soft-cancel) read it. `None` means no
450 /// parent-imposed deadline.
451 #[serde(default, skip_serializing_if = "Option::is_none")]
452 pub deadline_unix_ms: Option<u64>,
453}
454
455impl RunIdentity {
456 /// Construct a top-level identity. Generates a fresh UUIDv4 run id.
457 pub fn root() -> Self {
458 Self {
459 run_id: uuid::Uuid::new_v4().to_string(),
460 parent_run_id: None,
461 depth: 0,
462 conversation_id: None,
463 deadline_unix_ms: None,
464 }
465 }
466
467 /// Construct a child identity from a parent. Inherits
468 /// `conversation_id` and `deadline_unix_ms`, bumps `depth`, sets
469 /// `parent_run_id`, and generates a fresh `run_id`.
470 pub fn child_of(parent: &Self) -> Self {
471 Self {
472 run_id: uuid::Uuid::new_v4().to_string(),
473 parent_run_id: Some(parent.run_id.clone()),
474 depth: parent.depth + 1,
475 conversation_id: parent.conversation_id.clone(),
476 deadline_unix_ms: parent.deadline_unix_ms,
477 }
478 }
479
480 pub fn with_run_id(mut self, id: impl Into<String>) -> Self {
481 self.run_id = id.into();
482 self
483 }
484
485 pub fn with_conversation_id(mut self, id: impl Into<String>) -> Self {
486 self.conversation_id = Some(id.into());
487 self
488 }
489
490 pub fn with_deadline_unix_ms(mut self, ms: u64) -> Self {
491 self.deadline_unix_ms = Some(ms);
492 self
493 }
494}
495
496/// Snapshot of agent state passed into the loop.
497///
498/// Carries the system prompt, the current transcript, and an optional
499/// [`RunIdentity`]. Plain data — the loop builds an internal mutable
500/// copy and returns the new tail.
501///
502/// `identity` is optional for backward compatibility with callers that
503/// don't yet thread one through. When `None`, the loop treats the run
504/// as an anonymous root; plugins that key on identity see `None` and
505/// degrade gracefully.
506#[derive(Debug, Clone)]
507pub struct AgentContext {
508 pub system_prompt: String,
509 pub messages: Vec<AgentMessage>,
510 pub identity: Option<RunIdentity>,
511}
512
513impl AgentContext {
514 pub fn new(system_prompt: impl Into<String>) -> Self {
515 Self {
516 system_prompt: system_prompt.into(),
517 messages: Vec::new(),
518 identity: None,
519 }
520 }
521
522 pub fn with_messages(mut self, messages: Vec<AgentMessage>) -> Self {
523 self.messages = messages;
524 self
525 }
526
527 /// Attach a [`RunIdentity`] to this context. Use
528 /// [`RunIdentity::root`] for top-level runs and
529 /// [`RunIdentity::child_of`] for spawned children.
530 pub fn with_identity(mut self, identity: RunIdentity) -> Self {
531 self.identity = Some(identity);
532 self
533 }
534
535 /// Convenience: produce a child `AgentContext` for a spawned run.
536 /// Returns a fresh context with the supplied `system_prompt`, no
537 /// messages, and a child identity derived from this context's
538 /// identity (or a fresh root if this context has none).
539 pub fn spawn_child(&self, system_prompt: impl Into<String>) -> Self {
540 let parent_identity = self.identity.clone().unwrap_or_else(RunIdentity::root);
541 Self {
542 system_prompt: system_prompt.into(),
543 messages: Vec::new(),
544 identity: Some(RunIdentity::child_of(&parent_identity)),
545 }
546 }
547}
548
549#[cfg(test)]
550mod tests {
551 use super::*;
552
553 #[test]
554 fn user_text_round_trip() {
555 let msg = AgentMessage::User {
556 content: UserContent::Text("hello".into()),
557 timestamp: Some(0),
558 };
559 let json = serde_json::to_value(&msg).unwrap();
560 assert_eq!(json["role"], "user");
561 assert_eq!(json["content"], "hello");
562 }
563
564 #[test]
565 fn assistant_with_tool_call_blocks() {
566 let content = AssistantContent::with_tool_calls(
567 Some("calling…".into()),
568 vec![ToolCall {
569 id: "call_1".into(),
570 name: "shell".into(),
571 arguments: serde_json::json!({"cmd": "ls"}),
572 }],
573 );
574 assert_eq!(content.tool_calls().len(), 1);
575 assert_eq!(content.plain_text(), "calling…");
576 }
577
578 #[test]
579 fn custom_message_passthrough() {
580 let msg = AgentMessage::Custom {
581 kind: "ui_notification".into(),
582 payload: serde_json::json!({"text": "build started"}),
583 timestamp: None,
584 };
585 let json = serde_json::to_value(&msg).unwrap();
586 assert_eq!(json["role"], "custom");
587 assert_eq!(json["kind"], "ui_notification");
588 }
589}