Skip to main content

clark_agent/
types.rs

1//! Agent message shapes.
2//!
3//! `AgentMessage` is the canonical typed conversation transcript. Apps that
4//! need richer shapes either extend the `Custom` variant (kind-tagged JSON
5//! payload) or wrap the entire enum in their own outer enum. The loop never
6//! peeks into `Custom` — it's pass-through context.
7//!
8//! The discriminator lives on the role tag, content is typed, and the loop
9//! avoids `Value` walking via field-name strings.
10
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13use std::time::SystemTime;
14
15use crate::tool::ToolCall;
16
17/// One message in the conversation transcript.
18///
19/// Discriminated by `role`. Each variant carries its own payload shape;
20/// the loop pattern-matches, never field-walks.
21#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
22#[serde(tag = "role", rename_all = "snake_case")]
23pub enum AgentMessage {
24    /// System prompt. Typically only one, at the head of the transcript.
25    System {
26        content: String,
27        #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
28        timestamp: Option<u64>,
29    },
30    /// User input. May be a single text block or rich blocks (text + images).
31    User {
32        content: UserContent,
33        #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
34        timestamp: Option<u64>,
35    },
36    /// Model output. Carries text, thinking blocks, and tool calls.
37    Assistant {
38        content: AssistantContent,
39        stop_reason: StopReason,
40        #[serde(default, skip_serializing_if = "Option::is_none")]
41        error_message: Option<String>,
42        #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
43        timestamp: Option<u64>,
44        /// Provider-reported token accounting for the call that produced
45        /// this message. Populated by streaming transports that request
46        /// `stream_options.include_usage`; consumed by cost/billing
47        /// observers (e.g. eval matrix). `None` when the transport
48        /// didn't surface usage data.
49        #[serde(default, skip_serializing_if = "Option::is_none")]
50        usage: Option<Usage>,
51    },
52    /// Output of a tool call. Always paired with a prior assistant message
53    /// that contains the corresponding `ToolCall` block.
54    ToolResult {
55        tool_call_id: String,
56        tool_name: String,
57        content: ToolResultContent,
58        #[serde(default)]
59        is_error: bool,
60        /// Tool-side prose summary — the row-caption sentence the UI
61        /// renders ("Ran `ls -la`.", "Wrote `index.html` (4 KB).",
62        /// "Searched: `rust async` — 8 results."). The loop fills this
63        /// from `ToolResult::narration` when the typed result is
64        /// appended to history; tools may set it deterministically
65        /// from their own structured signals. Optional for
66        /// backward-compatibility with persisted histories that
67        /// pre-date this field.
68        ///
69        /// `working_memory_anchor` and other history-aware plugins
70        /// consume this in preference to walking the content blocks
71        /// for a preview; the model's first-line peek of a densified
72        /// shell result is opaque metadata, while narration carries
73        /// the actual prose every other surface already shows.
74        #[serde(default, skip_serializing_if = "Option::is_none")]
75        narration: Option<String>,
76        /// Host-side structured payload carried from the tool's
77        /// `ToolResult::details`. Stripped from provider wire formats
78        /// (the model sees `content` only) but preserved into history
79        /// so host-side plugins — delivery gates, artifact dispatchers,
80        /// UI projectors — can read structured fields without
81        /// text-grepping. Typed producers (`create_slides`,
82        /// `create_website`, `publish`) put canonical artifact metadata
83        /// here (`html_url`, `artifacts: [...]`, …). `None` when the
84        /// tool returned no structured payload, or for messages
85        /// deserialized from histories persisted before this field
86        /// existed.
87        #[serde(default, skip_serializing_if = "Option::is_none")]
88        details: Option<Value>,
89        #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
90        timestamp: Option<u64>,
91    },
92    /// Escape hatch for app-specific message types (UI notifications, hidden
93    /// runtime feedback, replay markers). The loop ignores these for tool
94    /// dispatch but apps can route them through plugins or the event sink.
95    Custom {
96        kind: String,
97        #[serde(default)]
98        payload: Value,
99        #[serde(default = "default_timestamp", skip_serializing_if = "Option::is_none")]
100        timestamp: Option<u64>,
101    },
102}
103
104fn default_timestamp() -> Option<u64> {
105    SystemTime::now()
106        .duration_since(SystemTime::UNIX_EPOCH)
107        .ok()
108        .map(|d| d.as_millis() as u64)
109}
110
111/// Provider-reported token accounting for one LLM call.
112///
113/// All counts are in tokens; field names mirror the OpenAI-shape
114/// `usage` block (input/output) plus the cache-related fields the
115/// OpenRouter and Anthropic streams expose. Cost aggregators
116/// (`evals/rust/src/cost.rs`) read `input_tokens`,
117/// `output_tokens`, `cache_creation_input_tokens`,
118/// `cache_read_input_tokens` directly.
119#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
120pub struct Usage {
121    #[serde(default)]
122    pub input_tokens: i64,
123    #[serde(default)]
124    pub output_tokens: i64,
125    #[serde(default)]
126    pub cache_creation_input_tokens: i64,
127    #[serde(default)]
128    pub cache_read_input_tokens: i64,
129}
130
131/// Why an assistant turn ended.
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
133#[serde(rename_all = "snake_case")]
134pub enum StopReason {
135    /// Model emitted a natural end-of-turn (no tool calls).
136    EndTurn,
137    /// Model emitted one or more tool calls; loop will dispatch and continue.
138    ToolUse,
139    /// Provider hit max output tokens.
140    MaxTokens,
141    /// Provider raised an error during streaming.
142    Error,
143    /// Caller cancelled via the abort signal.
144    Aborted,
145    /// Other / provider-specific stop. Use the model's own value.
146    Other,
147}
148
149/// User-message content. Plain text is the common case; the block form
150/// supports images, attachments, and other multimodal inputs.
151#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
152#[serde(untagged)]
153pub enum UserContent {
154    Text(String),
155    Blocks(Vec<UserBlock>),
156}
157
158#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
159#[serde(tag = "type", rename_all = "snake_case")]
160pub enum UserBlock {
161    Text(TextContent),
162    Image(ImageContent),
163}
164
165/// Assistant-message content. Carries text, hidden reasoning blocks, and
166/// tool call requests in source order.
167#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
168#[serde(transparent)]
169pub struct AssistantContent {
170    pub blocks: Vec<AssistantBlock>,
171}
172
173impl AssistantContent {
174    pub fn text(text: impl Into<String>) -> Self {
175        Self {
176            blocks: vec![AssistantBlock::Text(TextContent { text: text.into() })],
177        }
178    }
179
180    pub fn with_tool_calls(text: Option<String>, tool_calls: Vec<ToolCall>) -> Self {
181        let mut blocks = Vec::new();
182        if let Some(t) = text.filter(|s| !s.trim().is_empty()) {
183            blocks.push(AssistantBlock::Text(TextContent { text: t }));
184        }
185        for call in tool_calls {
186            blocks.push(AssistantBlock::ToolCall(call));
187        }
188        Self { blocks }
189    }
190
191    /// Concatenate all text blocks into a single string.
192    pub fn plain_text(&self) -> String {
193        self.blocks
194            .iter()
195            .filter_map(|b| match b {
196                AssistantBlock::Text(t) => Some(t.text.as_str()),
197                _ => None,
198            })
199            .collect::<Vec<_>>()
200            .join("")
201    }
202
203    /// Return all tool call blocks in source order.
204    pub fn tool_calls(&self) -> Vec<&ToolCall> {
205        self.blocks
206            .iter()
207            .filter_map(|b| match b {
208                AssistantBlock::ToolCall(c) => Some(c),
209                _ => None,
210            })
211            .collect()
212    }
213
214    pub fn thinking_text(&self) -> String {
215        self.blocks
216            .iter()
217            .filter_map(|b| match b {
218                AssistantBlock::Thinking(t) => Some(t.text.as_str()),
219                _ => None,
220            })
221            .collect::<Vec<_>>()
222            .join("\n")
223    }
224
225    pub fn reasoning_text(&self) -> String {
226        self.blocks
227            .iter()
228            .filter_map(|b| match b {
229                AssistantBlock::Reasoning(t) => Some(t.text.as_str()),
230                _ => None,
231            })
232            .collect::<Vec<_>>()
233            .join("")
234    }
235
236    pub fn reasoning_details_values(&self) -> Vec<Value> {
237        self.blocks
238            .iter()
239            .filter_map(|b| match b {
240                AssistantBlock::ReasoningDetails(d) => Some(d.details.as_slice()),
241                _ => None,
242            })
243            .flatten()
244            .cloned()
245            .collect()
246    }
247}
248
249/// Blocks an assistant message can carry.
250///
251/// ## Channel separation contract
252///
253/// `Thinking` and `Reasoning` are two **independent** channels and the
254/// loop must never mix them:
255///
256/// - [`Thinking`](AssistantBlock::Thinking) is **prompt-elicited
257///   tag-text**. The model wraps reasoning inside
258///   `<thought>...</thought>` (or one of the synonym tags handled by
259///   [`crate::ThinkingTagStreamFilter`]) inside its visible text
260///   stream. The bridge parses those tags out of the visible-text
261///   channel and stores the captured content here. On the next
262///   provider request it is **rewoven into the `content` field as a
263///   `<thought>...</thought>` tag** — never as the wire `reasoning`
264///   field.
265///
266/// - [`Reasoning`](AssistantBlock::Reasoning) and
267///   [`ReasoningDetails`](AssistantBlock::ReasoningDetails) are
268///   **provider-native reasoning**. They arrive on a dedicated
269///   sideband (`delta.reasoning` / `delta.reasoning_details` on the
270///   OpenRouter wire) and represent the provider's own
271///   chain-of-thought tokens. On the next provider request they are
272///   replayed verbatim through the typed `reasoning` /
273///   `reasoning_details` fields — never wrapped in a
274///   `<thought>...</thought>` tag in the `content` field.
275///
276/// The two processes are not interchangeable: tag-elicited scratch is
277/// the model writing into its visible output by convention, and the
278/// loop strips it before the user sees anything. Provider-native
279/// reasoning is the upstream API delivering structured thinking
280/// alongside the message. Conflating them risks (a) shipping the
281/// model's hidden scratch as if it were native reasoning (some
282/// providers reject unknown content there or refuse to bill it as
283/// cached input) and (b) leaking native reasoning into visible text
284/// by way of `<thought>` rewrap (would round-trip native tokens
285/// through the visible channel and double-count them).
286///
287/// The invariants are pinned by tests in
288/// `openrouter_request_tests.rs::channel_separation_invariants` and
289/// `openrouter_stream::tests::stream_chunk_routing_invariants`.
290#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
291#[serde(tag = "type", rename_all = "snake_case")]
292pub enum AssistantBlock {
293    Text(TextContent),
294    /// Prompt-elicited hidden scratchpad. Captured from
295    /// `<thought>...</thought>` tags the model writes inside its
296    /// visible text stream; rewoven into the wire `content` field as
297    /// `<thought>...</thought>` on the next request. **Never** flows
298    /// into the wire `reasoning` field — see the type-level docs for
299    /// the channel-separation contract.
300    Thinking(TextContent),
301    /// Provider-native reasoning (xAI Grok, OpenAI o-series, Anthropic
302    /// native thinking). Arrives on the dedicated `delta.reasoning`
303    /// sideband and replayed via the wire `reasoning` field. **Never**
304    /// wrapped in a `<thought>...</thought>` tag inside the `content`
305    /// field — see the type-level docs.
306    Reasoning(TextContent),
307    /// Native provider reasoning detail blocks (xAI's
308    /// `reasoning.encrypted` envelopes, etc.). Replayed unmodified on
309    /// tool-continuation requests for reasoning models that rely on
310    /// signed/encrypted thinking continuity. Same channel contract as
311    /// [`Reasoning`](AssistantBlock::Reasoning).
312    ReasoningDetails(ReasoningDetailsContent),
313    /// Tool call request. Loop dispatches via the registry.
314    ToolCall(ToolCall),
315}
316
317/// Persistent envelope for provider-native reasoning items on an
318/// assistant turn. The wire shape is `Vec<Value>` matching
319/// OpenRouter's `reasoning_details[]` schema (the broadest typed
320/// surface across providers); `as_items` lifts it to typed
321/// [`crate::reasoning::ReasoningItem`]s for codec operations and `from_items` projects
322/// typed items back. The `details` field stays the source of truth so
323/// persisted trajectories round-trip byte-exact, even when a future
324/// provider sends shapes the typed enum doesn't yet recognize.
325#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
326pub struct ReasoningDetailsContent {
327    pub details: Vec<Value>,
328}
329
330impl ReasoningDetailsContent {
331    pub fn new(details: Vec<Value>) -> Self {
332        Self { details }
333    }
334
335    /// Lift the stored `details` array into typed
336    /// [`crate::reasoning::ReasoningItem`]s. Items that don't match a known variant
337    /// are preserved in `details` but elided from the typed view —
338    /// so consumers iterating typed items never see corrupt data
339    /// while replay-via-`details` still ships the original bytes.
340    pub fn as_items(&self) -> Vec<crate::reasoning::ReasoningItem> {
341        self.details
342            .iter()
343            .filter_map(crate::reasoning::ReasoningItem::from_openrouter_value)
344            .collect()
345    }
346
347    /// Build from typed items. Used when a codec produces typed
348    /// items from a non-OpenRouter provider response.
349    pub fn from_items(items: &[crate::reasoning::ReasoningItem]) -> Self {
350        Self {
351            details: items
352                .iter()
353                .map(crate::reasoning::ReasoningItem::to_openrouter_value)
354                .collect(),
355        }
356    }
357
358    /// True iff any item carries a signed/encrypted payload that a
359    /// strict-replay provider would reject if missing on next turn.
360    pub fn has_signed_payload(&self) -> bool {
361        self.as_items()
362            .iter()
363            .any(crate::reasoning::ReasoningItem::carries_signed_payload)
364    }
365}
366
367/// Tool result content. Multiple blocks support image-returning tools.
368#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
369#[serde(transparent)]
370pub struct ToolResultContent {
371    pub blocks: Vec<ToolResultBlock>,
372}
373
374impl ToolResultContent {
375    pub fn text(text: impl Into<String>) -> Self {
376        Self {
377            blocks: vec![ToolResultBlock::Text(TextContent { text: text.into() })],
378        }
379    }
380
381    pub fn plain_text(&self) -> String {
382        self.blocks
383            .iter()
384            .filter_map(|b| match b {
385                ToolResultBlock::Text(t) => Some(t.text.as_str()),
386                _ => None,
387            })
388            .collect::<Vec<_>>()
389            .join("")
390    }
391}
392
393#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
394#[serde(tag = "type", rename_all = "snake_case")]
395pub enum ToolResultBlock {
396    Text(TextContent),
397    Image(ImageContent),
398}
399
400#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
401pub struct TextContent {
402    pub text: String,
403}
404
405#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
406pub struct ImageContent {
407    /// Either a data: URL or an external URL the provider can fetch.
408    pub source: String,
409    #[serde(default, skip_serializing_if = "Option::is_none")]
410    pub media_type: Option<String>,
411    #[serde(default, skip_serializing_if = "Option::is_none")]
412    pub alt: Option<String>,
413}
414
415/// Identity of one agent run.
416///
417/// Threaded through child spawns so the loop, its plugins, and any
418/// trajectory sink can answer "who am I, who is my parent, how deep am
419/// I, what conversation, when do I expire" without consulting a
420/// side-channel. The fields are typed at the same level as
421/// [`AgentMessage`] — every run has identity, full stop. Today's bridge
422/// scatters these across `LoopConfig.conversation_id`,
423/// `RunnerJob.depth`, `ChildScope.parent_conversation_id`, and
424/// `parent_deadline`; `RunIdentity` is the merge target.
425///
426/// Identity is serializable so trajectory writers can pin every event
427/// to its run without inventing a parallel key store.
428#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
429pub struct RunIdentity {
430    /// Stable identifier for this run. UUIDv4 by default; callers may
431    /// supply their own value (e.g. to keep run ids aligned with an
432    /// external trace system).
433    pub run_id: String,
434    /// `Some(parent.run_id)` when this run was spawned by another run;
435    /// `None` for top-level runs initiated by a user-facing entry
436    /// point.
437    #[serde(default, skip_serializing_if = "Option::is_none")]
438    pub parent_run_id: Option<String>,
439    /// 0 for top-level runs; +1 per nested spawn.
440    #[serde(default)]
441    pub depth: usize,
442    /// Conversation this run belongs to (when the host runtime has
443    /// one). `None` for tests and isolated runs that don't carry
444    /// conversation identity.
445    #[serde(default, skip_serializing_if = "Option::is_none")]
446    pub conversation_id: Option<String>,
447    /// Wall-clock deadline as milliseconds since the UNIX epoch. The
448    /// loop does not enforce this directly — plugins that care
449    /// (wall-clock steering, soft-cancel) read it. `None` means no
450    /// parent-imposed deadline.
451    #[serde(default, skip_serializing_if = "Option::is_none")]
452    pub deadline_unix_ms: Option<u64>,
453}
454
455impl RunIdentity {
456    /// Construct a top-level identity. Generates a fresh UUIDv4 run id.
457    pub fn root() -> Self {
458        Self {
459            run_id: uuid::Uuid::new_v4().to_string(),
460            parent_run_id: None,
461            depth: 0,
462            conversation_id: None,
463            deadline_unix_ms: None,
464        }
465    }
466
467    /// Construct a child identity from a parent. Inherits
468    /// `conversation_id` and `deadline_unix_ms`, bumps `depth`, sets
469    /// `parent_run_id`, and generates a fresh `run_id`.
470    pub fn child_of(parent: &Self) -> Self {
471        Self {
472            run_id: uuid::Uuid::new_v4().to_string(),
473            parent_run_id: Some(parent.run_id.clone()),
474            depth: parent.depth + 1,
475            conversation_id: parent.conversation_id.clone(),
476            deadline_unix_ms: parent.deadline_unix_ms,
477        }
478    }
479
480    pub fn with_run_id(mut self, id: impl Into<String>) -> Self {
481        self.run_id = id.into();
482        self
483    }
484
485    pub fn with_conversation_id(mut self, id: impl Into<String>) -> Self {
486        self.conversation_id = Some(id.into());
487        self
488    }
489
490    pub fn with_deadline_unix_ms(mut self, ms: u64) -> Self {
491        self.deadline_unix_ms = Some(ms);
492        self
493    }
494}
495
496/// Snapshot of agent state passed into the loop.
497///
498/// Carries the system prompt, the current transcript, and an optional
499/// [`RunIdentity`]. Plain data — the loop builds an internal mutable
500/// copy and returns the new tail.
501///
502/// `identity` is optional for backward compatibility with callers that
503/// don't yet thread one through. When `None`, the loop treats the run
504/// as an anonymous root; plugins that key on identity see `None` and
505/// degrade gracefully.
506#[derive(Debug, Clone)]
507pub struct AgentContext {
508    pub system_prompt: String,
509    pub messages: Vec<AgentMessage>,
510    pub identity: Option<RunIdentity>,
511}
512
513impl AgentContext {
514    pub fn new(system_prompt: impl Into<String>) -> Self {
515        Self {
516            system_prompt: system_prompt.into(),
517            messages: Vec::new(),
518            identity: None,
519        }
520    }
521
522    pub fn with_messages(mut self, messages: Vec<AgentMessage>) -> Self {
523        self.messages = messages;
524        self
525    }
526
527    /// Attach a [`RunIdentity`] to this context. Use
528    /// [`RunIdentity::root`] for top-level runs and
529    /// [`RunIdentity::child_of`] for spawned children.
530    pub fn with_identity(mut self, identity: RunIdentity) -> Self {
531        self.identity = Some(identity);
532        self
533    }
534
535    /// Convenience: produce a child `AgentContext` for a spawned run.
536    /// Returns a fresh context with the supplied `system_prompt`, no
537    /// messages, and a child identity derived from this context's
538    /// identity (or a fresh root if this context has none).
539    pub fn spawn_child(&self, system_prompt: impl Into<String>) -> Self {
540        let parent_identity = self.identity.clone().unwrap_or_else(RunIdentity::root);
541        Self {
542            system_prompt: system_prompt.into(),
543            messages: Vec::new(),
544            identity: Some(RunIdentity::child_of(&parent_identity)),
545        }
546    }
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552
553    #[test]
554    fn user_text_round_trip() {
555        let msg = AgentMessage::User {
556            content: UserContent::Text("hello".into()),
557            timestamp: Some(0),
558        };
559        let json = serde_json::to_value(&msg).unwrap();
560        assert_eq!(json["role"], "user");
561        assert_eq!(json["content"], "hello");
562    }
563
564    #[test]
565    fn assistant_with_tool_call_blocks() {
566        let content = AssistantContent::with_tool_calls(
567            Some("calling…".into()),
568            vec![ToolCall {
569                id: "call_1".into(),
570                name: "shell".into(),
571                arguments: serde_json::json!({"cmd": "ls"}),
572            }],
573        );
574        assert_eq!(content.tool_calls().len(), 1);
575        assert_eq!(content.plain_text(), "calling…");
576    }
577
578    #[test]
579    fn custom_message_passthrough() {
580        let msg = AgentMessage::Custom {
581            kind: "ui_notification".into(),
582            payload: serde_json::json!({"text": "build started"}),
583            timestamp: None,
584        };
585        let json = serde_json::to_value(&msg).unwrap();
586        assert_eq!(json["role"], "custom");
587        assert_eq!(json["kind"], "ui_notification");
588    }
589}