1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
//! Core types for LLM provider abstraction
//!
//! Defines common types used across all LLM providers.
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;
/// Role of a message in the conversation
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Role {
/// User message
User,
/// Assistant message
Assistant,
/// System message (not all providers support this)
System,
}
/// A message in the conversation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
/// Role of the message sender
pub role: Role,
/// Content blocks of the message
pub content: Vec<ContentBlock>,
}
impl Message {
/// Create a new user message with text content
pub fn user(text: impl Into<String>) -> Self {
Self {
role: Role::User,
content: vec![ContentBlock::Text { text: text.into() }],
}
}
/// Create a new assistant message with text content
pub fn assistant(text: impl Into<String>) -> Self {
Self {
role: Role::Assistant,
content: vec![ContentBlock::Text { text: text.into() }],
}
}
/// Create a new system message with text content
pub fn system(text: impl Into<String>) -> Self {
Self {
role: Role::System,
content: vec![ContentBlock::Text { text: text.into() }],
}
}
}
/// Content block in a message
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentBlock {
/// Plain text content
Text { text: String },
/// Image content (base64 or URL)
Image { source: ImageSource },
/// Tool use request from assistant
ToolUse {
id: String,
name: String,
input: serde_json::Value,
},
/// Tool result from user
ToolResult {
tool_use_id: String,
content: String,
#[serde(skip_serializing_if = "Option::is_none")]
is_error: Option<bool>,
},
/// Thinking/reasoning content (extended thinking)
Thinking {
thinking: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
signature: Option<String>,
},
}
/// Image source for image content blocks
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ImageSource {
/// Base64-encoded image
Base64 { media_type: String, data: String },
/// Image URL
Url { url: String },
}
/// LLM request parameters
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LLMRequest {
/// Model to use
pub model: String,
/// Conversation messages
pub messages: Vec<Message>,
/// System brain content (if supported)
#[serde(skip_serializing_if = "Option::is_none")]
pub system: Option<String>,
/// Available tools
#[serde(skip_serializing_if = "Option::is_none")]
pub tools: Option<Vec<Tool>>,
/// Temperature (0.0-1.0)
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
/// Maximum tokens to generate
#[serde(skip_serializing_if = "Option::is_none")]
pub max_tokens: Option<u32>,
/// Whether to stream the response
#[serde(skip)]
pub stream: bool,
/// Additional metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<HashMap<String, String>>,
/// Working directory for proxy-aware providers (not serialized to API)
#[serde(skip)]
pub working_directory: Option<String>,
/// Session ID — used by CLI providers to isolate sessions via --session-id
#[serde(skip)]
pub session_id: Option<Uuid>,
}
impl LLMRequest {
/// Create a new LLM request
pub fn new(model: impl Into<String>, messages: Vec<Message>) -> Self {
Self {
model: model.into(),
messages,
system: None,
tools: None,
temperature: None,
max_tokens: None,
stream: false,
metadata: None,
working_directory: None,
session_id: None,
}
}
/// Set system brain content
pub fn with_system(mut self, system: impl Into<String>) -> Self {
self.system = Some(system.into());
self
}
/// Set tools
pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
self.tools = Some(tools);
self
}
/// Set temperature
pub fn with_temperature(mut self, temperature: f32) -> Self {
self.temperature = Some(temperature);
self
}
/// Set max tokens
pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
self.max_tokens = Some(max_tokens);
self
}
/// Enable streaming
pub fn with_streaming(mut self) -> Self {
self.stream = true;
self
}
}
/// Tool definition for LLM
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tool {
/// Tool name
pub name: String,
/// Tool description
pub description: String,
/// Input schema (JSON Schema)
pub input_schema: serde_json::Value,
}
/// LLM response
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LLMResponse {
/// Response ID
pub id: String,
/// Model used
pub model: String,
/// Content blocks
pub content: Vec<ContentBlock>,
/// Stop reason
pub stop_reason: Option<StopReason>,
/// Token usage
pub usage: TokenUsage,
/// Sum of active-streaming windows for this response, in seconds.
/// Excludes idle gaps >1s between content deltas (which would be
/// tool execution, approval waits, or between-message-block
/// round-trips). Used as the denominator for an accurate
/// tokens-per-second display so the channel footer reports the
/// model's actual sustained generation rate, not output-tokens
/// divided by full turn wall-clock (which silently halves the rate
/// on every tool-heavy turn). Only the streaming path populates
/// this; non-streaming `parse_response` sites leave it `None`.
#[serde(default)]
pub streaming_active_secs: Option<f64>,
}
/// Reason why the model stopped generating
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum StopReason {
/// Natural end of response
EndTurn,
/// Hit max tokens
MaxTokens,
/// Stop sequence encountered
StopSequence,
/// Tool use requested
ToolUse,
}
/// Token usage information
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
pub struct TokenUsage {
/// Input tokens (non-cached)
pub input_tokens: u32,
/// Output tokens
pub output_tokens: u32,
/// Cache creation input tokens (Anthropic-specific)
#[serde(default)]
pub cache_creation_tokens: u32,
/// Cache read input tokens (Anthropic-specific)
#[serde(default)]
pub cache_read_tokens: u32,
/// Billing: cumulative cache creation across all CLI tool rounds.
/// For non-CLI providers or single-round calls, this stays 0 and
/// cache_creation_tokens is used for both context and billing.
#[serde(default)]
pub billing_cache_creation: u32,
/// Billing: cumulative cache read across all CLI tool rounds.
#[serde(default)]
pub billing_cache_read: u32,
}
impl TokenUsage {
/// Non-cached tokens only — for context window tracking.
pub fn total(&self) -> u32 {
self.input_tokens + self.output_tokens
}
/// All tokens including cache — for billing/usage display.
/// Uses cumulative billing fields when available (CLI multi-round),
/// falls back to per-call cache fields for single-round providers.
pub fn billable_input(&self) -> u32 {
let cc = if self.billing_cache_creation > 0 {
self.billing_cache_creation
} else {
self.cache_creation_tokens
};
let cr = if self.billing_cache_read > 0 {
self.billing_cache_read
} else {
self.cache_read_tokens
};
self.input_tokens + cc + cr
}
/// Context window tokens — per-call cache values representing
/// the actual prompt size sent to the model on the last API call.
pub fn context_input(&self) -> u32 {
self.input_tokens + self.cache_creation_tokens + self.cache_read_tokens
}
/// Total billable tokens (input + cache + output).
pub fn billable_total(&self) -> u32 {
self.billable_input() + self.output_tokens
}
}
/// Streaming event from LLM
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum StreamEvent {
/// Stream started
MessageStart { message: StreamMessage },
/// Content block started
ContentBlockStart {
index: usize,
content_block: ContentBlock,
},
/// Content block delta (incremental update)
ContentBlockDelta { index: usize, delta: ContentDelta },
/// Content block stopped
ContentBlockStop { index: usize },
/// Message completed
MessageDelta {
delta: MessageDelta,
usage: TokenUsage,
},
/// Stream finished
MessageStop,
/// Ping event (keep-alive)
Ping,
/// Error event
Error { error: String },
}
/// Partial message information at stream start
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamMessage {
pub id: String,
pub model: String,
pub role: Role,
pub usage: TokenUsage,
}
/// Content delta for streaming updates
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentDelta {
/// Text delta
TextDelta { text: String },
/// Tool input delta (JSON)
InputJsonDelta { partial_json: String },
/// Reasoning/thinking content delta (display-only, not part of response text)
ReasoningDelta { text: String },
/// Anthropic native thinking delta (extended thinking — same as reasoning but uses `thinking` field)
ThinkingDelta { thinking: String },
}
/// Message delta for final updates
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MessageDelta {
pub stop_reason: Option<StopReason>,
pub stop_sequence: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_message_creation() {
let user_msg = Message::user("Hello");
assert_eq!(user_msg.role, Role::User);
assert_eq!(user_msg.content.len(), 1);
let assistant_msg = Message::assistant("Hi there");
assert_eq!(assistant_msg.role, Role::Assistant);
}
#[test]
fn test_llm_request_builder() {
let request = LLMRequest::new("claude-3-sonnet-20240229", vec![Message::user("Test")])
.with_system("You are helpful")
.with_temperature(0.7)
.with_max_tokens(1000)
.with_streaming();
assert_eq!(request.model, "claude-3-sonnet-20240229");
assert!(request.system.is_some());
assert_eq!(request.temperature, Some(0.7));
assert_eq!(request.max_tokens, Some(1000));
assert!(request.stream);
}
#[test]
fn test_token_usage() {
let usage = TokenUsage {
input_tokens: 100,
output_tokens: 200,
..Default::default()
};
assert_eq!(usage.total(), 300);
}
}