iron-providers 0.2.10

Semantic provider boundary for protocol-oriented LLM providers
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
//! Semantic provider request/response models
//!
//! These types define the normalized boundary between iron-core and
//! provider implementations. They are intentionally domain-oriented
//! rather than mirroring any specific provider's wire format.

use serde::{Deserialize, Serialize};
use serde_json::Value;

/// Reserved internal tool name used by providers to normalize model-originated
/// choice requests into first-class `ProviderEvent::ChoiceRequest` events.
pub const CHOICE_REQUEST_TOOL_NAME: &str = "runtime.request_choice";

/// A transcript of conversation messages
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct Transcript {
    /// Ordered conversation messages.
    pub messages: Vec<Message>,
}

impl Transcript {
    /// Create an empty transcript.
    pub fn new() -> Self {
        Self { messages: vec![] }
    }

    /// Create a transcript with the provided messages.
    pub fn with_messages(messages: Vec<Message>) -> Self {
        Self { messages }
    }

    /// Append a message to the transcript.
    pub fn add_message(&mut self, message: Message) {
        self.messages.push(message);
    }

    /// Return whether the transcript contains no messages.
    pub fn is_empty(&self) -> bool {
        self.messages.is_empty()
    }
}

/// A message in the conversation transcript
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "role", rename_all = "snake_case")]
pub enum Message {
    /// User message with text content
    User { content: String },
    /// Assistant message with text content
    Assistant { content: String },
    /// Assistant tool call (the model requesting to call a tool)
    AssistantToolCall {
        /// Stable tool call identifier.
        call_id: String,
        /// Requested tool name.
        tool_name: String,
        /// Parsed tool arguments.
        arguments: Value,
    },
    /// Tool result message
    Tool {
        /// Stable tool call identifier.
        call_id: String,
        /// Tool name associated with the result.
        tool_name: String,
        /// Structured tool result.
        result: Value,
    },
}

impl Message {
    /// Create a user message
    pub fn user<S: Into<String>>(content: S) -> Self {
        Self::User {
            content: content.into(),
        }
    }

    /// Create an assistant message
    pub fn assistant<S: Into<String>>(content: S) -> Self {
        Self::Assistant {
            content: content.into(),
        }
    }

    /// Create a tool result message.
    pub fn tool<S1: Into<String>, S2: Into<String>>(
        call_id: S1,
        tool_name: S2,
        result: Value,
    ) -> Self {
        Self::Tool {
            call_id: call_id.into(),
            tool_name: tool_name.into(),
            result,
        }
    }
}

/// Selection cardinality for a provider-originated choice request.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ChoiceSelectionMode {
    Single,
    Multiple,
}

/// One selectable item in a provider-originated choice request.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ChoiceItem {
    pub id: String,
    pub label: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub description: Option<String>,
}

/// A first-class model-originated choice request surfaced by the provider/runtime layer.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ChoiceRequest {
    pub prompt: String,
    pub selection_mode: ChoiceSelectionMode,
    pub items: Vec<ChoiceItem>,
}

impl ChoiceRequest {
    /// Parse a choice request from a structured JSON value.
    pub fn from_value(value: Value) -> Result<Self, serde_json::Error> {
        serde_json::from_value(value)
    }
}

/// Model-facing tool definition
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ToolDefinition {
    /// Unique tool name.
    pub name: String,
    /// Natural-language tool description.
    pub description: String,
    /// JSON Schema describing tool arguments.
    pub input_schema: Value,
}

impl ToolDefinition {
    /// Create a new tool definition.
    pub fn new<S1: Into<String>, S2: Into<String>>(
        name: S1,
        description: S2,
        input_schema: Value,
    ) -> Self {
        Self {
            name: name.into(),
            description: description.into(),
            input_schema,
        }
    }
}

/// Tool choice policy
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum ToolPolicy {
    /// No tools allowed
    None,
    /// Model can choose to use tools
    #[default]
    Auto,
    /// Model must use a tool
    Required,
    /// Model must use a specific tool
    Specific(String),
}

/// Normalized generation configuration
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct GenerationConfig {
    /// Temperature for sampling (0.0 to 2.0)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f32>,
    /// Maximum tokens to generate
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_tokens: Option<u32>,
    /// Top-p sampling parameter
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f32>,
    /// Stop sequences
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stop: Option<Vec<String>>,
}

impl GenerationConfig {
    /// Create an empty generation configuration.
    pub fn new() -> Self {
        Self::default()
    }

    /// Set the sampling temperature.
    pub fn with_temperature(mut self, temp: f32) -> Self {
        self.temperature = Some(temp);
        self
    }

    /// Set the maximum output token count.
    pub fn with_max_tokens(mut self, max: u32) -> Self {
        self.max_tokens = Some(max);
        self
    }

    /// Set the top-p sampling value.
    pub fn with_top_p(mut self, top_p: f32) -> Self {
        self.top_p = Some(top_p);
        self
    }
}

/// A completed tool call with structured JSON arguments
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ToolCall {
    /// Stable tool call identifier.
    pub call_id: String,
    /// Tool name selected by the model.
    pub tool_name: String,
    /// Parsed tool arguments.
    pub arguments: Value,
}

impl ToolCall {
    /// Create a normalized tool call record.
    pub fn new<S1: Into<String>, S2: Into<String>>(
        call_id: S1,
        tool_name: S2,
        arguments: Value,
    ) -> Self {
        Self {
            call_id: call_id.into(),
            tool_name: tool_name.into(),
            arguments,
        }
    }
}

/// Normalized provider-reported token usage for a single inference request.
///
/// All fields are optional because provider families differ in what they
/// return.  When present, each value represents the provider's cumulative
/// snapshot for the current request, not an incremental delta.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct TokenUsage {
    /// Input or prompt tokens reported by the provider.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub input_tokens: Option<u64>,
    /// Output or completion tokens reported by the provider.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub output_tokens: Option<u64>,
    /// Total tokens reported by the provider.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub total_tokens: Option<u64>,
    /// Cached input tokens reported by OpenAI-style providers.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cached_input_tokens: Option<u64>,
    /// Cache creation input tokens reported by Anthropic-style providers.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cache_creation_input_tokens: Option<u64>,
    /// Cache read input tokens reported by Anthropic-style providers.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cache_read_input_tokens: Option<u64>,
    /// Reasoning or thinking output tokens reported by the provider.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_output_tokens: Option<u64>,
}

impl TokenUsage {
    /// Create an empty usage snapshot.
    pub fn new() -> Self {
        Self::default()
    }
}

/// Events emitted by the provider during streaming
///
/// ## Stream termination contract
///
/// - `Complete` is emitted **only** on successful stream termination.
/// - If a provider encounters an unrecoverable error, the stream ends
///   with `Error` and does **not** emit `Complete`.
/// - `Status` events are informational and do not affect termination.
/// - `Usage` events carry cumulative provider-reported token usage.  When
///   multiple `Usage` events appear for the same request, the latest one
///   supersedes earlier snapshots rather than being additive.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ProviderEvent {
    /// Status update
    Status { message: String },
    /// Incremental text output
    Output { content: String },
    /// Completed tool call
    ToolCall { call: ToolCall },
    /// Structured model-originated choice request.
    ChoiceRequest { request: ChoiceRequest },
    /// Provider-reported token usage snapshot.
    ///
    /// Represents the provider's cumulative usage for the current request.
    /// Consumers should treat later `Usage` events as superseding earlier
    /// ones rather than adding them together.
    Usage { usage: TokenUsage },
    /// Stream completed successfully.
    ///
    /// This event is emitted exactly once per successful stream and is
    /// never emitted after an unrecoverable error.
    Complete,
    /// Error occurred during streaming.
    ///
    /// Carries a structured [`ProviderError`](crate::ProviderError) so
    /// downstream consumers can programmatically classify the failure
    /// (authentication, rate-limit, transport, etc.).
    ///
    /// If this represents an unrecoverable error, the stream ends
    /// without a subsequent `Complete` event.
    Error { source: crate::ProviderError },
}

/// A runtime-owned record that is **not** model-visible.
///
/// Runtime records carry structured context (e.g. resolved interaction
/// records, session metadata) that should be available to provider
/// adapters for request assembly but must not be projected into the
/// model-visible conversation transcript.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RuntimeRecord {
    /// Stable record kind (e.g. "interaction", "session_state").
    pub kind: String,
    /// Structured payload.
    pub payload: Value,
}

impl RuntimeRecord {
    /// Create a new runtime record.
    pub fn new<S: Into<String>>(kind: S, payload: Value) -> Self {
        Self {
            kind: kind.into(),
            payload,
        }
    }
}

/// Inference context separating model-visible conversation from runtime-only state.
///
/// Provider adapters receive the full context but must only project the
/// `transcript` into model-visible request fields. Runtime records may
/// influence request assembly (e.g. system instructions, metadata headers)
/// through explicit provider-specific mapping logic.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct InferenceContext {
    /// Model-visible conversation transcript.
    pub transcript: Transcript,
    /// Runtime-only records that are not replayed into model context.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub runtime_records: Vec<RuntimeRecord>,
}

impl InferenceContext {
    /// Create an empty context.
    pub fn new() -> Self {
        Self::default()
    }

    /// Create a context with only a transcript (no runtime records).
    pub fn from_transcript(transcript: Transcript) -> Self {
        Self {
            transcript,
            runtime_records: vec![],
        }
    }

    /// Add a runtime record.
    pub fn add_record(&mut self, record: RuntimeRecord) {
        self.runtime_records.push(record);
    }
}

/// Semantic inference request
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InferenceRequest {
    /// Model identifier
    pub model: String,
    /// Optional top-level instructions (system prompt)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub instructions: Option<String>,
    /// Inference context containing model-visible transcript and runtime-only records.
    pub context: InferenceContext,
    /// Available tools
    #[serde(skip_serializing_if = "Vec::is_empty", default)]
    pub tools: Vec<ToolDefinition>,
    /// Tool usage policy
    #[serde(default)]
    pub tool_policy: ToolPolicy,
    /// Generation settings
    #[serde(default)]
    pub generation: GenerationConfig,
}

impl InferenceRequest {
    /// Create a new inference request for the provided model and transcript.
    pub fn new<S: Into<String>>(model: S, transcript: Transcript) -> Self {
        Self {
            model: model.into(),
            instructions: None,
            context: InferenceContext::from_transcript(transcript),
            tools: vec![],
            tool_policy: ToolPolicy::default(),
            generation: GenerationConfig::default(),
        }
    }

    /// Validate that the model identifier is present and non-empty.
    ///
    /// Called by all provider adapters before constructing a request.
    pub fn validate_model(&self) -> crate::ProviderResult<()> {
        if self.model.trim().is_empty() {
            return Err(crate::ProviderError::invalid_request(
                "InferenceRequest.model must be a non-empty model identifier",
            ));
        }
        Ok(())
    }

    /// Set top-level instructions for the request.
    pub fn with_instructions<S: Into<String>>(mut self, instructions: S) -> Self {
        self.instructions = Some(instructions.into());
        self
    }

    /// Attach tool definitions to the request.
    pub fn with_tools(mut self, tools: Vec<ToolDefinition>) -> Self {
        self.tools = tools;
        self
    }

    /// Set the tool policy for the request.
    pub fn with_tool_policy(mut self, policy: ToolPolicy) -> Self {
        self.tool_policy = policy;
        self
    }

    /// Set generation parameters for the request.
    pub fn with_generation(mut self, generation: GenerationConfig) -> Self {
        self.generation = generation;
        self
    }
}