multi_llm/messages.rs
1//! Unified message architecture for LLM interactions.
2//!
3//! This module provides the core [`UnifiedMessage`] type that works across all LLM providers.
4//! It's the primary abstraction that makes multi-llm provider-agnostic.
5//!
6//! # Overview
7//!
8//! The unified message system provides:
9//! - **Provider-agnostic messages**: Same format works with OpenAI, Anthropic, Ollama, and LM Studio
10//! - **Caching hints**: Native support for Anthropic prompt caching via [`MessageAttributes`]
11//! - **Priority ordering**: Control message ordering with priority-based sorting
12//! - **Rich content types**: Text, JSON, tool calls, and tool results via [`MessageContent`]
13//!
14//! # Quick Start
15//!
16//! ```rust
17//! use multi_llm::{UnifiedMessage, MessageRole};
18//!
19//! // Simple messages using convenience constructors
20//! let user_msg = UnifiedMessage::user("Hello, how are you?");
21//! let system_msg = UnifiedMessage::system("You are a helpful assistant.");
22//! let assistant_msg = UnifiedMessage::assistant("I'm doing well, thank you!");
23//!
24//! // Build a conversation
25//! let messages = vec![system_msg, user_msg, assistant_msg];
26//! ```
27//!
28//! # Caching Support
29//!
30//! For Anthropic's prompt caching (90% cost savings on cache reads):
31//!
32//! ```rust
33//! use multi_llm::UnifiedMessage;
34//!
35//! // Mark a system prompt for caching (5-minute TTL)
36//! let cached_system = UnifiedMessage::system("You are a helpful assistant.")
37//! .with_ephemeral_cache();
38//!
39//! // For longer sessions, use extended caching (1-hour TTL)
40//! let long_context = UnifiedMessage::system("Large context here...")
41//! .with_extended_cache();
42//! ```
43//!
44//! # Message Categories
45//!
46//! Use semantic constructors to get appropriate caching and priority defaults:
47//!
48//! ```rust
49//! use multi_llm::UnifiedMessage;
50//!
51//! // System instructions (cacheable, highest priority)
52//! let system = UnifiedMessage::system_instruction(
53//! "You are a helpful assistant.".to_string(),
54//! Some("system-v1".to_string())
55//! );
56//!
57//! // Context information (cacheable, medium priority)
58//! let context = UnifiedMessage::context(
59//! "User preferences: dark mode, verbose output".to_string(),
60//! None
61//! );
62//!
63//! // Current user input (not cached, lowest priority)
64//! let current = UnifiedMessage::current_user("What's the weather?".to_string());
65//! ```
66
67use serde::{Deserialize, Serialize};
68use std::collections::HashMap;
69
70/// Role of a message in an LLM conversation.
71///
72/// Each role has specific semantics that LLM providers understand:
73/// - [`MessageRole::System`]: Instructions that guide the model's behavior
74/// - [`MessageRole::User`]: Input from the human user
75/// - [`MessageRole::Assistant`]: Responses from the AI model
76/// - [`MessageRole::Tool`]: Results from tool/function executions
77///
78/// # Example
79///
80/// ```rust
81/// use multi_llm::{MessageRole, UnifiedMessage};
82///
83/// // Explicit role usage
84/// let msg = UnifiedMessage::simple(MessageRole::User, "Hello!");
85///
86/// // Or use convenience constructors (preferred)
87/// let msg = UnifiedMessage::user("Hello!");
88/// ```
89#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
90#[non_exhaustive]
91pub enum MessageRole {
92 /// System instructions that guide the model's behavior.
93 ///
94 /// System messages typically contain:
95 /// - Persona definitions ("You are a helpful assistant")
96 /// - Behavioral constraints ("Never reveal confidential information")
97 /// - Output format instructions ("Respond in JSON format")
98 System,
99
100 /// Input from the human user.
101 ///
102 /// User messages contain the actual queries, questions, or commands
103 /// that the model should respond to.
104 User,
105
106 /// Response from the AI assistant.
107 ///
108 /// Assistant messages are typically generated by the LLM but can also
109 /// be provided as conversation history or to demonstrate expected output.
110 Assistant,
111
112 /// Result from a tool/function execution.
113 ///
114 /// Tool messages contain the output of function calls. They must reference
115 /// the `tool_call_id` of the corresponding tool call from an assistant message.
116 Tool,
117}
118
119impl std::fmt::Display for MessageRole {
120 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121 match self {
122 MessageRole::System => write!(f, "system"),
123 MessageRole::User => write!(f, "user"),
124 MessageRole::Assistant => write!(f, "assistant"),
125 MessageRole::Tool => write!(f, "tool"),
126 }
127 }
128}
129
130/// Content of a message, supporting text, JSON, and tool interactions.
131///
132/// Most messages use [`MessageContent::Text`], but tool calling workflows
133/// require [`MessageContent::ToolCall`] and [`MessageContent::ToolResult`].
134///
135/// # Examples
136///
137/// ```rust
138/// use multi_llm::MessageContent;
139///
140/// // Plain text (most common)
141/// let text = MessageContent::Text("Hello, world!".to_string());
142///
143/// // Structured JSON content
144/// let json = MessageContent::Json(serde_json::json!({
145/// "intent": "greeting",
146/// "confidence": 0.95
147/// }));
148///
149/// // Tool call from assistant
150/// let tool_call = MessageContent::ToolCall {
151/// id: "call_123".to_string(),
152/// name: "get_weather".to_string(),
153/// arguments: serde_json::json!({"city": "London"}),
154/// };
155///
156/// // Tool result to send back
157/// let tool_result = MessageContent::ToolResult {
158/// tool_call_id: "call_123".to_string(),
159/// content: "Sunny, 22°C".to_string(),
160/// is_error: false,
161/// };
162/// ```
163#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
164#[non_exhaustive]
165pub enum MessageContent {
166 /// Plain text content.
167 ///
168 /// This is the most common content type, used for regular conversation.
169 Text(String),
170
171 /// Structured JSON content.
172 ///
173 /// Useful for passing structured data that doesn't fit the tool calling model,
174 /// or for internal processing of parsed LLM outputs.
175 Json(serde_json::Value),
176
177 /// Tool/function call request from the assistant.
178 ///
179 /// When the LLM decides to call a tool, it returns this content type.
180 /// Your application should execute the tool and return a [`MessageContent::ToolResult`].
181 ToolCall {
182 /// Unique identifier for this tool call (generated by the LLM).
183 id: String,
184 /// Name of the tool to invoke (must match a defined [`Tool`](crate::Tool)).
185 name: String,
186 /// Arguments to pass to the tool as JSON.
187 arguments: serde_json::Value,
188 },
189
190 /// Result from executing a tool.
191 ///
192 /// After executing a tool call, send the result back using this content type.
193 /// The `tool_call_id` must match the `id` from the corresponding [`ToolCall`](MessageContent::ToolCall).
194 ToolResult {
195 /// ID of the tool call this result responds to.
196 tool_call_id: String,
197 /// Result content from tool execution (typically a string or JSON string).
198 content: String,
199 /// Whether the tool execution failed. If `true`, `content` contains error details.
200 is_error: bool,
201 },
202}
203
204impl std::fmt::Display for MessageContent {
205 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
206 match self {
207 MessageContent::Text(text) => write!(f, "{}", text),
208 MessageContent::Json(value) => {
209 write!(
210 f,
211 "{}",
212 serde_json::to_string_pretty(value).unwrap_or_default()
213 )
214 }
215 MessageContent::ToolCall {
216 name, arguments, ..
217 } => {
218 write!(
219 f,
220 "Tool call: {} with args: {}",
221 name,
222 serde_json::to_string(arguments).unwrap_or_default()
223 )
224 }
225 MessageContent::ToolResult {
226 content, is_error, ..
227 } => {
228 if *is_error {
229 write!(f, "Error: {}", content)
230 } else {
231 write!(f, "{}", content)
232 }
233 }
234 }
235 }
236}
237
238/// Semantic category of a message for provider-specific handling.
239///
240/// Categories help providers optimize message processing, especially for caching.
241/// Messages with different categories may be grouped, cached, or prioritized differently.
242///
243/// # Priority Defaults
244///
245/// When using semantic constructors like [`UnifiedMessage::system_instruction()`],
246/// categories automatically set appropriate priority values:
247///
248/// | Category | Default Priority | Cacheable |
249/// |----------|-----------------|-----------|
250/// | SystemInstruction | 0 (highest) | Yes |
251/// | ToolDefinition | 1 | Yes |
252/// | Context | 5 | Yes |
253/// | History | 20 | Yes |
254/// | ToolResult | 26 | No |
255/// | Current | 30 (lowest) | No |
256#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
257#[non_exhaustive]
258pub enum MessageCategory {
259 /// Core system prompts and instructions.
260 ///
261 /// These are the foundational instructions that define the model's behavior.
262 /// They're almost always cached since they rarely change within a session.
263 SystemInstruction,
264
265 /// Tool/function definitions.
266 ///
267 /// Contains the schema definitions for available tools. Typically cached
268 /// since tool definitions are static for a given application.
269 ToolDefinition,
270
271 /// Contextual information (user preferences, session state, etc.).
272 ///
273 /// Background context that informs responses but isn't part of the
274 /// direct conversation. Often cached for the session duration.
275 Context,
276
277 /// Conversation history (previous turns).
278 ///
279 /// Past messages in the conversation. May be partially cached for
280 /// long conversations to save on repeated processing.
281 History,
282
283 /// Current user input (the active turn).
284 ///
285 /// The message being responded to right now. Never cached since
286 /// it changes with each request.
287 Current,
288
289 /// Results from tool executions.
290 ///
291 /// Output from function calls. Not cached since tool results
292 /// are dynamic and request-specific.
293 ToolResult,
294}
295
296/// Cache type for prompt caching (Anthropic-specific feature)
297///
298/// Controls the time-to-live (TTL) for cached prompt content. Both types offer
299/// 90% savings on cache reads, but differ in write costs and duration.
300///
301/// # Pricing Model
302/// - **Ephemeral writes**: 1.25x base input token cost (25% premium)
303/// - **Extended writes**: 2x base input token cost (100% premium)
304/// - **Cache reads (both)**: 0.1x base input token cost (90% savings)
305///
306/// # When to Use
307/// - **Ephemeral**: Quick iterations, development sessions (< 5 minutes)
308/// - **Extended**: Long documentation, repeated workflows (< 1 hour)
309///
310/// # Example
311/// ```rust
312/// use multi_llm::{MessageAttributes, CacheType};
313///
314/// // Ephemeral: lower write cost, shorter TTL
315/// let ephemeral = MessageAttributes {
316/// cacheable: true,
317/// cache_type: Some(CacheType::Ephemeral),
318/// ..Default::default()
319/// };
320///
321/// // Extended: higher write cost, longer TTL
322/// let extended = MessageAttributes {
323/// cacheable: true,
324/// cache_type: Some(CacheType::Extended),
325/// ..Default::default()
326/// };
327/// ```
328///
329/// # Break-Even Analysis
330/// For 1000 tokens cached and reused N times:
331/// - **Ephemeral**: Profitable after 1-2 reads (breaks even quickly)
332/// - **Extended**: Profitable after 5-6 reads (higher initial cost)
333///
334/// See: <https://platform.claude.com/docs/en/build-with-claude/prompt-caching>
335#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
336#[non_exhaustive]
337pub enum CacheType {
338 /// Ephemeral cache (5-minute TTL, 1.25x write cost)
339 ///
340 /// Best for development, quick iterations, and short sessions where you'll
341 /// reuse the same context multiple times within 5 minutes.
342 #[default]
343 Ephemeral,
344
345 /// Extended cache (1-hour TTL, 2x write cost)
346 ///
347 /// Best for long documentation contexts, extended workflows, or situations
348 /// where you need the cache to persist across longer time periods (up to 1 hour).
349 Extended,
350}
351
352/// Attributes that guide how providers handle a message.
353///
354/// These attributes control caching behavior, message ordering, and provide
355/// metadata that providers can use for optimization.
356///
357/// # Caching
358///
359/// For Anthropic's prompt caching, set `cacheable: true` and optionally
360/// specify a [`CacheType`]. The `cache_key` helps identify content for
361/// deduplication across requests.
362///
363/// # Priority
364///
365/// Priority determines message ordering when using [`UnifiedLLMRequest::sort_messages()`].
366/// Lower values = higher priority (processed first). Range: 0-255.
367///
368/// # Example
369///
370/// ```rust
371/// use multi_llm::{MessageAttributes, MessageCategory, CacheType};
372/// use std::collections::HashMap;
373///
374/// // Cacheable system instruction with highest priority
375/// let system_attrs = MessageAttributes {
376/// priority: 0,
377/// cacheable: true,
378/// cache_type: Some(CacheType::Extended),
379/// cache_key: Some("system-v1".to_string()),
380/// category: MessageCategory::SystemInstruction,
381/// metadata: HashMap::new(),
382/// };
383///
384/// // Current user message (not cached, lowest priority)
385/// let user_attrs = MessageAttributes::default(); // priority=50, cacheable=false
386/// ```
387#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
388pub struct MessageAttributes {
389 /// Priority for message ordering (lower = higher priority).
390 ///
391 /// Default is 50. Range: 0 (highest) to 255 (lowest).
392 /// Used by [`UnifiedLLMRequest::sort_messages()`] to order messages.
393 pub priority: u8,
394
395 /// Whether this message content is static and can be cached.
396 ///
397 /// When `true`, providers that support caching (like Anthropic) will
398 /// attempt to cache this content for subsequent requests.
399 pub cacheable: bool,
400
401 /// Cache TTL type for Anthropic prompt caching.
402 ///
403 /// - [`CacheType::Ephemeral`]: 5-minute TTL, 1.25x write cost
404 /// - [`CacheType::Extended`]: 1-hour TTL, 2x write cost
405 ///
406 /// Only meaningful when `cacheable` is `true`. Ignored by providers
407 /// that don't support prompt caching.
408 pub cache_type: Option<CacheType>,
409
410 /// Optional cache key for content deduplication.
411 ///
412 /// When provided, helps identify identical content across requests.
413 /// Useful for versioning system prompts (e.g., "system-v2").
414 pub cache_key: Option<String>,
415
416 /// Semantic category for provider-specific handling.
417 ///
418 /// See [`MessageCategory`] for details on how categories affect
419 /// caching and priority defaults.
420 pub category: MessageCategory,
421
422 /// Custom metadata for application-specific extensions.
423 ///
424 /// This data is passed through but not interpreted by multi-llm.
425 /// Useful for tracking, logging, or application-specific processing.
426 pub metadata: HashMap<String, serde_json::Value>,
427}
428
429impl Default for MessageAttributes {
430 fn default() -> Self {
431 Self {
432 priority: 50,
433 cacheable: false,
434 cache_type: None,
435 cache_key: None,
436 category: MessageCategory::Current,
437 metadata: HashMap::new(),
438 }
439 }
440}
441
442/// A provider-agnostic message for LLM interactions.
443///
444/// This is the core type of multi-llm. `UnifiedMessage` works across all supported
445/// providers (OpenAI, Anthropic, Ollama, LM Studio) and provides built-in support
446/// for caching hints and priority-based ordering.
447///
448/// # Creating Messages
449///
450/// Use the convenience constructors for common cases:
451///
452/// ```rust
453/// use multi_llm::UnifiedMessage;
454///
455/// // Simple messages
456/// let user = UnifiedMessage::user("What's the weather?");
457/// let system = UnifiedMessage::system("You are a helpful assistant.");
458/// let assistant = UnifiedMessage::assistant("The weather is sunny.");
459///
460/// // Semantic messages with caching defaults
461/// let instruction = UnifiedMessage::system_instruction(
462/// "You are a weather bot.".to_string(),
463/// Some("weather-system-v1".to_string())
464/// );
465/// ```
466///
467/// # Caching
468///
469/// For Anthropic prompt caching (90% cost savings), use the builder methods:
470///
471/// ```rust
472/// use multi_llm::UnifiedMessage;
473///
474/// // 5-minute cache (good for development/testing)
475/// let cached = UnifiedMessage::system("Large context...")
476/// .with_ephemeral_cache();
477///
478/// // 1-hour cache (good for production)
479/// let long_cached = UnifiedMessage::system("Large context...")
480/// .with_extended_cache();
481/// ```
482///
483/// # Tool Calling
484///
485/// For function calling workflows:
486///
487/// ```rust
488/// use multi_llm::UnifiedMessage;
489///
490/// // Assistant requests a tool call
491/// let tool_request = UnifiedMessage::tool_call(
492/// "call_abc123".to_string(),
493/// "get_weather".to_string(),
494/// serde_json::json!({"city": "London"})
495/// );
496///
497/// // Send the tool result back
498/// let tool_response = UnifiedMessage::tool_result(
499/// "call_abc123".to_string(),
500/// "Sunny, 22°C".to_string(),
501/// false // not an error
502/// );
503/// ```
504#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
505pub struct UnifiedMessage {
506 /// Role of this message (system, user, assistant, or tool).
507 pub role: MessageRole,
508
509 /// Content of this message.
510 pub content: MessageContent,
511
512 /// Attributes controlling caching, priority, and metadata.
513 pub attributes: MessageAttributes,
514
515 /// Timestamp for secondary ordering (after priority).
516 ///
517 /// When messages have equal priority, they're sorted by timestamp.
518 pub timestamp: chrono::DateTime<chrono::Utc>,
519}
520
521impl UnifiedMessage {
522 /// Create a new message with default attributes
523 pub fn new(role: MessageRole, content: MessageContent) -> Self {
524 Self {
525 role,
526 content,
527 attributes: MessageAttributes::default(),
528 timestamp: chrono::Utc::now(),
529 }
530 }
531
532 /// Create a new message with custom attributes
533 pub fn with_attributes(
534 role: MessageRole,
535 content: MessageContent,
536 attributes: MessageAttributes,
537 ) -> Self {
538 Self {
539 role,
540 content,
541 attributes,
542 timestamp: chrono::Utc::now(),
543 }
544 }
545
546 /// Create a system instruction message (cacheable, high priority)
547 pub fn system_instruction(content: String, cache_key: Option<String>) -> Self {
548 Self::with_attributes(
549 MessageRole::System,
550 MessageContent::Text(content),
551 MessageAttributes {
552 priority: 0,
553 cacheable: true,
554 cache_type: None,
555 cache_key,
556 category: MessageCategory::SystemInstruction,
557 metadata: HashMap::new(),
558 },
559 )
560 }
561
562 /// Create a tool definition message (cacheable, high priority)
563 pub fn tool_definition(content: String, cache_key: Option<String>) -> Self {
564 Self::with_attributes(
565 MessageRole::System,
566 MessageContent::Text(content),
567 MessageAttributes {
568 priority: 1,
569 cacheable: true,
570 cache_type: None,
571 cache_key,
572 category: MessageCategory::ToolDefinition,
573 metadata: HashMap::new(),
574 },
575 )
576 }
577
578 /// Create a context message (cacheable, medium priority)
579 pub fn context(content: String, cache_key: Option<String>) -> Self {
580 Self::with_attributes(
581 MessageRole::System,
582 MessageContent::Text(content),
583 MessageAttributes {
584 priority: 5,
585 cacheable: true,
586 cache_type: None,
587 cache_key,
588 category: MessageCategory::Context,
589 metadata: HashMap::new(),
590 },
591 )
592 }
593
594 /// Create a history message (cacheable, lower priority)
595 pub fn history(role: MessageRole, content: String) -> Self {
596 Self::with_attributes(
597 role,
598 MessageContent::Text(content),
599 MessageAttributes {
600 priority: 20,
601 cacheable: true,
602 cache_type: None,
603 cache_key: None,
604 category: MessageCategory::History,
605 metadata: HashMap::new(),
606 },
607 )
608 }
609
610 /// Create a current user message (not cacheable, lowest priority)
611 pub fn current_user(content: String) -> Self {
612 Self::with_attributes(
613 MessageRole::User,
614 MessageContent::Text(content),
615 MessageAttributes {
616 priority: 30,
617 cacheable: false,
618 cache_type: None,
619 cache_key: None,
620 category: MessageCategory::Current,
621 metadata: HashMap::new(),
622 },
623 )
624 }
625
626 /// Create a tool call message
627 pub fn tool_call(id: String, name: String, arguments: serde_json::Value) -> Self {
628 Self::with_attributes(
629 MessageRole::Assistant,
630 MessageContent::ToolCall {
631 id,
632 name,
633 arguments,
634 },
635 MessageAttributes {
636 priority: 25,
637 cacheable: false,
638 cache_type: None,
639 cache_key: None,
640 category: MessageCategory::ToolResult,
641 metadata: HashMap::new(),
642 },
643 )
644 }
645
646 /// Create a tool result message
647 pub fn tool_result(tool_call_id: String, content: String, is_error: bool) -> Self {
648 Self::with_attributes(
649 MessageRole::Tool,
650 MessageContent::ToolResult {
651 tool_call_id,
652 content,
653 is_error,
654 },
655 MessageAttributes {
656 priority: 26,
657 cacheable: false,
658 cache_type: None,
659 cache_key: None,
660 category: MessageCategory::ToolResult,
661 metadata: HashMap::new(),
662 },
663 )
664 }
665
666 // Convenience constructors
667
668 /// Create a simple text message
669 pub fn simple(role: MessageRole, content: impl Into<String>) -> Self {
670 Self::new(role, MessageContent::Text(content.into()))
671 }
672
673 /// Create a simple user message
674 pub fn user(content: impl Into<String>) -> Self {
675 Self::simple(MessageRole::User, content)
676 }
677
678 /// Create a simple assistant message
679 pub fn assistant(content: impl Into<String>) -> Self {
680 Self::simple(MessageRole::Assistant, content)
681 }
682
683 /// Create a simple system message
684 pub fn system(content: impl Into<String>) -> Self {
685 Self::simple(MessageRole::System, content)
686 }
687
688 // Cache control methods
689
690 /// Mark this message for ephemeral caching (5-minute TTL)
691 pub fn with_ephemeral_cache(mut self) -> Self {
692 self.attributes.cacheable = true;
693 self.attributes.cache_type = Some(CacheType::Ephemeral);
694 self
695 }
696
697 /// Mark this message for extended caching (1-hour TTL)
698 pub fn with_extended_cache(mut self) -> Self {
699 self.attributes.cacheable = true;
700 self.attributes.cache_type = Some(CacheType::Extended);
701 self
702 }
703}
704
705/// A complete request to an LLM provider.
706///
707/// Bundles messages, optional response schema, and request configuration
708/// into a single structure that can be passed to any provider.
709///
710/// # Basic Usage
711///
712/// ```rust
713/// use multi_llm::{UnifiedLLMRequest, UnifiedMessage};
714///
715/// let messages = vec![
716/// UnifiedMessage::system("You are a helpful assistant."),
717/// UnifiedMessage::user("Hello!"),
718/// ];
719///
720/// let request = UnifiedLLMRequest::new(messages);
721/// ```
722///
723/// # With Configuration
724///
725/// ```rust
726/// use multi_llm::{UnifiedLLMRequest, UnifiedMessage, RequestConfig};
727///
728/// let messages = vec![UnifiedMessage::user("Hello!")];
729/// let config = RequestConfig {
730/// temperature: Some(0.7),
731/// max_tokens: Some(1000),
732/// ..Default::default()
733/// };
734///
735/// let request = UnifiedLLMRequest::with_config(messages, config);
736/// ```
737///
738/// # Structured Output
739///
740/// For JSON-structured responses (useful for data extraction):
741///
742/// ```rust
743/// use multi_llm::{UnifiedLLMRequest, UnifiedMessage};
744///
745/// let messages = vec![UnifiedMessage::user("Extract the name and age.")];
746/// let schema = serde_json::json!({
747/// "type": "object",
748/// "properties": {
749/// "name": {"type": "string"},
750/// "age": {"type": "integer"}
751/// },
752/// "required": ["name", "age"]
753/// });
754///
755/// let request = UnifiedLLMRequest::with_schema(messages, schema);
756/// ```
757///
758/// # Message Ordering
759///
760/// Use [`sort_messages()`](Self::sort_messages) to order by priority and timestamp:
761///
762/// ```rust
763/// use multi_llm::{UnifiedLLMRequest, UnifiedMessage};
764///
765/// let messages = vec![
766/// UnifiedMessage::current_user("Hello!".to_string()), // priority 30
767/// UnifiedMessage::system_instruction("Be helpful.".to_string(), None), // priority 0
768/// ];
769///
770/// let mut request = UnifiedLLMRequest::new(messages);
771/// request.sort_messages(); // System instruction now first
772/// ```
773#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
774pub struct UnifiedLLMRequest {
775 /// All messages for this request.
776 ///
777 /// Use [`sort_messages()`](Self::sort_messages) to order by priority.
778 pub messages: Vec<UnifiedMessage>,
779
780 /// Optional JSON schema for structured output.
781 ///
782 /// When provided, the LLM will attempt to return a response that
783 /// conforms to this schema. Useful for data extraction tasks.
784 pub response_schema: Option<serde_json::Value>,
785
786 /// Optional configuration overrides for this request.
787 ///
788 /// When `None`, the provider's default configuration is used.
789 pub config: Option<crate::provider::RequestConfig>,
790}
791
792impl UnifiedLLMRequest {
793 /// Create a new request with messages
794 pub fn new(messages: Vec<UnifiedMessage>) -> Self {
795 Self {
796 messages,
797 response_schema: None,
798 config: None,
799 }
800 }
801
802 /// Create a new request with schema
803 pub fn with_schema(messages: Vec<UnifiedMessage>, schema: serde_json::Value) -> Self {
804 Self {
805 messages,
806 response_schema: Some(schema),
807 config: None,
808 }
809 }
810
811 /// Create a new request with config
812 pub fn with_config(
813 messages: Vec<UnifiedMessage>,
814 config: crate::provider::RequestConfig,
815 ) -> Self {
816 Self {
817 messages,
818 response_schema: None,
819 config: Some(config),
820 }
821 }
822
823 /// Sort messages by priority and timestamp
824 pub fn sort_messages(&mut self) {
825 self.messages.sort_by(|a, b| {
826 a.attributes
827 .priority
828 .cmp(&b.attributes.priority)
829 .then_with(|| a.timestamp.cmp(&b.timestamp))
830 });
831 }
832
833 /// Get messages sorted by priority (does not modify original)
834 pub fn get_sorted_messages(&self) -> Vec<&UnifiedMessage> {
835 let mut sorted: Vec<&UnifiedMessage> = self.messages.iter().collect();
836 sorted.sort_by(|a, b| {
837 a.attributes
838 .priority
839 .cmp(&b.attributes.priority)
840 .then_with(|| a.timestamp.cmp(&b.timestamp))
841 });
842 sorted
843 }
844
845 /// Get system messages
846 pub fn get_system_messages(&self) -> Vec<&UnifiedMessage> {
847 self.messages
848 .iter()
849 .filter(|msg| msg.role == MessageRole::System)
850 .collect()
851 }
852
853 /// Get non-system messages
854 pub fn get_conversation_messages(&self) -> Vec<&UnifiedMessage> {
855 self.messages
856 .iter()
857 .filter(|msg| msg.role != MessageRole::System)
858 .collect()
859 }
860
861 /// Get cacheable messages
862 pub fn get_cacheable_messages(&self) -> Vec<&UnifiedMessage> {
863 self.messages
864 .iter()
865 .filter(|msg| msg.attributes.cacheable)
866 .collect()
867 }
868}