multi-llm 1.0.0

//! Unified message architecture for LLM interactions.
//!
//! This module provides the core [`UnifiedMessage`] type that works across all LLM providers.
//! It's the primary abstraction that makes multi-llm provider-agnostic.
//!
//! # Overview
//!
//! The unified message system provides:
//! - **Provider-agnostic messages**: Same format works with OpenAI, Anthropic, Ollama, and LM Studio
//! - **Caching hints**: Native support for Anthropic prompt caching via [`MessageAttributes`]
//! - **Priority ordering**: Control message ordering with priority-based sorting
//! - **Rich content types**: Text, JSON, tool calls, and tool results via [`MessageContent`]
//!
//! # Quick Start
//!
//! ```rust
//! use multi_llm::{UnifiedMessage, MessageRole};
//!
//! // Simple messages using convenience constructors
//! let user_msg = UnifiedMessage::user("Hello, how are you?");
//! let system_msg = UnifiedMessage::system("You are a helpful assistant.");
//! let assistant_msg = UnifiedMessage::assistant("I'm doing well, thank you!");
//!
//! // Build a conversation
//! let messages = vec![system_msg, user_msg, assistant_msg];
//! ```
//!
//! # Caching Support
//!
//! For Anthropic's prompt caching (90% cost savings on cache reads):
//!
//! ```rust
//! use multi_llm::UnifiedMessage;
//!
//! // Mark a system prompt for caching (5-minute TTL)
//! let cached_system = UnifiedMessage::system("You are a helpful assistant.")
//!     .with_ephemeral_cache();
//!
//! // For longer sessions, use extended caching (1-hour TTL)
//! let long_context = UnifiedMessage::system("Large context here...")
//!     .with_extended_cache();
//! ```
//!
//! # Message Categories
//!
//! Use semantic constructors to get appropriate caching and priority defaults:
//!
//! ```rust
//! use multi_llm::UnifiedMessage;
//!
//! // System instructions (cacheable, highest priority)
//! let system = UnifiedMessage::system_instruction(
//!     "You are a helpful assistant.".to_string(),
//!     Some("system-v1".to_string())
//! );
//!
//! // Context information (cacheable, medium priority)
//! let context = UnifiedMessage::context(
//!     "User preferences: dark mode, verbose output".to_string(),
//!     None
//! );
//!
//! // Current user input (not cached, lowest priority)
//! let current = UnifiedMessage::current_user("What's the weather?".to_string());
//! ```

use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Role of a message in an LLM conversation.
///
/// Each role has specific semantics that LLM providers understand:
/// - [`MessageRole::System`]: Instructions that guide the model's behavior
/// - [`MessageRole::User`]: Input from the human user
/// - [`MessageRole::Assistant`]: Responses from the AI model
/// - [`MessageRole::Tool`]: Results from tool/function executions
///
/// # Example
///
/// ```rust
/// use multi_llm::{MessageRole, UnifiedMessage};
///
/// // Explicit role usage
/// let msg = UnifiedMessage::simple(MessageRole::User, "Hello!");
///
/// // Or use convenience constructors (preferred)
/// let msg = UnifiedMessage::user("Hello!");
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum MessageRole {
    /// System instructions that guide the model's behavior.
    ///
    /// System messages typically contain:
    /// - Persona definitions ("You are a helpful assistant")
    /// - Behavioral constraints ("Never reveal confidential information")
    /// - Output format instructions ("Respond in JSON format")
    System,

    /// Input from the human user.
    ///
    /// User messages contain the actual queries, questions, or commands
    /// that the model should respond to.
    User,

    /// Response from the AI assistant.
    ///
    /// Assistant messages are typically generated by the LLM but can also
    /// be provided as conversation history or to demonstrate expected output.
    Assistant,

    /// Result from a tool/function execution.
    ///
    /// Tool messages contain the output of function calls. They must reference
    /// the `tool_call_id` of the corresponding tool call from an assistant message.
    Tool,
}

impl std::fmt::Display for MessageRole {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            MessageRole::System => write!(f, "system"),
            MessageRole::User => write!(f, "user"),
            MessageRole::Assistant => write!(f, "assistant"),
            MessageRole::Tool => write!(f, "tool"),
        }
    }
}

/// Content of a message, supporting text, JSON, and tool interactions.
///
/// Most messages use [`MessageContent::Text`], but tool calling workflows
/// require [`MessageContent::ToolCall`] and [`MessageContent::ToolResult`].
///
/// # Examples
///
/// ```rust
/// use multi_llm::MessageContent;
///
/// // Plain text (most common)
/// let text = MessageContent::Text("Hello, world!".to_string());
///
/// // Structured JSON content
/// let json = MessageContent::Json(serde_json::json!({
///     "intent": "greeting",
///     "confidence": 0.95
/// }));
///
/// // Tool call from assistant
/// let tool_call = MessageContent::ToolCall {
///     id: "call_123".to_string(),
///     name: "get_weather".to_string(),
///     arguments: serde_json::json!({"city": "London"}),
/// };
///
/// // Tool result to send back
/// let tool_result = MessageContent::ToolResult {
///     tool_call_id: "call_123".to_string(),
///     content: "Sunny, 22°C".to_string(),
///     is_error: false,
/// };
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum MessageContent {
    /// Plain text content.
    ///
    /// This is the most common content type, used for regular conversation.
    Text(String),

    /// Structured JSON content.
    ///
    /// Useful for passing structured data that doesn't fit the tool calling model,
    /// or for internal processing of parsed LLM outputs.
    Json(serde_json::Value),

    /// Tool/function call request from the assistant.
    ///
    /// When the LLM decides to call a tool, it returns this content type.
    /// Your application should execute the tool and return a [`MessageContent::ToolResult`].
    ToolCall {
        /// Unique identifier for this tool call (generated by the LLM).
        id: String,
        /// Name of the tool to invoke (must match a defined [`Tool`](crate::Tool)).
        name: String,
        /// Arguments to pass to the tool as JSON.
        arguments: serde_json::Value,
    },

    /// Result from executing a tool.
    ///
    /// After executing a tool call, send the result back using this content type.
    /// The `tool_call_id` must match the `id` from the corresponding [`ToolCall`](MessageContent::ToolCall).
    ToolResult {
        /// ID of the tool call this result responds to.
        tool_call_id: String,
        /// Result content from tool execution (typically a string or JSON string).
        content: String,
        /// Whether the tool execution failed. If `true`, `content` contains error details.
        is_error: bool,
    },
}

impl std::fmt::Display for MessageContent {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            MessageContent::Text(text) => write!(f, "{}", text),
            MessageContent::Json(value) => {
                write!(
                    f,
                    "{}",
                    serde_json::to_string_pretty(value).unwrap_or_default()
                )
            }
            MessageContent::ToolCall {
                name, arguments, ..
            } => {
                write!(
                    f,
                    "Tool call: {} with args: {}",
                    name,
                    serde_json::to_string(arguments).unwrap_or_default()
                )
            }
            MessageContent::ToolResult {
                content, is_error, ..
            } => {
                if *is_error {
                    write!(f, "Error: {}", content)
                } else {
                    write!(f, "{}", content)
                }
            }
        }
    }
}

/// Semantic category of a message for provider-specific handling.
///
/// Categories help providers optimize message processing, especially for caching.
/// Messages with different categories may be grouped, cached, or prioritized differently.
///
/// # Priority Defaults
///
/// When using semantic constructors like [`UnifiedMessage::system_instruction()`],
/// categories automatically set appropriate priority values:
///
/// | Category | Default Priority | Cacheable |
/// |----------|-----------------|-----------|
/// | SystemInstruction | 0 (highest) | Yes |
/// | ToolDefinition | 1 | Yes |
/// | Context | 5 | Yes |
/// | History | 20 | Yes |
/// | ToolResult | 26 | No |
/// | Current | 30 (lowest) | No |
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum MessageCategory {
    /// Core system prompts and instructions.
    ///
    /// These are the foundational instructions that define the model's behavior.
    /// They're almost always cached since they rarely change within a session.
    SystemInstruction,

    /// Tool/function definitions.
    ///
    /// Contains the schema definitions for available tools. Typically cached
    /// since tool definitions are static for a given application.
    ToolDefinition,

    /// Contextual information (user preferences, session state, etc.).
    ///
    /// Background context that informs responses but isn't part of the
    /// direct conversation. Often cached for the session duration.
    Context,

    /// Conversation history (previous turns).
    ///
    /// Past messages in the conversation. May be partially cached for
    /// long conversations to save on repeated processing.
    History,

    /// Current user input (the active turn).
    ///
    /// The message being responded to right now. Never cached since
    /// it changes with each request.
    Current,

    /// Results from tool executions.
    ///
    /// Output from function calls. Not cached since tool results
    /// are dynamic and request-specific.
    ToolResult,
}

/// Cache type for prompt caching (Anthropic-specific feature)
///
/// Controls the time-to-live (TTL) for cached prompt content. Both types offer
/// 90% savings on cache reads, but differ in write costs and duration.
///
/// # Pricing Model
/// - **Ephemeral writes**: 1.25x base input token cost (25% premium)
/// - **Extended writes**: 2x base input token cost (100% premium)
/// - **Cache reads (both)**: 0.1x base input token cost (90% savings)
///
/// # When to Use
/// - **Ephemeral**: Quick iterations, development sessions (< 5 minutes)
/// - **Extended**: Long documentation, repeated workflows (< 1 hour)
///
/// # Example
/// ```rust
/// use multi_llm::{MessageAttributes, CacheType};
///
/// // Ephemeral: lower write cost, shorter TTL
/// let ephemeral = MessageAttributes {
///     cacheable: true,
///     cache_type: Some(CacheType::Ephemeral),
///     ..Default::default()
/// };
///
/// // Extended: higher write cost, longer TTL
/// let extended = MessageAttributes {
///     cacheable: true,
///     cache_type: Some(CacheType::Extended),
///     ..Default::default()
/// };
/// ```
///
/// # Break-Even Analysis
/// For 1000 tokens cached and reused N times:
/// - **Ephemeral**: Profitable after 1-2 reads (breaks even quickly)
/// - **Extended**: Profitable after 5-6 reads (higher initial cost)
///
/// See: <https://platform.claude.com/docs/en/build-with-claude/prompt-caching>
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
#[non_exhaustive]
pub enum CacheType {
    /// Ephemeral cache (5-minute TTL, 1.25x write cost)
    ///
    /// Best for development, quick iterations, and short sessions where you'll
    /// reuse the same context multiple times within 5 minutes.
    #[default]
    Ephemeral,

    /// Extended cache (1-hour TTL, 2x write cost)
    ///
    /// Best for long documentation contexts, extended workflows, or situations
    /// where you need the cache to persist across longer time periods (up to 1 hour).
    Extended,
}

/// Attributes that guide how providers handle a message.
///
/// These attributes control caching behavior, message ordering, and provide
/// metadata that providers can use for optimization.
///
/// # Caching
///
/// For Anthropic's prompt caching, set `cacheable: true` and optionally
/// specify a [`CacheType`]. The `cache_key` helps identify content for
/// deduplication across requests.
///
/// # Priority
///
/// Priority determines message ordering when using [`UnifiedLLMRequest::sort_messages()`].
/// Lower values = higher priority (processed first). Range: 0-255.
///
/// # Example
///
/// ```rust
/// use multi_llm::{MessageAttributes, MessageCategory, CacheType};
/// use std::collections::HashMap;
///
/// // Cacheable system instruction with highest priority
/// let system_attrs = MessageAttributes {
///     priority: 0,
///     cacheable: true,
///     cache_type: Some(CacheType::Extended),
///     cache_key: Some("system-v1".to_string()),
///     category: MessageCategory::SystemInstruction,
///     metadata: HashMap::new(),
/// };
///
/// // Current user message (not cached, lowest priority)
/// let user_attrs = MessageAttributes::default();  // priority=50, cacheable=false
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MessageAttributes {
    /// Priority for message ordering (lower = higher priority).
    ///
    /// Default is 50. Range: 0 (highest) to 255 (lowest).
    /// Used by [`UnifiedLLMRequest::sort_messages()`] to order messages.
    pub priority: u8,

    /// Whether this message content is static and can be cached.
    ///
    /// When `true`, providers that support caching (like Anthropic) will
    /// attempt to cache this content for subsequent requests.
    pub cacheable: bool,

    /// Cache TTL type for Anthropic prompt caching.
    ///
    /// - [`CacheType::Ephemeral`]: 5-minute TTL, 1.25x write cost
    /// - [`CacheType::Extended`]: 1-hour TTL, 2x write cost
    ///
    /// Only meaningful when `cacheable` is `true`. Ignored by providers
    /// that don't support prompt caching.
    pub cache_type: Option<CacheType>,

    /// Optional cache key for content deduplication.
    ///
    /// When provided, helps identify identical content across requests.
    /// Useful for versioning system prompts (e.g., "system-v2").
    pub cache_key: Option<String>,

    /// Semantic category for provider-specific handling.
    ///
    /// See [`MessageCategory`] for details on how categories affect
    /// caching and priority defaults.
    pub category: MessageCategory,

    /// Custom metadata for application-specific extensions.
    ///
    /// This data is passed through but not interpreted by multi-llm.
    /// Useful for tracking, logging, or application-specific processing.
    pub metadata: HashMap<String, serde_json::Value>,
}

impl Default for MessageAttributes {
    fn default() -> Self {
        Self {
            priority: 50,
            cacheable: false,
            cache_type: None,
            cache_key: None,
            category: MessageCategory::Current,
            metadata: HashMap::new(),
        }
    }
}

/// A provider-agnostic message for LLM interactions.
///
/// This is the core type of multi-llm. `UnifiedMessage` works across all supported
/// providers (OpenAI, Anthropic, Ollama, LM Studio) and provides built-in support
/// for caching hints and priority-based ordering.
///
/// # Creating Messages
///
/// Use the convenience constructors for common cases:
///
/// ```rust
/// use multi_llm::UnifiedMessage;
///
/// // Simple messages
/// let user = UnifiedMessage::user("What's the weather?");
/// let system = UnifiedMessage::system("You are a helpful assistant.");
/// let assistant = UnifiedMessage::assistant("The weather is sunny.");
///
/// // Semantic messages with caching defaults
/// let instruction = UnifiedMessage::system_instruction(
///     "You are a weather bot.".to_string(),
///     Some("weather-system-v1".to_string())
/// );
/// ```
///
/// # Caching
///
/// For Anthropic prompt caching (90% cost savings), use the builder methods:
///
/// ```rust
/// use multi_llm::UnifiedMessage;
///
/// // 5-minute cache (good for development/testing)
/// let cached = UnifiedMessage::system("Large context...")
///     .with_ephemeral_cache();
///
/// // 1-hour cache (good for production)
/// let long_cached = UnifiedMessage::system("Large context...")
///     .with_extended_cache();
/// ```
///
/// # Tool Calling
///
/// For function calling workflows:
///
/// ```rust
/// use multi_llm::UnifiedMessage;
///
/// // Assistant requests a tool call
/// let tool_request = UnifiedMessage::tool_call(
///     "call_abc123".to_string(),
///     "get_weather".to_string(),
///     serde_json::json!({"city": "London"})
/// );
///
/// // Send the tool result back
/// let tool_response = UnifiedMessage::tool_result(
///     "call_abc123".to_string(),
///     "Sunny, 22°C".to_string(),
///     false  // not an error
/// );
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct UnifiedMessage {
    /// Role of this message (system, user, assistant, or tool).
    pub role: MessageRole,

    /// Content of this message.
    pub content: MessageContent,

    /// Attributes controlling caching, priority, and metadata.
    pub attributes: MessageAttributes,

    /// Timestamp for secondary ordering (after priority).
    ///
    /// When messages have equal priority, they're sorted by timestamp.
    pub timestamp: chrono::DateTime<chrono::Utc>,
}

impl UnifiedMessage {
    /// Create a new message with default attributes
    pub fn new(role: MessageRole, content: MessageContent) -> Self {
        Self {
            role,
            content,
            attributes: MessageAttributes::default(),
            timestamp: chrono::Utc::now(),
        }
    }

    /// Create a new message with custom attributes
    pub fn with_attributes(
        role: MessageRole,
        content: MessageContent,
        attributes: MessageAttributes,
    ) -> Self {
        Self {
            role,
            content,
            attributes,
            timestamp: chrono::Utc::now(),
        }
    }

    /// Create a system instruction message (cacheable, high priority)
    pub fn system_instruction(content: String, cache_key: Option<String>) -> Self {
        Self::with_attributes(
            MessageRole::System,
            MessageContent::Text(content),
            MessageAttributes {
                priority: 0,
                cacheable: true,
                cache_type: None,
                cache_key,
                category: MessageCategory::SystemInstruction,
                metadata: HashMap::new(),
            },
        )
    }

    /// Create a tool definition message (cacheable, high priority)
    pub fn tool_definition(content: String, cache_key: Option<String>) -> Self {
        Self::with_attributes(
            MessageRole::System,
            MessageContent::Text(content),
            MessageAttributes {
                priority: 1,
                cacheable: true,
                cache_type: None,
                cache_key,
                category: MessageCategory::ToolDefinition,
                metadata: HashMap::new(),
            },
        )
    }

    /// Create a context message (cacheable, medium priority)
    pub fn context(content: String, cache_key: Option<String>) -> Self {
        Self::with_attributes(
            MessageRole::System,
            MessageContent::Text(content),
            MessageAttributes {
                priority: 5,
                cacheable: true,
                cache_type: None,
                cache_key,
                category: MessageCategory::Context,
                metadata: HashMap::new(),
            },
        )
    }

    /// Create a history message (cacheable, lower priority)
    pub fn history(role: MessageRole, content: String) -> Self {
        Self::with_attributes(
            role,
            MessageContent::Text(content),
            MessageAttributes {
                priority: 20,
                cacheable: true,
                cache_type: None,
                cache_key: None,
                category: MessageCategory::History,
                metadata: HashMap::new(),
            },
        )
    }

    /// Create a current user message (not cacheable, lowest priority)
    pub fn current_user(content: String) -> Self {
        Self::with_attributes(
            MessageRole::User,
            MessageContent::Text(content),
            MessageAttributes {
                priority: 30,
                cacheable: false,
                cache_type: None,
                cache_key: None,
                category: MessageCategory::Current,
                metadata: HashMap::new(),
            },
        )
    }

    /// Create a tool call message
    pub fn tool_call(id: String, name: String, arguments: serde_json::Value) -> Self {
        Self::with_attributes(
            MessageRole::Assistant,
            MessageContent::ToolCall {
                id,
                name,
                arguments,
            },
            MessageAttributes {
                priority: 25,
                cacheable: false,
                cache_type: None,
                cache_key: None,
                category: MessageCategory::ToolResult,
                metadata: HashMap::new(),
            },
        )
    }

    /// Create a tool result message
    pub fn tool_result(tool_call_id: String, content: String, is_error: bool) -> Self {
        Self::with_attributes(
            MessageRole::Tool,
            MessageContent::ToolResult {
                tool_call_id,
                content,
                is_error,
            },
            MessageAttributes {
                priority: 26,
                cacheable: false,
                cache_type: None,
                cache_key: None,
                category: MessageCategory::ToolResult,
                metadata: HashMap::new(),
            },
        )
    }

    // Convenience constructors

    /// Create a simple text message
    pub fn simple(role: MessageRole, content: impl Into<String>) -> Self {
        Self::new(role, MessageContent::Text(content.into()))
    }

    /// Create a simple user message
    pub fn user(content: impl Into<String>) -> Self {
        Self::simple(MessageRole::User, content)
    }

    /// Create a simple assistant message
    pub fn assistant(content: impl Into<String>) -> Self {
        Self::simple(MessageRole::Assistant, content)
    }

    /// Create a simple system message
    pub fn system(content: impl Into<String>) -> Self {
        Self::simple(MessageRole::System, content)
    }

    // Cache control methods

    /// Mark this message for ephemeral caching (5-minute TTL)
    pub fn with_ephemeral_cache(mut self) -> Self {
        self.attributes.cacheable = true;
        self.attributes.cache_type = Some(CacheType::Ephemeral);
        self
    }

    /// Mark this message for extended caching (1-hour TTL)
    pub fn with_extended_cache(mut self) -> Self {
        self.attributes.cacheable = true;
        self.attributes.cache_type = Some(CacheType::Extended);
        self
    }
}

/// A complete request to an LLM provider.
///
/// Bundles messages, optional response schema, and request configuration
/// into a single structure that can be passed to any provider.
///
/// # Basic Usage
///
/// ```rust
/// use multi_llm::{UnifiedLLMRequest, UnifiedMessage};
///
/// let messages = vec![
///     UnifiedMessage::system("You are a helpful assistant."),
///     UnifiedMessage::user("Hello!"),
/// ];
///
/// let request = UnifiedLLMRequest::new(messages);
/// ```
///
/// # With Configuration
///
/// ```rust
/// use multi_llm::{UnifiedLLMRequest, UnifiedMessage, RequestConfig};
///
/// let messages = vec![UnifiedMessage::user("Hello!")];
/// let config = RequestConfig {
///     temperature: Some(0.7),
///     max_tokens: Some(1000),
///     ..Default::default()
/// };
///
/// let request = UnifiedLLMRequest::with_config(messages, config);
/// ```
///
/// # Structured Output
///
/// For JSON-structured responses (useful for data extraction):
///
/// ```rust
/// use multi_llm::{UnifiedLLMRequest, UnifiedMessage};
///
/// let messages = vec![UnifiedMessage::user("Extract the name and age.")];
/// let schema = serde_json::json!({
///     "type": "object",
///     "properties": {
///         "name": {"type": "string"},
///         "age": {"type": "integer"}
///     },
///     "required": ["name", "age"]
/// });
///
/// let request = UnifiedLLMRequest::with_schema(messages, schema);
/// ```
///
/// # Message Ordering
///
/// Use [`sort_messages()`](Self::sort_messages) to order by priority and timestamp:
///
/// ```rust
/// use multi_llm::{UnifiedLLMRequest, UnifiedMessage};
///
/// let messages = vec![
///     UnifiedMessage::current_user("Hello!".to_string()),  // priority 30
///     UnifiedMessage::system_instruction("Be helpful.".to_string(), None),  // priority 0
/// ];
///
/// let mut request = UnifiedLLMRequest::new(messages);
/// request.sort_messages();  // System instruction now first
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct UnifiedLLMRequest {
    /// All messages for this request.
    ///
    /// Use [`sort_messages()`](Self::sort_messages) to order by priority.
    pub messages: Vec<UnifiedMessage>,

    /// Optional JSON schema for structured output.
    ///
    /// When provided, the LLM will attempt to return a response that
    /// conforms to this schema. Useful for data extraction tasks.
    pub response_schema: Option<serde_json::Value>,

    /// Optional configuration overrides for this request.
    ///
    /// When `None`, the provider's default configuration is used.
    pub config: Option<crate::provider::RequestConfig>,
}

impl UnifiedLLMRequest {
    /// Create a new request with messages
    pub fn new(messages: Vec<UnifiedMessage>) -> Self {
        Self {
            messages,
            response_schema: None,
            config: None,
        }
    }

    /// Create a new request with schema
    pub fn with_schema(messages: Vec<UnifiedMessage>, schema: serde_json::Value) -> Self {
        Self {
            messages,
            response_schema: Some(schema),
            config: None,
        }
    }

    /// Create a new request with config
    pub fn with_config(
        messages: Vec<UnifiedMessage>,
        config: crate::provider::RequestConfig,
    ) -> Self {
        Self {
            messages,
            response_schema: None,
            config: Some(config),
        }
    }

    /// Sort messages by priority and timestamp
    pub fn sort_messages(&mut self) {
        self.messages.sort_by(|a, b| {
            a.attributes
                .priority
                .cmp(&b.attributes.priority)
                .then_with(|| a.timestamp.cmp(&b.timestamp))
        });
    }

    /// Get messages sorted by priority (does not modify original)
    pub fn get_sorted_messages(&self) -> Vec<&UnifiedMessage> {
        let mut sorted: Vec<&UnifiedMessage> = self.messages.iter().collect();
        sorted.sort_by(|a, b| {
            a.attributes
                .priority
                .cmp(&b.attributes.priority)
                .then_with(|| a.timestamp.cmp(&b.timestamp))
        });
        sorted
    }

    /// Get system messages
    pub fn get_system_messages(&self) -> Vec<&UnifiedMessage> {
        self.messages
            .iter()
            .filter(|msg| msg.role == MessageRole::System)
            .collect()
    }

    /// Get non-system messages
    pub fn get_conversation_messages(&self) -> Vec<&UnifiedMessage> {
        self.messages
            .iter()
            .filter(|msg| msg.role != MessageRole::System)
            .collect()
    }

    /// Get cacheable messages
    pub fn get_cacheable_messages(&self) -> Vec<&UnifiedMessage> {
        self.messages
            .iter()
            .filter(|msg| msg.attributes.cacheable)
            .collect()
    }
}