modelmux 1.0.0 - Docs.rs

//!
//! Anthropic to OpenAI format converter for API response translation.
//!
//! Converts Anthropic/Vertex AI chat completion responses to OpenAI-compatible format.
//! Handles streaming and non-streaming responses, tool calls, and message formatting
//! while maintaining semantic equivalence between the two API formats.
//!
//! Authors:
//!   Jaro <yarenty@gmail.com>
//!
//! Copyright (c) 2026 SkyCorp

/* --- uses ------------------------------------------------------------------------------------ */

use chrono::Utc;
use serde::{Deserialize, Serialize};

use crate::config::LogLevel;

/* --- types ----------------------------------------------------------------------------------- */

///
/// Anthropic chat completion response structure.
///
/// Represents a complete response from Anthropic's Claude API containing
/// generated content, usage statistics, and completion status.
#[derive(Debug, Deserialize)]
pub struct AnthropicResponse {
    /** content blocks generated by the model */
    pub content: Vec<AnthropicContentBlock>,
    /** reason why generation stopped */
    #[serde(rename = "stop_reason")]
    pub stop_reason: Option<String>,
    /** token usage statistics */
    pub usage: Option<AnthropicUsage>,
}

///
/// Anthropic content block within a response.
///
/// Represents individual content elements that can be either text
/// or tool usage instructions with appropriate type tagging.
#[derive(Debug, Deserialize)]
#[serde(tag = "type")]
pub enum AnthropicContentBlock {
    /** text content block */
    #[serde(rename = "text")]
    Text {
        /** the generated text content */
        text: String,
    },
    /** tool usage block for function calls */
    #[serde(rename = "tool_use")]
    ToolUse {
        /** unique tool call identifier */
        id: String,
        /** function name to call */
        name: String,
        /** function input parameters */
        input: serde_json::Value,
    },
}

///
/// Anthropic usage statistics for token consumption.
///
/// Tracks input and output token counts for billing and monitoring.
#[derive(Debug, Deserialize)]
pub struct AnthropicUsage {
    /** number of tokens in the input prompt */
    #[serde(rename = "input_tokens")]
    pub input_tokens: Option<u32>,
    /** number of tokens in the generated output */
    #[serde(rename = "output_tokens")]
    pub output_tokens: Option<u32>,
}

///
/// Anthropic streaming event for real-time responses.
///
/// Represents various event types during streaming response generation,
/// including content deltas, block boundaries, and completion status.
#[derive(Debug, Deserialize)]
#[serde(tag = "type")]
pub enum AnthropicStreamEvent {
    /** content block delta with incremental updates */
    #[serde(rename = "content_block_delta")]
    ContentBlockDelta {
        /** the incremental content delta */
        delta: AnthropicDelta,
    },
    /** content block start notification */
    #[serde(rename = "content_block_start")]
    ContentBlockStart {
        /** content block metadata */
        #[serde(rename = "content_block")]
        content_block: AnthropicStreamContentBlock,
    },
    /** content block stop notification */
    #[serde(rename = "content_block_stop")]
    ContentBlockStop,
    /** message stop with completion status */
    #[serde(rename = "message_stop")]
    MessageStop {
        /** reason why message generation stopped */
        #[serde(rename = "stop_reason")]
        stop_reason: Option<String>,
    },
    /** message start notification */
    #[serde(rename = "message_start")]
    MessageStart {
        /** message metadata */
        #[allow(dead_code)]
        message: serde_json::Value,
    },
    /** message delta with status updates */
    #[serde(rename = "message_delta")]
    MessageDelta {
        /** the message delta information */
        delta: MessageDelta,
    },
    /** ping event for connection keep-alive */
    #[serde(rename = "ping")]
    Ping,
}

///
/// Anthropic delta content for streaming updates.
///
/// Contains incremental content updates during streaming response generation.
#[derive(Debug, Deserialize)]
pub struct AnthropicDelta {
    /** incremental text content */
    pub text: Option<String>,
    /** partial JSON for tool call arguments */
    #[serde(rename = "partial_json")]
    pub partial_json: Option<String>,
}

///
/// Anthropic streaming content block metadata.
///
/// Provides information about content blocks during streaming initialization.
#[derive(Debug, Deserialize)]
pub struct AnthropicStreamContentBlock {
    /** content block type identifier */
    #[serde(rename = "type")]
    pub block_type: String,
    /** optional block identifier */
    pub id: Option<String>,
    /** optional block name for tool usage */
    pub name: Option<String>,
}

///
/// Anthropic message delta for streaming status updates.
///
/// Contains status information during streaming message generation.
#[derive(Debug, Deserialize)]
pub struct MessageDelta {
    /** reason why generation stopped */
    #[serde(rename = "stop_reason")]
    pub stop_reason: Option<String>,
    /** stop sequence that triggered completion */
    #[serde(rename = "stop_sequence")]
    #[allow(dead_code)]
    pub stop_sequence: Option<String>,
}

///
/// OpenAI chat completion response structure.
///
/// Target format for responses compatible with OpenAI's chat completions API.
/// Contains choices, usage statistics, and proper OpenAI formatting.
#[derive(Debug, Serialize)]
pub struct OpenAiResponse {
    /** unique response identifier */
    pub id: String,
    /** response object type */
    pub object: String,
    /** creation timestamp */
    pub created: i64,
    /** model identifier */
    pub model: String,
    /** response choices array */
    pub choices: Vec<OpenAiChoice>,
    /** token usage statistics */
    pub usage: OpenAiUsage,
}

///
/// OpenAI response choice containing the generated message.
///
/// Represents a single completion choice with the generated content
/// and completion status information.
#[derive(Debug, Serialize)]
pub struct OpenAiChoice {
    /** choice index in the array */
    pub index: u32,
    /** generated message content */
    pub message: OpenAiResponseMessage,
    /** reason why generation finished */
    #[serde(rename = "finish_reason")]
    pub finish_reason: String,
}

///
/// OpenAI response message containing generated content.
///
/// Contains the role, text content, and optional tool calls
/// in OpenAI's expected format.
#[derive(Debug, Serialize)]
pub struct OpenAiResponseMessage {
    /** message role, typically "assistant" */
    pub role: String,
    /** generated text content */
    pub content: Option<String>,
    /** tool calls made during generation */
    #[serde(rename = "tool_calls", skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<OpenAiToolCall>>,
}

///
/// OpenAI tool call structure for function invocations.
///
/// Represents a function call made by the model during response generation.
#[derive(Debug, Serialize)]
pub struct OpenAiToolCall {
    /** unique tool call identifier */
    pub id: String,
    /** tool call type, typically "function" */
    #[serde(rename = "type")]
    pub call_type: String,
    /** function call details */
    pub function: OpenAiFunctionCall,
}

///
/// OpenAI function call details within a tool call.
///
/// Contains the function name and serialized arguments.
#[derive(Debug, Serialize)]
pub struct OpenAiFunctionCall {
    /** function name */
    pub name: String,
    /** JSON-serialized function arguments */
    pub arguments: String,
}

///
/// OpenAI usage statistics for token consumption.
///
/// Tracks prompt, completion, and total token counts.
#[derive(Debug, Serialize)]
pub struct OpenAiUsage {
    /** number of tokens in the prompt */
    #[serde(rename = "prompt_tokens")]
    pub prompt_tokens: u32,
    /** number of tokens in the completion */
    #[serde(rename = "completion_tokens")]
    pub completion_tokens: u32,
    /** total number of tokens used */
    #[serde(rename = "total_tokens")]
    pub total_tokens: u32,
}

///
/// OpenAI streaming response chunk for real-time updates.
///
/// Represents incremental updates during streaming response generation
/// compatible with OpenAI's streaming format.
#[derive(Debug, Serialize)]
pub struct OpenAiStreamChunk {
    /** unique chunk identifier */
    pub id: String,
    /** chunk object type */
    pub object: String,
    /** creation timestamp */
    pub created: i64,
    /** model identifier */
    pub model: String,
    /** streaming choices array */
    pub choices: Vec<OpenAiStreamChoice>,
}

///
/// OpenAI streaming choice containing delta updates.
///
/// Contains incremental content updates and completion status
/// for streaming responses.
#[derive(Debug, Serialize)]
pub struct OpenAiStreamChoice {
    /** choice index in the array */
    pub index: u32,
    /** incremental content delta */
    pub delta: OpenAiStreamDelta,
    /** reason why generation finished, if complete */
    #[serde(rename = "finish_reason", skip_serializing_if = "Option::is_none")]
    pub finish_reason: Option<String>,
}

///
/// OpenAI streaming delta content for incremental updates.
///
/// Contains incremental text content and tool call updates
/// during streaming generation.
#[derive(Debug, Serialize)]
pub struct OpenAiStreamDelta {
    /** incremental text content */
    #[serde(skip_serializing_if = "Option::is_none")]
    pub content: Option<String>,
    /** incremental tool call updates */
    #[serde(rename = "tool_calls", skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<OpenAiStreamToolCall>>,
}

///
/// OpenAI streaming tool call for incremental function updates.
///
/// Represents incremental updates to tool calls during streaming.
#[derive(Debug, Serialize)]
pub struct OpenAiStreamToolCall {
    /** tool call index in the array */
    pub index: u32,
    /** tool call identifier, if starting new call */
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
    /** tool call type, if starting new call */
    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
    pub call_type: Option<String>,
    /** function call updates */
    #[serde(skip_serializing_if = "Option::is_none")]
    pub function: Option<OpenAiStreamFunctionCall>,
}

///
/// OpenAI streaming function call for incremental updates.
///
/// Contains incremental function name and argument updates.
#[derive(Debug, Serialize)]
pub struct OpenAiStreamFunctionCall {
    /** function name, if starting new call */
    #[serde(skip_serializing_if = "Option::is_none")]
    pub name: Option<String>,
    /** incremental function arguments */
    #[serde(skip_serializing_if = "Option::is_none")]
    pub arguments: Option<String>,
}

///
/// Helper struct for tracking streaming tool calls.
///
/// Maintains state for tool calls being built incrementally
/// during streaming response generation.
#[derive(Debug)]
pub struct StreamingToolCall {
    /** tool call unique identifier */
    #[allow(dead_code)]
    pub id: String,
    /** function name being called */
    pub name: String,
    /** accumulated function arguments */
    pub arguments: String,
}

///
/// Converter from Anthropic format to OpenAI format.
///
/// Follows Single Responsibility Principle - handles only format conversion
/// from Anthropic responses to OpenAI chat completions format.
pub struct AnthropicToOpenAiConverter {
    /** logging level for debug output */
    log_level: LogLevel,
}

/* --- constants ------------------------------------------------------------------------------ */

/** OpenAI object type for chat completions */
const CHAT_COMPLETION_OBJECT: &str = "chat.completion";

/** OpenAI object type for streaming chunks */
const CHAT_COMPLETION_CHUNK_OBJECT: &str = "chat.completion.chunk";

/** Default assistant role for responses */
const ASSISTANT_ROLE: &str = "assistant";

/** Function tool call type */
const FUNCTION_TOOL_TYPE: &str = "function";

/* --- start of code -------------------------------------------------------------------------- */

impl AnthropicToOpenAiConverter {
    ///
    /// Create a new Anthropic to OpenAI converter.
    ///
    /// # Arguments
    ///  * `log_level` - logging level for debug output
    ///
    /// # Returns
    ///  * New converter instance
    pub fn new(log_level: LogLevel) -> Self {
        Self { log_level }
    }

    ///
    /// Convert Anthropic response to OpenAI response format.
    ///
    /// Transforms the complete response structure including content blocks,
    /// usage statistics, and completion status into OpenAI format.
    ///
    /// # Arguments
    ///  * `response` - Anthropic response to convert
    ///  * `model` - model identifier for the response
    ///
    /// # Returns
    ///  * Converted OpenAI response
    pub fn convert(&self, response: AnthropicResponse, model: &str) -> OpenAiResponse {
        let mut message = OpenAiResponseMessage {
            role: ASSISTANT_ROLE.to_string(),
            content: None,
            tool_calls: None,
        };

        self.extract_text_content(&mut message, &response.content);
        self.extract_tool_calls(&mut message, &response.content);

        let finish_reason =
            self.determine_finish_reason(&response.stop_reason, &message.tool_calls);
        let usage = self.convert_usage(response.usage);

        OpenAiResponse {
            id: self.generate_response_id(),
            object: CHAT_COMPLETION_OBJECT.to_string(),
            created: Utc::now().timestamp(),
            model: model.to_string(),
            choices: vec![OpenAiChoice { index: 0, message, finish_reason }],
            usage,
        }
    }

    ///
    /// Convert Anthropic streaming event to OpenAI streaming chunk.
    ///
    /// Transforms streaming events into OpenAI-compatible chunks while maintaining
    /// state for tool calls and completion status.
    ///
    /// # Arguments
    ///  * `event` - Anthropic streaming event to convert
    ///  * `model` - model identifier for the chunk
    ///  * `current_tool_call` - mutable state for current tool call
    ///  * `has_tool_calls` - mutable flag for tool call presence
    ///  * `stop_reason_from_delta` - mutable stop reason from message delta
    ///
    /// # Returns
    ///  * OpenAI streaming chunk if conversion produces output
    ///  * None if event should not generate output
    pub fn convert_stream_event(
        &self,
        event: &AnthropicStreamEvent,
        model: &str,
        current_tool_call: &mut Option<StreamingToolCall>,
        has_tool_calls: &mut bool,
        stop_reason_from_delta: &mut Option<String>,
    ) -> Option<OpenAiStreamChunk> {
        match event {
            AnthropicStreamEvent::ContentBlockDelta { delta } => {
                self.handle_content_delta(delta, model, current_tool_call)
            }
            AnthropicStreamEvent::ContentBlockStart { content_block } => {
                self.handle_content_start(content_block, model, current_tool_call, has_tool_calls)
            }
            AnthropicStreamEvent::ContentBlockStop => self.handle_content_stop(current_tool_call),
            AnthropicStreamEvent::MessageStart { .. } => self.handle_message_start(),
            AnthropicStreamEvent::MessageDelta { delta } => {
                self.handle_message_delta(delta, stop_reason_from_delta)
            }
            AnthropicStreamEvent::Ping => None,
            AnthropicStreamEvent::MessageStop { stop_reason } => self.handle_message_stop(
                stop_reason,
                model,
                current_tool_call,
                has_tool_calls,
                stop_reason_from_delta,
            ),
        }
    }

    ///
    /// Extract text content from Anthropic content blocks.
    ///
    /// # Arguments
    ///  * `message` - OpenAI message to populate with text content
    ///  * `content_blocks` - Anthropic content blocks to extract from
    fn extract_text_content(
        &self,
        message: &mut OpenAiResponseMessage,
        content_blocks: &[AnthropicContentBlock],
    ) {
        let text_content: Vec<&str> = content_blocks
            .iter()
            .filter_map(|block| {
                if let AnthropicContentBlock::Text { text } = block {
                    Some(text.as_str())
                } else {
                    None
                }
            })
            .collect();

        if !text_content.is_empty() {
            message.content = Some(text_content.join(""));
        }
    }

    ///
    /// Extract tool calls from Anthropic content blocks.
    ///
    /// # Arguments
    ///  * `message` - OpenAI message to populate with tool calls
    ///  * `content_blocks` - Anthropic content blocks to extract from
    fn extract_tool_calls(
        &self,
        message: &mut OpenAiResponseMessage,
        content_blocks: &[AnthropicContentBlock],
    ) {
        let tool_use_blocks: Vec<_> = content_blocks
            .iter()
            .filter_map(|block| {
                if let AnthropicContentBlock::ToolUse { id, name, input } = block {
                    Some((id, name, input))
                } else {
                    None
                }
            })
            .collect();

        if !tool_use_blocks.is_empty() {
            self.debug(&format!(
                "Found {} tool call(s) in Anthropic response",
                tool_use_blocks.len()
            ));
            message.tool_calls = Some(
                tool_use_blocks
                    .into_iter()
                    .map(|(id, name, input)| {
                        let args_str =
                            serde_json::to_string(input).unwrap_or_else(|_| "{}".to_string());
                        self.debug(&format!("Tool call: {}({})", name, args_str));
                        OpenAiToolCall {
                            id: id.clone(),
                            call_type: FUNCTION_TOOL_TYPE.to_string(),
                            function: OpenAiFunctionCall {
                                name: name.clone(),
                                arguments: args_str,
                            },
                        }
                    })
                    .collect(),
            );
        }
    }

    ///
    /// Determine the appropriate finish reason based on stop reason and tool calls.
    ///
    /// # Arguments
    ///  * `stop_reason` - Anthropic stop reason
    ///  * `tool_calls` - optional tool calls in the response
    ///
    /// # Returns
    ///  * OpenAI finish reason string
    fn determine_finish_reason(
        &self,
        stop_reason: &Option<String>,
        tool_calls: &Option<Vec<OpenAiToolCall>>,
    ) -> String {
        match stop_reason.as_deref() {
            Some("end_turn") => "stop",
            Some("tool_use") => "tool_calls",
            Some("max_tokens") => "length",
            _ => {
                if tool_calls.is_some() {
                    "tool_calls"
                } else {
                    "length"
                }
            }
        }
        .to_string()
    }

    ///
    /// Convert Anthropic usage statistics to OpenAI format.
    ///
    /// # Arguments
    ///  * `usage` - optional Anthropic usage statistics
    ///
    /// # Returns
    ///  * OpenAI usage statistics
    fn convert_usage(&self, usage: Option<AnthropicUsage>) -> OpenAiUsage {
        let usage = usage.unwrap_or(AnthropicUsage { input_tokens: None, output_tokens: None });

        let prompt_tokens = usage.input_tokens.unwrap_or(0);
        let completion_tokens = usage.output_tokens.unwrap_or(0);

        OpenAiUsage {
            prompt_tokens,
            completion_tokens,
            total_tokens: prompt_tokens + completion_tokens,
        }
    }

    ///
    /// Generate a unique response identifier.
    ///
    /// # Returns
    ///  * Unique response ID string
    fn generate_response_id(&self) -> String {
        format!("chatcmpl-{}", Utc::now().timestamp_millis())
    }

    ///
    /// Handle content block delta events for streaming.
    ///
    /// # Arguments
    ///  * `delta` - content delta to process
    ///  * `model` - model identifier
    ///  * `current_tool_call` - current tool call state
    ///
    /// # Returns
    ///  * OpenAI stream chunk if content should be output
    fn handle_content_delta(
        &self,
        delta: &AnthropicDelta,
        model: &str,
        current_tool_call: &mut Option<StreamingToolCall>,
    ) -> Option<OpenAiStreamChunk> {
        if let Some(text) = &delta.text {
            self.create_text_chunk(text, model)
        } else if let Some(partial_json) = &delta.partial_json {
            self.handle_tool_argument_delta(partial_json, model, current_tool_call)
        } else {
            None
        }
    }

    ///
    /// Create a text content streaming chunk.
    ///
    /// # Arguments
    ///  * `text` - text content to include
    ///  * `model` - model identifier
    ///
    /// # Returns
    ///  * OpenAI stream chunk with text content
    pub fn create_text_chunk(&self, text: &str, model: &str) -> Option<OpenAiStreamChunk> {
        Some(OpenAiStreamChunk {
            id: self.generate_response_id(),
            object: CHAT_COMPLETION_CHUNK_OBJECT.to_string(),
            created: Utc::now().timestamp(),
            model: model.to_string(),
            choices: vec![OpenAiStreamChoice {
                index: 0,
                delta: OpenAiStreamDelta { content: Some(text.to_string()), tool_calls: None },
                finish_reason: None,
            }],
        })
    }

    ///
    /// Handle tool call argument delta for streaming.
    ///
    /// # Arguments
    ///  * `partial_json` - partial JSON arguments
    ///  * `model` - model identifier
    ///  * `current_tool_call` - current tool call state
    ///
    /// # Returns
    ///  * OpenAI stream chunk with tool call delta
    fn handle_tool_argument_delta(
        &self,
        partial_json: &str,
        model: &str,
        current_tool_call: &mut Option<StreamingToolCall>,
    ) -> Option<OpenAiStreamChunk> {
        if let Some(tool_call) = current_tool_call.as_mut() {
            self.debug(&format!(
                "[STREAM] Tool call arguments delta for {}: {}",
                tool_call.name, partial_json
            ));
            tool_call.arguments.push_str(partial_json);

            Some(self.create_tool_argument_chunk(partial_json, model))
        } else {
            None
        }
    }

    ///
    /// Create a tool call argument streaming chunk.
    ///
    /// # Arguments
    ///  * `partial_json` - partial JSON arguments
    ///  * `model` - model identifier
    ///
    /// # Returns
    ///  * OpenAI stream chunk with tool call arguments
    fn create_tool_argument_chunk(&self, partial_json: &str, model: &str) -> OpenAiStreamChunk {
        OpenAiStreamChunk {
            id: self.generate_response_id(),
            object: CHAT_COMPLETION_CHUNK_OBJECT.to_string(),
            created: Utc::now().timestamp(),
            model: model.to_string(),
            choices: vec![OpenAiStreamChoice {
                index: 0,
                delta: OpenAiStreamDelta {
                    content: None,
                    tool_calls: Some(vec![OpenAiStreamToolCall {
                        index: 0,
                        id: None,
                        call_type: None,
                        function: Some(OpenAiStreamFunctionCall {
                            name: None,
                            arguments: Some(partial_json.to_string()),
                        }),
                    }]),
                },
                finish_reason: None,
            }],
        }
    }

    ///
    /// Handle content block start events for streaming.
    ///
    /// # Arguments
    ///  * `content_block` - content block metadata
    ///  * `model` - model identifier
    ///  * `current_tool_call` - current tool call state
    ///  * `has_tool_calls` - tool call presence flag
    ///
    /// # Returns
    ///  * OpenAI stream chunk if tool call is starting
    fn handle_content_start(
        &self,
        content_block: &AnthropicStreamContentBlock,
        model: &str,
        current_tool_call: &mut Option<StreamingToolCall>,
        has_tool_calls: &mut bool,
    ) -> Option<OpenAiStreamChunk> {
        if content_block.block_type == "tool_use" {
            if let (Some(id), Some(name)) = (&content_block.id, &content_block.name) {
                self.debug(&format!("[STREAM] Tool call started: {} (id: {})", name, id));
                *has_tool_calls = true;
                *current_tool_call = Some(StreamingToolCall {
                    id: id.clone(),
                    name: name.clone(),
                    arguments: String::new(),
                });

                Some(self.create_tool_start_chunk(id, name, model))
            } else {
                None
            }
        } else {
            None
        }
    }

    ///
    /// Create a tool call start streaming chunk.
    ///
    /// # Arguments
    ///  * `id` - tool call identifier
    ///  * `name` - function name
    ///  * `model` - model identifier
    ///
    /// # Returns
    ///  * OpenAI stream chunk with tool call start
    fn create_tool_start_chunk(&self, id: &str, name: &str, model: &str) -> OpenAiStreamChunk {
        OpenAiStreamChunk {
            id: self.generate_response_id(),
            object: CHAT_COMPLETION_CHUNK_OBJECT.to_string(),
            created: Utc::now().timestamp(),
            model: model.to_string(),
            choices: vec![OpenAiStreamChoice {
                index: 0,
                delta: OpenAiStreamDelta {
                    content: None,
                    tool_calls: Some(vec![OpenAiStreamToolCall {
                        index: 0,
                        id: Some(id.to_string()),
                        call_type: Some(FUNCTION_TOOL_TYPE.to_string()),
                        function: Some(OpenAiStreamFunctionCall {
                            name: Some(name.to_string()),
                            arguments: Some(String::new()),
                        }),
                    }]),
                },
                finish_reason: None,
            }],
        }
    }

    ///
    /// Handle content block stop events for streaming.
    ///
    /// # Arguments
    ///  * `current_tool_call` - current tool call state
    ///
    /// # Returns
    ///  * None (no output needed for content stop)
    fn handle_content_stop(
        &self,
        current_tool_call: &Option<StreamingToolCall>,
    ) -> Option<OpenAiStreamChunk> {
        if let Some(tool_call) = current_tool_call {
            self.debug(&format!("[STREAM] Tool call block stopped: {}", tool_call.name));
        }
        None
    }

    ///
    /// Handle message start events for streaming.
    ///
    /// # Returns
    ///  * None (no output needed for message start)
    fn handle_message_start(&self) -> Option<OpenAiStreamChunk> {
        self.debug("[STREAM] Message start");
        None
    }

    ///
    /// Handle message delta events for streaming.
    ///
    /// # Arguments
    ///  * `delta` - message delta information
    ///  * `stop_reason_from_delta` - mutable stop reason storage
    ///
    /// # Returns
    ///  * None (no output needed for message delta)
    fn handle_message_delta(
        &self,
        delta: &MessageDelta,
        stop_reason_from_delta: &mut Option<String>,
    ) -> Option<OpenAiStreamChunk> {
        if let Some(stop_reason) = &delta.stop_reason {
            *stop_reason_from_delta = Some(stop_reason.clone());
            self.debug(&format!("[STREAM] Message delta - stop_reason: {:?}", stop_reason));
        }
        None
    }

    ///
    /// Handle message stop events for streaming.
    ///
    /// # Arguments
    ///  * `stop_reason` - stop reason from message stop
    ///  * `model` - model identifier
    ///  * `current_tool_call` - current tool call state
    ///  * `has_tool_calls` - tool call presence flag
    ///  * `stop_reason_from_delta` - stop reason from message delta
    ///
    /// # Returns
    ///  * OpenAI stream chunk with finish reason
    fn handle_message_stop(
        &self,
        stop_reason: &Option<String>,
        model: &str,
        current_tool_call: &mut Option<StreamingToolCall>,
        has_tool_calls: &bool,
        stop_reason_from_delta: &mut Option<String>,
    ) -> Option<OpenAiStreamChunk> {
        let effective_stop_reason = stop_reason_from_delta.as_deref().or(stop_reason.as_deref());

        let finish_reason = if *has_tool_calls || current_tool_call.is_some() {
            "tool_calls"
        } else {
            match effective_stop_reason {
                Some("tool_use") => "tool_calls",
                Some("end_turn") => "stop",
                Some("max_tokens") => "length",
                _ => "stop",
            }
        };

        self.debug(&format!(
            "[STREAM] Message stop - reason: {:?}, hasToolCalls: {}, finish_reason: {}",
            effective_stop_reason, has_tool_calls, finish_reason
        ));

        *stop_reason_from_delta = None;

        if let Some(tool_call) = current_tool_call.take() {
            self.debug(&format!(
                "[STREAM] Completed tool call: {}({})",
                tool_call.name, tool_call.arguments
            ));
        }

        Some(OpenAiStreamChunk {
            id: self.generate_response_id(),
            object: CHAT_COMPLETION_CHUNK_OBJECT.to_string(),
            created: Utc::now().timestamp(),
            model: model.to_string(),
            choices: vec![OpenAiStreamChoice {
                index: 0,
                delta: OpenAiStreamDelta { content: None, tool_calls: None },
                finish_reason: Some(finish_reason.to_string()),
            }],
        })
    }

    ///
    /// Log debug message if trace logging is enabled.
    ///
    /// # Arguments
    ///  * `msg` - debug message to log
    pub(crate) fn debug(&self, msg: &str) {
        if self.log_level.is_trace_enabled() {
            tracing::debug!("[TRACE] {}", msg);
        }
    }
}