llm_interface 0.0.3

use crate::requests::*;
use serde::{Deserialize, Serialize};

impl CompletionResponse {
    pub fn new_from_anthropic(
        req: &CompletionRequest,
        res: AnthropicCompletionResponse,
    ) -> Result<Self, CompletionError> {
        let finish_reason = match res.stop_reason {
            StopReason::EndTurn => CompletionFinishReason::Eos,
            StopReason::StopSequence => {
                if let Some(stopping_string) = &res.stop_sequence {
                    if let Some(stop_sequence) =
                        req.stop_sequences.parse_string_response(stopping_string)
                    {
                        CompletionFinishReason::MatchingStoppingSequence(stop_sequence)
                    } else {
                        CompletionFinishReason::NonMatchingStoppingSequence(Some(
                            stopping_string.clone(),
                        ))
                    }
                } else {
                    CompletionFinishReason::NonMatchingStoppingSequence(None)
                }
            }
            StopReason::MaxTokens => CompletionFinishReason::StopLimit,
            StopReason::ToolUse => {
                return Err(CompletionError::StopReasonUnsupported(
                    "StopReason::ToolUse is not supported".to_owned(),
                ))
            }
        };

        if res.content.is_empty() {
            return Err(CompletionError::ReponseContentEmpty);
        }

        if res.content.len() > 1 {
            return Err(CompletionError::ReponseContentEmpty);
        }
        let content = res
            .content
            .first()
            .ok_or_else(|| CompletionError::ReponseContentEmpty)?
            .text
            .to_owned();

        Ok(Self {
            id: res.id.to_owned(),
            index: None,
            content,
            finish_reason,
            completion_probabilities: None,
            truncated: false,
            generation_settings: GenerationSettings::new_from_anthropic(req, &res),
            timing_usage: TimingUsage::new_from_generic(req.start_time),
            token_usage: TokenUsage::new_from_anthropic(&res),
        })
    }
}

/// Represents a chat completion response returned by model, based on the provided input.
#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
pub struct AnthropicCompletionResponse {
    /// Unique object identifier.
    ///
    /// The format and length of IDs may change over time.
    pub id: String,
    /// Content generated by the model.
    ///
    /// This is an array of content blocks, each of which has a type that determines its shape. Currently, the only type in responses is "text".
    pub content: Vec<CompletionContent>,
    /// The model that handled the request.
    pub model: String,
    /// The reason that we stopped.
    ///
    /// This may be one of the following values:
    ///
    /// "end_turn": the model reached a natural stopping point
    /// "max_tokens": we exceeded the requested max_tokens or the model's maximum
    /// "stop_sequence": one of your provided custom stop_sequences was generated
    pub stop_reason: StopReason,
    /// Which custom stop sequence was generated, if any.
    ///
    /// This value will be a non-null string if one of your custom stop sequences was generated.
    pub stop_sequence: Option<String>,
    /// Billing and rate-limit usage.
    ///
    /// Anthropic's API bills and rate-limits by token counts, as tokens represent the underlying cost to our systems.
    ///
    /// Under the hood, the API transforms requests into a format suitable for the model. The model's output then goes through a parsing stage before becoming an API response. As a result, the token counts in usage will not match one-to-one with the exact visible content of an API request or response.
    ///
    /// For example, output_tokens will be non-zero, even for an empty string response from Claude.
    pub usage: CompletionUsage,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct CompletionContent {
    /// The single text content.
    pub text: String,
}

/// Usage statistics for the completion request.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct CompletionUsage {
    /// The number of input tokens which were used.
    pub input_tokens: u32,
    /// The number of output tokens which were used.
    pub output_tokens: u32,
}

#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum StopReason {
    /// The model reached a natural stopping point.
    EndTurn,
    /// We exceeded the requested max_tokens or the model's maximum.
    MaxTokens,
    /// One of your provided custom stop_sequences was generated.
    StopSequence,
    /// Claude wants to use an external tool.
    ToolUse,
}