llm_interface 0.0.3

llm_interface: The backend for the llm_client crate
Documentation
use crate::requests::*;
use serde::{Deserialize, Serialize};

impl CompletionResponse {
    pub fn new_from_openai(
        req: &CompletionRequest,
        res: OpenAiCompletionResponse,
    ) -> Result<Self, CompletionError> {
        let choice = if res.choices.is_empty() || res.choices[0].message.content.is_none() {
            return Err(CompletionError::ReponseContentEmpty);
        } else {
            &res.choices[0]
        };
        let finish_reason = match choice.finish_reason {
            Some(FinishReason::Stop) => CompletionFinishReason::Eos,
            Some(FinishReason::Length) => CompletionFinishReason::StopLimit,
            Some(FinishReason::ToolCalls) => {
                return Err(CompletionError::StopReasonUnsupported(
                    "FinishReason::ToolCalls is not supported".to_owned(),
                ))
            }
            Some(FinishReason::ContentFilter) => {
                return Err(CompletionError::StopReasonUnsupported(
                    "FinishReason::ContentFilter is not supported".to_owned(),
                ))
            }
            Some(FinishReason::FunctionCall) => {
                return Err(CompletionError::StopReasonUnsupported(
                    "FinishReason::FunctionCall is not supported".to_owned(),
                ))
            }
            None => CompletionFinishReason::Eos,
        };
        Ok(Self {
            id: res.id.to_owned(),
            index: None,
            content: choice.message.content.as_ref().unwrap().to_owned(),
            finish_reason,
            completion_probabilities: None,
            truncated: false,
            generation_settings: GenerationSettings::new_from_openai(req, &res),
            timing_usage: TimingUsage::new_from_generic(req.start_time),
            token_usage: TokenUsage::new_from_generic(&res),
        })
    }
}

/// Represents a chat completion response returned by model, based on the provided input.
#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
pub struct OpenAiCompletionResponse {
    /// A unique identifier for the chat completion.
    pub id: String,
    /// A list of chat completion choices. Can be more than one if `n` is greater than 1.
    pub choices: Vec<ChatChoice>,
    /// The Unix timestamp (in seconds) of when the chat completion was created.
    pub created: u32,
    /// The model used for the chat completion.
    pub model: String,
    pub usage: Option<CompletionUsage>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatChoice {
    /// The index of the choice in the list of choices.
    pub index: u32,
    pub message: ChatCompletionResponseMessage,
    /// The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,
    /// `length` if the maximum number of tokens specified in the request was reached,
    /// `content_filter` if content was omitted due to a flag from our content filters,
    /// `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.
    pub finish_reason: Option<FinishReason>,
    /// Log probability information for the choice.
    pub logprobs: Option<ChatChoiceLogprobs>,
}

/// Usage statistics for the completion request.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct CompletionUsage {
    /// Number of tokens in the prompt.
    pub prompt_tokens: u32,
    /// Number of tokens in the generated completion.
    pub completion_tokens: u32,
    /// Total number of tokens used in the request (prompt + completion).
    pub total_tokens: u32,
}

/// A chat completion message generated by the model.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionResponseMessage {
    /// The contents of the message.
    pub content: Option<String>,

    /// The role of the author of this message.
    pub role: Role,
}

#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum FinishReason {
    Stop,
    Length,
    ToolCalls,
    ContentFilter,
    FunctionCall,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatChoiceLogprobs {
    /// A list of message content tokens with log probability information.
    pub content: Option<Vec<ChatCompletionTokenLogprob>>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionTokenLogprob {
    /// The token.
    pub token: String,
    /// The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.
    pub logprob: f32,
    /// A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.
    pub bytes: Option<Vec<u8>>,
    ///  List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned.
    pub top_logprobs: Vec<TopLogprobs>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct TopLogprobs {
    /// The token.
    pub token: String,
    /// The log probability of this token.
    pub logprob: f32,
    /// A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.
    pub bytes: Option<Vec<u8>>,
}

#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Role {
    System,
    #[default]
    User,
    Assistant,
    Tool,
    Function,
}