llmservice-flows 0.5.0

use crate::LLMApi;
use crate::Retry;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use urlencoding::encode;

/// Response struct for the chat completion.
#[derive(Debug, Deserialize)]
pub struct ChatResponse {
    /// The response from ChatGPT.
    pub choice: String,
}

impl Default for ChatResponse {
    fn default() -> ChatResponse {
        ChatResponse {
            choice: String::new(),
        }
    }
}

/// struct for setting the chat options.
#[derive(Debug, Default, Serialize)]
pub struct ChatOptions<'a> {
    /// The ID or name of the model to use for completion.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model: Option<&'a str>,

    /// The token limit of the model
    pub token_limit: u32,

    /// When true, a new conversation will be created.
    pub restart: bool,

    /// The prompt of the system role.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub system_prompt: Option<&'a str>,

    /// The prompt that will be prepended to user's prompt without saving in history.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub pre_prompt: Option<&'a str>,

    /// The prompt that will be appended to user's prompt without saving in history.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub post_prompt: Option<&'a str>,

    /// What sampling temperature to use, between 0 and 2.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f32>,

    /// An alternative to sampling with temperature
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f32>,

    /// Up to 4 sequences where the API will stop generating further tokens.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stop: Option<Vec<String>>,

    /// The maximum number of tokens to generate in the chat completion.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_tokens: Option<u16>,

    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub presence_penalty: Option<f32>,

    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub frequency_penalty: Option<f32>,

    /// Modify the likelihood of specified tokens appearing in the completion.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub logit_bias: Option<HashMap<String, i8>>,
}

impl LLMApi for (&str, &str, &ChatOptions<'_>) {
    type Output = ChatResponse;
    async fn api(&self, endpoint: &str, api_key: &str) -> Retry<Self::Output> {
        chat_completion_inner(endpoint, api_key, self.0, self.1, self.2).await
    }
}

impl<'a> crate::LLMServiceFlows<'a> {
    /// Create chat completion with the provided sentence.
    /// It uses OpenAI's [GPT-4](https://platform.openai.com/docs/models/gpt-4) model to make a conversation.
    ///
    /// `conversation_id` is the identifier of the conversation.
    /// The history will be fetched and attached to the `sentence` as a whole prompt for ChatGPT.
    ///
    /// `sentence` is a String that reprensents the current utterance of the conversation.
    ///
    ///```rust  
    ///     // Create a conversation_id.
    ///     // Only numbers, letters, underscores, dashes, and pound signs are allowed, up to 50 characters.
    ///     let chat_id = format!("news-summary-N");
    ///     // System_prompt content in text.
    ///     let system = &format!("You're a news editor AI.");
    ///
    ///     // Create ChatOptions.
    ///     let co = ChatOptions {
    ///         model: Some("gpt-4"),
    ///         token_limit: 8192,
    ///         restart: true,
    ///         system_prompt: Some(system),
    ///     // Use .. to extract the default value for the remaining fields.
    ///         ..Default::default()
    ///     };
    ///
    ///     // Create a `sentence`, the concatenation of user prompt and the text to work with.
    ///     let question = format!("Make a concise summary within 200 words on this: {news_body}.");
    ///
    ///     // Chat completion to get the result and handle the failure.
    ///     match llm.chat_completion(&chat_id, &question, &co).await {
    ///         Ok(r) => Ok(r.choice),
    ///         Err(e) =>  Err(e.into()),
    ///     }
    /// ```
    pub async fn chat_completion(
        &self,
        conversation_id: &str,
        sentence: &str,
        options: &ChatOptions<'_>,
    ) -> Result<ChatResponse, String> {
        self.keep_trying((conversation_id, sentence, options)).await
    }
}

async fn chat_completion_inner(
    endpoint: &str,
    api_key: &str,
    conversation_id: &str,
    sentence: &str,
    options: &ChatOptions<'_>,
) -> Retry<ChatResponse> {
    let flows_user = unsafe { crate::_get_flows_user() };
    let flow_id = unsafe { crate::_get_flow_id() };

    let uri = format!(
        "{}/{}/{}/chat_completion?endpoint={}&api_key={}&conversation={}",
        crate::LLM_API_PREFIX.as_str(),
        flows_user,
        flow_id,
        encode(endpoint),
        encode(api_key),
        encode(conversation_id),
    );
    let body = serde_json::to_vec(&serde_json::json!({
        "sentence": sentence,
        "params": options
    }))
    .unwrap_or_default();

    match reqwest::Client::new()
        .post(uri)
        .header("Content-Type", "application/json")
        .header("Content-Length", body.len())
        .body(body)
        .send()
        .await
    {
        Ok(res) => {
            let status = res.status();
            let body = res.bytes().await.unwrap();
            match status.is_success() {
                true => Retry::No(
                    serde_json::from_slice::<ChatResponse>(body.as_ref())
                        .or(Err(String::from("Unexpected error"))),
                ),
                false => {
                    match status.into() {
                        409 | 429 | 503 => {
                            // 409 TryAgain 429 RateLimitError
                            // 503 ServiceUnavailable
                            Retry::Yes(String::from_utf8_lossy(body.as_ref()).into_owned())
                        }
                        _ => Retry::No(Err(String::from_utf8_lossy(body.as_ref()).into_owned())),
                    }
                }
            }
        }
        Err(e) => Retry::No(Err(e.to_string())),
    }
}

#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum ChatRole {
    User,
    Assistant,
}

#[derive(Debug, Deserialize)]
pub struct ChatMessage {
    pub role: ChatRole,
    pub content: String,
}

/// Fetch the question history of conversation_id
/// Result will be an array of string whose length is
/// restricted by limit.
/// When limit is 0, all history will be returned.
///
///```rust,no_run
/// // The conversation_id we are interested in.
/// let conversation_id = "unique_conversation_id";
/// // Limit the number of messages returned.
/// let limit: u8 = 10;
/// // Call `chat_history` to fetch the conversation history.
/// let history = chat_history(conversation_id, limit);
///
/// match history {
///     Some(messages) => {
///         println!("Chat history (most recent {} messages):", limit);
///         for message in messages.iter().rev() {
///             let role = match message.role {
///                 ChatRole::User => "User",
///                 ChatRole::Assistant => "Assistant",
///             };
///             println!("{}: {}", role, message.content);
///         }
///     }
///     None => {
///         println!(
///             "Failed to fetch chat history for conversation {}",
///             conversation_id
///         );
///     }
/// }
/// ```

pub async fn chat_history(conversation_id: &str, limit: u8) -> Option<Vec<ChatMessage>> {
    let flows_user = unsafe { crate::_get_flows_user() };
    let flow_id = unsafe { crate::_get_flow_id() };

    let uri = format!(
        "{}/{}/{}/chat_history?conversation={}&limit={}",
        crate::LLM_API_PREFIX.as_str(),
        flows_user,
        flow_id,
        encode(conversation_id),
        limit
    );
    match reqwest::get(&uri).await {
        Ok(res) => match res.status().is_success() {
            true => {
                serde_json::from_slice::<Vec<ChatMessage>>(&res.bytes().await.unwrap().as_ref())
                    .ok()
            }
            false => None,
        },
        Err(_) => None,
    }
}