llmservice_flows/
chat.rs

1use crate::LLMApi;
2use crate::Retry;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use urlencoding::encode;
6
7/// Response struct for the chat completion.
8#[derive(Debug, Deserialize)]
9pub struct ChatResponse {
10    /// The response from ChatGPT.
11    pub choice: String,
12}
13
14impl Default for ChatResponse {
15    fn default() -> ChatResponse {
16        ChatResponse {
17            choice: String::new(),
18        }
19    }
20}
21
22/// struct for setting the chat options.
23#[derive(Debug, Default, Serialize)]
24pub struct ChatOptions<'a> {
25    /// The ID or name of the model to use for completion.
26    #[serde(skip_serializing_if = "Option::is_none")]
27    pub model: Option<&'a str>,
28
29    /// The token limit of the model
30    pub token_limit: u32,
31
32    /// When true, a new conversation will be created.
33    pub restart: bool,
34
35    /// The prompt of the system role.
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub system_prompt: Option<&'a str>,
38
39    /// The prompt that will be prepended to user's prompt without saving in history.
40    #[serde(skip_serializing_if = "Option::is_none")]
41    pub pre_prompt: Option<&'a str>,
42
43    /// The prompt that will be appended to user's prompt without saving in history.
44    #[serde(skip_serializing_if = "Option::is_none")]
45    pub post_prompt: Option<&'a str>,
46
47    /// What sampling temperature to use, between 0 and 2.
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub temperature: Option<f32>,
50
51    /// An alternative to sampling with temperature
52    #[serde(skip_serializing_if = "Option::is_none")]
53    pub top_p: Option<f32>,
54
55    /// Up to 4 sequences where the API will stop generating further tokens.
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub stop: Option<Vec<String>>,
58
59    /// The maximum number of tokens to generate in the chat completion.
60    #[serde(skip_serializing_if = "Option::is_none")]
61    pub max_tokens: Option<u16>,
62
63    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
64    #[serde(skip_serializing_if = "Option::is_none")]
65    pub presence_penalty: Option<f32>,
66
67    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub frequency_penalty: Option<f32>,
70
71    /// Modify the likelihood of specified tokens appearing in the completion.
72    #[serde(skip_serializing_if = "Option::is_none")]
73    pub logit_bias: Option<HashMap<String, i8>>,
74}
75
76impl LLMApi for (&str, &str, &ChatOptions<'_>) {
77    type Output = ChatResponse;
78    async fn api(&self, endpoint: &str, api_key: &str) -> Retry<Self::Output> {
79        chat_completion_inner(endpoint, api_key, self.0, self.1, self.2).await
80    }
81}
82
83impl<'a> crate::LLMServiceFlows<'a> {
84    /// Create chat completion with the provided sentence.
85    /// It uses OpenAI's [GPT-4](https://platform.openai.com/docs/models/gpt-4) model to make a conversation.
86    ///
87    /// `conversation_id` is the identifier of the conversation.
88    /// The history will be fetched and attached to the `sentence` as a whole prompt for ChatGPT.
89    ///
90    /// `sentence` is a String that reprensents the current utterance of the conversation.
91    ///
92    ///```rust  
93    ///     // Create a conversation_id.
94    ///     // Only numbers, letters, underscores, dashes, and pound signs are allowed, up to 50 characters.
95    ///     let chat_id = format!("news-summary-N");
96    ///     // System_prompt content in text.
97    ///     let system = &format!("You're a news editor AI.");
98    ///
99    ///     // Create ChatOptions.
100    ///     let co = ChatOptions {
101    ///         model: Some("gpt-4"),
102    ///         token_limit: 8192,
103    ///         restart: true,
104    ///         system_prompt: Some(system),
105    ///     // Use .. to extract the default value for the remaining fields.
106    ///         ..Default::default()
107    ///     };
108    ///
109    ///     // Create a `sentence`, the concatenation of user prompt and the text to work with.
110    ///     let question = format!("Make a concise summary within 200 words on this: {news_body}.");
111    ///
112    ///     // Chat completion to get the result and handle the failure.
113    ///     match llm.chat_completion(&chat_id, &question, &co).await {
114    ///         Ok(r) => Ok(r.choice),
115    ///         Err(e) =>  Err(e.into()),
116    ///     }
117    /// ```
118    pub async fn chat_completion(
119        &self,
120        conversation_id: &str,
121        sentence: &str,
122        options: &ChatOptions<'_>,
123    ) -> Result<ChatResponse, String> {
124        self.keep_trying((conversation_id, sentence, options)).await
125    }
126}
127
128async fn chat_completion_inner(
129    endpoint: &str,
130    api_key: &str,
131    conversation_id: &str,
132    sentence: &str,
133    options: &ChatOptions<'_>,
134) -> Retry<ChatResponse> {
135    let flows_user = unsafe { crate::_get_flows_user() };
136    let flow_id = unsafe { crate::_get_flow_id() };
137
138    let uri = format!(
139        "{}/{}/{}/chat_completion?endpoint={}&api_key={}&conversation={}",
140        crate::LLM_API_PREFIX.as_str(),
141        flows_user,
142        flow_id,
143        encode(endpoint),
144        encode(api_key),
145        encode(conversation_id),
146    );
147    let body = serde_json::to_vec(&serde_json::json!({
148        "sentence": sentence,
149        "params": options
150    }))
151    .unwrap_or_default();
152
153    match reqwest::Client::new()
154        .post(uri)
155        .header("Content-Type", "application/json")
156        .header("Content-Length", body.len())
157        .body(body)
158        .send()
159        .await
160    {
161        Ok(res) => {
162            let status = res.status();
163            let body = res.bytes().await.unwrap();
164            match status.is_success() {
165                true => Retry::No(
166                    serde_json::from_slice::<ChatResponse>(body.as_ref())
167                        .or(Err(String::from("Unexpected error"))),
168                ),
169                false => {
170                    match status.into() {
171                        409 | 429 | 503 => {
172                            // 409 TryAgain 429 RateLimitError
173                            // 503 ServiceUnavailable
174                            Retry::Yes(String::from_utf8_lossy(body.as_ref()).into_owned())
175                        }
176                        _ => Retry::No(Err(String::from_utf8_lossy(body.as_ref()).into_owned())),
177                    }
178                }
179            }
180        }
181        Err(e) => Retry::No(Err(e.to_string())),
182    }
183}
184
185#[derive(Debug, Deserialize)]
186#[serde(rename_all = "camelCase")]
187pub enum ChatRole {
188    User,
189    Assistant,
190}
191
192#[derive(Debug, Deserialize)]
193pub struct ChatMessage {
194    pub role: ChatRole,
195    pub content: String,
196}
197
198/// Fetch the question history of conversation_id
199/// Result will be an array of string whose length is
200/// restricted by limit.
201/// When limit is 0, all history will be returned.
202///
203///```rust,no_run
204/// // The conversation_id we are interested in.
205/// let conversation_id = "unique_conversation_id";
206/// // Limit the number of messages returned.
207/// let limit: u8 = 10;
208/// // Call `chat_history` to fetch the conversation history.
209/// let history = chat_history(conversation_id, limit);
210///
211/// match history {
212///     Some(messages) => {
213///         println!("Chat history (most recent {} messages):", limit);
214///         for message in messages.iter().rev() {
215///             let role = match message.role {
216///                 ChatRole::User => "User",
217///                 ChatRole::Assistant => "Assistant",
218///             };
219///             println!("{}: {}", role, message.content);
220///         }
221///     }
222///     None => {
223///         println!(
224///             "Failed to fetch chat history for conversation {}",
225///             conversation_id
226///         );
227///     }
228/// }
229/// ```
230
231pub async fn chat_history(conversation_id: &str, limit: u8) -> Option<Vec<ChatMessage>> {
232    let flows_user = unsafe { crate::_get_flows_user() };
233    let flow_id = unsafe { crate::_get_flow_id() };
234
235    let uri = format!(
236        "{}/{}/{}/chat_history?conversation={}&limit={}",
237        crate::LLM_API_PREFIX.as_str(),
238        flows_user,
239        flow_id,
240        encode(conversation_id),
241        limit
242    );
243    match reqwest::get(&uri).await {
244        Ok(res) => match res.status().is_success() {
245            true => {
246                serde_json::from_slice::<Vec<ChatMessage>>(&res.bytes().await.unwrap().as_ref())
247                    .ok()
248            }
249            false => None,
250        },
251        Err(_) => None,
252    }
253}