llmservice_flows/chat.rs
1use crate::LLMApi;
2use crate::Retry;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use urlencoding::encode;
6
7/// Response struct for the chat completion.
8#[derive(Debug, Deserialize)]
9pub struct ChatResponse {
10 /// The response from ChatGPT.
11 pub choice: String,
12}
13
14impl Default for ChatResponse {
15 fn default() -> ChatResponse {
16 ChatResponse {
17 choice: String::new(),
18 }
19 }
20}
21
22/// struct for setting the chat options.
23#[derive(Debug, Default, Serialize)]
24pub struct ChatOptions<'a> {
25 /// The ID or name of the model to use for completion.
26 #[serde(skip_serializing_if = "Option::is_none")]
27 pub model: Option<&'a str>,
28
29 /// The token limit of the model
30 pub token_limit: u32,
31
32 /// When true, a new conversation will be created.
33 pub restart: bool,
34
35 /// The prompt of the system role.
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub system_prompt: Option<&'a str>,
38
39 /// The prompt that will be prepended to user's prompt without saving in history.
40 #[serde(skip_serializing_if = "Option::is_none")]
41 pub pre_prompt: Option<&'a str>,
42
43 /// The prompt that will be appended to user's prompt without saving in history.
44 #[serde(skip_serializing_if = "Option::is_none")]
45 pub post_prompt: Option<&'a str>,
46
47 /// What sampling temperature to use, between 0 and 2.
48 #[serde(skip_serializing_if = "Option::is_none")]
49 pub temperature: Option<f32>,
50
51 /// An alternative to sampling with temperature
52 #[serde(skip_serializing_if = "Option::is_none")]
53 pub top_p: Option<f32>,
54
55 /// Up to 4 sequences where the API will stop generating further tokens.
56 #[serde(skip_serializing_if = "Option::is_none")]
57 pub stop: Option<Vec<String>>,
58
59 /// The maximum number of tokens to generate in the chat completion.
60 #[serde(skip_serializing_if = "Option::is_none")]
61 pub max_tokens: Option<u16>,
62
63 /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
64 #[serde(skip_serializing_if = "Option::is_none")]
65 pub presence_penalty: Option<f32>,
66
67 /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
68 #[serde(skip_serializing_if = "Option::is_none")]
69 pub frequency_penalty: Option<f32>,
70
71 /// Modify the likelihood of specified tokens appearing in the completion.
72 #[serde(skip_serializing_if = "Option::is_none")]
73 pub logit_bias: Option<HashMap<String, i8>>,
74}
75
76impl LLMApi for (&str, &str, &ChatOptions<'_>) {
77 type Output = ChatResponse;
78 async fn api(&self, endpoint: &str, api_key: &str) -> Retry<Self::Output> {
79 chat_completion_inner(endpoint, api_key, self.0, self.1, self.2).await
80 }
81}
82
83impl<'a> crate::LLMServiceFlows<'a> {
84 /// Create chat completion with the provided sentence.
85 /// It uses OpenAI's [GPT-4](https://platform.openai.com/docs/models/gpt-4) model to make a conversation.
86 ///
87 /// `conversation_id` is the identifier of the conversation.
88 /// The history will be fetched and attached to the `sentence` as a whole prompt for ChatGPT.
89 ///
90 /// `sentence` is a String that reprensents the current utterance of the conversation.
91 ///
92 ///```rust
93 /// // Create a conversation_id.
94 /// // Only numbers, letters, underscores, dashes, and pound signs are allowed, up to 50 characters.
95 /// let chat_id = format!("news-summary-N");
96 /// // System_prompt content in text.
97 /// let system = &format!("You're a news editor AI.");
98 ///
99 /// // Create ChatOptions.
100 /// let co = ChatOptions {
101 /// model: Some("gpt-4"),
102 /// token_limit: 8192,
103 /// restart: true,
104 /// system_prompt: Some(system),
105 /// // Use .. to extract the default value for the remaining fields.
106 /// ..Default::default()
107 /// };
108 ///
109 /// // Create a `sentence`, the concatenation of user prompt and the text to work with.
110 /// let question = format!("Make a concise summary within 200 words on this: {news_body}.");
111 ///
112 /// // Chat completion to get the result and handle the failure.
113 /// match llm.chat_completion(&chat_id, &question, &co).await {
114 /// Ok(r) => Ok(r.choice),
115 /// Err(e) => Err(e.into()),
116 /// }
117 /// ```
118 pub async fn chat_completion(
119 &self,
120 conversation_id: &str,
121 sentence: &str,
122 options: &ChatOptions<'_>,
123 ) -> Result<ChatResponse, String> {
124 self.keep_trying((conversation_id, sentence, options)).await
125 }
126}
127
128async fn chat_completion_inner(
129 endpoint: &str,
130 api_key: &str,
131 conversation_id: &str,
132 sentence: &str,
133 options: &ChatOptions<'_>,
134) -> Retry<ChatResponse> {
135 let flows_user = unsafe { crate::_get_flows_user() };
136 let flow_id = unsafe { crate::_get_flow_id() };
137
138 let uri = format!(
139 "{}/{}/{}/chat_completion?endpoint={}&api_key={}&conversation={}",
140 crate::LLM_API_PREFIX.as_str(),
141 flows_user,
142 flow_id,
143 encode(endpoint),
144 encode(api_key),
145 encode(conversation_id),
146 );
147 let body = serde_json::to_vec(&serde_json::json!({
148 "sentence": sentence,
149 "params": options
150 }))
151 .unwrap_or_default();
152
153 match reqwest::Client::new()
154 .post(uri)
155 .header("Content-Type", "application/json")
156 .header("Content-Length", body.len())
157 .body(body)
158 .send()
159 .await
160 {
161 Ok(res) => {
162 let status = res.status();
163 let body = res.bytes().await.unwrap();
164 match status.is_success() {
165 true => Retry::No(
166 serde_json::from_slice::<ChatResponse>(body.as_ref())
167 .or(Err(String::from("Unexpected error"))),
168 ),
169 false => {
170 match status.into() {
171 409 | 429 | 503 => {
172 // 409 TryAgain 429 RateLimitError
173 // 503 ServiceUnavailable
174 Retry::Yes(String::from_utf8_lossy(body.as_ref()).into_owned())
175 }
176 _ => Retry::No(Err(String::from_utf8_lossy(body.as_ref()).into_owned())),
177 }
178 }
179 }
180 }
181 Err(e) => Retry::No(Err(e.to_string())),
182 }
183}
184
185#[derive(Debug, Deserialize)]
186#[serde(rename_all = "camelCase")]
187pub enum ChatRole {
188 User,
189 Assistant,
190}
191
192#[derive(Debug, Deserialize)]
193pub struct ChatMessage {
194 pub role: ChatRole,
195 pub content: String,
196}
197
198/// Fetch the question history of conversation_id
199/// Result will be an array of string whose length is
200/// restricted by limit.
201/// When limit is 0, all history will be returned.
202///
203///```rust,no_run
204/// // The conversation_id we are interested in.
205/// let conversation_id = "unique_conversation_id";
206/// // Limit the number of messages returned.
207/// let limit: u8 = 10;
208/// // Call `chat_history` to fetch the conversation history.
209/// let history = chat_history(conversation_id, limit);
210///
211/// match history {
212/// Some(messages) => {
213/// println!("Chat history (most recent {} messages):", limit);
214/// for message in messages.iter().rev() {
215/// let role = match message.role {
216/// ChatRole::User => "User",
217/// ChatRole::Assistant => "Assistant",
218/// };
219/// println!("{}: {}", role, message.content);
220/// }
221/// }
222/// None => {
223/// println!(
224/// "Failed to fetch chat history for conversation {}",
225/// conversation_id
226/// );
227/// }
228/// }
229/// ```
230
231pub async fn chat_history(conversation_id: &str, limit: u8) -> Option<Vec<ChatMessage>> {
232 let flows_user = unsafe { crate::_get_flows_user() };
233 let flow_id = unsafe { crate::_get_flow_id() };
234
235 let uri = format!(
236 "{}/{}/{}/chat_history?conversation={}&limit={}",
237 crate::LLM_API_PREFIX.as_str(),
238 flows_user,
239 flow_id,
240 encode(conversation_id),
241 limit
242 );
243 match reqwest::get(&uri).await {
244 Ok(res) => match res.status().is_success() {
245 true => {
246 serde_json::from_slice::<Vec<ChatMessage>>(&res.bytes().await.unwrap().as_ref())
247 .ok()
248 }
249 false => None,
250 },
251 Err(_) => None,
252 }
253}