aleph_alpha_client/lib.rs
1//! Usage sample
2//!
3//! ```no_run
4//! use aleph_alpha_client::{Client, TaskCompletion, How};
5//!
6//! #[tokio::main(flavor = "current_thread")]
7//! async fn main() {
8//! // Authenticate against API. Fetches token.
9//! let client = Client::from_env().unwrap();
10//!
11//! // Name of the model we we want to use. Large models give usually better answer, but are also
12//! // more costly.
13//! let model = "luminous-base";
14//!
15//! // The task we want to perform. Here we want to continue the sentence: "An apple a day ..."
16//! let task = TaskCompletion::from_text("An apple a day");
17//!
18//! // Retrieve the answer from the API
19//! let response = client.completion(&task, model, &How::default()).await.unwrap();
20//!
21//! // Print entire sentence with completion
22//! println!("An apple a day{}", response.completion);
23//! }
24//! ```
25
26mod chat;
27mod completion;
28mod detokenization;
29mod explanation;
30mod http;
31mod image_preprocessing;
32mod logprobs;
33mod prompt;
34mod semantic_embedding;
35mod stream;
36mod tokenization;
37use dotenvy::dotenv;
38use futures_util::Stream;
39use http::HttpClient;
40use semantic_embedding::{BatchSemanticEmbeddingOutput, SemanticEmbeddingOutput};
41use std::env;
42use std::{pin::Pin, time::Duration};
43use tokenizers::Tokenizer;
44
45pub use self::{
46 chat::{
47 ChatChunk, ChatOutput, ChatSampling, Distribution, Message, StreamChatEvent, StreamMessage,
48 TaskChat, Usage,
49 },
50 completion::{
51 CompletionEvent, CompletionOutput, CompletionSummary, Sampling, Stopping, StreamChunk,
52 StreamSummary, TaskCompletion,
53 },
54 detokenization::{DetokenizationOutput, TaskDetokenization},
55 explanation::{
56 Explanation, ExplanationOutput, Granularity, ImageScore, ItemExplanation,
57 PromptGranularity, TaskExplanation, TextScore,
58 },
59 http::{Error, Job, Task},
60 logprobs::{Logprob, Logprobs},
61 prompt::{Modality, Prompt},
62 semantic_embedding::{
63 SemanticRepresentation, TaskBatchSemanticEmbedding, TaskSemanticEmbedding,
64 },
65 stream::{StreamJob, StreamTask},
66 tokenization::{TaskTokenization, TokenizationOutput},
67};
68
69/// Execute Jobs against the Aleph Alpha API
70pub struct Client {
71 /// This client does all the work of sending the requests and talking to the AA API. The only
72 /// additional knowledge added by this layer is that it knows about the individual jobs which
73 /// can be executed, which allows for an alternative non generic interface which might produce
74 /// easier to read code for the end user in many use cases.
75 http_client: HttpClient,
76}
77
78impl Client {
79 /// A new instance of an Aleph Alpha client helping you interact with the Aleph Alpha API.
80 ///
81 /// Setting the token to None allows specifying it on a per request basis.
82 /// You may want to only use request based authentication and skip default authentication. This
83 /// is useful if writing an application which invokes the client on behalf of many different
84 /// users. Having neither request, nor default authentication is considered a bug and will cause
85 /// a panic.
86 pub fn new(host: impl Into<String>, api_token: Option<String>) -> Result<Self, Error> {
87 let http_client = HttpClient::new(host.into(), api_token)?;
88 Ok(Self { http_client })
89 }
90
91 /// A client instance that always uses the same token for all requests.
92 pub fn with_auth(host: impl Into<String>, api_token: impl Into<String>) -> Result<Self, Error> {
93 Self::new(host, Some(api_token.into()))
94 }
95
96 pub fn from_env() -> Result<Self, Error> {
97 let _ = dotenv();
98 let api_token = env::var("PHARIA_AI_TOKEN").unwrap();
99 let inference_url = env::var("INFERENCE_URL").unwrap();
100 Self::with_auth(inference_url, api_token)
101 }
102
103 /// Execute a task with the aleph alpha API and fetch its result.
104 ///
105 /// ```no_run
106 /// use aleph_alpha_client::{Client, How, TaskCompletion, Error};
107 ///
108 /// async fn print_completion() -> Result<(), Error> {
109 /// // Authenticate against API. Fetches token.
110 /// let client = Client::from_env()?;
111 ///
112 /// // Name of the model we we want to use. Large models give usually better answer, but are
113 /// // also slower and more costly.
114 /// let model = "luminous-base";
115 ///
116 /// // The task we want to perform. Here we want to continue the sentence: "An apple a day
117 /// // ..."
118 /// let task = TaskCompletion::from_text("An apple a day");
119 ///
120 /// // Retrieve answer from API
121 /// let response = client.execute(model, &task, &How::default()).await?;
122 ///
123 /// // Print entire sentence with completion
124 /// println!("An apple a day{}", response.completion);
125 /// Ok(())
126 /// }
127 /// ```
128 #[deprecated = "Please use output_of instead."]
129 pub async fn execute<T: Task>(
130 &self,
131 model: &str,
132 task: &T,
133 how: &How,
134 ) -> Result<T::Output, Error> {
135 self.output_of(&task.with_model(model), how).await
136 }
137
138 /// Execute any task with the aleph alpha API and fetch its result. This is most useful in
139 /// generic code then you want to execute arbitrary task types. Otherwise prefer methods taking
140 /// concrete tasks like [`Self::completion`] for improved readability.
141 pub async fn output_of<T: Job>(&self, task: &T, how: &How) -> Result<T::Output, Error> {
142 self.http_client.output_of(task, how).await
143 }
144
145 /// An embedding trying to capture the semantic meaning of a text. Cosine similarity can be used
146 /// find out how well two texts (or multimodal prompts) match. Useful for search usecases.
147 ///
148 /// See the example for [`cosine_similarity`].
149 pub async fn semantic_embedding(
150 &self,
151 task: &TaskSemanticEmbedding<'_>,
152 how: &How,
153 ) -> Result<SemanticEmbeddingOutput, Error> {
154 self.http_client.output_of(task, how).await
155 }
156
157 /// An batch of embeddings trying to capture the semantic meaning of a text.
158 pub async fn batch_semantic_embedding(
159 &self,
160 task: &TaskBatchSemanticEmbedding<'_>,
161 how: &How,
162 ) -> Result<BatchSemanticEmbeddingOutput, Error> {
163 self.http_client.output_of(task, how).await
164 }
165
166 /// Instruct a model served by the aleph alpha API to continue writing a piece of text (or
167 /// multimodal document).
168 ///
169 /// ```no_run
170 /// use aleph_alpha_client::{Client, How, TaskCompletion, Task, Error};
171 ///
172 /// async fn print_completion() -> Result<(), Error> {
173 /// // Authenticate against API. Fetches token.
174 /// let client = Client::from_env()?;
175 ///
176 /// // Name of the model we we want to use. Large models give usually better answer, but are
177 /// // also slower and more costly.
178 /// let model = "luminous-base";
179 ///
180 /// // The task we want to perform. Here we want to continue the sentence: "An apple a day
181 /// // ..."
182 /// let task = TaskCompletion::from_text("An apple a day");
183 ///
184 /// // Retrieve answer from API
185 /// let response = client.completion(&task, model, &How::default()).await?;
186 ///
187 /// // Print entire sentence with completion
188 /// println!("An apple a day{}", response.completion);
189 /// Ok(())
190 /// }
191 /// ```
192 pub async fn completion(
193 &self,
194 task: &TaskCompletion<'_>,
195 model: &str,
196 how: &How,
197 ) -> Result<CompletionOutput, Error> {
198 self.http_client
199 .output_of(&Task::with_model(task, model), how)
200 .await
201 }
202
203 /// Instruct a model served by the aleph alpha API to continue writing a piece of text.
204 /// Stream the response as a series of events.
205 ///
206 /// ```no_run
207 /// use aleph_alpha_client::{Client, How, TaskCompletion, Error, CompletionEvent};
208 /// use futures_util::StreamExt;
209 ///
210 /// async fn print_stream_completion() -> Result<(), Error> {
211 /// // Authenticate against API. Fetches token.
212 /// let client = Client::from_env()?;
213 ///
214 /// // Name of the model we we want to use. Large models give usually better answer, but are
215 /// // also slower and more costly.
216 /// let model = "luminous-base";
217 ///
218 /// // The task we want to perform. Here we want to continue the sentence: "An apple a day
219 /// // ..."
220 /// let task = TaskCompletion::from_text("An apple a day");
221 ///
222 /// // Retrieve stream from API
223 /// let mut stream = client.stream_completion(&task, model, &How::default()).await?;
224 /// while let Some(Ok(event)) = stream.next().await {
225 /// if let CompletionEvent::StreamChunk(chunk) = event {
226 /// println!("{}", chunk.completion);
227 /// }
228 /// }
229 /// Ok(())
230 /// }
231 /// ```
232 pub async fn stream_completion(
233 &self,
234 task: &TaskCompletion<'_>,
235 model: &str,
236 how: &How,
237 ) -> Result<Pin<Box<dyn Stream<Item = Result<CompletionEvent, Error>> + Send>>, Error> {
238 self.http_client
239 .stream_output_of(&Task::with_model(task, model), how)
240 .await
241 }
242
243 /// Send a chat message to a model.
244 /// ```no_run
245 /// use aleph_alpha_client::{Client, How, TaskChat, Error, Message};
246 ///
247 /// async fn print_chat() -> Result<(), Error> {
248 /// // Authenticate against API. Fetches token.
249 /// let client = Client::from_env()?;
250 ///
251 /// // Name of a model that supports chat.
252 /// let model = "pharia-1-llm-7b-control";
253 ///
254 /// // Create a chat task with a user message.
255 /// let message = Message::user("Hello, how are you?");
256 /// let task = TaskChat::with_message(message);
257 ///
258 /// // Send the message to the model.
259 /// let response = client.chat(&task, model, &How::default()).await?;
260 ///
261 /// // Print the model response
262 /// println!("{}", response.message.content);
263 /// Ok(())
264 /// }
265 /// ```
266 pub async fn chat(
267 &self,
268 task: &TaskChat<'_>,
269 model: &str,
270 how: &How,
271 ) -> Result<ChatOutput, Error> {
272 self.http_client
273 .output_of(&Task::with_model(task, model), how)
274 .await
275 }
276
277 /// Send a chat message to a model. Stream the response as a series of events.
278 /// ```no_run
279 /// use aleph_alpha_client::{Client, How, TaskChat, Error, Message, ChatChunk, StreamChatEvent};
280 /// use futures_util::StreamExt;
281 ///
282 /// async fn print_stream_chat() -> Result<(), Error> {
283 /// // Authenticate against API. Fetches token.
284 /// let client = Client::from_env()?;
285 ///
286 /// // Name of a model that supports chat.
287 /// let model = "pharia-1-llm-7b-control";
288 ///
289 /// // Create a chat task with a user message.
290 /// let message = Message::user("Hello, how are you?");
291 /// let task = TaskChat::with_message(message);
292 ///
293 /// // Send the message to the model.
294 /// let mut stream = client.stream_chat(&task, model, &How::default()).await?;
295 /// while let Some(Ok(event)) = stream.next().await {
296 /// if let StreamChatEvent::Chunk(ChatChunk::Delta { delta }) = event {
297 /// println!("{}", delta.content);
298 /// }
299 /// }
300 /// Ok(())
301 /// }
302 /// ```
303 pub async fn stream_chat(
304 &self,
305 task: &TaskChat<'_>,
306 model: &str,
307 how: &How,
308 ) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChatEvent, Error>> + Send>>, Error> {
309 self.http_client
310 .stream_output_of(&StreamTask::with_model(task, model), how)
311 .await
312 }
313
314 /// Returns an explanation given a prompt and a target (typically generated
315 /// by a previous completion request). The explanation describes how individual parts
316 /// of the prompt influenced the target.
317 ///
318 /// ```no_run
319 /// use aleph_alpha_client::{Client, How, TaskCompletion, Task, Error, Granularity,
320 /// TaskExplanation, Stopping, Prompt, Sampling, Logprobs};
321 ///
322 /// async fn print_explanation() -> Result<(), Error> {
323 /// let client = Client::from_env()?;
324 ///
325 /// // Name of the model we we want to use. Large models give usually better answer, but are
326 /// // also slower and more costly.
327 /// let model = "luminous-base";
328 ///
329 /// // input for the completion
330 /// let prompt = Prompt::from_text("An apple a day");
331 ///
332 /// let task = TaskCompletion {
333 /// prompt: prompt.clone(),
334 /// stopping: Stopping::from_maximum_tokens(10),
335 /// sampling: Sampling::MOST_LIKELY,
336 /// special_tokens: false,
337 /// logprobs: Logprobs::No,
338 /// };
339 /// let response = client.completion(&task, model, &How::default()).await?;
340 ///
341 /// let task = TaskExplanation {
342 /// prompt: prompt, // same input as for completion
343 /// target: &response.completion, // output of completion
344 /// granularity: Granularity::default(),
345 /// };
346 /// let response = client.explanation(&task, model, &How::default()).await?;
347 ///
348 /// dbg!(&response);
349 /// Ok(())
350 /// }
351 /// ```
352 pub async fn explanation(
353 &self,
354 task: &TaskExplanation<'_>,
355 model: &str,
356 how: &How,
357 ) -> Result<ExplanationOutput, Error> {
358 self.http_client
359 .output_of(&task.with_model(model), how)
360 .await
361 }
362
363 /// Tokenize a prompt for a specific model.
364 ///
365 /// ```no_run
366 /// use aleph_alpha_client::{Client, Error, How, TaskTokenization};
367 ///
368 /// async fn tokenize() -> Result<(), Error> {
369 /// let client = Client::from_env()?;
370 ///
371 /// // Name of the model for which we want to tokenize text.
372 /// let model = "luminous-base";
373 ///
374 /// // Text prompt to be tokenized.
375 /// let prompt = "An apple a day";
376 ///
377 /// let task = TaskTokenization {
378 /// prompt,
379 /// tokens: true, // return text-tokens
380 /// token_ids: true, // return numeric token-ids
381 /// };
382 /// let responses = client.tokenize(&task, model, &How::default()).await?;
383 ///
384 /// dbg!(&responses);
385 /// Ok(())
386 /// }
387 /// ```
388 pub async fn tokenize(
389 &self,
390 task: &TaskTokenization<'_>,
391 model: &str,
392 how: &How,
393 ) -> Result<TokenizationOutput, Error> {
394 self.http_client
395 .output_of(&task.with_model(model), how)
396 .await
397 }
398
399 /// Detokenize a list of token ids into a string.
400 ///
401 /// ```no_run
402 /// use aleph_alpha_client::{Client, Error, How, TaskDetokenization};
403 ///
404 /// async fn detokenize() -> Result<(), Error> {
405 /// let client = Client::from_env()?;
406 ///
407 /// // Specify the name of the model whose tokenizer was used to generate the input token ids.
408 /// let model = "luminous-base";
409 ///
410 /// // Token ids to convert into text.
411 /// let token_ids: Vec<u32> = vec![556, 48741, 247, 2983];
412 ///
413 /// let task = TaskDetokenization {
414 /// token_ids: &token_ids,
415 /// };
416 /// let responses = client.detokenize(&task, model, &How::default()).await?;
417 ///
418 /// dbg!(&responses);
419 /// Ok(())
420 /// }
421 /// ```
422 pub async fn detokenize(
423 &self,
424 task: &TaskDetokenization<'_>,
425 model: &str,
426 how: &How,
427 ) -> Result<DetokenizationOutput, Error> {
428 self.http_client
429 .output_of(&task.with_model(model), how)
430 .await
431 }
432
433 pub async fn tokenizer_by_model(
434 &self,
435 model: &str,
436 api_token: Option<String>,
437 ) -> Result<Tokenizer, Error> {
438 self.http_client.tokenizer_by_model(model, api_token).await
439 }
440}
441
442/// Controls of how to execute a task
443#[derive(Clone, PartialEq, Eq, Hash)]
444pub struct How {
445 /// The be-nice flag is used to reduce load for the models you intend to use.
446 /// This is commonly used if you are conducting experiments
447 /// or trying things out that create a large load on the aleph-alpha-api
448 /// and you do not want to increase queue time for other users too much.
449 ///
450 /// (!) This increases how often you get a `Busy` response.
451 pub be_nice: bool,
452
453 /// The maximum duration of a request before the client cancels the request. This is not passed on
454 /// to the server but only handled by the client locally, i.e. the client will not wait longer than
455 /// this duration for a response.
456 pub client_timeout: Duration,
457
458 /// API token used to authenticate the request, overwrites the default token provided on setup
459 /// Default token may not provide the tracking or permission that is wanted for the request
460 pub api_token: Option<String>,
461}
462
463impl Default for How {
464 fn default() -> Self {
465 // the aleph-alpha-api cancels request after 5 minute
466 let api_timeout = Duration::from_secs(300);
467 Self {
468 be_nice: Default::default(),
469 // on the client side a request can take longer in case of network errors
470 // therefore by default we wait slightly longer
471 client_timeout: api_timeout + Duration::from_secs(5),
472 api_token: None,
473 }
474 }
475}
476
477/// Intended to compare embeddings.
478///
479/// ```no_run
480/// use aleph_alpha_client::{
481/// Client, Prompt, TaskSemanticEmbedding, cosine_similarity, SemanticRepresentation, How
482/// };
483///
484/// async fn semantic_search_with_luminous_base(client: &Client) {
485/// // Given
486/// let robot_fact = Prompt::from_text(
487/// "A robot is a machine—especially one programmable by a computer—capable of carrying out a \
488/// complex series of actions automatically.",
489/// );
490/// let pizza_fact = Prompt::from_text(
491/// "Pizza (Italian: [ˈpittsa], Neapolitan: [ˈpittsə]) is a dish of Italian origin consisting \
492/// of a usually round, flat base of leavened wheat-based dough topped with tomatoes, cheese, \
493/// and often various other ingredients (such as various types of sausage, anchovies, \
494/// mushrooms, onions, olives, vegetables, meat, ham, etc.), which is then baked at a high \
495/// temperature, traditionally in a wood-fired oven.",
496/// );
497/// let query = Prompt::from_text("What is Pizza?");
498/// let how = How::default();
499///
500/// // When
501/// let robot_embedding_task = TaskSemanticEmbedding {
502/// prompt: robot_fact,
503/// representation: SemanticRepresentation::Document,
504/// compress_to_size: Some(128),
505/// };
506/// let robot_embedding = client.semantic_embedding(
507/// &robot_embedding_task,
508/// &how,
509/// ).await.unwrap().embedding;
510///
511/// let pizza_embedding_task = TaskSemanticEmbedding {
512/// prompt: pizza_fact,
513/// representation: SemanticRepresentation::Document,
514/// compress_to_size: Some(128),
515/// };
516/// let pizza_embedding = client.semantic_embedding(
517/// &pizza_embedding_task,
518/// &how,
519/// ).await.unwrap().embedding;
520///
521/// let query_embedding_task = TaskSemanticEmbedding {
522/// prompt: query,
523/// representation: SemanticRepresentation::Query,
524/// compress_to_size: Some(128),
525/// };
526/// let query_embedding = client.semantic_embedding(
527/// &query_embedding_task,
528/// &how,
529/// ).await.unwrap().embedding;
530/// let similarity_pizza = cosine_similarity(&query_embedding, &pizza_embedding);
531/// println!("similarity pizza: {similarity_pizza}");
532/// let similarity_robot = cosine_similarity(&query_embedding, &robot_embedding);
533/// println!("similarity robot: {similarity_robot}");
534///
535/// // Then
536///
537/// // The fact about pizza should be more relevant to the "What is Pizza?" question than a fact
538/// // about robots.
539/// assert!(similarity_pizza > similarity_robot);
540/// }
541/// ```
542pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
543 let ab: f32 = a.iter().zip(b).map(|(a, b)| a * b).sum();
544 let aa: f32 = a.iter().map(|a| a * a).sum();
545 let bb: f32 = b.iter().map(|b| b * b).sum();
546 let prod_len = (aa * bb).sqrt();
547 ab / prod_len
548}
549
550#[cfg(test)]
551mod tests {
552 use crate::Prompt;
553
554 #[test]
555 fn ability_to_generate_prompt_in_local_function() {
556 fn local_function() -> Prompt<'static> {
557 Prompt::from_text(String::from("My test prompt"))
558 }
559
560 assert_eq!(Prompt::from_text("My test prompt"), local_function())
561 }
562}