aleph_alpha_client/
lib.rs

1//! Usage sample
2//!
3//! ```no_run
4//! use aleph_alpha_client::{Client, TaskCompletion, How};
5//!
6//! #[tokio::main(flavor = "current_thread")]
7//! async fn main() {
8//!     // Authenticate against API. Fetches token.
9//!     let client = Client::from_env().unwrap();
10//!
11//!     // Name of the model we we want to use. Large models give usually better answer, but are also
12//!     // more costly.
13//!     let model = "luminous-base";
14//!
15//!     // The task we want to perform. Here we want to continue the sentence: "An apple a day ..."
16//!     let task = TaskCompletion::from_text("An apple a day");
17//!     
18//!     // Retrieve the answer from the API
19//!     let response = client.completion(&task, model, &How::default()).await.unwrap();
20//!
21//!     // Print entire sentence with completion
22//!     println!("An apple a day{}", response.completion);
23//! }
24//! ```
25
26mod chat;
27mod completion;
28mod detokenization;
29mod explanation;
30mod http;
31mod image_preprocessing;
32mod logprobs;
33mod prompt;
34mod semantic_embedding;
35mod stream;
36mod tokenization;
37use dotenvy::dotenv;
38use futures_util::Stream;
39use http::HttpClient;
40use semantic_embedding::{BatchSemanticEmbeddingOutput, SemanticEmbeddingOutput};
41use std::env;
42use std::{pin::Pin, time::Duration};
43use tokenizers::Tokenizer;
44
45pub use self::{
46    chat::{
47        ChatChunk, ChatOutput, ChatSampling, Distribution, Message, StreamChatEvent, StreamMessage,
48        TaskChat, Usage,
49    },
50    completion::{
51        CompletionEvent, CompletionOutput, CompletionSummary, Sampling, Stopping, StreamChunk,
52        StreamSummary, TaskCompletion,
53    },
54    detokenization::{DetokenizationOutput, TaskDetokenization},
55    explanation::{
56        Explanation, ExplanationOutput, Granularity, ImageScore, ItemExplanation,
57        PromptGranularity, TaskExplanation, TextScore,
58    },
59    http::{Error, Job, Task},
60    logprobs::{Logprob, Logprobs},
61    prompt::{Modality, Prompt},
62    semantic_embedding::{
63        SemanticRepresentation, TaskBatchSemanticEmbedding, TaskSemanticEmbedding,
64    },
65    stream::{StreamJob, StreamTask},
66    tokenization::{TaskTokenization, TokenizationOutput},
67};
68
69/// Execute Jobs against the Aleph Alpha API
70pub struct Client {
71    /// This client does all the work of sending the requests and talking to the AA API. The only
72    /// additional knowledge added by this layer is that it knows about the individual jobs which
73    /// can be executed, which allows for an alternative non generic interface which might produce
74    /// easier to read code for the end user in many use cases.
75    http_client: HttpClient,
76}
77
78impl Client {
79    /// A new instance of an Aleph Alpha client helping you interact with the Aleph Alpha API.
80    ///
81    /// Setting the token to None allows specifying it on a per request basis.
82    /// You may want to only use request based authentication and skip default authentication. This
83    /// is useful if writing an application which invokes the client on behalf of many different
84    /// users. Having neither request, nor default authentication is considered a bug and will cause
85    /// a panic.
86    pub fn new(host: impl Into<String>, api_token: Option<String>) -> Result<Self, Error> {
87        let http_client = HttpClient::new(host.into(), api_token)?;
88        Ok(Self { http_client })
89    }
90
91    /// A client instance that always uses the same token for all requests.
92    pub fn with_auth(host: impl Into<String>, api_token: impl Into<String>) -> Result<Self, Error> {
93        Self::new(host, Some(api_token.into()))
94    }
95
96    pub fn from_env() -> Result<Self, Error> {
97        let _ = dotenv();
98        let api_token = env::var("PHARIA_AI_TOKEN").unwrap();
99        let inference_url = env::var("INFERENCE_URL").unwrap();
100        Self::with_auth(inference_url, api_token)
101    }
102
103    /// Execute a task with the aleph alpha API and fetch its result.
104    ///
105    /// ```no_run
106    /// use aleph_alpha_client::{Client, How, TaskCompletion, Error};
107    ///
108    /// async fn print_completion() -> Result<(), Error> {
109    ///     // Authenticate against API. Fetches token.
110    ///     let client = Client::from_env()?;
111    ///
112    ///     // Name of the model we we want to use. Large models give usually better answer, but are
113    ///     // also slower and more costly.
114    ///     let model = "luminous-base";
115    ///
116    ///     // The task we want to perform. Here we want to continue the sentence: "An apple a day
117    ///     // ..."
118    ///     let task = TaskCompletion::from_text("An apple a day");
119    ///
120    ///     // Retrieve answer from API
121    ///     let response = client.execute(model, &task, &How::default()).await?;
122    ///
123    ///     // Print entire sentence with completion
124    ///     println!("An apple a day{}", response.completion);
125    ///     Ok(())
126    /// }
127    /// ```
128    #[deprecated = "Please use output_of instead."]
129    pub async fn execute<T: Task>(
130        &self,
131        model: &str,
132        task: &T,
133        how: &How,
134    ) -> Result<T::Output, Error> {
135        self.output_of(&task.with_model(model), how).await
136    }
137
138    /// Execute any task with the aleph alpha API and fetch its result. This is most useful in
139    /// generic code then you want to execute arbitrary task types. Otherwise prefer methods taking
140    /// concrete tasks like [`Self::completion`] for improved readability.
141    pub async fn output_of<T: Job>(&self, task: &T, how: &How) -> Result<T::Output, Error> {
142        self.http_client.output_of(task, how).await
143    }
144
145    /// An embedding trying to capture the semantic meaning of a text. Cosine similarity can be used
146    /// find out how well two texts (or multimodal prompts) match. Useful for search usecases.
147    ///
148    /// See the example for [`cosine_similarity`].
149    pub async fn semantic_embedding(
150        &self,
151        task: &TaskSemanticEmbedding<'_>,
152        how: &How,
153    ) -> Result<SemanticEmbeddingOutput, Error> {
154        self.http_client.output_of(task, how).await
155    }
156
157    /// An batch of embeddings trying to capture the semantic meaning of a text.
158    pub async fn batch_semantic_embedding(
159        &self,
160        task: &TaskBatchSemanticEmbedding<'_>,
161        how: &How,
162    ) -> Result<BatchSemanticEmbeddingOutput, Error> {
163        self.http_client.output_of(task, how).await
164    }
165
166    /// Instruct a model served by the aleph alpha API to continue writing a piece of text (or
167    /// multimodal document).
168    ///
169    /// ```no_run
170    /// use aleph_alpha_client::{Client, How, TaskCompletion, Task, Error};
171    ///
172    /// async fn print_completion() -> Result<(), Error> {
173    ///     // Authenticate against API. Fetches token.
174    ///     let client = Client::from_env()?;
175    ///
176    ///     // Name of the model we we want to use. Large models give usually better answer, but are
177    ///     // also slower and more costly.
178    ///     let model = "luminous-base";
179    ///
180    ///     // The task we want to perform. Here we want to continue the sentence: "An apple a day
181    ///     // ..."
182    ///     let task = TaskCompletion::from_text("An apple a day");
183    ///
184    ///     // Retrieve answer from API
185    ///     let response = client.completion(&task, model, &How::default()).await?;
186    ///
187    ///     // Print entire sentence with completion
188    ///     println!("An apple a day{}", response.completion);
189    ///     Ok(())
190    /// }
191    /// ```
192    pub async fn completion(
193        &self,
194        task: &TaskCompletion<'_>,
195        model: &str,
196        how: &How,
197    ) -> Result<CompletionOutput, Error> {
198        self.http_client
199            .output_of(&Task::with_model(task, model), how)
200            .await
201    }
202
203    /// Instruct a model served by the aleph alpha API to continue writing a piece of text.
204    /// Stream the response as a series of events.
205    ///
206    /// ```no_run
207    /// use aleph_alpha_client::{Client, How, TaskCompletion, Error, CompletionEvent};
208    /// use futures_util::StreamExt;
209    ///
210    /// async fn print_stream_completion() -> Result<(), Error> {
211    ///     // Authenticate against API. Fetches token.
212    ///     let client = Client::from_env()?;
213    ///
214    ///     // Name of the model we we want to use. Large models give usually better answer, but are
215    ///     // also slower and more costly.
216    ///     let model = "luminous-base";
217    ///
218    ///     // The task we want to perform. Here we want to continue the sentence: "An apple a day
219    ///     // ..."
220    ///     let task = TaskCompletion::from_text("An apple a day");
221    ///
222    ///     // Retrieve stream from API
223    ///     let mut stream = client.stream_completion(&task, model, &How::default()).await?;
224    ///     while let Some(Ok(event)) = stream.next().await {
225    ///         if let CompletionEvent::StreamChunk(chunk) = event {
226    ///             println!("{}", chunk.completion);
227    ///         }
228    ///     }
229    ///     Ok(())
230    /// }
231    /// ```
232    pub async fn stream_completion(
233        &self,
234        task: &TaskCompletion<'_>,
235        model: &str,
236        how: &How,
237    ) -> Result<Pin<Box<dyn Stream<Item = Result<CompletionEvent, Error>> + Send>>, Error> {
238        self.http_client
239            .stream_output_of(&Task::with_model(task, model), how)
240            .await
241    }
242
243    /// Send a chat message to a model.
244    /// ```no_run
245    /// use aleph_alpha_client::{Client, How, TaskChat, Error, Message};
246    ///
247    /// async fn print_chat() -> Result<(), Error> {
248    ///     // Authenticate against API. Fetches token.
249    ///     let client = Client::from_env()?;
250    ///
251    ///     // Name of a model that supports chat.
252    ///     let model = "pharia-1-llm-7b-control";
253    ///
254    ///     // Create a chat task with a user message.
255    ///     let message = Message::user("Hello, how are you?");
256    ///     let task = TaskChat::with_message(message);
257    ///
258    ///     // Send the message to the model.
259    ///     let response = client.chat(&task, model, &How::default()).await?;
260    ///
261    ///     // Print the model response
262    ///     println!("{}", response.message.content);
263    ///     Ok(())
264    /// }
265    /// ```
266    pub async fn chat(
267        &self,
268        task: &TaskChat<'_>,
269        model: &str,
270        how: &How,
271    ) -> Result<ChatOutput, Error> {
272        self.http_client
273            .output_of(&Task::with_model(task, model), how)
274            .await
275    }
276
277    /// Send a chat message to a model. Stream the response as a series of events.
278    /// ```no_run
279    /// use aleph_alpha_client::{Client, How, TaskChat, Error, Message, ChatChunk, StreamChatEvent};
280    /// use futures_util::StreamExt;
281    ///
282    /// async fn print_stream_chat() -> Result<(), Error> {
283    ///     // Authenticate against API. Fetches token.
284    ///     let client = Client::from_env()?;
285    ///
286    ///     // Name of a model that supports chat.
287    ///     let model = "pharia-1-llm-7b-control";
288    ///
289    ///     // Create a chat task with a user message.
290    ///     let message = Message::user("Hello, how are you?");
291    ///     let task = TaskChat::with_message(message);
292    ///
293    ///     // Send the message to the model.
294    ///     let mut stream = client.stream_chat(&task, model, &How::default()).await?;
295    ///     while let Some(Ok(event)) = stream.next().await {
296    ///         if let StreamChatEvent::Chunk(ChatChunk::Delta { delta }) = event {
297    ///             println!("{}", delta.content);
298    ///         }
299    ///     }
300    ///     Ok(())
301    /// }
302    /// ```
303    pub async fn stream_chat(
304        &self,
305        task: &TaskChat<'_>,
306        model: &str,
307        how: &How,
308    ) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChatEvent, Error>> + Send>>, Error> {
309        self.http_client
310            .stream_output_of(&StreamTask::with_model(task, model), how)
311            .await
312    }
313
314    /// Returns an explanation given a prompt and a target (typically generated
315    /// by a previous completion request). The explanation describes how individual parts
316    /// of the prompt influenced the target.
317    ///
318    /// ```no_run
319    /// use aleph_alpha_client::{Client, How, TaskCompletion, Task, Error, Granularity,
320    ///     TaskExplanation, Stopping, Prompt, Sampling, Logprobs};
321    ///
322    /// async fn print_explanation() -> Result<(), Error> {
323    ///     let client = Client::from_env()?;
324    ///
325    ///     // Name of the model we we want to use. Large models give usually better answer, but are
326    ///     // also slower and more costly.
327    ///     let model = "luminous-base";
328    ///
329    ///     // input for the completion
330    ///     let prompt = Prompt::from_text("An apple a day");
331    ///
332    ///     let task = TaskCompletion {
333    ///         prompt: prompt.clone(),
334    ///         stopping: Stopping::from_maximum_tokens(10),
335    ///         sampling: Sampling::MOST_LIKELY,
336    ///         special_tokens: false,
337    ///         logprobs: Logprobs::No,
338    ///     };
339    ///     let response = client.completion(&task, model, &How::default()).await?;
340    ///
341    ///     let task = TaskExplanation {
342    ///         prompt: prompt,               // same input as for completion
343    ///         target: &response.completion,  // output of completion
344    ///         granularity: Granularity::default(),
345    ///     };
346    ///     let response = client.explanation(&task, model, &How::default()).await?;
347    ///
348    ///     dbg!(&response);
349    ///     Ok(())
350    /// }
351    /// ```
352    pub async fn explanation(
353        &self,
354        task: &TaskExplanation<'_>,
355        model: &str,
356        how: &How,
357    ) -> Result<ExplanationOutput, Error> {
358        self.http_client
359            .output_of(&task.with_model(model), how)
360            .await
361    }
362
363    /// Tokenize a prompt for a specific model.
364    ///
365    /// ```no_run
366    /// use aleph_alpha_client::{Client, Error, How, TaskTokenization};
367    ///
368    /// async fn tokenize() -> Result<(), Error> {
369    ///     let client = Client::from_env()?;
370    ///
371    ///     // Name of the model for which we want to tokenize text.
372    ///     let model = "luminous-base";
373    ///
374    ///     // Text prompt to be tokenized.
375    ///     let prompt = "An apple a day";
376    ///
377    ///     let task = TaskTokenization {
378    ///         prompt,
379    ///         tokens: true,       // return text-tokens
380    ///         token_ids: true,    // return numeric token-ids
381    ///     };
382    ///     let responses = client.tokenize(&task, model, &How::default()).await?;
383    ///
384    ///     dbg!(&responses);
385    ///     Ok(())
386    /// }
387    /// ```
388    pub async fn tokenize(
389        &self,
390        task: &TaskTokenization<'_>,
391        model: &str,
392        how: &How,
393    ) -> Result<TokenizationOutput, Error> {
394        self.http_client
395            .output_of(&task.with_model(model), how)
396            .await
397    }
398
399    /// Detokenize a list of token ids into a string.
400    ///
401    /// ```no_run
402    /// use aleph_alpha_client::{Client, Error, How, TaskDetokenization};
403    ///
404    /// async fn detokenize() -> Result<(), Error> {
405    ///     let client = Client::from_env()?;
406    ///
407    ///     // Specify the name of the model whose tokenizer was used to generate the input token ids.
408    ///     let model = "luminous-base";
409    ///
410    ///     // Token ids to convert into text.
411    ///     let token_ids: Vec<u32> = vec![556, 48741, 247, 2983];
412    ///
413    ///     let task = TaskDetokenization {
414    ///         token_ids: &token_ids,
415    ///     };
416    ///     let responses = client.detokenize(&task, model, &How::default()).await?;
417    ///
418    ///     dbg!(&responses);
419    ///     Ok(())
420    /// }
421    /// ```
422    pub async fn detokenize(
423        &self,
424        task: &TaskDetokenization<'_>,
425        model: &str,
426        how: &How,
427    ) -> Result<DetokenizationOutput, Error> {
428        self.http_client
429            .output_of(&task.with_model(model), how)
430            .await
431    }
432
433    pub async fn tokenizer_by_model(
434        &self,
435        model: &str,
436        api_token: Option<String>,
437    ) -> Result<Tokenizer, Error> {
438        self.http_client.tokenizer_by_model(model, api_token).await
439    }
440}
441
442/// Controls of how to execute a task
443#[derive(Clone, PartialEq, Eq, Hash)]
444pub struct How {
445    /// The be-nice flag is used to reduce load for the models you intend to use.
446    /// This is commonly used if you are conducting experiments
447    /// or trying things out that create a large load on the aleph-alpha-api
448    /// and you do not want to increase queue time for other users too much.
449    ///
450    /// (!) This increases how often you get a `Busy` response.
451    pub be_nice: bool,
452
453    /// The maximum duration of a request before the client cancels the request. This is not passed on
454    /// to the server but only handled by the client locally, i.e. the client will not wait longer than
455    /// this duration for a response.
456    pub client_timeout: Duration,
457
458    /// API token used to authenticate the request, overwrites the default token provided on setup
459    /// Default token may not provide the tracking or permission that is wanted for the request
460    pub api_token: Option<String>,
461}
462
463impl Default for How {
464    fn default() -> Self {
465        // the aleph-alpha-api cancels request after 5 minute
466        let api_timeout = Duration::from_secs(300);
467        Self {
468            be_nice: Default::default(),
469            // on the client side a request can take longer in case of network errors
470            // therefore by default we wait slightly longer
471            client_timeout: api_timeout + Duration::from_secs(5),
472            api_token: None,
473        }
474    }
475}
476
477/// Intended to compare embeddings.
478///
479/// ```no_run
480/// use aleph_alpha_client::{
481///     Client, Prompt, TaskSemanticEmbedding, cosine_similarity, SemanticRepresentation, How
482/// };
483///
484/// async fn semantic_search_with_luminous_base(client: &Client) {
485///     // Given
486///     let robot_fact = Prompt::from_text(
487///         "A robot is a machine—especially one programmable by a computer—capable of carrying out a \
488///         complex series of actions automatically.",
489///     );
490///     let pizza_fact = Prompt::from_text(
491///         "Pizza (Italian: [ˈpittsa], Neapolitan: [ˈpittsə]) is a dish of Italian origin consisting \
492///         of a usually round, flat base of leavened wheat-based dough topped with tomatoes, cheese, \
493///         and often various other ingredients (such as various types of sausage, anchovies, \
494///         mushrooms, onions, olives, vegetables, meat, ham, etc.), which is then baked at a high \
495///         temperature, traditionally in a wood-fired oven.",
496///     );
497///     let query = Prompt::from_text("What is Pizza?");
498///     let how = How::default();
499///     
500///     // When
501///     let robot_embedding_task = TaskSemanticEmbedding {
502///         prompt: robot_fact,
503///         representation: SemanticRepresentation::Document,
504///         compress_to_size: Some(128),
505///     };
506///     let robot_embedding = client.semantic_embedding(
507///         &robot_embedding_task,
508///         &how,
509///     ).await.unwrap().embedding;
510///     
511///     let pizza_embedding_task = TaskSemanticEmbedding {
512///         prompt: pizza_fact,
513///         representation: SemanticRepresentation::Document,
514///         compress_to_size: Some(128),
515///     };
516///     let pizza_embedding = client.semantic_embedding(
517///         &pizza_embedding_task,
518///         &how,
519///     ).await.unwrap().embedding;
520///     
521///     let query_embedding_task = TaskSemanticEmbedding {
522///         prompt: query,
523///         representation: SemanticRepresentation::Query,
524///         compress_to_size: Some(128),
525///     };
526///     let query_embedding = client.semantic_embedding(
527///         &query_embedding_task,
528///         &how,
529///     ).await.unwrap().embedding;
530///     let similarity_pizza = cosine_similarity(&query_embedding, &pizza_embedding);
531///     println!("similarity pizza: {similarity_pizza}");
532///     let similarity_robot = cosine_similarity(&query_embedding, &robot_embedding);
533///     println!("similarity robot: {similarity_robot}");
534///     
535///     // Then
536///     
537///     // The fact about pizza should be more relevant to the "What is Pizza?" question than a fact
538///     // about robots.
539///     assert!(similarity_pizza > similarity_robot);
540/// }
541/// ```
542pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
543    let ab: f32 = a.iter().zip(b).map(|(a, b)| a * b).sum();
544    let aa: f32 = a.iter().map(|a| a * a).sum();
545    let bb: f32 = b.iter().map(|b| b * b).sum();
546    let prod_len = (aa * bb).sqrt();
547    ab / prod_len
548}
549
550#[cfg(test)]
551mod tests {
552    use crate::Prompt;
553
554    #[test]
555    fn ability_to_generate_prompt_in_local_function() {
556        fn local_function() -> Prompt<'static> {
557            Prompt::from_text(String::from("My test prompt"))
558        }
559
560        assert_eq!(Prompt::from_text("My test prompt"), local_function())
561    }
562}