aleph_alpha_client/
tokenization.rs

1use crate::Task;
2use serde::{Deserialize, Serialize};
3
4/// Input for a [crate::Client::tokenize] request.
5pub struct TaskTokenization<'a> {
6    /// The text prompt which should be converted into tokens
7    pub prompt: &'a str,
8
9    /// Specify `true` to return text-tokens.
10    pub tokens: bool,
11
12    /// Specify `true` to return numeric token-ids.
13    pub token_ids: bool,
14}
15
16impl<'a> From<&'a str> for TaskTokenization<'a> {
17    fn from(prompt: &str) -> TaskTokenization {
18        TaskTokenization {
19            prompt,
20            tokens: true,
21            token_ids: true,
22        }
23    }
24}
25
26impl TaskTokenization<'_> {
27    pub fn new(prompt: &str, tokens: bool, token_ids: bool) -> TaskTokenization {
28        TaskTokenization {
29            prompt,
30            tokens,
31            token_ids,
32        }
33    }
34}
35
36#[derive(Serialize, Debug)]
37struct BodyTokenization<'a> {
38    /// Name of the model tasked with completing the prompt. E.g. `luminous-base`.
39    pub model: &'a str,
40    /// String to tokenize.
41    pub prompt: &'a str,
42    /// Set this value to `true` to return text-tokens.
43    pub tokens: bool,
44    /// Set this value to `true` to return numeric token-ids.
45    pub token_ids: bool,
46}
47
48#[derive(Deserialize, Debug, PartialEq, Eq)]
49pub struct ResponseTokenization {
50    pub tokens: Option<Vec<String>>,
51    pub token_ids: Option<Vec<u32>>,
52}
53
54#[derive(Debug, PartialEq)]
55pub struct TokenizationOutput {
56    pub tokens: Option<Vec<String>>,
57    pub token_ids: Option<Vec<u32>>,
58}
59
60impl From<ResponseTokenization> for TokenizationOutput {
61    fn from(response: ResponseTokenization) -> Self {
62        Self {
63            tokens: response.tokens,
64            token_ids: response.token_ids,
65        }
66    }
67}
68
69impl Task for TaskTokenization<'_> {
70    type Output = TokenizationOutput;
71    type ResponseBody = ResponseTokenization;
72
73    fn build_request(
74        &self,
75        client: &reqwest::Client,
76        base: &str,
77        model: &str,
78    ) -> reqwest::RequestBuilder {
79        let body = BodyTokenization {
80            model,
81            prompt: self.prompt,
82            tokens: self.tokens,
83            token_ids: self.token_ids,
84        };
85        client.post(format!("{base}/tokenize")).json(&body)
86    }
87
88    fn body_to_output(&self, response: Self::ResponseBody) -> Self::Output {
89        TokenizationOutput::from(response)
90    }
91}