1use serde::{Deserialize, Serialize};
7
8use super::UNKNOWN_MODEL_ID;
9
10#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
18pub struct TokenizeRequest {
19 #[serde(default = "default_model_name")]
21 pub model: String,
22
23 pub prompt: StringOrArray,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
29pub struct TokenizeResponse {
30 pub tokens: TokensResult,
32
33 pub count: CountResult,
35
36 pub char_count: CountResult,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
42#[serde(untagged)]
43pub enum TokensResult {
44 Single(Vec<u32>),
45 Batch(Vec<Vec<u32>>),
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
50#[serde(untagged)]
51pub enum CountResult {
52 Single(i32),
53 Batch(Vec<i32>),
54}
55
56#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
64pub struct DetokenizeRequest {
65 #[serde(default = "default_model_name")]
67 pub model: String,
68
69 pub tokens: TokensInput,
71
72 #[serde(default = "default_true")]
74 pub skip_special_tokens: bool,
75}
76
77#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
79#[serde(untagged)]
80pub enum TokensInput {
81 Single(Vec<u32>),
83 Batch(Vec<Vec<u32>>),
85}
86
87impl TokensInput {
88 pub fn is_batch(&self) -> bool {
90 matches!(self, TokensInput::Batch(_))
91 }
92
93 pub fn sequences(&self) -> Vec<&[u32]> {
95 match self {
96 TokensInput::Single(seq) => vec![seq.as_slice()],
97 TokensInput::Batch(seqs) => seqs.iter().map(|s| s.as_slice()).collect(),
98 }
99 }
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
104pub struct DetokenizeResponse {
105 pub text: TextResult,
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
111#[serde(untagged)]
112pub enum TextResult {
113 Single(String),
114 Batch(Vec<String>),
115}
116
117#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
123pub struct AddTokenizerRequest {
124 pub name: String,
126
127 pub source: String,
129
130 #[serde(skip_serializing_if = "Option::is_none")]
132 pub chat_template_path: Option<String>,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
137pub struct AddTokenizerResponse {
138 pub id: String,
140 pub status: String,
142 pub message: String,
143 #[serde(skip_serializing_if = "Option::is_none")]
145 pub vocab_size: Option<usize>,
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
150pub struct ListTokenizersResponse {
151 pub tokenizers: Vec<TokenizerInfo>,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
156pub struct TokenizerInfo {
157 pub id: String,
159 pub name: String,
161 pub source: String,
163 pub vocab_size: usize,
164}
165
166#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
168pub struct RemoveTokenizerRequest {
169 pub name: String,
171}
172
173#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
175pub struct RemoveTokenizerResponse {
176 pub success: bool,
177 pub message: String,
178}
179
180#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
186#[schemars(rename = "TokenizeStringOrArray")]
187#[serde(untagged)]
188pub enum StringOrArray {
189 Single(String),
190 Array(Vec<String>),
191}
192
193impl StringOrArray {
194 pub fn is_batch(&self) -> bool {
196 matches!(self, StringOrArray::Array(_))
197 }
198
199 pub fn as_strings(&self) -> Vec<&str> {
201 match self {
202 StringOrArray::Single(s) => vec![s.as_str()],
203 StringOrArray::Array(arr) => arr.iter().map(|s| s.as_str()).collect(),
204 }
205 }
206}
207
208fn default_model_name() -> String {
213 UNKNOWN_MODEL_ID.to_string()
214}
215
216fn default_true() -> bool {
217 true
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 #[test]
225 fn test_tokenize_request_single() {
226 let json = r#"{"prompt": "Hello world"}"#;
227 let req: TokenizeRequest = serde_json::from_str(json).unwrap();
228 assert_eq!(req.model, "unknown");
229 assert!(matches!(req.prompt, StringOrArray::Single(_)));
230 }
231
232 #[test]
233 fn test_tokenize_request_batch() {
234 let json = r#"{"model": "llama", "prompt": ["Hello", "World"]}"#;
235 let req: TokenizeRequest = serde_json::from_str(json).unwrap();
236 assert_eq!(req.model, "llama");
237 assert!(matches!(req.prompt, StringOrArray::Array(_)));
238 }
239
240 #[test]
241 fn test_detokenize_request_single() {
242 let json = r#"{"tokens": [1, 2, 3]}"#;
243 let req: DetokenizeRequest = serde_json::from_str(json).unwrap();
244 assert!(matches!(req.tokens, TokensInput::Single(_)));
245 assert!(req.skip_special_tokens);
246 }
247
248 #[test]
249 fn test_detokenize_request_batch() {
250 let json = r#"{"tokens": [[1, 2], [3, 4, 5]], "skip_special_tokens": false}"#;
251 let req: DetokenizeRequest = serde_json::from_str(json).unwrap();
252 assert!(matches!(req.tokens, TokensInput::Batch(_)));
253 assert!(!req.skip_special_tokens);
254 }
255
256 #[test]
257 fn test_tokenize_response_single() {
258 let resp = TokenizeResponse {
259 tokens: TokensResult::Single(vec![1, 2, 3]),
260 count: CountResult::Single(3),
261 char_count: CountResult::Single(11),
262 };
263 let json = serde_json::to_string(&resp).unwrap();
264 assert!(json.contains("[1,2,3]"));
265 assert!(json.contains("\"count\":3"));
266 assert!(json.contains("\"char_count\":11"));
267 }
268
269 #[test]
270 fn test_tokenize_response_batch() {
271 let resp = TokenizeResponse {
272 tokens: TokensResult::Batch(vec![vec![1, 2], vec![3, 4, 5]]),
273 count: CountResult::Batch(vec![2, 3]),
274 char_count: CountResult::Batch(vec![5, 5]),
275 };
276 let json = serde_json::to_string(&resp).unwrap();
277 assert!(json.contains("[[1,2],[3,4,5]]"));
278 assert!(json.contains("[2,3]"));
279 }
280}