1use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
16pub struct TokenizeRequest {
17 pub model: String,
19
20 pub prompt: StringOrArray,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
26pub struct TokenizeResponse {
27 pub tokens: TokensResult,
29
30 pub count: CountResult,
32
33 pub char_count: CountResult,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
39#[serde(untagged)]
40pub enum TokensResult {
41 Single(Vec<u32>),
42 Batch(Vec<Vec<u32>>),
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
47#[serde(untagged)]
48pub enum CountResult {
49 Single(i32),
50 Batch(Vec<i32>),
51}
52
53#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
61pub struct DetokenizeRequest {
62 pub model: String,
64
65 pub tokens: TokensInput,
67
68 #[serde(default = "default_true")]
70 pub skip_special_tokens: bool,
71}
72
73#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
75#[serde(untagged)]
76pub enum TokensInput {
77 Single(Vec<u32>),
79 Batch(Vec<Vec<u32>>),
81}
82
83impl TokensInput {
84 pub fn is_batch(&self) -> bool {
86 matches!(self, TokensInput::Batch(_))
87 }
88
89 pub fn sequences(&self) -> Vec<&[u32]> {
91 match self {
92 TokensInput::Single(seq) => vec![seq.as_slice()],
93 TokensInput::Batch(seqs) => seqs.iter().map(|s| s.as_slice()).collect(),
94 }
95 }
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
100pub struct DetokenizeResponse {
101 pub text: TextResult,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
107#[serde(untagged)]
108pub enum TextResult {
109 Single(String),
110 Batch(Vec<String>),
111}
112
113#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
119pub struct AddTokenizerRequest {
120 pub name: String,
122
123 pub source: String,
125
126 #[serde(skip_serializing_if = "Option::is_none")]
128 pub chat_template_path: Option<String>,
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
133pub struct AddTokenizerResponse {
134 pub id: String,
136 pub status: String,
138 pub message: String,
139 #[serde(skip_serializing_if = "Option::is_none")]
141 pub vocab_size: Option<usize>,
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
146pub struct ListTokenizersResponse {
147 pub tokenizers: Vec<TokenizerInfo>,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
152pub struct TokenizerInfo {
153 pub id: String,
155 pub name: String,
157 pub source: String,
159 pub vocab_size: usize,
160}
161
162#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
164pub struct RemoveTokenizerRequest {
165 pub name: String,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
171pub struct RemoveTokenizerResponse {
172 pub success: bool,
173 pub message: String,
174}
175
176#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
182#[schemars(rename = "TokenizeStringOrArray")]
183#[serde(untagged)]
184pub enum StringOrArray {
185 Single(String),
186 Array(Vec<String>),
187}
188
189impl StringOrArray {
190 pub fn is_batch(&self) -> bool {
192 matches!(self, StringOrArray::Array(_))
193 }
194
195 pub fn as_strings(&self) -> Vec<&str> {
197 match self {
198 StringOrArray::Single(s) => vec![s.as_str()],
199 StringOrArray::Array(arr) => arr.iter().map(|s| s.as_str()).collect(),
200 }
201 }
202}
203
204fn default_true() -> bool {
209 true
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215
216 #[test]
217 fn test_tokenize_request_requires_model() {
218 let json = r#"{"prompt": "Hello world"}"#;
219 let result = serde_json::from_str::<TokenizeRequest>(json);
220 assert!(result.is_err(), "Should fail without model field");
221 }
222
223 #[test]
224 fn test_tokenize_request_batch() {
225 let json = r#"{"model": "llama", "prompt": ["Hello", "World"]}"#;
226 let req: TokenizeRequest = serde_json::from_str(json).unwrap();
227 assert_eq!(req.model, "llama");
228 assert!(matches!(req.prompt, StringOrArray::Array(_)));
229 }
230
231 #[test]
232 fn test_detokenize_request_requires_model() {
233 let json = r#"{"tokens": [1, 2, 3]}"#;
234 let result = serde_json::from_str::<DetokenizeRequest>(json);
235 assert!(result.is_err(), "Should fail without model field");
236 }
237
238 #[test]
239 fn test_detokenize_request_single() {
240 let json = r#"{"model": "test-model", "tokens": [1, 2, 3]}"#;
241 let req: DetokenizeRequest = serde_json::from_str(json).unwrap();
242 assert!(matches!(req.tokens, TokensInput::Single(_)));
243 assert!(req.skip_special_tokens);
244 }
245
246 #[test]
247 fn test_detokenize_request_batch() {
248 let json = r#"{"model": "test-model", "tokens": [[1, 2], [3, 4, 5]], "skip_special_tokens": false}"#;
249 let req: DetokenizeRequest = serde_json::from_str(json).unwrap();
250 assert!(matches!(req.tokens, TokensInput::Batch(_)));
251 assert!(!req.skip_special_tokens);
252 }
253
254 #[test]
255 fn test_tokenize_response_single() {
256 let resp = TokenizeResponse {
257 tokens: TokensResult::Single(vec![1, 2, 3]),
258 count: CountResult::Single(3),
259 char_count: CountResult::Single(11),
260 };
261 let json = serde_json::to_string(&resp).unwrap();
262 assert!(json.contains("[1,2,3]"));
263 assert!(json.contains("\"count\":3"));
264 assert!(json.contains("\"char_count\":11"));
265 }
266
267 #[test]
268 fn test_tokenize_response_batch() {
269 let resp = TokenizeResponse {
270 tokens: TokensResult::Batch(vec![vec![1, 2], vec![3, 4, 5]]),
271 count: CountResult::Batch(vec![2, 3]),
272 char_count: CountResult::Batch(vec![5, 5]),
273 };
274 let json = serde_json::to_string(&resp).unwrap();
275 assert!(json.contains("[[1,2],[3,4,5]]"));
276 assert!(json.contains("[2,3]"));
277 }
278}