use crate::registry::ModelInfo;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct HealthResponse {
pub status: String,
pub version: String,
pub compute_mode: String,
pub model_loaded: bool,
pub uptime_sec: f64,
}
#[derive(Serialize, Deserialize)]
pub struct TokenizeRequest {
pub text: String,
pub model_id: Option<String>,
}
#[derive(Serialize, Deserialize)]
pub struct TokenizeResponse {
pub token_ids: Vec<u32>,
pub num_tokens: usize,
}
#[derive(Serialize, Deserialize)]
pub struct GenerateRequest {
pub prompt: String,
#[serde(default = "default_max_tokens")]
pub max_tokens: usize,
#[serde(default = "default_temperature")]
pub temperature: f32,
#[serde(default = "default_strategy")]
pub strategy: String,
#[serde(default = "default_top_k")]
pub top_k: usize,
#[serde(default = "default_top_p")]
pub top_p: f32,
pub seed: Option<u64>,
pub model_id: Option<String>,
}
pub fn default_max_tokens() -> usize {
50
}
pub(crate) fn default_temperature() -> f32 {
1.0
}
pub(crate) fn default_strategy() -> String {
"greedy".to_string()
}
pub fn default_top_k() -> usize {
50
}
pub(crate) fn default_top_p() -> f32 {
0.9
}
#[derive(Serialize, Deserialize)]
pub struct GenerateResponse {
pub token_ids: Vec<u32>,
pub text: String,
pub num_generated: usize,
}
#[derive(Serialize, Deserialize)]
pub struct ErrorResponse {
pub error: String,
}
#[derive(Serialize, Deserialize)]
pub struct BatchTokenizeRequest {
pub texts: Vec<String>,
}
#[derive(Serialize, Deserialize)]
pub struct BatchTokenizeResponse {
pub results: Vec<TokenizeResponse>,
}
#[derive(Serialize, Deserialize)]
pub struct BatchGenerateRequest {
pub prompts: Vec<String>,
#[serde(default = "default_max_tokens")]
pub max_tokens: usize,
#[serde(default = "default_temperature")]
pub temperature: f32,
#[serde(default = "default_strategy")]
pub strategy: String,
#[serde(default = "default_top_k")]
pub top_k: usize,
#[serde(default = "default_top_p")]
pub top_p: f32,
pub seed: Option<u64>,
}
#[derive(Serialize, Deserialize)]
pub struct BatchGenerateResponse {
pub results: Vec<GenerateResponse>,
}
#[derive(Serialize, Deserialize)]
pub struct StreamTokenEvent {
pub token_id: u32,
pub text: String,
}
#[derive(Serialize, Deserialize)]
pub struct StreamDoneEvent {
pub num_generated: usize,
}
#[derive(Serialize, Deserialize)]
pub struct ModelsResponse {
pub models: Vec<ModelInfo>,
}