batuta/serve/banco/
handlers_models.rs1use axum::{extract::State, http::StatusCode, response::Json};
4
5use super::state::BancoState;
6use super::types::{ErrorResponse, ModelLoadRequest, ModelStatusResponse};
7
8pub async fn model_load_handler(
10 State(state): State<BancoState>,
11 Json(request): Json<ModelLoadRequest>,
12) -> Result<Json<ModelStatusResponse>, (StatusCode, Json<ErrorResponse>)> {
13 let info = state.model.load(&request.model).map_err(|e| {
14 (
15 StatusCode::INTERNAL_SERVER_ERROR,
16 Json(ErrorResponse::new(e.to_string(), "model_load_error", 500)),
17 )
18 })?;
19
20 state.events.emit(&super::events::BancoEvent::ModelLoaded {
21 model_id: info.model_id.clone(),
22 format: format!("{:?}", info.format).to_lowercase(),
23 });
24
25 let tokenizer_mode = {
26 #[cfg(feature = "aprender")]
27 {
28 if state.model.has_bpe_tokenizer() { "bpe" } else { "greedy" }.to_string()
29 }
30 #[cfg(not(feature = "aprender"))]
31 {
32 "greedy".to_string()
33 }
34 };
35 Ok(Json(ModelStatusResponse {
36 loaded: true,
37 model: Some(info),
38 uptime_secs: Some(0),
39 tokenizer: Some(tokenizer_mode),
40 }))
41}
42
43pub async fn model_unload_handler(
45 State(state): State<BancoState>,
46) -> Result<StatusCode, (StatusCode, Json<ErrorResponse>)> {
47 state
48 .model
49 .unload()
50 .map(|()| {
51 state.events.emit(&super::events::BancoEvent::ModelUnloaded);
52 StatusCode::NO_CONTENT
53 })
54 .map_err(|e| {
55 (StatusCode::BAD_REQUEST, Json(ErrorResponse::new(e.to_string(), "no_model", 400)))
56 })
57}
58
59pub async fn model_status_handler(State(state): State<BancoState>) -> Json<ModelStatusResponse> {
61 let info = state.model.info();
62 let loaded = info.is_some();
63 let tokenizer = if loaded {
64 #[cfg(feature = "aprender")]
65 {
66 Some(if state.model.has_bpe_tokenizer() { "bpe" } else { "greedy" }.to_string())
67 }
68 #[cfg(not(feature = "aprender"))]
69 {
70 Some("greedy".to_string())
71 }
72 } else {
73 None
74 };
75 Json(ModelStatusResponse {
76 loaded,
77 model: info,
78 uptime_secs: if loaded { Some(state.model.uptime_secs()) } else { None },
79 tokenizer,
80 })
81}