1use serde::{Deserialize, Serialize};
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, RwLock};
9use std::time::Instant;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13#[serde(rename_all = "lowercase")]
14pub enum ModelFormat {
15 Gguf,
16 Apr,
17 SafeTensors,
18 Unknown,
19}
20
21impl ModelFormat {
22 #[must_use]
24 pub fn from_path(path: &Path) -> Self {
25 match path.extension().and_then(|e| e.to_str()) {
26 Some("gguf") => Self::Gguf,
27 Some("apr") => Self::Apr,
28 Some("safetensors") => Self::SafeTensors,
29 _ => Self::Unknown,
30 }
31 }
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ModelSlotInfo {
37 pub model_id: String,
38 pub path: String,
39 pub format: ModelFormat,
40 pub size_bytes: u64,
41 pub loaded_at_secs: u64,
42 #[serde(skip_serializing_if = "Option::is_none")]
44 pub architecture: Option<String>,
45 #[serde(skip_serializing_if = "Option::is_none")]
47 pub vocab_size: Option<usize>,
48 #[serde(skip_serializing_if = "Option::is_none")]
50 pub hidden_dim: Option<usize>,
51 #[serde(skip_serializing_if = "Option::is_none")]
53 pub num_layers: Option<usize>,
54 #[serde(skip_serializing_if = "Option::is_none")]
56 pub context_length: Option<usize>,
57 #[serde(skip_serializing_if = "Option::is_none")]
59 pub tensor_count: Option<usize>,
60}
61
62pub struct ModelSlot {
64 info: RwLock<Option<ModelSlotInfo>>,
65 loaded_at: RwLock<Option<Instant>>,
66 #[cfg(feature = "realizar")]
68 quantized_model: RwLock<Option<Arc<realizar::gguf::OwnedQuantizedModel>>>,
69 #[cfg(feature = "realizar")]
71 vocab: RwLock<Vec<String>>,
72 #[cfg(feature = "aprender")]
75 bpe_tokenizer: RwLock<Option<aprender::text::bpe::BpeTokenizer>>,
76}
77
78impl ModelSlot {
79 #[must_use]
81 pub fn empty() -> Self {
82 Self {
83 info: RwLock::new(None),
84 loaded_at: RwLock::new(None),
85 #[cfg(feature = "realizar")]
86 quantized_model: RwLock::new(None),
87 #[cfg(feature = "realizar")]
88 vocab: RwLock::new(Vec::new()),
89 #[cfg(feature = "aprender")]
90 bpe_tokenizer: RwLock::new(None),
91 }
92 }
93
94 pub fn load(&self, path: &str) -> Result<ModelSlotInfo, ModelSlotError> {
99 let pb = PathBuf::from(path);
100
101 let model_id = pb.file_stem().and_then(|s| s.to_str()).unwrap_or("unknown").to_string();
102 let format = ModelFormat::from_path(&pb);
103 let size_bytes = std::fs::metadata(&pb).map(|m| m.len()).unwrap_or(0);
104
105 let gguf_meta = extract_model_metadata(&pb, format);
107
108 let info = ModelSlotInfo {
109 model_id,
110 path: path.to_string(),
111 format,
112 size_bytes,
113 loaded_at_secs: epoch_secs(),
114 architecture: gguf_meta.as_ref().map(|m| m.architecture.clone()),
115 vocab_size: gguf_meta.as_ref().map(|m| m.vocab_size),
116 hidden_dim: gguf_meta.as_ref().map(|m| m.hidden_dim),
117 num_layers: gguf_meta.as_ref().map(|m| m.num_layers),
118 context_length: gguf_meta.as_ref().map(|m| m.context_length),
119 tensor_count: gguf_meta.as_ref().map(|m| m.tensor_count),
120 };
121
122 #[cfg(feature = "realizar")]
124 if let Some(ref meta) = gguf_meta {
125 if let Ok(mut m) = self.quantized_model.write() {
126 *m = meta.model.clone();
127 }
128 if let Ok(mut v) = self.vocab.write() {
129 *v = meta.vocab.clone();
130 }
131 }
132
133 #[cfg(feature = "aprender")]
135 {
136 let bpe = load_bpe_tokenizer(&pb);
137 if let Ok(mut t) = self.bpe_tokenizer.write() {
138 *t = bpe;
139 }
140 }
141
142 if let Ok(mut slot) = self.info.write() {
143 *slot = Some(info.clone());
144 }
145 if let Ok(mut t) = self.loaded_at.write() {
146 *t = Some(Instant::now());
147 }
148
149 Ok(info)
150 }
151
152 pub fn unload(&self) -> Result<(), ModelSlotError> {
154 let had_model = self.info.write().map(|mut s| s.take().is_some()).unwrap_or(false);
155 if let Ok(mut t) = self.loaded_at.write() {
156 *t = None;
157 }
158 #[cfg(feature = "realizar")]
159 {
160 if let Ok(mut m) = self.quantized_model.write() {
161 *m = None;
162 }
163 if let Ok(mut v) = self.vocab.write() {
164 v.clear();
165 }
166 }
167 #[cfg(feature = "aprender")]
168 {
169 if let Ok(mut t) = self.bpe_tokenizer.write() {
170 *t = None;
171 }
172 }
173 if had_model {
174 Ok(())
175 } else {
176 Err(ModelSlotError::NoModelLoaded)
177 }
178 }
179
180 #[must_use]
182 pub fn info(&self) -> Option<ModelSlotInfo> {
183 self.info.read().ok()?.clone()
184 }
185
186 #[must_use]
188 pub fn is_loaded(&self) -> bool {
189 self.info.read().map(|s| s.is_some()).unwrap_or(false)
190 }
191
192 #[cfg(feature = "realizar")]
194 #[must_use]
195 pub fn quantized_model(&self) -> Option<Arc<realizar::gguf::OwnedQuantizedModel>> {
196 self.quantized_model.read().ok()?.clone()
197 }
198
199 #[cfg(feature = "realizar")]
201 #[must_use]
202 pub fn vocabulary(&self) -> Vec<String> {
203 self.vocab.read().map(|v| v.clone()).unwrap_or_default()
204 }
205
206 #[cfg(feature = "realizar")]
208 #[must_use]
209 pub fn has_inference_model(&self) -> bool {
210 self.quantized_model.read().map(|m| m.is_some()).unwrap_or(false)
211 }
212
213 #[cfg(feature = "realizar")]
217 #[must_use]
218 pub fn encode_text(&self, text: &str) -> Vec<u32> {
219 if text.is_empty() {
220 return Vec::new();
221 }
222
223 #[cfg(feature = "aprender")]
225 if let Ok(guard) = self.bpe_tokenizer.read() {
226 if let Some(ref bpe) = *guard {
227 return bpe.encode(text);
228 }
229 }
230
231 let vocab = self.vocabulary();
233 super::inference::encode_prompt(&vocab, text)
234 }
235
236 #[cfg(feature = "aprender")]
238 #[must_use]
239 pub fn has_bpe_tokenizer(&self) -> bool {
240 self.bpe_tokenizer.read().map(|t| t.is_some()).unwrap_or(false)
241 }
242
243 #[must_use]
245 pub fn uptime_secs(&self) -> u64 {
246 self.loaded_at.read().ok().and_then(|t| t.map(|i| i.elapsed().as_secs())).unwrap_or(0)
247 }
248}
249
250#[derive(Debug, Clone, PartialEq, Eq)]
252pub enum ModelSlotError {
253 NoModelLoaded,
254}
255
256impl std::fmt::Display for ModelSlotError {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 Self::NoModelLoaded => write!(f, "No model loaded"),
260 }
261 }
262}
263
264impl std::error::Error for ModelSlotError {}
265
266fn epoch_secs() -> u64 {
267 std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs()
268}
269
270struct GgufMeta {
272 architecture: String,
273 vocab_size: usize,
274 hidden_dim: usize,
275 num_layers: usize,
276 context_length: usize,
277 tensor_count: usize,
278 #[cfg(feature = "realizar")]
280 model: Option<Arc<realizar::gguf::OwnedQuantizedModel>>,
281 #[cfg(feature = "realizar")]
283 vocab: Vec<String>,
284}
285
286#[cfg(feature = "realizar")]
288fn extract_model_metadata(path: &Path, format: ModelFormat) -> Option<GgufMeta> {
289 match format {
290 ModelFormat::Gguf => extract_gguf_metadata(path),
291 ModelFormat::Apr => extract_apr_metadata(path),
292 _ => None,
293 }
294}
295
296#[cfg(feature = "realizar")]
298fn extract_gguf_metadata(path: &Path) -> Option<GgufMeta> {
299 let mapped = realizar::gguf::MappedGGUFModel::from_path(path.to_str()?).ok()?;
301 let config = realizar::gguf::GGUFConfig::from_gguf(&mapped.model).ok()?;
302
303 let vocab = mapped
305 .model
306 .vocabulary()
307 .unwrap_or_else(|| (0..config.vocab_size).map(|i| format!("token{i}")).collect());
308
309 let quantized = match realizar::gguf::OwnedQuantizedModel::from_mapped(&mapped) {
311 Ok(m) => {
312 eprintln!("[banco] Quantized model loaded successfully");
313 Some(m)
314 }
315 Err(e) => {
316 eprintln!("[banco] WARNING: Failed to build quantized model: {e}");
317 eprintln!("[banco] Metadata available but inference disabled for this model");
318 None
319 }
320 };
321
322 Some(GgufMeta {
323 architecture: config.architecture.clone(),
324 vocab_size: config.vocab_size,
325 hidden_dim: config.hidden_dim,
326 num_layers: config.num_layers,
327 context_length: config.context_length,
328 tensor_count: mapped.model.tensors.len(),
329 model: quantized.map(Arc::new),
330 vocab,
331 })
332}
333
334#[cfg(feature = "realizar")]
336fn extract_apr_metadata(path: &Path) -> Option<GgufMeta> {
337 let apr = realizar::apr::MappedAprModel::from_path(path).ok()?;
338
339 let meta = &apr.metadata;
340 let architecture = meta.architecture.clone().unwrap_or_else(|| "unknown".to_string());
341 let hidden_dim = meta.hidden_size.unwrap_or(0);
342 let num_layers = meta.num_layers.unwrap_or(0);
343 let vocab_size = meta.vocab_size.unwrap_or(0);
344 let context_length = meta.max_position_embeddings.unwrap_or(2048);
345 let tensor_count = apr.tensor_count();
346
347 eprintln!(
348 "[banco] APR model: {architecture} | {num_layers} layers | {hidden_dim}d | {vocab_size} vocab | {tensor_count} tensors"
349 );
350
351 let vocab: Vec<String> = if vocab_size > 0 {
353 (0..vocab_size).map(|i| format!("token{i}")).collect()
354 } else {
355 Vec::new()
356 };
357
358 let quantized = match realizar::gguf::OwnedQuantizedModel::from_apr(&apr) {
360 Ok(m) => {
361 eprintln!("[banco] APR quantized model loaded successfully");
362 Some(m)
363 }
364 Err(e) => {
365 eprintln!("[banco] WARNING: Failed to build quantized model from APR: {e}");
366 None
367 }
368 };
369
370 Some(GgufMeta {
371 architecture,
372 vocab_size,
373 hidden_dim,
374 num_layers,
375 context_length,
376 tensor_count,
377 model: quantized.map(Arc::new),
378 vocab,
379 })
380}
381
382#[cfg(not(feature = "realizar"))]
384fn extract_model_metadata(_path: &Path, _format: ModelFormat) -> Option<GgufMeta> {
385 None
386}
387
388#[cfg(feature = "aprender")]
396fn load_bpe_tokenizer(model_path: &Path) -> Option<aprender::text::bpe::BpeTokenizer> {
397 use aprender::text::bpe::BpeTokenizer;
398
399 let stem = model_path.file_stem()?.to_string_lossy();
401 let sibling = model_path.with_file_name(format!("{stem}.tokenizer.json"));
402 if sibling.exists() {
403 match BpeTokenizer::from_huggingface(&sibling) {
404 Ok(tok) => {
405 eprintln!("[banco] BPE tokenizer loaded from {}", sibling.display());
406 return Some(tok);
407 }
408 Err(e) => {
409 eprintln!(
410 "[banco] WARNING: Failed to load tokenizer from {}: {e}",
411 sibling.display()
412 );
413 }
414 }
415 }
416
417 if let Some(parent) = model_path.parent() {
419 let tokenizer_json = parent.join("tokenizer.json");
420 if tokenizer_json.exists() {
421 match BpeTokenizer::from_huggingface(&tokenizer_json) {
422 Ok(tok) => {
423 eprintln!("[banco] BPE tokenizer loaded from {}", tokenizer_json.display());
424 return Some(tok);
425 }
426 Err(e) => {
427 eprintln!(
428 "[banco] WARNING: Failed to load tokenizer from {}: {e}",
429 tokenizer_json.display()
430 );
431 }
432 }
433 }
434 }
435
436 eprintln!(
437 "[banco] No tokenizer.json found for '{}' — using greedy tokenization",
438 model_path.display()
439 );
440 None
441}