use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};
use std::time::Instant;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ModelFormat {
Gguf,
Apr,
SafeTensors,
Unknown,
}
impl ModelFormat {
#[must_use]
pub fn from_path(path: &Path) -> Self {
match path.extension().and_then(|e| e.to_str()) {
Some("gguf") => Self::Gguf,
Some("apr") => Self::Apr,
Some("safetensors") => Self::SafeTensors,
_ => Self::Unknown,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelSlotInfo {
pub model_id: String,
pub path: String,
pub format: ModelFormat,
pub size_bytes: u64,
pub loaded_at_secs: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub architecture: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub vocab_size: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hidden_dim: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub num_layers: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub context_length: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tensor_count: Option<usize>,
}
pub struct ModelSlot {
info: RwLock<Option<ModelSlotInfo>>,
loaded_at: RwLock<Option<Instant>>,
#[cfg(feature = "realizar")]
quantized_model: RwLock<Option<Arc<realizar::gguf::OwnedQuantizedModel>>>,
#[cfg(feature = "realizar")]
vocab: RwLock<Vec<String>>,
#[cfg(feature = "aprender")]
bpe_tokenizer: RwLock<Option<aprender::text::bpe::BpeTokenizer>>,
}
impl ModelSlot {
#[must_use]
pub fn empty() -> Self {
Self {
info: RwLock::new(None),
loaded_at: RwLock::new(None),
#[cfg(feature = "realizar")]
quantized_model: RwLock::new(None),
#[cfg(feature = "realizar")]
vocab: RwLock::new(Vec::new()),
#[cfg(feature = "aprender")]
bpe_tokenizer: RwLock::new(None),
}
}
pub fn load(&self, path: &str) -> Result<ModelSlotInfo, ModelSlotError> {
let pb = PathBuf::from(path);
let model_id = pb.file_stem().and_then(|s| s.to_str()).unwrap_or("unknown").to_string();
let format = ModelFormat::from_path(&pb);
let size_bytes = std::fs::metadata(&pb).map(|m| m.len()).unwrap_or(0);
let gguf_meta = extract_model_metadata(&pb, format);
let info = ModelSlotInfo {
model_id,
path: path.to_string(),
format,
size_bytes,
loaded_at_secs: epoch_secs(),
architecture: gguf_meta.as_ref().map(|m| m.architecture.clone()),
vocab_size: gguf_meta.as_ref().map(|m| m.vocab_size),
hidden_dim: gguf_meta.as_ref().map(|m| m.hidden_dim),
num_layers: gguf_meta.as_ref().map(|m| m.num_layers),
context_length: gguf_meta.as_ref().map(|m| m.context_length),
tensor_count: gguf_meta.as_ref().map(|m| m.tensor_count),
};
#[cfg(feature = "realizar")]
if let Some(ref meta) = gguf_meta {
if let Ok(mut m) = self.quantized_model.write() {
*m = meta.model.clone();
}
if let Ok(mut v) = self.vocab.write() {
*v = meta.vocab.clone();
}
}
#[cfg(feature = "aprender")]
{
let bpe = load_bpe_tokenizer(&pb);
if let Ok(mut t) = self.bpe_tokenizer.write() {
*t = bpe;
}
}
if let Ok(mut slot) = self.info.write() {
*slot = Some(info.clone());
}
if let Ok(mut t) = self.loaded_at.write() {
*t = Some(Instant::now());
}
Ok(info)
}
pub fn unload(&self) -> Result<(), ModelSlotError> {
let had_model = self.info.write().map(|mut s| s.take().is_some()).unwrap_or(false);
if let Ok(mut t) = self.loaded_at.write() {
*t = None;
}
#[cfg(feature = "realizar")]
{
if let Ok(mut m) = self.quantized_model.write() {
*m = None;
}
if let Ok(mut v) = self.vocab.write() {
v.clear();
}
}
#[cfg(feature = "aprender")]
{
if let Ok(mut t) = self.bpe_tokenizer.write() {
*t = None;
}
}
if had_model {
Ok(())
} else {
Err(ModelSlotError::NoModelLoaded)
}
}
#[must_use]
pub fn info(&self) -> Option<ModelSlotInfo> {
self.info.read().ok()?.clone()
}
#[must_use]
pub fn is_loaded(&self) -> bool {
self.info.read().map(|s| s.is_some()).unwrap_or(false)
}
#[cfg(feature = "realizar")]
#[must_use]
pub fn quantized_model(&self) -> Option<Arc<realizar::gguf::OwnedQuantizedModel>> {
self.quantized_model.read().ok()?.clone()
}
#[cfg(feature = "realizar")]
#[must_use]
pub fn vocabulary(&self) -> Vec<String> {
self.vocab.read().map(|v| v.clone()).unwrap_or_default()
}
#[cfg(feature = "realizar")]
#[must_use]
pub fn has_inference_model(&self) -> bool {
self.quantized_model.read().map(|m| m.is_some()).unwrap_or(false)
}
#[cfg(feature = "realizar")]
#[must_use]
pub fn encode_text(&self, text: &str) -> Vec<u32> {
if text.is_empty() {
return Vec::new();
}
#[cfg(feature = "aprender")]
if let Ok(guard) = self.bpe_tokenizer.read() {
if let Some(ref bpe) = *guard {
return bpe.encode(text);
}
}
let vocab = self.vocabulary();
super::inference::encode_prompt(&vocab, text)
}
#[cfg(feature = "aprender")]
#[must_use]
pub fn has_bpe_tokenizer(&self) -> bool {
self.bpe_tokenizer.read().map(|t| t.is_some()).unwrap_or(false)
}
#[must_use]
pub fn uptime_secs(&self) -> u64 {
self.loaded_at.read().ok().and_then(|t| t.map(|i| i.elapsed().as_secs())).unwrap_or(0)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ModelSlotError {
NoModelLoaded,
}
impl std::fmt::Display for ModelSlotError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::NoModelLoaded => write!(f, "No model loaded"),
}
}
}
impl std::error::Error for ModelSlotError {}
fn epoch_secs() -> u64 {
std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs()
}
struct GgufMeta {
architecture: String,
vocab_size: usize,
hidden_dim: usize,
num_layers: usize,
context_length: usize,
tensor_count: usize,
#[cfg(feature = "realizar")]
model: Option<Arc<realizar::gguf::OwnedQuantizedModel>>,
#[cfg(feature = "realizar")]
vocab: Vec<String>,
}
#[cfg(feature = "realizar")]
fn extract_model_metadata(path: &Path, format: ModelFormat) -> Option<GgufMeta> {
match format {
ModelFormat::Gguf => extract_gguf_metadata(path),
ModelFormat::Apr => extract_apr_metadata(path),
_ => None,
}
}
#[cfg(feature = "realizar")]
fn extract_gguf_metadata(path: &Path) -> Option<GgufMeta> {
let mapped = realizar::gguf::MappedGGUFModel::from_path(path.to_str()?).ok()?;
let config = realizar::gguf::GGUFConfig::from_gguf(&mapped.model).ok()?;
let vocab = mapped
.model
.vocabulary()
.unwrap_or_else(|| (0..config.vocab_size).map(|i| format!("token{i}")).collect());
let quantized = match realizar::gguf::OwnedQuantizedModel::from_mapped(&mapped) {
Ok(m) => {
eprintln!("[banco] Quantized model loaded successfully");
Some(m)
}
Err(e) => {
eprintln!("[banco] WARNING: Failed to build quantized model: {e}");
eprintln!("[banco] Metadata available but inference disabled for this model");
None
}
};
Some(GgufMeta {
architecture: config.architecture.clone(),
vocab_size: config.vocab_size,
hidden_dim: config.hidden_dim,
num_layers: config.num_layers,
context_length: config.context_length,
tensor_count: mapped.model.tensors.len(),
model: quantized.map(Arc::new),
vocab,
})
}
#[cfg(feature = "realizar")]
fn extract_apr_metadata(path: &Path) -> Option<GgufMeta> {
let apr = realizar::apr::MappedAprModel::from_path(path).ok()?;
let meta = &apr.metadata;
let architecture = meta.architecture.clone().unwrap_or_else(|| "unknown".to_string());
let hidden_dim = meta.hidden_size.unwrap_or(0);
let num_layers = meta.num_layers.unwrap_or(0);
let vocab_size = meta.vocab_size.unwrap_or(0);
let context_length = meta.max_position_embeddings.unwrap_or(2048);
let tensor_count = apr.tensor_count();
eprintln!(
"[banco] APR model: {architecture} | {num_layers} layers | {hidden_dim}d | {vocab_size} vocab | {tensor_count} tensors"
);
let vocab: Vec<String> = if vocab_size > 0 {
(0..vocab_size).map(|i| format!("token{i}")).collect()
} else {
Vec::new()
};
let quantized = match realizar::gguf::OwnedQuantizedModel::from_apr(&apr) {
Ok(m) => {
eprintln!("[banco] APR quantized model loaded successfully");
Some(m)
}
Err(e) => {
eprintln!("[banco] WARNING: Failed to build quantized model from APR: {e}");
None
}
};
Some(GgufMeta {
architecture,
vocab_size,
hidden_dim,
num_layers,
context_length,
tensor_count,
model: quantized.map(Arc::new),
vocab,
})
}
#[cfg(not(feature = "realizar"))]
fn extract_model_metadata(_path: &Path, _format: ModelFormat) -> Option<GgufMeta> {
None
}
#[cfg(feature = "aprender")]
fn load_bpe_tokenizer(model_path: &Path) -> Option<aprender::text::bpe::BpeTokenizer> {
use aprender::text::bpe::BpeTokenizer;
let stem = model_path.file_stem()?.to_string_lossy();
let sibling = model_path.with_file_name(format!("{stem}.tokenizer.json"));
if sibling.exists() {
match BpeTokenizer::from_huggingface(&sibling) {
Ok(tok) => {
eprintln!("[banco] BPE tokenizer loaded from {}", sibling.display());
return Some(tok);
}
Err(e) => {
eprintln!(
"[banco] WARNING: Failed to load tokenizer from {}: {e}",
sibling.display()
);
}
}
}
if let Some(parent) = model_path.parent() {
let tokenizer_json = parent.join("tokenizer.json");
if tokenizer_json.exists() {
match BpeTokenizer::from_huggingface(&tokenizer_json) {
Ok(tok) => {
eprintln!("[banco] BPE tokenizer loaded from {}", tokenizer_json.display());
return Some(tok);
}
Err(e) => {
eprintln!(
"[banco] WARNING: Failed to load tokenizer from {}: {e}",
tokenizer_json.display()
);
}
}
}
}
eprintln!(
"[banco] No tokenizer.json found for '{}' — using greedy tokenization",
model_path.display()
);
None
}