1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
//! Model registry for multi-model serving
//!
//! Provides a central registry to manage multiple models in production environments.
//! Supports dynamic model loading/unloading, caching, and thread-safe concurrent access.
//!
//! ## Features
//!
//! - Thread-safe model registry with concurrent access
//! - Integration with `ModelCache` for efficient memory management
//! - Support for multiple model formats (GGUF, Safetensors)
//! - Model metadata and configuration
//! - Graceful loading/unloading with error handling
//!
//! ## Example
//!
//! ```rust,ignore
//! use realizar::registry::{ModelRegistry, ModelConfig};
//!
//! let registry = ModelRegistry::new(5); // Max 5 models cached
//! registry.register("llama-7b", model, tokenizer)?;
//! let model = registry.get("llama-7b")?;
//! ```
use std::{
collections::HashMap,
sync::{Arc, Mutex},
};
use arc_swap::ArcSwap;
use crate::{
cache::ModelCache,
error::{RealizarError, Result},
layers::Model,
tokenizer::BPETokenizer,
};
/// Information about a registered model
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ModelInfo {
/// Unique model identifier
pub id: String,
/// Human-readable model name
pub name: String,
/// Model description
pub description: String,
/// Model format (GGUF, Safetensors, etc.)
pub format: String,
/// Whether the model is currently loaded
pub loaded: bool,
}
/// Entry in the model registry
#[derive(Clone)]
struct ModelEntry {
/// Model instance
model: Arc<Model>,
/// Tokenizer for this model
tokenizer: Arc<BPETokenizer>,
/// Model metadata
info: ModelInfo,
}
/// Type alias for the models map (immutable snapshot)
type ModelsMap = HashMap<String, ModelEntry>;
/// Type alias for model and tokenizer tuple
type ModelTuple = (Arc<Model>, Arc<BPETokenizer>);
/// Central registry for managing multiple models
///
/// The `ModelRegistry` provides thread-safe access to multiple models,
/// with automatic caching and lifecycle management.
///
/// Uses `ArcSwap` for lock-free reads (per `McKenney` 2011).
pub struct ModelRegistry {
/// Registry of loaded models (lock-free reads via `ArcSwap`)
models: ArcSwap<ModelsMap>,
/// Write lock to serialize modifications
write_lock: Mutex<()>,
/// Model cache for memory management (reserved for future use)
#[allow(dead_code)]
cache: Arc<ModelCache>,
}
impl ModelRegistry {
/// Create a new model registry
///
/// # Arguments
///
/// * `cache_capacity` - Maximum number of models to keep in cache
#[must_use]
pub fn new(cache_capacity: usize) -> Self {
Self {
models: ArcSwap::from_pointee(HashMap::new()),
write_lock: Mutex::new(()),
cache: Arc::new(ModelCache::new(cache_capacity)),
}
}
/// Register a new model
///
/// # Arguments
///
/// * `id` - Unique identifier for the model
/// * `model` - Model instance
/// * `tokenizer` - Tokenizer for the model
///
/// # Errors
///
/// Returns error if model ID already exists
pub fn register(&self, id: &str, model: Model, tokenizer: BPETokenizer) -> Result<()> {
let _guard = self.write_lock.lock().map_err(|_| {
RealizarError::RegistryError("Failed to acquire write lock".to_string())
})?;
let current = self.models.load();
if current.contains_key(id) {
return Err(RealizarError::ModelAlreadyExists(id.to_string()));
}
let entry = ModelEntry {
model: Arc::new(model),
tokenizer: Arc::new(tokenizer),
info: ModelInfo {
id: id.to_string(),
name: id.to_string(),
description: String::new(),
format: "unknown".to_string(),
loaded: true,
},
};
let mut new_map: ModelsMap = (**current).clone();
new_map.insert(id.to_string(), entry);
self.models.store(Arc::new(new_map));
Ok(())
}
/// Register a model with full metadata
///
/// # Arguments
///
/// * `info` - Model metadata
/// * `model` - Model instance
/// * `tokenizer` - Tokenizer for the model
///
/// # Errors
///
/// Returns error if model ID already exists
pub fn register_with_info(
&self,
mut info: ModelInfo,
model: Model,
tokenizer: BPETokenizer,
) -> Result<()> {
let _guard = self.write_lock.lock().map_err(|_| {
RealizarError::RegistryError("Failed to acquire write lock".to_string())
})?;
let current = self.models.load();
if current.contains_key(&info.id) {
return Err(RealizarError::ModelAlreadyExists(info.id));
}
info.loaded = true;
let entry = ModelEntry {
model: Arc::new(model),
tokenizer: Arc::new(tokenizer),
info,
};
let id = entry.info.id.clone();
let mut new_map: ModelsMap = (**current).clone();
new_map.insert(id, entry);
self.models.store(Arc::new(new_map));
Ok(())
}
/// Get a model by ID
///
/// # Arguments
///
/// * `id` - Model identifier
///
/// # Errors
///
/// Returns error if model not found
pub fn get(&self, id: &str) -> Result<ModelTuple> {
// Lock-free read via ArcSwap::load()
let models = self.models.load();
let entry = models
.get(id)
.ok_or_else(|| RealizarError::ModelNotFound(id.to_string()))?;
Ok((Arc::clone(&entry.model), Arc::clone(&entry.tokenizer)))
}
/// Get model info by ID
///
/// # Arguments
///
/// * `id` - Model identifier
///
/// # Errors
///
/// Returns error if model not found
pub fn get_info(&self, id: &str) -> Result<ModelInfo> {
let models = self.models.load();
let entry = models
.get(id)
.ok_or_else(|| RealizarError::ModelNotFound(id.to_string()))?;
Ok(entry.info.clone())
}
/// List all registered models (lock-free)
#[must_use]
pub fn list(&self) -> Vec<ModelInfo> {
let models = self.models.load();
models.values().map(|entry| entry.info.clone()).collect()
}
/// Unregister a model
///
/// # Arguments
///
/// * `id` - Model identifier
///
/// # Errors
///
/// Returns error if model not found
pub fn unregister(&self, id: &str) -> Result<()> {
let _guard = self.write_lock.lock().map_err(|_| {
RealizarError::RegistryError("Failed to acquire write lock".to_string())
})?;
let current = self.models.load();
if !current.contains_key(id) {
return Err(RealizarError::ModelNotFound(id.to_string()));
}
let mut new_map: ModelsMap = (**current).clone();
new_map.remove(id);
self.models.store(Arc::new(new_map));
Ok(())
}
/// Atomically replace a model (for hot-reload)
///
/// # Arguments
///
/// * `id` - Model identifier to replace
/// * `model` - New model instance
/// * `tokenizer` - New tokenizer instance
///
/// # Errors
///
/// Returns error if model not found or lock acquisition fails
pub fn replace(&self, id: &str, model: Model, tokenizer: BPETokenizer) -> Result<()> {
let _guard = self.write_lock.lock().map_err(|_| {
RealizarError::RegistryError("Failed to acquire write lock".to_string())
})?;
let current = self.models.load();
if !current.contains_key(id) {
return Err(RealizarError::ModelNotFound(id.to_string()));
}
// Get existing info to preserve metadata
let existing_info = current.get(id).map_or_else(
|| ModelInfo {
id: id.to_string(),
name: id.to_string(),
description: String::new(),
format: "unknown".to_string(),
loaded: true,
},
|e| e.info.clone(),
);
let entry = ModelEntry {
model: Arc::new(model),
tokenizer: Arc::new(tokenizer),
info: existing_info,
};
let mut new_map: ModelsMap = (**current).clone();
new_map.insert(id.to_string(), entry);
self.models.store(Arc::new(new_map));
Ok(())
}
/// Check if a model is registered (lock-free)
#[must_use]
pub fn contains(&self, id: &str) -> bool {
let models = self.models.load();
models.contains_key(id)
}
/// Get the number of registered models (lock-free)
#[must_use]
pub fn len(&self) -> usize {
let models = self.models.load();
models.len()
}
/// Check if the registry is empty (lock-free)
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
include!("registry_get_info.rs");