1#![allow(missing_docs)]
26
27pub mod error;
28pub mod types;
29pub mod xml_parser;
30
31pub use error::{LocalAIError, Result};
32pub use types::*;
33
34use std::path::PathBuf;
35use std::sync::Arc;
36
37use anyhow::Result as AnyhowResult;
38use tokio::sync::Mutex;
39
40#[derive(Debug, Clone)]
42pub struct LocalAIConfig {
43 pub models_dir: PathBuf,
45 pub cache_dir: PathBuf,
47 pub small_model: String,
49 pub large_model: String,
51 pub embedding_model: String,
53 pub embedding_dimensions: usize,
55 pub gpu_layers: u32,
57 pub context_size: usize,
59}
60
61impl Default for LocalAIConfig {
62 fn default() -> Self {
63 let home = directories::BaseDirs::new()
64 .map(|d| d.home_dir().to_path_buf())
65 .unwrap_or_else(|| PathBuf::from("."));
66
67 Self {
68 models_dir: home.join(".eliza").join("models"),
69 cache_dir: home.join(".eliza").join("cache"),
70 small_model: "Qwen3-4B-Q4_K_M.gguf".to_string(),
71 large_model: "Qwen3-4B-Q4_K_M.gguf".to_string(),
72 embedding_model: "bge-small-en-v1.5.Q4_K_M.gguf".to_string(),
73 embedding_dimensions: 384,
74 gpu_layers: 0,
75 context_size: 8192,
76 }
77 }
78}
79
80impl LocalAIConfig {
81 pub fn new(models_dir: impl Into<PathBuf>) -> Self {
83 let models_dir = models_dir.into();
84 Self {
85 models_dir,
86 ..Default::default()
87 }
88 }
89
90 pub fn small_model(mut self, model: impl Into<String>) -> Self {
92 self.small_model = model.into();
93 self
94 }
95
96 pub fn large_model(mut self, model: impl Into<String>) -> Self {
98 self.large_model = model.into();
99 self
100 }
101
102 pub fn embedding_model(mut self, model: impl Into<String>) -> Self {
104 self.embedding_model = model.into();
105 self
106 }
107
108 pub fn gpu_layers(mut self, layers: u32) -> Self {
110 self.gpu_layers = layers;
111 self
112 }
113
114 pub fn context_size(mut self, size: usize) -> Self {
116 self.context_size = size;
117 self
118 }
119}
120
121#[cfg(feature = "llm")]
126mod llm_impl {
127 use super::*;
128 use llama_cpp_rs::{
129 options::{ModelOptions, PredictOptions},
130 LLama,
131 };
132
133 pub struct ModelHolder {
135 pub model: LLama,
136 pub model_name: String,
137 }
138
139 impl ModelHolder {
140 pub fn load(
141 path: &std::path::Path,
142 config: &LocalAIConfig,
143 embeddings: bool,
144 ) -> Result<Self> {
145 let model_name = path
146 .file_name()
147 .and_then(|n| n.to_str())
148 .unwrap_or("unknown")
149 .to_string();
150
151 tracing::info!("Loading model: {}", path.display());
152
153 let options = ModelOptions {
154 n_gpu_layers: config.gpu_layers as i32,
155 context_size: config.context_size as i32,
156 embeddings,
157 ..Default::default()
158 };
159
160 let model_path = path.to_string_lossy().to_string();
161 let model = LLama::new(model_path, &options)
162 .map_err(|e| LocalAIError::ModelLoadError(e.to_string()))?;
163
164 tracing::info!("Model loaded successfully: {}", model_name);
165
166 Ok(Self { model, model_name })
167 }
168
169 pub fn generate(&self, params: &TextGenerationParams) -> Result<TextGenerationResult> {
170 let predict_options = PredictOptions {
171 tokens: params.max_tokens as i32,
172 temperature: params.temperature,
173 top_p: params.top_p,
174 stop_prompts: params.stop_sequences.clone(),
175 ..Default::default()
176 };
177
178 let result = self
179 .model
180 .predict(params.prompt.clone(), predict_options)
181 .map_err(|e| LocalAIError::InferenceError(e.to_string()))?;
182
183 Ok(TextGenerationResult {
184 text: result,
185 tokens_used: 0, model: self.model_name.clone(),
187 })
188 }
189
190 pub fn embed(&self, text: &str) -> Result<Vec<f32>> {
191 let mut predict_options = PredictOptions::default();
192 self.model
193 .embeddings(text.to_string(), &mut predict_options)
194 .map_err(|e| LocalAIError::InferenceError(e.to_string()))
195 }
196 }
197}
198
199#[cfg(not(feature = "llm"))]
200mod llm_impl {
201 use super::*;
202
203 pub struct ModelHolder {
205 pub model_name: String,
206 }
207
208 impl ModelHolder {
209 pub fn load(
210 path: &std::path::Path,
211 _config: &LocalAIConfig,
212 _embeddings: bool,
213 ) -> Result<Self> {
214 let model_name = path
215 .file_name()
216 .and_then(|n| n.to_str())
217 .unwrap_or("unknown")
218 .to_string();
219
220 if !path.exists() {
222 return Err(LocalAIError::ModelNotFound(path.display().to_string()));
223 }
224
225 tracing::warn!(
226 "LLM feature not enabled. Model path validated: {} (enable 'llm' feature for actual inference)",
227 path.display()
228 );
229
230 Ok(Self { model_name })
231 }
232
233 pub fn generate(&self, params: &TextGenerationParams) -> Result<TextGenerationResult> {
234 let _ = params;
235 Err(LocalAIError::ConfigError(format!(
236 "Local inference is not available because the `llm` feature is disabled (model: {}). \
237Enable it with Cargo features: `elizaos-plugin-local-ai = {{ ..., features = [\"llm\"] }}`",
238 self.model_name
239 )))
240 }
241
242 pub fn embed(&self, _text: &str) -> Result<Vec<f32>> {
243 Err(LocalAIError::ConfigError(format!(
244 "Embeddings are not available because the `llm` feature is disabled (model: {}). \
245Enable it with Cargo features: `elizaos-plugin-local-ai = {{ ..., features = [\"llm\"] }}`",
246 self.model_name
247 )))
248 }
249 }
250}
251
252use llm_impl::ModelHolder;
253
254pub struct LocalAIPlugin {
262 config: LocalAIConfig,
263 small_model: Arc<Mutex<Option<ModelHolder>>>,
264 large_model: Arc<Mutex<Option<ModelHolder>>>,
265 embedding_model: Arc<Mutex<Option<ModelHolder>>>,
266}
267
268impl LocalAIPlugin {
269 pub fn new(config: LocalAIConfig) -> Result<Self> {
271 if config.models_dir.as_os_str().is_empty() {
272 return Err(LocalAIError::ConfigError(
273 "Models directory path cannot be empty".to_string(),
274 ));
275 }
276 if config.cache_dir.as_os_str().is_empty() {
277 return Err(LocalAIError::ConfigError(
278 "Cache directory path cannot be empty".to_string(),
279 ));
280 }
281 std::fs::create_dir_all(&config.models_dir)
282 .map_err(|e| LocalAIError::IoError(e.to_string()))?;
283 std::fs::create_dir_all(&config.cache_dir)
284 .map_err(|e| LocalAIError::IoError(e.to_string()))?;
285
286 Ok(Self {
287 config,
288 small_model: Arc::new(Mutex::new(None)),
289 large_model: Arc::new(Mutex::new(None)),
290 embedding_model: Arc::new(Mutex::new(None)),
291 })
292 }
293
294 pub fn from_env() -> AnyhowResult<Self> {
306 let mut config = LocalAIConfig::default();
307
308 if let Ok(dir) = std::env::var("MODELS_DIR") {
309 config.models_dir = PathBuf::from(dir);
310 }
311
312 if let Ok(dir) = std::env::var("CACHE_DIR") {
313 config.cache_dir = PathBuf::from(dir);
314 }
315
316 if let Ok(model) = std::env::var("LOCAL_SMALL_MODEL") {
317 config.small_model = model;
318 }
319
320 if let Ok(model) = std::env::var("LOCAL_LARGE_MODEL") {
321 config.large_model = model;
322 }
323
324 if let Ok(model) = std::env::var("LOCAL_EMBEDDING_MODEL") {
325 config.embedding_model = model;
326 }
327
328 if let Ok(dims) = std::env::var("LOCAL_EMBEDDING_DIMENSIONS") {
329 config.embedding_dimensions = dims.parse().unwrap_or(384);
330 }
331
332 if let Ok(layers) = std::env::var("GPU_LAYERS") {
333 config.gpu_layers = layers.parse().unwrap_or(0);
334 }
335
336 if let Ok(size) = std::env::var("CONTEXT_SIZE") {
337 config.context_size = size.parse().unwrap_or(8192);
338 }
339
340 Self::new(config).map_err(|e| anyhow::anyhow!("Failed to create Local AI plugin: {}", e))
341 }
342
343 pub fn config(&self) -> &LocalAIConfig {
345 &self.config
346 }
347
348 pub async fn generate_text(&self, prompt: &str) -> Result<String> {
350 let params = TextGenerationParams::new(prompt);
351 let result = self.generate_text_with_params(¶ms).await?;
352 Ok(result.text)
353 }
354
355 pub async fn generate_text_with_params(
357 &self,
358 params: &TextGenerationParams,
359 ) -> Result<TextGenerationResult> {
360 let model_path = if params.use_large_model {
361 self.config.models_dir.join(&self.config.large_model)
362 } else {
363 self.config.models_dir.join(&self.config.small_model)
364 };
365
366 if !model_path.exists() {
367 return Err(LocalAIError::ModelNotFound(
368 model_path.display().to_string(),
369 ));
370 }
371
372 let model_mutex = if params.use_large_model {
374 &self.large_model
375 } else {
376 &self.small_model
377 };
378
379 let mut model_guard = model_mutex.lock().await;
380
381 if model_guard.is_none() {
383 let holder = ModelHolder::load(&model_path, &self.config, false)?;
384 *model_guard = Some(holder);
385 }
386
387 let holder = model_guard.as_ref().unwrap();
388 holder.generate(params)
389 }
390
391 pub async fn create_embedding(&self, text: &str) -> Result<Vec<f32>> {
396 let params = EmbeddingParams::new(text);
397 self.create_embedding_with_params(¶ms).await
398 }
399
400 pub async fn create_embedding_with_params(&self, params: &EmbeddingParams) -> Result<Vec<f32>> {
402 let model_path = self.config.models_dir.join(&self.config.embedding_model);
403
404 if !model_path.exists() {
405 return Err(LocalAIError::ModelNotFound(
406 model_path.display().to_string(),
407 ));
408 }
409
410 let mut model_guard = self.embedding_model.lock().await;
411
412 if model_guard.is_none() {
414 let holder = ModelHolder::load(&model_path, &self.config, true)?;
415 *model_guard = Some(holder);
416 }
417
418 let holder = model_guard.as_ref().unwrap();
419 let embedding = holder.embed(¶ms.text)?;
420
421 if embedding.len() != self.config.embedding_dimensions {
422 tracing::warn!(
423 "Embedding dimensions mismatch: config={} model={}",
424 self.config.embedding_dimensions,
425 embedding.len()
426 );
427 }
428
429 Ok(embedding)
430 }
431
432 pub fn is_llm_enabled() -> bool {
434 cfg!(feature = "llm")
435 }
436}
437
438#[cfg(test)]
443mod tests {
444 use super::*;
445 use tempfile::tempdir;
446
447 #[test]
448 fn test_config_defaults() {
449 let config = LocalAIConfig::default();
450 assert!(!config.small_model.is_empty());
451 assert!(!config.large_model.is_empty());
452 assert_eq!(config.embedding_dimensions, 384);
453 assert_eq!(config.gpu_layers, 0);
454 assert_eq!(config.context_size, 8192);
455 }
456
457 #[test]
458 fn test_config_builder() {
459 let config = LocalAIConfig::new("/tmp/models")
460 .small_model("test-small.gguf")
461 .large_model("test-large.gguf")
462 .gpu_layers(10)
463 .context_size(4096);
464
465 assert_eq!(config.models_dir, PathBuf::from("/tmp/models"));
466 assert_eq!(config.small_model, "test-small.gguf");
467 assert_eq!(config.large_model, "test-large.gguf");
468 assert_eq!(config.gpu_layers, 10);
469 assert_eq!(config.context_size, 4096);
470 }
471
472 #[test]
473 fn test_text_generation_params() {
474 let params = TextGenerationParams::new("Hello")
475 .max_tokens(100)
476 .temperature(0.5)
477 .top_p(0.8)
478 .stop("</answer>")
479 .large();
480
481 assert_eq!(params.prompt, "Hello");
482 assert_eq!(params.max_tokens, 100);
483 assert_eq!(params.temperature, 0.5);
484 assert_eq!(params.top_p, 0.8);
485 assert!(params.stop_sequences.contains(&"</answer>".to_string()));
486 assert!(params.use_large_model);
487 }
488
489 #[tokio::test]
490 async fn test_plugin_creation() {
491 let dir = tempdir().unwrap();
492 let config = LocalAIConfig::new(dir.path().join("models"));
493 let plugin = LocalAIPlugin::new(config);
494
495 assert!(plugin.is_ok());
496 }
497
498 #[tokio::test]
499 async fn test_model_not_found() {
500 let dir = tempdir().unwrap();
501 let config = LocalAIConfig::new(dir.path().join("models")).small_model("nonexistent.gguf");
502 let plugin = LocalAIPlugin::new(config).unwrap();
503
504 let result = plugin.generate_text("test").await;
505 assert!(result.is_err());
506
507 match result {
508 Err(LocalAIError::ModelNotFound(_)) => {}
509 _ => panic!("Expected ModelNotFound error"),
510 }
511 }
512
513 #[test]
514 fn test_llm_feature_flag() {
515 let enabled = LocalAIPlugin::is_llm_enabled();
517
518 #[cfg(feature = "llm")]
519 assert!(enabled);
520
521 #[cfg(not(feature = "llm"))]
522 assert!(!enabled);
523 }
524}