Skip to main content

elizaos_plugin_local_ai/
lib.rs

1//! elizaOS Local AI Plugin - Rust Implementation
2//!
3//! Provides local LLM inference using GGUF models via llama.cpp bindings.
4//!
5//! # Features
6//!
7//! - `llm` - Enable actual inference with llama_cpp_rs
8//! - `cuda` - Enable CUDA GPU acceleration
9//! - `metal` - Enable Metal GPU acceleration (macOS)
10//!
11//! # Example
12//!
13//! ```no_run
14//! use elizaos_plugin_local_ai::{LocalAIPlugin, LocalAIConfig, TextGenerationParams};
15//!
16//! #[tokio::main]
17//! async fn main() -> anyhow::Result<()> {
18//!     let plugin = LocalAIPlugin::from_env()?;
19//!     let response = plugin.generate_text("Hello, world!").await?;
20//!     println!("{}", response);
21//!     Ok(())
22//! }
23//! ```
24
25#![allow(missing_docs)]
26
27pub mod error;
28pub mod types;
29pub mod xml_parser;
30
31pub use error::{LocalAIError, Result};
32pub use types::*;
33
34use std::path::PathBuf;
35use std::sync::Arc;
36
37use anyhow::Result as AnyhowResult;
38use tokio::sync::Mutex;
39
40/// Configuration for the Local AI plugin.
41#[derive(Debug, Clone)]
42pub struct LocalAIConfig {
43    /// Directory containing GGUF model files.
44    pub models_dir: PathBuf,
45    /// Cache directory for temporary files.
46    pub cache_dir: PathBuf,
47    /// Filename of the small model.
48    pub small_model: String,
49    /// Filename of the large model.
50    pub large_model: String,
51    /// Filename of the embedding model.
52    pub embedding_model: String,
53    /// Embedding vector dimensions.
54    pub embedding_dimensions: usize,
55    /// Number of layers to offload to GPU.
56    pub gpu_layers: u32,
57    /// Context window size.
58    pub context_size: usize,
59}
60
61impl Default for LocalAIConfig {
62    fn default() -> Self {
63        let home = directories::BaseDirs::new()
64            .map(|d| d.home_dir().to_path_buf())
65            .unwrap_or_else(|| PathBuf::from("."));
66
67        Self {
68            models_dir: home.join(".eliza").join("models"),
69            cache_dir: home.join(".eliza").join("cache"),
70            small_model: "Qwen3-4B-Q4_K_M.gguf".to_string(),
71            large_model: "Qwen3-4B-Q4_K_M.gguf".to_string(),
72            embedding_model: "bge-small-en-v1.5.Q4_K_M.gguf".to_string(),
73            embedding_dimensions: 384,
74            gpu_layers: 0,
75            context_size: 8192,
76        }
77    }
78}
79
80impl LocalAIConfig {
81    /// Create a new configuration with the specified models directory.
82    pub fn new(models_dir: impl Into<PathBuf>) -> Self {
83        let models_dir = models_dir.into();
84        Self {
85            models_dir,
86            ..Default::default()
87        }
88    }
89
90    /// Set the small model filename.
91    pub fn small_model(mut self, model: impl Into<String>) -> Self {
92        self.small_model = model.into();
93        self
94    }
95
96    /// Set the large model filename.
97    pub fn large_model(mut self, model: impl Into<String>) -> Self {
98        self.large_model = model.into();
99        self
100    }
101
102    /// Set the embedding model filename.
103    pub fn embedding_model(mut self, model: impl Into<String>) -> Self {
104        self.embedding_model = model.into();
105        self
106    }
107
108    /// Set the number of GPU layers to offload.
109    pub fn gpu_layers(mut self, layers: u32) -> Self {
110        self.gpu_layers = layers;
111        self
112    }
113
114    /// Set the context window size.
115    pub fn context_size(mut self, size: usize) -> Self {
116        self.context_size = size;
117        self
118    }
119}
120
121// ============================================================================
122// LLM Feature Implementation
123// ============================================================================
124
125#[cfg(feature = "llm")]
126mod llm_impl {
127    use super::*;
128    use llama_cpp_rs::{
129        options::{ModelOptions, PredictOptions},
130        LLama,
131    };
132
133    /// Internal model holder for lazy initialization.
134    pub struct ModelHolder {
135        pub model: LLama,
136        pub model_name: String,
137    }
138
139    impl ModelHolder {
140        pub fn load(
141            path: &std::path::Path,
142            config: &LocalAIConfig,
143            embeddings: bool,
144        ) -> Result<Self> {
145            let model_name = path
146                .file_name()
147                .and_then(|n| n.to_str())
148                .unwrap_or("unknown")
149                .to_string();
150
151            tracing::info!("Loading model: {}", path.display());
152
153            let options = ModelOptions {
154                n_gpu_layers: config.gpu_layers as i32,
155                context_size: config.context_size as i32,
156                embeddings,
157                ..Default::default()
158            };
159
160            let model_path = path.to_string_lossy().to_string();
161            let model = LLama::new(model_path, &options)
162                .map_err(|e| LocalAIError::ModelLoadError(e.to_string()))?;
163
164            tracing::info!("Model loaded successfully: {}", model_name);
165
166            Ok(Self { model, model_name })
167        }
168
169        pub fn generate(&self, params: &TextGenerationParams) -> Result<TextGenerationResult> {
170            let predict_options = PredictOptions {
171                tokens: params.max_tokens as i32,
172                temperature: params.temperature,
173                top_p: params.top_p,
174                stop_prompts: params.stop_sequences.clone(),
175                ..Default::default()
176            };
177
178            let result = self
179                .model
180                .predict(params.prompt.clone(), predict_options)
181                .map_err(|e| LocalAIError::InferenceError(e.to_string()))?;
182
183            Ok(TextGenerationResult {
184                text: result,
185                tokens_used: 0, // llama_cpp_rs doesn't expose token count directly
186                model: self.model_name.clone(),
187            })
188        }
189
190        pub fn embed(&self, text: &str) -> Result<Vec<f32>> {
191            let mut predict_options = PredictOptions::default();
192            self.model
193                .embeddings(text.to_string(), &mut predict_options)
194                .map_err(|e| LocalAIError::InferenceError(e.to_string()))
195        }
196    }
197}
198
199#[cfg(not(feature = "llm"))]
200mod llm_impl {
201    use super::*;
202
203    /// Placeholder model holder when LLM feature is disabled.
204    pub struct ModelHolder {
205        pub model_name: String,
206    }
207
208    impl ModelHolder {
209        pub fn load(
210            path: &std::path::Path,
211            _config: &LocalAIConfig,
212            _embeddings: bool,
213        ) -> Result<Self> {
214            let model_name = path
215                .file_name()
216                .and_then(|n| n.to_str())
217                .unwrap_or("unknown")
218                .to_string();
219
220            // Just verify the model file exists
221            if !path.exists() {
222                return Err(LocalAIError::ModelNotFound(path.display().to_string()));
223            }
224
225            tracing::warn!(
226                "LLM feature not enabled. Model path validated: {} (enable 'llm' feature for actual inference)",
227                path.display()
228            );
229
230            Ok(Self { model_name })
231        }
232
233        pub fn generate(&self, params: &TextGenerationParams) -> Result<TextGenerationResult> {
234            let _ = params;
235            Err(LocalAIError::ConfigError(format!(
236                "Local inference is not available because the `llm` feature is disabled (model: {}). \
237Enable it with Cargo features: `elizaos-plugin-local-ai = {{ ..., features = [\"llm\"] }}`",
238                self.model_name
239            )))
240        }
241
242        pub fn embed(&self, _text: &str) -> Result<Vec<f32>> {
243            Err(LocalAIError::ConfigError(format!(
244                "Embeddings are not available because the `llm` feature is disabled (model: {}). \
245Enable it with Cargo features: `elizaos-plugin-local-ai = {{ ..., features = [\"llm\"] }}`",
246                self.model_name
247            )))
248        }
249    }
250}
251
252use llm_impl::ModelHolder;
253
254// ============================================================================
255// LocalAIPlugin
256// ============================================================================
257
258/// The main Local AI plugin struct.
259///
260/// Manages loading and inference with local GGUF models.
261pub struct LocalAIPlugin {
262    config: LocalAIConfig,
263    small_model: Arc<Mutex<Option<ModelHolder>>>,
264    large_model: Arc<Mutex<Option<ModelHolder>>>,
265    embedding_model: Arc<Mutex<Option<ModelHolder>>>,
266}
267
268impl LocalAIPlugin {
269    /// Create a new plugin with the given configuration.
270    pub fn new(config: LocalAIConfig) -> Result<Self> {
271        if config.models_dir.as_os_str().is_empty() {
272            return Err(LocalAIError::ConfigError(
273                "Models directory path cannot be empty".to_string(),
274            ));
275        }
276        if config.cache_dir.as_os_str().is_empty() {
277            return Err(LocalAIError::ConfigError(
278                "Cache directory path cannot be empty".to_string(),
279            ));
280        }
281        std::fs::create_dir_all(&config.models_dir)
282            .map_err(|e| LocalAIError::IoError(e.to_string()))?;
283        std::fs::create_dir_all(&config.cache_dir)
284            .map_err(|e| LocalAIError::IoError(e.to_string()))?;
285
286        Ok(Self {
287            config,
288            small_model: Arc::new(Mutex::new(None)),
289            large_model: Arc::new(Mutex::new(None)),
290            embedding_model: Arc::new(Mutex::new(None)),
291        })
292    }
293
294    /// Create a plugin from environment variables.
295    ///
296    /// Reads the following environment variables:
297    /// - `MODELS_DIR` - Directory containing model files
298    /// - `CACHE_DIR` - Cache directory
299    /// - `LOCAL_SMALL_MODEL` - Small model filename
300    /// - `LOCAL_LARGE_MODEL` - Large model filename
301    /// - `LOCAL_EMBEDDING_MODEL` - Embedding model filename
302    /// - `LOCAL_EMBEDDING_DIMENSIONS` - Embedding vector dimensions
303    /// - `GPU_LAYERS` - Number of layers to offload to GPU
304    /// - `CONTEXT_SIZE` - Context window size
305    pub fn from_env() -> AnyhowResult<Self> {
306        let mut config = LocalAIConfig::default();
307
308        if let Ok(dir) = std::env::var("MODELS_DIR") {
309            config.models_dir = PathBuf::from(dir);
310        }
311
312        if let Ok(dir) = std::env::var("CACHE_DIR") {
313            config.cache_dir = PathBuf::from(dir);
314        }
315
316        if let Ok(model) = std::env::var("LOCAL_SMALL_MODEL") {
317            config.small_model = model;
318        }
319
320        if let Ok(model) = std::env::var("LOCAL_LARGE_MODEL") {
321            config.large_model = model;
322        }
323
324        if let Ok(model) = std::env::var("LOCAL_EMBEDDING_MODEL") {
325            config.embedding_model = model;
326        }
327
328        if let Ok(dims) = std::env::var("LOCAL_EMBEDDING_DIMENSIONS") {
329            config.embedding_dimensions = dims.parse().unwrap_or(384);
330        }
331
332        if let Ok(layers) = std::env::var("GPU_LAYERS") {
333            config.gpu_layers = layers.parse().unwrap_or(0);
334        }
335
336        if let Ok(size) = std::env::var("CONTEXT_SIZE") {
337            config.context_size = size.parse().unwrap_or(8192);
338        }
339
340        Self::new(config).map_err(|e| anyhow::anyhow!("Failed to create Local AI plugin: {}", e))
341    }
342
343    /// Get a reference to the configuration.
344    pub fn config(&self) -> &LocalAIConfig {
345        &self.config
346    }
347
348    /// Generate text using a simple prompt.
349    pub async fn generate_text(&self, prompt: &str) -> Result<String> {
350        let params = TextGenerationParams::new(prompt);
351        let result = self.generate_text_with_params(&params).await?;
352        Ok(result.text)
353    }
354
355    /// Generate text with full parameters.
356    pub async fn generate_text_with_params(
357        &self,
358        params: &TextGenerationParams,
359    ) -> Result<TextGenerationResult> {
360        let model_path = if params.use_large_model {
361            self.config.models_dir.join(&self.config.large_model)
362        } else {
363            self.config.models_dir.join(&self.config.small_model)
364        };
365
366        if !model_path.exists() {
367            return Err(LocalAIError::ModelNotFound(
368                model_path.display().to_string(),
369            ));
370        }
371
372        // Get or load the appropriate model
373        let model_mutex = if params.use_large_model {
374            &self.large_model
375        } else {
376            &self.small_model
377        };
378
379        let mut model_guard = model_mutex.lock().await;
380
381        // Lazy load the model if not already loaded
382        if model_guard.is_none() {
383            let holder = ModelHolder::load(&model_path, &self.config, false)?;
384            *model_guard = Some(holder);
385        }
386
387        let holder = model_guard.as_ref().unwrap();
388        holder.generate(params)
389    }
390
391    /// Create an embedding for the given text.
392    ///
393    /// Note: Embedding support requires additional implementation with
394    /// a dedicated embedding model. Currently returns zeros.
395    pub async fn create_embedding(&self, text: &str) -> Result<Vec<f32>> {
396        let params = EmbeddingParams::new(text);
397        self.create_embedding_with_params(&params).await
398    }
399
400    /// Create an embedding with full parameters.
401    pub async fn create_embedding_with_params(&self, params: &EmbeddingParams) -> Result<Vec<f32>> {
402        let model_path = self.config.models_dir.join(&self.config.embedding_model);
403
404        if !model_path.exists() {
405            return Err(LocalAIError::ModelNotFound(
406                model_path.display().to_string(),
407            ));
408        }
409
410        let mut model_guard = self.embedding_model.lock().await;
411
412        // Lazy load embedding model if not already loaded
413        if model_guard.is_none() {
414            let holder = ModelHolder::load(&model_path, &self.config, true)?;
415            *model_guard = Some(holder);
416        }
417
418        let holder = model_guard.as_ref().unwrap();
419        let embedding = holder.embed(&params.text)?;
420
421        if embedding.len() != self.config.embedding_dimensions {
422            tracing::warn!(
423                "Embedding dimensions mismatch: config={} model={}",
424                self.config.embedding_dimensions,
425                embedding.len()
426            );
427        }
428
429        Ok(embedding)
430    }
431
432    /// Check if the LLM feature is enabled.
433    pub fn is_llm_enabled() -> bool {
434        cfg!(feature = "llm")
435    }
436}
437
438// ============================================================================
439// Tests
440// ============================================================================
441
442#[cfg(test)]
443mod tests {
444    use super::*;
445    use tempfile::tempdir;
446
447    #[test]
448    fn test_config_defaults() {
449        let config = LocalAIConfig::default();
450        assert!(!config.small_model.is_empty());
451        assert!(!config.large_model.is_empty());
452        assert_eq!(config.embedding_dimensions, 384);
453        assert_eq!(config.gpu_layers, 0);
454        assert_eq!(config.context_size, 8192);
455    }
456
457    #[test]
458    fn test_config_builder() {
459        let config = LocalAIConfig::new("/tmp/models")
460            .small_model("test-small.gguf")
461            .large_model("test-large.gguf")
462            .gpu_layers(10)
463            .context_size(4096);
464
465        assert_eq!(config.models_dir, PathBuf::from("/tmp/models"));
466        assert_eq!(config.small_model, "test-small.gguf");
467        assert_eq!(config.large_model, "test-large.gguf");
468        assert_eq!(config.gpu_layers, 10);
469        assert_eq!(config.context_size, 4096);
470    }
471
472    #[test]
473    fn test_text_generation_params() {
474        let params = TextGenerationParams::new("Hello")
475            .max_tokens(100)
476            .temperature(0.5)
477            .top_p(0.8)
478            .stop("</answer>")
479            .large();
480
481        assert_eq!(params.prompt, "Hello");
482        assert_eq!(params.max_tokens, 100);
483        assert_eq!(params.temperature, 0.5);
484        assert_eq!(params.top_p, 0.8);
485        assert!(params.stop_sequences.contains(&"</answer>".to_string()));
486        assert!(params.use_large_model);
487    }
488
489    #[tokio::test]
490    async fn test_plugin_creation() {
491        let dir = tempdir().unwrap();
492        let config = LocalAIConfig::new(dir.path().join("models"));
493        let plugin = LocalAIPlugin::new(config);
494
495        assert!(plugin.is_ok());
496    }
497
498    #[tokio::test]
499    async fn test_model_not_found() {
500        let dir = tempdir().unwrap();
501        let config = LocalAIConfig::new(dir.path().join("models")).small_model("nonexistent.gguf");
502        let plugin = LocalAIPlugin::new(config).unwrap();
503
504        let result = plugin.generate_text("test").await;
505        assert!(result.is_err());
506
507        match result {
508            Err(LocalAIError::ModelNotFound(_)) => {}
509            _ => panic!("Expected ModelNotFound error"),
510        }
511    }
512
513    #[test]
514    fn test_llm_feature_flag() {
515        // This test verifies the feature flag is correctly detected
516        let enabled = LocalAIPlugin::is_llm_enabled();
517
518        #[cfg(feature = "llm")]
519        assert!(enabled);
520
521        #[cfg(not(feature = "llm"))]
522        assert!(!enabled);
523    }
524}