Skip to main content

symbi_runtime/models/
runners.rs

1//! SLM runner implementations for executing models with security constraints
2//!
3//! This module provides the [`SlmRunner`] trait and concrete implementations for
4//! executing Small Language Models within Symbiont's security sandbox.
5//!
6//! # Security Model
7//!
8//! All runners must respect the [`SandboxProfile`] associated with their execution
9//! context. This includes:
10//!
11//! - Resource limits (memory, CPU, disk)
12//! - Filesystem access controls
13//! - Network restrictions
14//! - Process execution limits
15//!
16//! # Adding New Runners
17//!
18//! To add support for a new model format:
19//!
20//! 1. Implement the [`SlmRunner`] trait
21//! 2. Ensure proper sandbox profile enforcement
22//! 3. Add comprehensive error handling
23//! 4. Include unit tests for both success and failure cases
24//!
25//! # Usage
26//!
27//! ```rust,no_run
28//! use symbi_runtime::models::{SlmRunner, LocalGgufRunner};
29//! use symbi_runtime::config::{SandboxProfile, ModelResourceRequirements};
30//!
31//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
32//! let sandbox_profile = SandboxProfile::secure_default();
33//! let resource_requirements = ModelResourceRequirements {
34//!     min_memory_mb: 1024,
35//!     preferred_cpu_cores: 2.0,
36//!     gpu_requirements: None,
37//! };
38//! let runner = LocalGgufRunner::new("/path/to/model.gguf", sandbox_profile, resource_requirements).await?;
39//!
40//! let response = runner.execute("Hello, world!", None).await?;
41//! println!("Model response: {:?}", response);
42//! # Ok(())
43//! # }
44//! ```
45
46use crate::config::{ModelResourceRequirements, SandboxProfile};
47use async_trait::async_trait;
48use std::collections::HashMap;
49use std::path::PathBuf;
50use std::time::Duration;
51use thiserror::Error;
52use tokio::process::Command;
53use tokio::time::timeout;
54
55/// Errors that can occur during SLM execution
56#[derive(Debug, Error)]
57pub enum SlmRunnerError {
58    #[error("Model initialization failed: {reason}")]
59    InitializationFailed { reason: String },
60
61    #[error("Model execution failed: {reason}")]
62    ExecutionFailed { reason: String },
63
64    #[error("Resource limit exceeded: {limit_type}")]
65    ResourceLimitExceeded { limit_type: String },
66
67    #[error("Sandbox violation: {violation}")]
68    SandboxViolation { violation: String },
69
70    #[error("Model file not found: {path}")]
71    ModelFileNotFound { path: String },
72
73    #[error("Execution timeout after {seconds} seconds")]
74    ExecutionTimeout { seconds: u64 },
75
76    #[error("Invalid input: {reason}")]
77    InvalidInput { reason: String },
78
79    #[error("IO error: {message}")]
80    IoError { message: String },
81}
82
83/// Execution options for SLM runners
84#[derive(Debug, Clone)]
85pub struct ExecutionOptions {
86    /// Maximum execution time
87    pub timeout: Option<Duration>,
88    /// Temperature for text generation (0.0 - 1.0)
89    pub temperature: Option<f32>,
90    /// Maximum tokens to generate
91    pub max_tokens: Option<u32>,
92    /// Additional parameters specific to the model
93    pub custom_parameters: HashMap<String, String>,
94}
95
96impl Default for ExecutionOptions {
97    fn default() -> Self {
98        Self {
99            timeout: Some(Duration::from_secs(30)),
100            temperature: Some(0.7),
101            max_tokens: Some(256),
102            custom_parameters: HashMap::new(),
103        }
104    }
105}
106
107/// Execution result from an SLM runner
108#[derive(Debug, Clone)]
109pub struct ExecutionResult {
110    /// Generated response text
111    pub response: String,
112    /// Execution metadata
113    pub metadata: ExecutionMetadata,
114}
115
116/// Metadata about model execution
117#[derive(Debug, Clone)]
118pub struct ExecutionMetadata {
119    /// Number of tokens in the input
120    pub input_tokens: Option<u32>,
121    /// Number of tokens generated
122    pub output_tokens: Option<u32>,
123    /// Execution time in milliseconds
124    pub execution_time_ms: u64,
125    /// Memory usage during execution in MB
126    pub memory_usage_mb: Option<u64>,
127    /// Whether execution hit any limits
128    pub limits_hit: Vec<String>,
129}
130
131/// Generic trait for executing Small Language Models
132///
133/// This trait defines the interface for running SLMs within Symbiont's
134/// security constraints. All implementations must respect the associated
135/// [`SandboxProfile`] and provide proper resource isolation.
136#[async_trait]
137pub trait SlmRunner: Send + Sync {
138    /// Execute the model with given input and options
139    ///
140    /// # Arguments
141    ///
142    /// * `prompt` - Input text to process
143    /// * `options` - Execution options (timeout, temperature, etc.)
144    ///
145    /// # Errors
146    ///
147    /// Returns [`SlmRunnerError`] if execution fails due to resource limits,
148    /// sandbox violations, or model errors.
149    async fn execute(
150        &self,
151        prompt: &str,
152        options: Option<ExecutionOptions>,
153    ) -> Result<ExecutionResult, SlmRunnerError>;
154
155    /// Get the sandbox profile associated with this runner
156    fn get_sandbox_profile(&self) -> &SandboxProfile;
157
158    /// Get the resource requirements for this model
159    fn get_resource_requirements(&self) -> &ModelResourceRequirements;
160
161    /// Check if the runner is healthy and ready for execution
162    async fn health_check(&self) -> Result<(), SlmRunnerError>;
163
164    /// Get runner-specific information
165    fn get_info(&self) -> RunnerInfo;
166}
167
168/// Information about a specific runner implementation
169#[derive(Debug, Clone)]
170pub struct RunnerInfo {
171    /// Runner type identifier
172    pub runner_type: String,
173    /// Model path or identifier
174    pub model_path: String,
175    /// Supported capabilities
176    pub capabilities: Vec<String>,
177    /// Version information
178    pub version: Option<String>,
179}
180
181/// Local GGUF model runner implementation
182///
183/// This runner executes GGUF-quantized models using the llama.cpp framework
184/// within the configured security sandbox.
185#[derive(Debug)]
186pub struct LocalGgufRunner {
187    /// Path to the GGUF model file
188    model_path: PathBuf,
189    /// Sandbox profile for security constraints
190    sandbox_profile: SandboxProfile,
191    /// Resource requirements for this model
192    resource_requirements: ModelResourceRequirements,
193    /// Path to the llama.cpp executable
194    llama_cpp_path: PathBuf,
195}
196
197impl LocalGgufRunner {
198    /// Create a new GGUF runner
199    ///
200    /// # Arguments
201    ///
202    /// * `model_path` - Path to the GGUF model file
203    /// * `sandbox_profile` - Security constraints for execution
204    /// * `resource_requirements` - Resource requirements for the model
205    ///
206    /// # Errors
207    ///
208    /// Returns [`SlmRunnerError::InitializationFailed`] if the model file
209    /// doesn't exist or isn't accessible.
210    pub async fn new(
211        model_path: impl Into<PathBuf>,
212        sandbox_profile: SandboxProfile,
213        resource_requirements: ModelResourceRequirements,
214    ) -> Result<Self, SlmRunnerError> {
215        let model_path = model_path.into();
216
217        // Validate model file exists
218        if !model_path.exists() {
219            return Err(SlmRunnerError::ModelFileNotFound {
220                path: model_path.display().to_string(),
221            });
222        }
223
224        // Find llama.cpp executable
225        let llama_cpp_path = Self::find_llama_cpp_executable().await?;
226
227        let runner = Self {
228            model_path,
229            sandbox_profile,
230            resource_requirements,
231            llama_cpp_path,
232        };
233
234        // Perform initial health check
235        runner.health_check().await?;
236
237        Ok(runner)
238    }
239
240    /// Find the llama.cpp executable in the system
241    async fn find_llama_cpp_executable() -> Result<PathBuf, SlmRunnerError> {
242        // Common paths where llama.cpp might be installed
243        let candidate_paths = vec![
244            "/usr/local/bin/llama-cli",
245            "/usr/bin/llama-cli",
246            "/opt/llama.cpp/llama-cli",
247            "./bin/llama-cli",
248        ];
249
250        for path in candidate_paths {
251            let path_buf = PathBuf::from(path);
252            if path_buf.exists() {
253                return Ok(path_buf);
254            }
255        }
256
257        // Try to find via which command
258        match Command::new("which").arg("llama-cli").output().await {
259            Ok(output) if output.status.success() => {
260                let path_str = String::from_utf8_lossy(&output.stdout);
261                let trimmed_path = path_str.trim();
262                Ok(PathBuf::from(trimmed_path))
263            }
264            _ => Err(SlmRunnerError::InitializationFailed {
265                reason: "llama.cpp executable not found".to_string(),
266            }),
267        }
268    }
269
270    /// Build command arguments for llama.cpp execution
271    fn build_command_args(&self, prompt: &str, options: &ExecutionOptions) -> Vec<String> {
272        let mut args = vec![
273            "--model".to_string(),
274            self.model_path.display().to_string(),
275            "--prompt".to_string(),
276            prompt.to_string(),
277            "--no-display-prompt".to_string(),
278        ];
279
280        // Add temperature setting
281        if let Some(temp) = options.temperature {
282            args.extend(vec!["--temp".to_string(), temp.to_string()]);
283        }
284
285        // Add max tokens setting
286        if let Some(max_tokens) = options.max_tokens {
287            args.extend(vec!["--n-predict".to_string(), max_tokens.to_string()]);
288        }
289
290        // Apply resource constraints from sandbox profile
291        args.extend(vec![
292            "--threads".to_string(),
293            self.sandbox_profile
294                .resources
295                .max_cpu_cores
296                .floor()
297                .to_string(),
298        ]);
299
300        // Add custom parameters
301        for (key, value) in &options.custom_parameters {
302            args.extend(vec![format!("--{}", key), value.clone()]);
303        }
304
305        args
306    }
307
308    /// Apply sandbox constraints to the command
309    fn apply_sandbox_constraints(&self, command: &mut Command) {
310        // Set memory limits (convert MB to bytes for ulimit)
311        let memory_limit = self.sandbox_profile.resources.max_memory_mb * 1024 * 1024;
312
313        // Use systemd-run or similar for resource constraints in production
314        // For now, we'll use basic process limits
315        command.env("RLIMIT_AS", memory_limit.to_string());
316
317        // Set working directory to a sandboxed location
318        if let Some(write_path) = self.sandbox_profile.filesystem.write_paths.first() {
319            if let Ok(path) = std::fs::canonicalize(write_path.trim_end_matches("/*")) {
320                command.current_dir(path);
321            }
322        }
323
324        // Apply network restrictions by setting environment variables
325        // that llama.cpp would respect (if it supported them)
326        match self.sandbox_profile.network.access_mode {
327            crate::config::NetworkAccessMode::None => {
328                command.env("NO_NETWORK", "1");
329            }
330            crate::config::NetworkAccessMode::Restricted => {
331                // Set allowed hosts if needed
332                if !self.sandbox_profile.network.allowed_destinations.is_empty() {
333                    let hosts: Vec<String> = self
334                        .sandbox_profile
335                        .network
336                        .allowed_destinations
337                        .iter()
338                        .map(|dest| dest.host.clone())
339                        .collect();
340                    command.env("ALLOWED_HOSTS", hosts.join(","));
341                }
342            }
343            crate::config::NetworkAccessMode::Full => {
344                // No restrictions
345            }
346        }
347    }
348
349    /// Validate execution constraints before running
350    fn validate_execution_constraints(&self, prompt: &str) -> Result<(), SlmRunnerError> {
351        // Check prompt length (rough token estimation)
352        let estimated_tokens = prompt.len() / 4; // Rough approximation
353        if estimated_tokens > 4000 {
354            return Err(SlmRunnerError::InvalidInput {
355                reason: "Prompt too long".to_string(),
356            });
357        }
358
359        // Validate sandbox profile constraints
360        self.sandbox_profile
361            .validate()
362            .map_err(|e| SlmRunnerError::SandboxViolation {
363                violation: e.to_string(),
364            })?;
365
366        Ok(())
367    }
368}
369
370#[async_trait]
371impl SlmRunner for LocalGgufRunner {
372    async fn execute(
373        &self,
374        prompt: &str,
375        options: Option<ExecutionOptions>,
376    ) -> Result<ExecutionResult, SlmRunnerError> {
377        let options = options.unwrap_or_default();
378        let start_time = std::time::Instant::now();
379
380        // Validate execution constraints
381        self.validate_execution_constraints(prompt)?;
382
383        // Build command
384        let args = self.build_command_args(prompt, &options);
385        let mut command = Command::new(&self.llama_cpp_path);
386        command.args(&args);
387
388        // Apply sandbox constraints
389        self.apply_sandbox_constraints(&mut command);
390
391        // Set up timeout
392        let execution_timeout = options.timeout.unwrap_or_else(|| {
393            Duration::from_secs(
394                self.sandbox_profile
395                    .process_limits
396                    .max_execution_time_seconds,
397            )
398        });
399
400        // Execute with timeout
401        let output = timeout(execution_timeout, command.output())
402            .await
403            .map_err(|_| SlmRunnerError::ExecutionTimeout {
404                seconds: execution_timeout.as_secs(),
405            })?
406            .map_err(|e| SlmRunnerError::ExecutionFailed {
407                reason: format!("Process execution failed: {}", e),
408            })?;
409
410        // Check if process succeeded
411        if !output.status.success() {
412            let stderr = String::from_utf8_lossy(&output.stderr);
413            return Err(SlmRunnerError::ExecutionFailed {
414                reason: format!("llama.cpp execution failed: {}", stderr),
415            });
416        }
417
418        // Extract response
419        let response = String::from_utf8_lossy(&output.stdout).trim().to_string();
420        let execution_time = start_time.elapsed();
421
422        // Create execution metadata
423        let metadata = ExecutionMetadata {
424            input_tokens: Some((prompt.len() / 4) as u32), // Rough estimation
425            output_tokens: Some((response.len() / 4) as u32), // Rough estimation
426            execution_time_ms: execution_time.as_millis() as u64,
427            memory_usage_mb: None, // Would need process monitoring for accurate measurement
428            limits_hit: Vec::new(), // Would be populated if we detected limit violations
429        };
430
431        Ok(ExecutionResult { response, metadata })
432    }
433
434    fn get_sandbox_profile(&self) -> &SandboxProfile {
435        &self.sandbox_profile
436    }
437
438    fn get_resource_requirements(&self) -> &ModelResourceRequirements {
439        &self.resource_requirements
440    }
441
442    async fn health_check(&self) -> Result<(), SlmRunnerError> {
443        // Check if model file is still accessible
444        if !self.model_path.exists() {
445            return Err(SlmRunnerError::ModelFileNotFound {
446                path: self.model_path.display().to_string(),
447            });
448        }
449
450        // Check if llama.cpp executable is still available
451        if !self.llama_cpp_path.exists() {
452            return Err(SlmRunnerError::InitializationFailed {
453                reason: "llama.cpp executable no longer available".to_string(),
454            });
455        }
456
457        // Test basic execution with a simple prompt
458        let test_prompt = "Hello";
459        let options = ExecutionOptions {
460            timeout: Some(Duration::from_secs(10)),
461            temperature: Some(0.1),
462            max_tokens: Some(1),
463            custom_parameters: HashMap::new(),
464        };
465
466        match self.execute(test_prompt, Some(options)).await {
467            Ok(_) => Ok(()),
468            Err(e) => Err(SlmRunnerError::InitializationFailed {
469                reason: format!("Health check failed: {}", e),
470            }),
471        }
472    }
473
474    fn get_info(&self) -> RunnerInfo {
475        RunnerInfo {
476            runner_type: "LocalGgufRunner".to_string(),
477            model_path: self.model_path.display().to_string(),
478            capabilities: vec!["text_generation".to_string(), "conversation".to_string()],
479            version: Some("1.0.0".to_string()),
480        }
481    }
482}
483
484#[cfg(test)]
485mod tests {
486    use super::*;
487    use crate::config::SandboxProfile;
488    use std::io::Write;
489    use tempfile::NamedTempFile;
490
491    fn create_test_resource_requirements() -> ModelResourceRequirements {
492        ModelResourceRequirements {
493            min_memory_mb: 512,
494            preferred_cpu_cores: 1.0,
495            gpu_requirements: None,
496        }
497    }
498
499    #[tokio::test]
500    async fn test_gguf_runner_creation_missing_file() {
501        let sandbox_profile = SandboxProfile::secure_default();
502        let resource_requirements = create_test_resource_requirements();
503
504        let result = LocalGgufRunner::new(
505            "/nonexistent/model.gguf",
506            sandbox_profile,
507            resource_requirements,
508        )
509        .await;
510
511        assert!(matches!(
512            result,
513            Err(SlmRunnerError::ModelFileNotFound { .. })
514        ));
515    }
516
517    #[tokio::test]
518    async fn test_execution_options_default() {
519        let options = ExecutionOptions::default();
520        assert_eq!(options.temperature, Some(0.7));
521        assert_eq!(options.max_tokens, Some(256));
522        assert!(options.timeout.is_some());
523    }
524
525    #[tokio::test]
526    async fn test_command_args_building() {
527        // Create a temporary file to serve as our model
528        let mut temp_file = NamedTempFile::new().unwrap();
529        writeln!(temp_file, "dummy model content").unwrap();
530        let model_path = temp_file.path().to_path_buf();
531
532        let sandbox_profile = SandboxProfile::secure_default();
533        let resource_requirements = create_test_resource_requirements();
534
535        // Skip the actual runner creation since llama.cpp might not be available
536        // Instead, test the argument building logic directly
537        let runner = LocalGgufRunner {
538            model_path: model_path.clone(),
539            sandbox_profile,
540            resource_requirements,
541            llama_cpp_path: PathBuf::from("/fake/llama-cli"), // Fake path for testing
542        };
543
544        let options = ExecutionOptions::default();
545        let args = runner.build_command_args("test prompt", &options);
546
547        assert!(args.contains(&"--model".to_string()));
548        assert!(args.contains(&model_path.display().to_string()));
549        assert!(args.contains(&"--prompt".to_string()));
550        assert!(args.contains(&"test prompt".to_string()));
551    }
552
553    #[test]
554    fn test_validation_long_prompt() {
555        let sandbox_profile = SandboxProfile::secure_default();
556        let resource_requirements = create_test_resource_requirements();
557
558        let runner = LocalGgufRunner {
559            model_path: PathBuf::from("/fake/model.gguf"),
560            sandbox_profile,
561            resource_requirements,
562            llama_cpp_path: PathBuf::from("/fake/llama-cli"),
563        };
564
565        let long_prompt = "a".repeat(20000); // Very long prompt
566        let result = runner.validate_execution_constraints(&long_prompt);
567
568        assert!(matches!(result, Err(SlmRunnerError::InvalidInput { .. })));
569    }
570}