1use crate::config::{ModelResourceRequirements, SandboxProfile};
47use async_trait::async_trait;
48use std::collections::HashMap;
49use std::path::PathBuf;
50use std::time::Duration;
51use thiserror::Error;
52use tokio::process::Command;
53use tokio::time::timeout;
54
55#[derive(Debug, Error)]
57pub enum SlmRunnerError {
58 #[error("Model initialization failed: {reason}")]
59 InitializationFailed { reason: String },
60
61 #[error("Model execution failed: {reason}")]
62 ExecutionFailed { reason: String },
63
64 #[error("Resource limit exceeded: {limit_type}")]
65 ResourceLimitExceeded { limit_type: String },
66
67 #[error("Sandbox violation: {violation}")]
68 SandboxViolation { violation: String },
69
70 #[error("Model file not found: {path}")]
71 ModelFileNotFound { path: String },
72
73 #[error("Execution timeout after {seconds} seconds")]
74 ExecutionTimeout { seconds: u64 },
75
76 #[error("Invalid input: {reason}")]
77 InvalidInput { reason: String },
78
79 #[error("IO error: {message}")]
80 IoError { message: String },
81}
82
83#[derive(Debug, Clone)]
85pub struct ExecutionOptions {
86 pub timeout: Option<Duration>,
88 pub temperature: Option<f32>,
90 pub max_tokens: Option<u32>,
92 pub custom_parameters: HashMap<String, String>,
94}
95
96impl Default for ExecutionOptions {
97 fn default() -> Self {
98 Self {
99 timeout: Some(Duration::from_secs(30)),
100 temperature: Some(0.7),
101 max_tokens: Some(256),
102 custom_parameters: HashMap::new(),
103 }
104 }
105}
106
107#[derive(Debug, Clone)]
109pub struct ExecutionResult {
110 pub response: String,
112 pub metadata: ExecutionMetadata,
114}
115
116#[derive(Debug, Clone)]
118pub struct ExecutionMetadata {
119 pub input_tokens: Option<u32>,
121 pub output_tokens: Option<u32>,
123 pub execution_time_ms: u64,
125 pub memory_usage_mb: Option<u64>,
127 pub limits_hit: Vec<String>,
129}
130
131#[async_trait]
137pub trait SlmRunner: Send + Sync {
138 async fn execute(
150 &self,
151 prompt: &str,
152 options: Option<ExecutionOptions>,
153 ) -> Result<ExecutionResult, SlmRunnerError>;
154
155 fn get_sandbox_profile(&self) -> &SandboxProfile;
157
158 fn get_resource_requirements(&self) -> &ModelResourceRequirements;
160
161 async fn health_check(&self) -> Result<(), SlmRunnerError>;
163
164 fn get_info(&self) -> RunnerInfo;
166}
167
168#[derive(Debug, Clone)]
170pub struct RunnerInfo {
171 pub runner_type: String,
173 pub model_path: String,
175 pub capabilities: Vec<String>,
177 pub version: Option<String>,
179}
180
181#[derive(Debug)]
186pub struct LocalGgufRunner {
187 model_path: PathBuf,
189 sandbox_profile: SandboxProfile,
191 resource_requirements: ModelResourceRequirements,
193 llama_cpp_path: PathBuf,
195}
196
197impl LocalGgufRunner {
198 pub async fn new(
211 model_path: impl Into<PathBuf>,
212 sandbox_profile: SandboxProfile,
213 resource_requirements: ModelResourceRequirements,
214 ) -> Result<Self, SlmRunnerError> {
215 let model_path = model_path.into();
216
217 if !model_path.exists() {
219 return Err(SlmRunnerError::ModelFileNotFound {
220 path: model_path.display().to_string(),
221 });
222 }
223
224 let llama_cpp_path = Self::find_llama_cpp_executable().await?;
226
227 let runner = Self {
228 model_path,
229 sandbox_profile,
230 resource_requirements,
231 llama_cpp_path,
232 };
233
234 runner.health_check().await?;
236
237 Ok(runner)
238 }
239
240 async fn find_llama_cpp_executable() -> Result<PathBuf, SlmRunnerError> {
242 let candidate_paths = vec![
244 "/usr/local/bin/llama-cli",
245 "/usr/bin/llama-cli",
246 "/opt/llama.cpp/llama-cli",
247 "./bin/llama-cli",
248 ];
249
250 for path in candidate_paths {
251 let path_buf = PathBuf::from(path);
252 if path_buf.exists() {
253 return Ok(path_buf);
254 }
255 }
256
257 match Command::new("which").arg("llama-cli").output().await {
259 Ok(output) if output.status.success() => {
260 let path_str = String::from_utf8_lossy(&output.stdout);
261 let trimmed_path = path_str.trim();
262 Ok(PathBuf::from(trimmed_path))
263 }
264 _ => Err(SlmRunnerError::InitializationFailed {
265 reason: "llama.cpp executable not found".to_string(),
266 }),
267 }
268 }
269
270 fn build_command_args(&self, prompt: &str, options: &ExecutionOptions) -> Vec<String> {
272 let mut args = vec![
273 "--model".to_string(),
274 self.model_path.display().to_string(),
275 "--prompt".to_string(),
276 prompt.to_string(),
277 "--no-display-prompt".to_string(),
278 ];
279
280 if let Some(temp) = options.temperature {
282 args.extend(vec!["--temp".to_string(), temp.to_string()]);
283 }
284
285 if let Some(max_tokens) = options.max_tokens {
287 args.extend(vec!["--n-predict".to_string(), max_tokens.to_string()]);
288 }
289
290 args.extend(vec![
292 "--threads".to_string(),
293 self.sandbox_profile
294 .resources
295 .max_cpu_cores
296 .floor()
297 .to_string(),
298 ]);
299
300 for (key, value) in &options.custom_parameters {
302 args.extend(vec![format!("--{}", key), value.clone()]);
303 }
304
305 args
306 }
307
308 fn apply_sandbox_constraints(&self, command: &mut Command) {
310 let memory_limit = self.sandbox_profile.resources.max_memory_mb * 1024 * 1024;
312
313 command.env("RLIMIT_AS", memory_limit.to_string());
316
317 if let Some(write_path) = self.sandbox_profile.filesystem.write_paths.first() {
319 if let Ok(path) = std::fs::canonicalize(write_path.trim_end_matches("/*")) {
320 command.current_dir(path);
321 }
322 }
323
324 match self.sandbox_profile.network.access_mode {
327 crate::config::NetworkAccessMode::None => {
328 command.env("NO_NETWORK", "1");
329 }
330 crate::config::NetworkAccessMode::Restricted => {
331 if !self.sandbox_profile.network.allowed_destinations.is_empty() {
333 let hosts: Vec<String> = self
334 .sandbox_profile
335 .network
336 .allowed_destinations
337 .iter()
338 .map(|dest| dest.host.clone())
339 .collect();
340 command.env("ALLOWED_HOSTS", hosts.join(","));
341 }
342 }
343 crate::config::NetworkAccessMode::Full => {
344 }
346 }
347 }
348
349 fn validate_execution_constraints(&self, prompt: &str) -> Result<(), SlmRunnerError> {
351 let estimated_tokens = prompt.len() / 4; if estimated_tokens > 4000 {
354 return Err(SlmRunnerError::InvalidInput {
355 reason: "Prompt too long".to_string(),
356 });
357 }
358
359 self.sandbox_profile
361 .validate()
362 .map_err(|e| SlmRunnerError::SandboxViolation {
363 violation: e.to_string(),
364 })?;
365
366 Ok(())
367 }
368}
369
370#[async_trait]
371impl SlmRunner for LocalGgufRunner {
372 async fn execute(
373 &self,
374 prompt: &str,
375 options: Option<ExecutionOptions>,
376 ) -> Result<ExecutionResult, SlmRunnerError> {
377 let options = options.unwrap_or_default();
378 let start_time = std::time::Instant::now();
379
380 self.validate_execution_constraints(prompt)?;
382
383 let args = self.build_command_args(prompt, &options);
385 let mut command = Command::new(&self.llama_cpp_path);
386 command.args(&args);
387
388 self.apply_sandbox_constraints(&mut command);
390
391 let execution_timeout = options.timeout.unwrap_or_else(|| {
393 Duration::from_secs(
394 self.sandbox_profile
395 .process_limits
396 .max_execution_time_seconds,
397 )
398 });
399
400 let output = timeout(execution_timeout, command.output())
402 .await
403 .map_err(|_| SlmRunnerError::ExecutionTimeout {
404 seconds: execution_timeout.as_secs(),
405 })?
406 .map_err(|e| SlmRunnerError::ExecutionFailed {
407 reason: format!("Process execution failed: {}", e),
408 })?;
409
410 if !output.status.success() {
412 let stderr = String::from_utf8_lossy(&output.stderr);
413 return Err(SlmRunnerError::ExecutionFailed {
414 reason: format!("llama.cpp execution failed: {}", stderr),
415 });
416 }
417
418 let response = String::from_utf8_lossy(&output.stdout).trim().to_string();
420 let execution_time = start_time.elapsed();
421
422 let metadata = ExecutionMetadata {
424 input_tokens: Some((prompt.len() / 4) as u32), output_tokens: Some((response.len() / 4) as u32), execution_time_ms: execution_time.as_millis() as u64,
427 memory_usage_mb: None, limits_hit: Vec::new(), };
430
431 Ok(ExecutionResult { response, metadata })
432 }
433
434 fn get_sandbox_profile(&self) -> &SandboxProfile {
435 &self.sandbox_profile
436 }
437
438 fn get_resource_requirements(&self) -> &ModelResourceRequirements {
439 &self.resource_requirements
440 }
441
442 async fn health_check(&self) -> Result<(), SlmRunnerError> {
443 if !self.model_path.exists() {
445 return Err(SlmRunnerError::ModelFileNotFound {
446 path: self.model_path.display().to_string(),
447 });
448 }
449
450 if !self.llama_cpp_path.exists() {
452 return Err(SlmRunnerError::InitializationFailed {
453 reason: "llama.cpp executable no longer available".to_string(),
454 });
455 }
456
457 let test_prompt = "Hello";
459 let options = ExecutionOptions {
460 timeout: Some(Duration::from_secs(10)),
461 temperature: Some(0.1),
462 max_tokens: Some(1),
463 custom_parameters: HashMap::new(),
464 };
465
466 match self.execute(test_prompt, Some(options)).await {
467 Ok(_) => Ok(()),
468 Err(e) => Err(SlmRunnerError::InitializationFailed {
469 reason: format!("Health check failed: {}", e),
470 }),
471 }
472 }
473
474 fn get_info(&self) -> RunnerInfo {
475 RunnerInfo {
476 runner_type: "LocalGgufRunner".to_string(),
477 model_path: self.model_path.display().to_string(),
478 capabilities: vec!["text_generation".to_string(), "conversation".to_string()],
479 version: Some("1.0.0".to_string()),
480 }
481 }
482}
483
484#[cfg(test)]
485mod tests {
486 use super::*;
487 use crate::config::SandboxProfile;
488 use std::io::Write;
489 use tempfile::NamedTempFile;
490
491 fn create_test_resource_requirements() -> ModelResourceRequirements {
492 ModelResourceRequirements {
493 min_memory_mb: 512,
494 preferred_cpu_cores: 1.0,
495 gpu_requirements: None,
496 }
497 }
498
499 #[tokio::test]
500 async fn test_gguf_runner_creation_missing_file() {
501 let sandbox_profile = SandboxProfile::secure_default();
502 let resource_requirements = create_test_resource_requirements();
503
504 let result = LocalGgufRunner::new(
505 "/nonexistent/model.gguf",
506 sandbox_profile,
507 resource_requirements,
508 )
509 .await;
510
511 assert!(matches!(
512 result,
513 Err(SlmRunnerError::ModelFileNotFound { .. })
514 ));
515 }
516
517 #[tokio::test]
518 async fn test_execution_options_default() {
519 let options = ExecutionOptions::default();
520 assert_eq!(options.temperature, Some(0.7));
521 assert_eq!(options.max_tokens, Some(256));
522 assert!(options.timeout.is_some());
523 }
524
525 #[tokio::test]
526 async fn test_command_args_building() {
527 let mut temp_file = NamedTempFile::new().unwrap();
529 writeln!(temp_file, "dummy model content").unwrap();
530 let model_path = temp_file.path().to_path_buf();
531
532 let sandbox_profile = SandboxProfile::secure_default();
533 let resource_requirements = create_test_resource_requirements();
534
535 let runner = LocalGgufRunner {
538 model_path: model_path.clone(),
539 sandbox_profile,
540 resource_requirements,
541 llama_cpp_path: PathBuf::from("/fake/llama-cli"), };
543
544 let options = ExecutionOptions::default();
545 let args = runner.build_command_args("test prompt", &options);
546
547 assert!(args.contains(&"--model".to_string()));
548 assert!(args.contains(&model_path.display().to_string()));
549 assert!(args.contains(&"--prompt".to_string()));
550 assert!(args.contains(&"test prompt".to_string()));
551 }
552
553 #[test]
554 fn test_validation_long_prompt() {
555 let sandbox_profile = SandboxProfile::secure_default();
556 let resource_requirements = create_test_resource_requirements();
557
558 let runner = LocalGgufRunner {
559 model_path: PathBuf::from("/fake/model.gguf"),
560 sandbox_profile,
561 resource_requirements,
562 llama_cpp_path: PathBuf::from("/fake/llama-cli"),
563 };
564
565 let long_prompt = "a".repeat(20000); let result = runner.validate_execution_constraints(&long_prompt);
567
568 assert!(matches!(result, Err(SlmRunnerError::InvalidInput { .. })));
569 }
570}