1use std::fmt;
43
44#[cfg(feature = "serde")]
45use serde::{Deserialize, Serialize};
46
47#[derive(Debug, Clone, PartialEq, Eq)]
49#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
50pub struct PullProgress {
51 pub status: String,
53 pub completed: u64,
55 pub total: u64,
57}
58
59impl PullProgress {
60 #[must_use]
62 pub fn new(status: impl Into<String>, completed: u64, total: u64) -> Self {
63 Self {
64 status: status.into(),
65 completed,
66 total,
67 }
68 }
69
70 #[must_use]
72 pub fn percent(&self) -> f64 {
73 if self.total == 0 {
74 0.0
75 } else {
76 (self.completed as f64 / self.total as f64) * 100.0
77 }
78 }
79
80 #[must_use]
82 pub fn is_complete(&self) -> bool {
83 self.total > 0 && self.completed >= self.total
84 }
85}
86
87impl fmt::Display for PullProgress {
88 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89 write!(f, "{}: {:.1}%", self.status, self.percent())
90 }
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
95#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
96pub struct ModelInfo {
97 pub name: String,
99 pub size: u64,
101 pub quantization: Option<String>,
103 pub parameters: Option<String>,
105 pub digest: Option<String>,
107}
108
109impl ModelInfo {
110 #[must_use]
112 pub fn size_gb(&self) -> f64 {
113 self.size as f64 / 1_000_000_000.0
114 }
115
116 #[must_use]
118 pub fn size_human(&self) -> String {
119 let gb = self.size_gb();
120 if gb >= 1.0 {
121 format!("{gb:.1} GB")
122 } else {
123 format!("{:.0} MB", self.size as f64 / 1_000_000.0)
124 }
125 }
126}
127
128impl fmt::Display for ModelInfo {
129 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130 write!(f, "{} ({})", self.name, self.size_human())
131 }
132}
133
134#[derive(Debug, Clone, PartialEq, Eq)]
136#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
137pub struct RunningModel {
138 pub name: String,
140 pub vram_used: Option<u64>,
142 pub gpu_ids: Vec<u32>,
144}
145
146impl RunningModel {
147 #[must_use]
149 pub fn vram_gb(&self) -> Option<f64> {
150 self.vram_used.map(|v| v as f64 / 1_000_000_000.0)
151 }
152}
153
154impl fmt::Display for RunningModel {
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 write!(f, "{}", self.name)?;
157 if !self.gpu_ids.is_empty() {
158 write!(f, " [GPU: {:?}]", self.gpu_ids)?;
159 }
160 if let Some(vram) = self.vram_gb() {
161 write!(f, " ({vram:.1} GB VRAM)")?;
162 }
163 Ok(())
164 }
165}
166
167#[derive(Debug, Clone, PartialEq, Eq)]
169#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
170pub struct GpuInfo {
171 pub id: u32,
173 pub name: String,
175 pub memory_total: u64,
177 pub memory_free: u64,
179}
180
181impl GpuInfo {
182 #[must_use]
184 pub fn memory_total_gb(&self) -> f64 {
185 self.memory_total as f64 / 1_000_000_000.0
186 }
187
188 #[must_use]
190 pub fn memory_free_gb(&self) -> f64 {
191 self.memory_free as f64 / 1_000_000_000.0
192 }
193
194 #[must_use]
196 pub fn memory_used_percent(&self) -> f64 {
197 if self.memory_total == 0 {
198 0.0
199 } else {
200 let used = self.memory_total.saturating_sub(self.memory_free);
201 (used as f64 / self.memory_total as f64) * 100.0
202 }
203 }
204}
205
206impl fmt::Display for GpuInfo {
207 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208 write!(
209 f,
210 "GPU {}: {} ({:.1}/{:.1} GB free)",
211 self.id,
212 self.name,
213 self.memory_free_gb(),
214 self.memory_total_gb()
215 )
216 }
217}
218
219#[derive(Debug, Clone, PartialEq, Eq)]
221#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
222pub enum BackendError {
223 NotRunning,
225 ModelNotFound(String),
227 AlreadyLoaded(String),
229 InsufficientMemory,
231 NetworkError(String),
233 ProcessError(String),
235 BackendSpecific(String),
237}
238
239impl std::error::Error for BackendError {}
240
241impl fmt::Display for BackendError {
242 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243 match self {
244 Self::NotRunning => write!(f, "Backend server is not running"),
245 Self::ModelNotFound(name) => write!(f, "Model not found: {name}"),
246 Self::AlreadyLoaded(name) => write!(f, "Model already loaded: {name}"),
247 Self::InsufficientMemory => write!(f, "Insufficient memory to load model"),
248 Self::NetworkError(msg) => write!(f, "Network error: {msg}"),
249 Self::ProcessError(msg) => write!(f, "Process error: {msg}"),
250 Self::BackendSpecific(msg) => write!(f, "Backend error: {msg}"),
251 }
252 }
253}
254
255#[derive(Debug, Clone, PartialEq, Eq)]
257#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
258pub struct LoadConfig {
259 pub gpu_ids: Vec<u32>,
261 pub gpu_layers: i32,
263 pub context_size: Option<u32>,
265 pub keep_alive: bool,
267}
268
269#[derive(Debug, Clone, Copy, PartialEq, Eq)]
275#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
276#[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))]
277pub enum ChatRole {
278 System,
280 User,
282 Assistant,
284}
285
286impl fmt::Display for ChatRole {
287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288 match self {
289 Self::System => write!(f, "system"),
290 Self::User => write!(f, "user"),
291 Self::Assistant => write!(f, "assistant"),
292 }
293 }
294}
295
296#[derive(Debug, Clone, PartialEq, Eq)]
298#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
299pub struct ChatMessage {
300 pub role: ChatRole,
302 pub content: String,
304}
305
306impl ChatMessage {
307 #[must_use]
309 pub fn system(content: impl Into<String>) -> Self {
310 Self {
311 role: ChatRole::System,
312 content: content.into(),
313 }
314 }
315
316 #[must_use]
318 pub fn user(content: impl Into<String>) -> Self {
319 Self {
320 role: ChatRole::User,
321 content: content.into(),
322 }
323 }
324
325 #[must_use]
327 pub fn assistant(content: impl Into<String>) -> Self {
328 Self {
329 role: ChatRole::Assistant,
330 content: content.into(),
331 }
332 }
333}
334
335#[derive(Debug, Clone, PartialEq, Default)]
337#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
338pub struct ChatOptions {
339 pub temperature: Option<f32>,
341 pub top_p: Option<f32>,
343 pub top_k: Option<u32>,
345 pub max_tokens: Option<u32>,
347 pub stop: Vec<String>,
349 pub seed: Option<u64>,
351}
352
353impl ChatOptions {
354 #[must_use]
356 pub fn new() -> Self {
357 Self::default()
358 }
359
360 #[must_use]
362 pub fn with_temperature(mut self, temp: f32) -> Self {
363 self.temperature = Some(temp);
364 self
365 }
366
367 #[must_use]
369 pub fn with_top_p(mut self, top_p: f32) -> Self {
370 self.top_p = Some(top_p);
371 self
372 }
373
374 #[must_use]
376 pub fn with_top_k(mut self, top_k: u32) -> Self {
377 self.top_k = Some(top_k);
378 self
379 }
380
381 #[must_use]
383 pub fn with_max_tokens(mut self, max: u32) -> Self {
384 self.max_tokens = Some(max);
385 self
386 }
387
388 #[must_use]
390 pub fn with_stop(mut self, stop: impl Into<String>) -> Self {
391 self.stop.push(stop.into());
392 self
393 }
394
395 #[must_use]
397 pub fn with_seed(mut self, seed: u64) -> Self {
398 self.seed = Some(seed);
399 self
400 }
401}
402
403#[derive(Debug, Clone, PartialEq)]
405#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
406pub struct ChatResponse {
407 pub message: ChatMessage,
409 pub done: bool,
411 pub total_duration: Option<u64>,
413 pub eval_count: Option<u32>,
415 pub prompt_eval_count: Option<u32>,
417}
418
419impl ChatResponse {
420 #[must_use]
422 pub fn content(&self) -> &str {
423 &self.message.content
424 }
425
426 #[must_use]
428 pub fn tokens_per_second(&self) -> Option<f64> {
429 match (self.eval_count, self.total_duration) {
430 (Some(count), Some(duration)) if duration > 0 => {
431 Some(count as f64 / (duration as f64 / 1_000_000_000.0))
432 }
433 _ => None,
434 }
435 }
436}
437
438#[derive(Debug, Clone, PartialEq)]
444#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
445pub struct EmbeddingResponse {
446 pub embedding: Vec<f32>,
448 pub total_duration: Option<u64>,
450 pub prompt_eval_count: Option<u32>,
452}
453
454impl EmbeddingResponse {
455 #[must_use]
457 pub fn dimension(&self) -> usize {
458 self.embedding.len()
459 }
460
461 #[must_use]
463 pub fn cosine_similarity(&self, other: &Self) -> f32 {
464 if self.embedding.len() != other.embedding.len() {
465 return 0.0;
466 }
467
468 let dot_product: f32 = self
469 .embedding
470 .iter()
471 .zip(&other.embedding)
472 .map(|(a, b)| a * b)
473 .sum();
474
475 let norm_a: f32 = self.embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
476 let norm_b: f32 = other.embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
477
478 if norm_a == 0.0 || norm_b == 0.0 {
479 0.0
480 } else {
481 dot_product / (norm_a * norm_b)
482 }
483 }
484}
485
486impl Default for LoadConfig {
487 fn default() -> Self {
488 Self {
489 gpu_ids: Vec::new(),
490 gpu_layers: -1, context_size: None,
492 keep_alive: false,
493 }
494 }
495}
496
497impl LoadConfig {
498 #[must_use]
500 pub fn new() -> Self {
501 Self::default()
502 }
503
504 #[must_use]
506 pub fn with_gpus(mut self, gpu_ids: Vec<u32>) -> Self {
507 self.gpu_ids = gpu_ids;
508 self
509 }
510
511 #[must_use]
513 pub fn with_gpu_layers(mut self, layers: i32) -> Self {
514 self.gpu_layers = layers;
515 self
516 }
517
518 #[must_use]
520 pub fn with_context_size(mut self, size: u32) -> Self {
521 self.context_size = Some(size);
522 self
523 }
524
525 #[must_use]
527 pub fn with_keep_alive(mut self, keep: bool) -> Self {
528 self.keep_alive = keep;
529 self
530 }
531
532 #[must_use]
534 pub fn is_cpu_only(&self) -> bool {
535 self.gpu_layers == 0
536 }
537
538 #[must_use]
540 pub fn is_full_gpu(&self) -> bool {
541 self.gpu_layers < 0
542 }
543}
544
545#[cfg(test)]
546mod tests {
547 use super::*;
548
549 #[test]
550 fn test_pull_progress() {
551 let progress = PullProgress::new("downloading", 500, 1000);
552 assert_eq!(progress.percent(), 50.0);
553 assert!(!progress.is_complete());
554
555 let complete = PullProgress::new("complete", 1000, 1000);
556 assert!(complete.is_complete());
557 }
558
559 #[test]
560 fn test_pull_progress_display() {
561 let progress = PullProgress::new("pulling", 750, 1000);
562 assert_eq!(progress.to_string(), "pulling: 75.0%");
563 }
564
565 #[test]
566 fn test_pull_progress_zero_total() {
567 let progress = PullProgress::new("starting", 0, 0);
568 assert_eq!(progress.percent(), 0.0);
569 assert!(!progress.is_complete());
570 }
571
572 #[test]
573 fn test_model_info_size() {
574 let info = ModelInfo {
575 name: "llama3.2:7b".to_string(),
576 size: 4_500_000_000,
577 quantization: Some("Q4_K_M".to_string()),
578 parameters: Some("7B".to_string()),
579 digest: None,
580 };
581
582 assert!((info.size_gb() - 4.5).abs() < 0.01);
583 assert_eq!(info.size_human(), "4.5 GB");
584 }
585
586 #[test]
587 fn test_model_info_display() {
588 let info = ModelInfo {
589 name: "test:latest".to_string(),
590 size: 500_000_000,
591 quantization: None,
592 parameters: None,
593 digest: None,
594 };
595
596 assert!(info.to_string().contains("test:latest"));
597 assert!(info.to_string().contains("500 MB"));
598 }
599
600 #[test]
601 fn test_running_model() {
602 let model = RunningModel {
603 name: "llama3.2".to_string(),
604 vram_used: Some(4_000_000_000),
605 gpu_ids: vec![0],
606 };
607
608 assert!((model.vram_gb().unwrap() - 4.0).abs() < 0.01);
609 assert!(model.to_string().contains("llama3.2"));
610 assert!(model.to_string().contains("GPU"));
611 }
612
613 #[test]
614 fn test_gpu_info() {
615 let gpu = GpuInfo {
616 id: 0,
617 name: "RTX 4090".to_string(),
618 memory_total: 24_000_000_000,
619 memory_free: 20_000_000_000,
620 };
621
622 assert!((gpu.memory_total_gb() - 24.0).abs() < 0.01);
623 assert!((gpu.memory_free_gb() - 20.0).abs() < 0.01);
624 assert!((gpu.memory_used_percent() - 16.67).abs() < 0.5);
625 }
626
627 #[test]
628 fn test_backend_error_display() {
629 let err = BackendError::NotRunning;
630 assert!(err.to_string().contains("not running"));
631
632 let err = BackendError::ModelNotFound("test".to_string());
633 assert!(err.to_string().contains("test"));
634 }
635
636 #[test]
637 fn test_load_config_default() {
638 let config = LoadConfig::default();
639 assert!(config.gpu_ids.is_empty());
640 assert_eq!(config.gpu_layers, -1);
641 assert!(config.is_full_gpu());
642 assert!(!config.is_cpu_only());
643 }
644
645 #[test]
646 fn test_load_config_builder() {
647 let config = LoadConfig::new()
648 .with_gpus(vec![0, 1])
649 .with_gpu_layers(32)
650 .with_context_size(8192)
651 .with_keep_alive(true);
652
653 assert_eq!(config.gpu_ids, vec![0, 1]);
654 assert_eq!(config.gpu_layers, 32);
655 assert_eq!(config.context_size, Some(8192));
656 assert!(config.keep_alive);
657 assert!(!config.is_cpu_only());
658 assert!(!config.is_full_gpu());
659 }
660
661 #[test]
662 fn test_load_config_cpu_only() {
663 let config = LoadConfig::new().with_gpu_layers(0);
664 assert!(config.is_cpu_only());
665 assert!(!config.is_full_gpu());
666 }
667
668 #[test]
669 fn test_chat_role_display() {
670 assert_eq!(ChatRole::System.to_string(), "system");
671 assert_eq!(ChatRole::User.to_string(), "user");
672 assert_eq!(ChatRole::Assistant.to_string(), "assistant");
673 }
674
675 #[test]
676 fn test_chat_message_constructors() {
677 let system = ChatMessage::system("You are helpful");
678 assert_eq!(system.role, ChatRole::System);
679 assert_eq!(system.content, "You are helpful");
680
681 let user = ChatMessage::user("Hello");
682 assert_eq!(user.role, ChatRole::User);
683
684 let assistant = ChatMessage::assistant("Hi there!");
685 assert_eq!(assistant.role, ChatRole::Assistant);
686 }
687
688 #[test]
689 fn test_chat_options_builder() {
690 let options = ChatOptions::new()
691 .with_temperature(0.7)
692 .with_top_p(0.9)
693 .with_top_k(40)
694 .with_max_tokens(100)
695 .with_stop("END")
696 .with_seed(42);
697
698 assert_eq!(options.temperature, Some(0.7));
699 assert_eq!(options.top_p, Some(0.9));
700 assert_eq!(options.top_k, Some(40));
701 assert_eq!(options.max_tokens, Some(100));
702 assert_eq!(options.stop, vec!["END"]);
703 assert_eq!(options.seed, Some(42));
704 }
705
706 #[test]
707 fn test_chat_response_content() {
708 let response = ChatResponse {
709 message: ChatMessage::assistant("Hello!"),
710 done: true,
711 total_duration: Some(1_000_000_000),
712 eval_count: Some(10),
713 prompt_eval_count: Some(5),
714 };
715
716 assert_eq!(response.content(), "Hello!");
717 assert!(response.done);
718 }
719
720 #[test]
721 fn test_chat_response_tokens_per_second() {
722 let response = ChatResponse {
723 message: ChatMessage::assistant("Test"),
724 done: true,
725 total_duration: Some(2_000_000_000), eval_count: Some(100),
727 prompt_eval_count: None,
728 };
729
730 let tps = response.tokens_per_second().unwrap();
731 assert!((tps - 50.0).abs() < 0.1);
732 }
733
734 #[test]
735 fn test_embedding_response_dimension() {
736 let response = EmbeddingResponse {
737 embedding: vec![0.1, 0.2, 0.3, 0.4],
738 total_duration: None,
739 prompt_eval_count: None,
740 };
741
742 assert_eq!(response.dimension(), 4);
743 }
744
745 #[test]
746 fn test_embedding_cosine_similarity() {
747 let a = EmbeddingResponse {
748 embedding: vec![1.0, 0.0, 0.0],
749 total_duration: None,
750 prompt_eval_count: None,
751 };
752
753 let b = EmbeddingResponse {
754 embedding: vec![1.0, 0.0, 0.0],
755 total_duration: None,
756 prompt_eval_count: None,
757 };
758
759 assert!((a.cosine_similarity(&b) - 1.0).abs() < 0.001);
761
762 let c = EmbeddingResponse {
763 embedding: vec![0.0, 1.0, 0.0],
764 total_duration: None,
765 prompt_eval_count: None,
766 };
767
768 assert!((a.cosine_similarity(&c)).abs() < 0.001);
770 }
771
772 #[test]
773 fn test_embedding_cosine_similarity_different_dimensions() {
774 let a = EmbeddingResponse {
775 embedding: vec![1.0, 0.0],
776 total_duration: None,
777 prompt_eval_count: None,
778 };
779
780 let b = EmbeddingResponse {
781 embedding: vec![1.0, 0.0, 0.0],
782 total_duration: None,
783 prompt_eval_count: None,
784 };
785
786 assert_eq!(a.cosine_similarity(&b), 0.0);
788 }
789}