ck_models/
lib.rs

1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::Path;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct ModelConfig {
8    pub name: String,
9    pub provider: String,
10    pub dimensions: usize,
11    pub max_tokens: usize,
12    pub description: String,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct ModelRegistry {
17    pub models: HashMap<String, ModelConfig>,
18    pub default_model: String,
19}
20
21impl Default for ModelRegistry {
22    fn default() -> Self {
23        let mut models = HashMap::new();
24
25        models.insert(
26            "bge-small".to_string(),
27            ModelConfig {
28                name: "BAAI/bge-small-en-v1.5".to_string(),
29                provider: "fastembed".to_string(),
30                dimensions: 384,
31                max_tokens: 512,
32                description: "Small, fast English embedding model".to_string(),
33            },
34        );
35
36        models.insert(
37            "minilm".to_string(),
38            ModelConfig {
39                name: "sentence-transformers/all-MiniLM-L6-v2".to_string(),
40                provider: "fastembed".to_string(),
41                dimensions: 384,
42                max_tokens: 256,
43                description: "Lightweight English embedding model".to_string(),
44            },
45        );
46
47        // Add enhanced models
48        models.insert(
49            "nomic-v1.5".to_string(),
50            ModelConfig {
51                name: "nomic-embed-text-v1.5".to_string(),
52                provider: "fastembed".to_string(),
53                dimensions: 768,
54                max_tokens: 8192,
55                description: "High-quality English embedding model with large context window"
56                    .to_string(),
57            },
58        );
59
60        models.insert(
61            "jina-code".to_string(),
62            ModelConfig {
63                name: "jina-embeddings-v2-base-code".to_string(),
64                provider: "fastembed".to_string(),
65                dimensions: 768,
66                max_tokens: 8192,
67                description: "Code-specific embedding model optimized for programming tasks"
68                    .to_string(),
69            },
70        );
71
72        Self {
73            models,
74            default_model: "bge-small".to_string(), // Keep BGE as default for backward compatibility
75        }
76    }
77}
78
79impl ModelRegistry {
80    pub fn load(path: &Path) -> Result<Self> {
81        if path.exists() {
82            let data = std::fs::read_to_string(path)?;
83            Ok(serde_json::from_str(&data)?)
84        } else {
85            Ok(Self::default())
86        }
87    }
88
89    pub fn save(&self, path: &Path) -> Result<()> {
90        let data = serde_json::to_string_pretty(self)?;
91        std::fs::write(path, data)?;
92        Ok(())
93    }
94
95    pub fn get_model(&self, name: &str) -> Option<&ModelConfig> {
96        self.models.get(name)
97    }
98
99    pub fn get_default_model(&self) -> Option<&ModelConfig> {
100        self.models.get(&self.default_model)
101    }
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct ProjectConfig {
106    pub model: String,
107    pub chunk_size: usize,
108    pub chunk_overlap: usize,
109    pub index_backend: String,
110}
111
112impl Default for ProjectConfig {
113    fn default() -> Self {
114        Self {
115            model: "bge-small".to_string(),
116            chunk_size: 512,
117            chunk_overlap: 128,
118            index_backend: "hnsw".to_string(),
119        }
120    }
121}
122
123impl ProjectConfig {
124    pub fn load(path: &Path) -> Result<Self> {
125        if path.exists() {
126            let data = std::fs::read_to_string(path)?;
127            Ok(serde_json::from_str(&data)?)
128        } else {
129            Ok(Self::default())
130        }
131    }
132
133    pub fn save(&self, path: &Path) -> Result<()> {
134        let data = serde_json::to_string_pretty(self)?;
135        std::fs::write(path, data)?;
136        Ok(())
137    }
138}