agcodex_core/embeddings/
manager.rs

1//! Main embeddings manager - coordinates providers and indexes.
2//!
3//! This is completely independent from chat/LLM models and has zero overhead when disabled.
4
5use super::EmbeddingError;
6use super::EmbeddingProvider;
7use super::EmbeddingVector;
8use super::config::EmbeddingsConfig;
9use super::config::IntelligenceMode;
10use super::config::ProviderSelection;
11use super::index_manager::EmbeddingIndexManager;
12use super::index_manager::SearchResult;
13use super::providers::GeminiProvider;
14use super::providers::OpenAIProvider;
15use super::providers::VoyageProvider;
16use super::providers::voyage::VoyageInputType;
17use std::collections::HashMap;
18use std::path::Path;
19use std::path::PathBuf;
20use std::sync::Arc;
21use tracing::debug;
22use tracing::info;
23use tracing::warn;
24
25/// Main embeddings manager
26pub struct EmbeddingsManager {
27    /// Configuration (None = disabled)
28    config: Option<EmbeddingsConfig>,
29
30    /// Available providers
31    providers: HashMap<String, Box<dyn EmbeddingProvider>>,
32
33    /// Current active provider
34    active_provider: Option<String>,
35
36    /// Index manager for vector storage
37    index_manager: Option<Arc<EmbeddingIndexManager>>,
38
39    /// Current repository path
40    current_repo: Option<PathBuf>,
41
42    /// Intelligence mode
43    intelligence_mode: IntelligenceMode,
44}
45
46impl EmbeddingsManager {
47    /// Create a new embeddings manager
48    pub fn new(config: Option<EmbeddingsConfig>) -> Self {
49        if config.is_none() {
50            info!("Embeddings disabled - zero overhead mode");
51            return Self::disabled();
52        }
53
54        let config = config.unwrap();
55        if !config.enabled {
56            info!("Embeddings explicitly disabled in config");
57            return Self::disabled();
58        }
59
60        info!("Initializing embeddings manager");
61
62        // Initialize providers based on available API keys
63        let mut providers = HashMap::new();
64        let mut active_provider = None;
65
66        // Check OpenAI
67        if let Some(api_key) = super::config::get_embedding_api_key("openai") {
68            debug!("OpenAI embedding API key found");
69            if let Some(openai_config) = &config.openai {
70                providers.insert(
71                    "openai".to_string(),
72                    Box::new(OpenAIProvider::new(
73                        api_key,
74                        openai_config.model.clone(),
75                        openai_config.dimensions,
76                        None, // api_endpoint
77                    )) as Box<dyn EmbeddingProvider>,
78                );
79                if active_provider.is_none() {
80                    active_provider = Some("openai".to_string());
81                }
82            }
83        }
84
85        // Check Gemini
86        if let Some(api_key) = super::config::get_embedding_api_key("gemini") {
87            debug!("Gemini embedding API key found");
88            if let Some(gemini_config) = &config.gemini {
89                providers.insert(
90                    "gemini".to_string(),
91                    Box::new(GeminiProvider::new(api_key, gemini_config.model.clone()))
92                        as Box<dyn EmbeddingProvider>,
93                );
94                if active_provider.is_none() {
95                    active_provider = Some("gemini".to_string());
96                }
97            }
98        }
99
100        // Check Voyage
101        if let Some(api_key) = super::config::get_embedding_api_key("voyage") {
102            debug!("Voyage embedding API key found");
103            if let Some(voyage_config) = &config.voyage {
104                let input_type = match voyage_config.input_type.as_str() {
105                    "query" => VoyageInputType::Query,
106                    _ => VoyageInputType::Document,
107                };
108                providers.insert(
109                    "voyage".to_string(),
110                    Box::new(VoyageProvider::new(
111                        api_key,
112                        voyage_config.model.clone(),
113                        input_type,
114                        None, // api_endpoint
115                    )) as Box<dyn EmbeddingProvider>,
116                );
117                if active_provider.is_none() {
118                    active_provider = Some("voyage".to_string());
119                }
120            }
121        }
122
123        // Select provider based on config
124        if let ProviderSelection::Auto = config.provider {
125            // Auto mode - use first available
126            info!("Auto-selecting embedding provider: {:?}", active_provider);
127        } else {
128            // Specific provider requested
129            let requested = match config.provider {
130                ProviderSelection::OpenAI => "openai",
131                ProviderSelection::Gemini => "gemini",
132                ProviderSelection::Voyage => "voyage",
133                _ => "openai",
134            };
135
136            if providers.contains_key(requested) {
137                active_provider = Some(requested.to_string());
138                info!("Using requested embedding provider: {}", requested);
139            } else {
140                warn!(
141                    "Requested provider {} not available, using: {:?}",
142                    requested, active_provider
143                );
144            }
145        }
146
147        // Initialize index manager
148        let storage_dir = dirs::home_dir()
149            .unwrap_or_default()
150            .join(".agcodex")
151            .join("embeddings");
152
153        let index_manager = if !providers.is_empty() {
154            Some(Arc::new(EmbeddingIndexManager::new(storage_dir)))
155        } else {
156            None
157        };
158
159        Self {
160            config: Some(config),
161            providers,
162            active_provider,
163            index_manager,
164            current_repo: None,
165            intelligence_mode: IntelligenceMode::Medium,
166        }
167    }
168
169    /// Create a disabled manager (zero overhead)
170    pub fn disabled() -> Self {
171        Self {
172            config: None,
173            providers: HashMap::new(),
174            active_provider: None,
175            index_manager: None,
176            current_repo: None,
177            intelligence_mode: IntelligenceMode::Medium,
178        }
179    }
180
181    /// Check if embeddings are enabled
182    pub fn is_enabled(&self) -> bool {
183        self.config.as_ref().map(|c| c.enabled).unwrap_or(false)
184    }
185
186    /// Set the current repository
187    pub fn set_repository(&mut self, repo: PathBuf) {
188        self.current_repo = Some(repo);
189    }
190
191    /// Set intelligence mode
192    pub const fn set_intelligence_mode(&mut self, mode: IntelligenceMode) {
193        self.intelligence_mode = mode;
194        // TODO: Update provider configurations based on mode
195    }
196
197    /// Get current model ID
198    pub fn current_model_id(&self) -> Option<String> {
199        self.active_provider
200            .as_ref()
201            .and_then(|name| self.providers.get(name).map(|p| p.model_id()))
202    }
203
204    /// Get current dimensions
205    pub fn current_dimensions(&self) -> Option<usize> {
206        self.active_provider
207            .as_ref()
208            .and_then(|name| self.providers.get(name).map(|p| p.dimensions()))
209    }
210
211    /// Embed a single text (returns None if disabled)
212    pub async fn embed(&self, text: &str) -> Result<Option<EmbeddingVector>, EmbeddingError> {
213        if !self.is_enabled() {
214            return Ok(None); // Fast path - no work done
215        }
216
217        let provider_name = self
218            .active_provider
219            .as_ref()
220            .ok_or(EmbeddingError::NotEnabled)?;
221
222        let provider = self
223            .providers
224            .get(provider_name)
225            .ok_or_else(|| EmbeddingError::ProviderNotAvailable(provider_name.clone()))?;
226
227        let vector = provider.embed(text).await?;
228        Ok(Some(vector))
229    }
230
231    /// Embed multiple texts in batch
232    pub async fn embed_batch(
233        &self,
234        texts: &[String],
235    ) -> Result<Option<Vec<EmbeddingVector>>, EmbeddingError> {
236        if !self.is_enabled() {
237            return Ok(None);
238        }
239
240        let provider_name = self
241            .active_provider
242            .as_ref()
243            .ok_or(EmbeddingError::NotEnabled)?;
244
245        let provider = self
246            .providers
247            .get(provider_name)
248            .ok_or_else(|| EmbeddingError::ProviderNotAvailable(provider_name.clone()))?;
249
250        let vectors = provider.embed_batch(texts).await?;
251        Ok(Some(vectors))
252    }
253
254    /// Search in the appropriate index for current repo/model
255    pub async fn search_in_index(
256        &self,
257        repo: &Path,
258        model_id: &str,
259        dimensions: usize,
260        query: &str,
261    ) -> Result<Vec<SearchResult>, EmbeddingError> {
262        let index_manager = self
263            .index_manager
264            .as_ref()
265            .ok_or(EmbeddingError::NotEnabled)?;
266
267        // Embed the query
268        let query_vector = self.embed(query).await?.ok_or(EmbeddingError::NotEnabled)?;
269
270        // Search in the correct index
271        let results = index_manager.search(
272            repo,
273            model_id,
274            dimensions,
275            &query_vector,
276            10, // Top 10 results
277        )?;
278
279        Ok(results)
280    }
281
282    /// Get statistics about the embeddings system
283    pub fn stats(&self) -> EmbeddingsStats {
284        EmbeddingsStats {
285            enabled: self.is_enabled(),
286            active_provider: self.active_provider.clone(),
287            available_providers: self.providers.keys().cloned().collect(),
288            current_repo: self.current_repo.clone(),
289            intelligence_mode: self.intelligence_mode,
290            index_stats: self.index_manager.as_ref().map(|m| m.stats()),
291        }
292    }
293}
294
295/// Statistics about the embeddings system
296#[derive(Debug)]
297pub struct EmbeddingsStats {
298    pub enabled: bool,
299    pub active_provider: Option<String>,
300    pub available_providers: Vec<String>,
301    pub current_repo: Option<PathBuf>,
302    pub intelligence_mode: IntelligenceMode,
303    pub index_stats: Option<super::index_manager::IndexManagerStats>,
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    #[test]
311    fn test_disabled_manager_has_zero_overhead() {
312        let manager = EmbeddingsManager::disabled();
313        assert!(!manager.is_enabled());
314        assert!(manager.providers.is_empty());
315        assert!(manager.index_manager.is_none());
316    }
317
318    #[tokio::test]
319    async fn test_disabled_embed_returns_none() {
320        let manager = EmbeddingsManager::disabled();
321        let result = manager.embed("test").await.unwrap();
322        assert!(result.is_none());
323    }
324}