codeprism_core/repository/
mod.rs

1//! Repository manager for orchestrating scanning and indexing operations
2//!
3//! This module provides high-level repository management functionality,
4//! coordinating the scanner, indexer, and file monitoring components.
5
6use crate::error::{Error, Result};
7use crate::indexer::{BulkIndexer, IndexingConfig, IndexingResult, IndexingStats};
8use crate::parser::{LanguageRegistry, ParserEngine};
9use crate::scanner::{NoOpProgressReporter, ProgressReporter, RepositoryScanner};
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::path::{Path, PathBuf};
13use std::sync::Arc;
14use std::time::{SystemTime, UNIX_EPOCH};
15
16/// Repository configuration
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct RepositoryConfig {
19    /// Repository ID (usually path or name)
20    pub repo_id: String,
21    /// Repository root path
22    pub root_path: PathBuf,
23    /// Display name
24    pub name: String,
25    /// Description
26    pub description: Option<String>,
27    /// Languages to include (None = all supported)
28    pub include_languages: Option<Vec<String>>,
29    /// Maximum file size to process (bytes)
30    pub max_file_size: Option<usize>,
31    /// Whether to follow symlinks
32    pub follow_symlinks: bool,
33    /// Custom exclude patterns
34    pub exclude_patterns: Vec<String>,
35    /// Repository metadata
36    pub metadata: HashMap<String, String>,
37}
38
39impl RepositoryConfig {
40    /// Create a new repository config
41    pub fn new<P: AsRef<Path>>(repo_id: String, root_path: P) -> Self {
42        let root_path = root_path.as_ref().to_path_buf();
43        let name = root_path
44            .file_name()
45            .and_then(|n| n.to_str())
46            .unwrap_or(&repo_id)
47            .to_string();
48
49        Self {
50            repo_id,
51            root_path,
52            name,
53            description: None,
54            include_languages: None,
55            max_file_size: Some(10 * 1024 * 1024), // 10MB
56            follow_symlinks: false,
57            exclude_patterns: Vec::new(),
58            metadata: HashMap::new(),
59        }
60    }
61
62    /// Set the display name
63    pub fn with_name(mut self, name: String) -> Self {
64        self.name = name;
65        self
66    }
67
68    /// Set the description
69    pub fn with_description(mut self, description: String) -> Self {
70        self.description = Some(description);
71        self
72    }
73
74    /// Add a metadata entry
75    pub fn with_metadata(mut self, key: String, value: String) -> Self {
76        self.metadata.insert(key, value);
77        self
78    }
79}
80
81/// Repository health status
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub enum HealthStatus {
84    /// Repository is healthy and up to date
85    Healthy,
86    /// Repository needs reindexing
87    Stale,
88    /// Repository has indexing errors
89    Degraded { error_count: usize },
90    /// Repository is corrupted or inaccessible
91    Unhealthy { reason: String },
92}
93
94/// Repository statistics and metadata
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct RepositoryInfo {
97    /// Repository configuration
98    pub config: RepositoryConfig,
99    /// Health status
100    pub health: HealthStatus,
101    /// Last scan timestamp
102    pub last_scan: Option<u64>,
103    /// Last successful index timestamp
104    pub last_index: Option<u64>,
105    /// Indexing statistics from last run
106    pub last_stats: Option<IndexingStats>,
107    /// Total files indexed
108    pub total_files: usize,
109    /// Total nodes in graph
110    pub total_nodes: usize,
111    /// Total edges in graph
112    pub total_edges: usize,
113    /// Repository size in bytes
114    pub repo_size_bytes: usize,
115}
116
117impl RepositoryInfo {
118    /// Create new repository info
119    pub fn new(config: RepositoryConfig) -> Self {
120        Self {
121            config,
122            health: HealthStatus::Stale, // Needs initial indexing
123            last_scan: None,
124            last_index: None,
125            last_stats: None,
126            total_files: 0,
127            total_nodes: 0,
128            total_edges: 0,
129            repo_size_bytes: 0,
130        }
131    }
132
133    /// Check if repository needs reindexing
134    pub fn needs_reindexing(&self) -> bool {
135        matches!(
136            self.health,
137            HealthStatus::Stale | HealthStatus::Unhealthy { .. }
138        )
139    }
140
141    /// Get time since last index in seconds
142    pub fn time_since_last_index(&self) -> Option<u64> {
143        self.last_index.map(|last| {
144            SystemTime::now()
145                .duration_since(UNIX_EPOCH)
146                .unwrap_or_default()
147                .as_secs()
148                - last
149        })
150    }
151}
152
153/// Repository manager for coordinating scanning and indexing
154pub struct RepositoryManager {
155    scanner: RepositoryScanner,
156    parser_engine: Arc<ParserEngine>,
157    repositories: HashMap<String, RepositoryInfo>,
158}
159
160impl RepositoryManager {
161    /// Create a new repository manager
162    pub fn new(language_registry: Arc<LanguageRegistry>) -> Self {
163        let parser_engine = Arc::new(ParserEngine::new(language_registry));
164        let scanner = RepositoryScanner::new();
165
166        Self {
167            scanner,
168            parser_engine,
169            repositories: HashMap::new(),
170        }
171    }
172
173    /// Create a new repository manager with custom configuration
174    pub fn new_with_config(
175        language_registry: Arc<LanguageRegistry>,
176        exclude_dirs: Option<Vec<String>>,
177        include_extensions: Option<Vec<String>>,
178        dependency_mode: Option<crate::scanner::DependencyMode>,
179    ) -> Self {
180        let parser_engine = Arc::new(ParserEngine::new(language_registry));
181
182        let mut scanner = if let Some(exclude_dirs) = exclude_dirs {
183            RepositoryScanner::with_exclude_dirs(exclude_dirs)
184        } else {
185            RepositoryScanner::new()
186        };
187
188        if let Some(extensions) = include_extensions {
189            scanner = scanner.with_extensions(extensions);
190        }
191
192        // Apply dependency mode if provided
193        if let Some(dep_mode) = dependency_mode {
194            scanner = scanner.with_dependency_mode(dep_mode);
195        }
196
197        Self {
198            scanner,
199            parser_engine,
200            repositories: HashMap::new(),
201        }
202    }
203
204    /// Register a repository
205    pub fn register_repository(&mut self, config: RepositoryConfig) -> Result<()> {
206        // Validate repository path exists
207        if !config.root_path.exists() {
208            return Err(Error::io(format!(
209                "Repository path does not exist: {}",
210                config.root_path.display()
211            )));
212        }
213
214        if !config.root_path.is_dir() {
215            return Err(Error::io(format!(
216                "Repository path is not a directory: {}",
217                config.root_path.display()
218            )));
219        }
220
221        let repo_info = RepositoryInfo::new(config.clone());
222        self.repositories.insert(config.repo_id.clone(), repo_info);
223
224        Ok(())
225    }
226
227    /// Unregister a repository
228    pub fn unregister_repository(&mut self, repo_id: &str) {
229        self.repositories.remove(repo_id);
230    }
231
232    /// Get repository info
233    pub fn get_repository(&self, repo_id: &str) -> Option<&RepositoryInfo> {
234        self.repositories.get(repo_id)
235    }
236
237    /// Get all registered repositories
238    pub fn list_repositories(&self) -> Vec<&RepositoryInfo> {
239        self.repositories.values().collect()
240    }
241
242    /// Perform full repository scan and indexing
243    pub async fn index_repository(
244        &mut self,
245        repo_id: &str,
246        progress_reporter: Option<Arc<dyn ProgressReporter>>,
247    ) -> Result<IndexingResult> {
248        let repo_info = self
249            .repositories
250            .get_mut(repo_id)
251            .ok_or_else(|| Error::other(format!("Repository not found: {}", repo_id)))?;
252
253        let progress = progress_reporter.unwrap_or_else(|| Arc::new(NoOpProgressReporter));
254
255        // Step 1: Scan repository
256        let scan_result = self
257            .scanner
258            .scan_repository(&repo_info.config.root_path, Arc::clone(&progress))
259            .await?;
260
261        // Update repository info with scan results
262        repo_info.last_scan = Some(
263            SystemTime::now()
264                .duration_since(UNIX_EPOCH)
265                .unwrap_or_default()
266                .as_secs(),
267        );
268        repo_info.total_files = scan_result.total_files;
269
270        // Step 2: Index discovered files
271        let indexing_config = IndexingConfig::new(
272            repo_id.to_string(),
273            format!("scan-{}", chrono::Utc::now().timestamp()),
274        );
275
276        let indexer = BulkIndexer::new(indexing_config, Arc::clone(&self.parser_engine));
277        let indexing_result = indexer.index_scan_result(&scan_result, progress).await?;
278
279        // Update repository info with indexing results
280        repo_info.last_index = Some(
281            SystemTime::now()
282                .duration_since(UNIX_EPOCH)
283                .unwrap_or_default()
284                .as_secs(),
285        );
286        repo_info.last_stats = Some(indexing_result.stats.clone());
287        repo_info.total_nodes = indexing_result.stats.nodes_created;
288        repo_info.total_edges = indexing_result.stats.edges_created;
289
290        // Update health status
291        repo_info.health = if indexing_result.stats.error_count == 0 {
292            HealthStatus::Healthy
293        } else if indexing_result.stats.error_count < indexing_result.stats.files_processed / 10 {
294            HealthStatus::Degraded {
295                error_count: indexing_result.stats.error_count,
296            }
297        } else {
298            HealthStatus::Unhealthy {
299                reason: format!(
300                    "High error rate: {}/{} files failed",
301                    indexing_result.stats.error_count, indexing_result.stats.files_processed
302                ),
303            }
304        };
305
306        Ok(indexing_result)
307    }
308
309    /// Quick repository health check
310    pub async fn health_check(&mut self, repo_id: &str) -> Result<HealthStatus> {
311        let repo_info = self
312            .repositories
313            .get_mut(repo_id)
314            .ok_or_else(|| Error::other(format!("Repository not found: {}", repo_id)))?;
315
316        // Check if repository path still exists
317        if !repo_info.config.root_path.exists() {
318            repo_info.health = HealthStatus::Unhealthy {
319                reason: "Repository path no longer exists".to_string(),
320            };
321            return Ok(repo_info.health.clone());
322        }
323
324        // Check if indexing is stale (older than 24 hours)
325        if let Some(time_since) = repo_info.time_since_last_index() {
326            if time_since > 24 * 60 * 60 {
327                // 24 hours
328                repo_info.health = HealthStatus::Stale;
329            }
330        }
331
332        Ok(repo_info.health.clone())
333    }
334
335    /// Get repository statistics
336    pub fn get_stats(&self, repo_id: &str) -> Option<&IndexingStats> {
337        self.repositories
338            .get(repo_id)
339            .and_then(|info| info.last_stats.as_ref())
340    }
341
342    /// Get total statistics across all repositories
343    pub fn get_total_stats(&self) -> HashMap<String, usize> {
344        let mut stats = HashMap::new();
345
346        let total_repos = self.repositories.len();
347        let total_files: usize = self
348            .repositories
349            .values()
350            .map(|info| info.total_files)
351            .sum();
352        let total_nodes: usize = self
353            .repositories
354            .values()
355            .map(|info| info.total_nodes)
356            .sum();
357        let total_edges: usize = self
358            .repositories
359            .values()
360            .map(|info| info.total_edges)
361            .sum();
362
363        stats.insert("repositories".to_string(), total_repos);
364        stats.insert("files".to_string(), total_files);
365        stats.insert("nodes".to_string(), total_nodes);
366        stats.insert("edges".to_string(), total_edges);
367
368        stats
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375    use crate::parser::LanguageRegistry;
376    use std::fs;
377    use tempfile::TempDir;
378
379    fn create_test_manager() -> (RepositoryManager, TempDir) {
380        let temp_dir = TempDir::new().unwrap();
381        let registry = Arc::new(LanguageRegistry::new());
382        let manager = RepositoryManager::new(registry);
383        (manager, temp_dir)
384    }
385
386    #[test]
387    fn test_repository_config() {
388        let config = RepositoryConfig::new("test_repo".to_string(), "/tmp/test");
389
390        assert_eq!(config.repo_id, "test_repo");
391        assert_eq!(config.root_path, PathBuf::from("/tmp/test"));
392        assert_eq!(config.name, "test");
393    }
394
395    #[test]
396    fn test_repository_config_builder() {
397        let config = RepositoryConfig::new("test".to_string(), "/tmp/test")
398            .with_name("My Test Repo".to_string())
399            .with_description("A test repository".to_string())
400            .with_metadata("version".to_string(), "1.0".to_string());
401
402        assert_eq!(config.name, "My Test Repo");
403        assert_eq!(config.description, Some("A test repository".to_string()));
404        assert_eq!(config.metadata.get("version"), Some(&"1.0".to_string()));
405    }
406
407    #[test]
408    fn test_repository_info() {
409        let config = RepositoryConfig::new("test".to_string(), "/tmp/test");
410        let info = RepositoryInfo::new(config);
411
412        assert!(info.needs_reindexing());
413        assert!(matches!(info.health, HealthStatus::Stale));
414        assert_eq!(info.total_files, 0);
415    }
416
417    #[test]
418    fn test_repository_manager_creation() {
419        let registry = Arc::new(LanguageRegistry::new());
420        let manager = RepositoryManager::new(registry);
421
422        assert_eq!(manager.list_repositories().len(), 0);
423    }
424
425    #[test]
426    fn test_register_repository() {
427        let (mut manager, temp_dir) = create_test_manager();
428
429        let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
430
431        let result = manager.register_repository(config);
432        assert!(result.is_ok());
433        assert_eq!(manager.list_repositories().len(), 1);
434    }
435
436    #[test]
437    fn test_register_nonexistent_repository() {
438        let (mut manager, _temp_dir) = create_test_manager();
439
440        let config = RepositoryConfig::new("test_repo".to_string(), "/nonexistent/path");
441
442        let result = manager.register_repository(config);
443        assert!(result.is_err());
444    }
445
446    #[test]
447    fn test_unregister_repository() {
448        let (mut manager, temp_dir) = create_test_manager();
449
450        let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
451
452        manager.register_repository(config).unwrap();
453        assert_eq!(manager.list_repositories().len(), 1);
454
455        manager.unregister_repository("test_repo");
456        assert_eq!(manager.list_repositories().len(), 0);
457    }
458
459    #[tokio::test]
460    async fn test_index_nonexistent_repository() {
461        let (mut manager, _temp_dir) = create_test_manager();
462
463        let result = manager.index_repository("nonexistent", None).await;
464        assert!(result.is_err());
465    }
466
467    #[tokio::test]
468    async fn test_health_check() {
469        let (mut manager, temp_dir) = create_test_manager();
470
471        // Create test file
472        fs::write(temp_dir.path().join("test.js"), "console.log('hello');").unwrap();
473
474        let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
475
476        manager.register_repository(config).unwrap();
477
478        let health = manager.health_check("test_repo").await.unwrap();
479        assert!(matches!(health, HealthStatus::Stale));
480    }
481
482    #[test]
483    fn test_total_stats() {
484        let (mut manager, temp_dir) = create_test_manager();
485
486        let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
487
488        manager.register_repository(config).unwrap();
489
490        let stats = manager.get_total_stats();
491        assert_eq!(stats.get("repositories"), Some(&1));
492        assert_eq!(stats.get("files"), Some(&0));
493    }
494}