codeprism_core/repository/
mod.rs

1//! Repository manager for orchestrating scanning and indexing operations
2//!
3//! This module provides high-level repository management functionality,
4//! coordinating the scanner, indexer, and file monitoring components.
5
6use crate::error::{Error, Result};
7use crate::indexer::{BulkIndexer, IndexingConfig, IndexingResult, IndexingStats};
8use crate::parser::{LanguageRegistry, ParserEngine};
9use crate::scanner::{NoOpProgressReporter, ProgressReporter, RepositoryScanner};
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::path::{Path, PathBuf};
13use std::sync::Arc;
14use std::time::{SystemTime, UNIX_EPOCH};
15
16/// Repository configuration
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct RepositoryConfig {
19    /// Repository ID (usually path or name)
20    pub repo_id: String,
21    /// Repository root path
22    pub root_path: PathBuf,
23    /// Display name
24    pub name: String,
25    /// Description
26    pub description: Option<String>,
27    /// Languages to include (None = all supported)
28    pub include_languages: Option<Vec<String>>,
29    /// Maximum file size to process (bytes)
30    pub max_file_size: Option<usize>,
31    /// Whether to follow symlinks
32    pub follow_symlinks: bool,
33    /// Custom exclude patterns
34    pub exclude_patterns: Vec<String>,
35    /// Repository metadata
36    pub metadata: HashMap<String, String>,
37}
38
39impl RepositoryConfig {
40    /// Create a new repository config
41    pub fn new<P: AsRef<Path>>(repo_id: String, root_path: P) -> Self {
42        let root_path = root_path.as_ref().to_path_buf();
43        let name = root_path
44            .file_name()
45            .and_then(|n| n.to_str())
46            .unwrap_or(&repo_id)
47            .to_string();
48
49        Self {
50            repo_id,
51            root_path,
52            name,
53            description: None,
54            include_languages: None,
55            max_file_size: Some(10 * 1024 * 1024), // 10MB
56            follow_symlinks: false,
57            exclude_patterns: Vec::new(),
58            metadata: HashMap::new(),
59        }
60    }
61
62    /// Set the display name
63    pub fn with_name(mut self, name: String) -> Self {
64        self.name = name;
65        self
66    }
67
68    /// Set the description
69    pub fn with_description(mut self, description: String) -> Self {
70        self.description = Some(description);
71        self
72    }
73
74    /// Add a metadata entry
75    pub fn with_metadata(mut self, key: String, value: String) -> Self {
76        self.metadata.insert(key, value);
77        self
78    }
79}
80
81/// Repository health status
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub enum HealthStatus {
84    /// Repository is healthy and up to date
85    Healthy,
86    /// Repository needs reindexing
87    Stale,
88    /// Repository has indexing errors
89    Degraded {
90        /// Number of files that failed to index
91        error_count: usize,
92    },
93    /// Repository is corrupted or inaccessible
94    Unhealthy {
95        /// Description of why the repository is unhealthy
96        reason: String,
97    },
98}
99
100/// Repository statistics and metadata
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct RepositoryInfo {
103    /// Repository configuration
104    pub config: RepositoryConfig,
105    /// Health status
106    pub health: HealthStatus,
107    /// Last scan timestamp
108    pub last_scan: Option<u64>,
109    /// Last successful index timestamp
110    pub last_index: Option<u64>,
111    /// Indexing statistics from last run
112    pub last_stats: Option<IndexingStats>,
113    /// Total files indexed
114    pub total_files: usize,
115    /// Total nodes in graph
116    pub total_nodes: usize,
117    /// Total edges in graph
118    pub total_edges: usize,
119    /// Repository size in bytes
120    pub repo_size_bytes: usize,
121}
122
123impl RepositoryInfo {
124    /// Create new repository info
125    pub fn new(config: RepositoryConfig) -> Self {
126        Self {
127            config,
128            health: HealthStatus::Stale, // Needs initial indexing
129            last_scan: None,
130            last_index: None,
131            last_stats: None,
132            total_files: 0,
133            total_nodes: 0,
134            total_edges: 0,
135            repo_size_bytes: 0,
136        }
137    }
138
139    /// Check if repository needs reindexing
140    pub fn needs_reindexing(&self) -> bool {
141        matches!(
142            self.health,
143            HealthStatus::Stale | HealthStatus::Unhealthy { .. }
144        )
145    }
146
147    /// Get time since last index in seconds
148    pub fn time_since_last_index(&self) -> Option<u64> {
149        self.last_index.map(|last| {
150            SystemTime::now()
151                .duration_since(UNIX_EPOCH)
152                .unwrap_or_default()
153                .as_secs()
154                - last
155        })
156    }
157}
158
159/// Repository manager for coordinating scanning and indexing
160pub struct RepositoryManager {
161    scanner: RepositoryScanner,
162    parser_engine: Arc<ParserEngine>,
163    repositories: HashMap<String, RepositoryInfo>,
164}
165
166impl RepositoryManager {
167    /// Create a new repository manager
168    pub fn new(language_registry: Arc<LanguageRegistry>) -> Self {
169        let parser_engine = Arc::new(ParserEngine::new(language_registry));
170        let scanner = RepositoryScanner::new();
171
172        Self {
173            scanner,
174            parser_engine,
175            repositories: HashMap::new(),
176        }
177    }
178
179    /// Create a new repository manager with custom configuration
180    pub fn new_with_config(
181        language_registry: Arc<LanguageRegistry>,
182        exclude_dirs: Option<Vec<String>>,
183        include_extensions: Option<Vec<String>>,
184        dependency_mode: Option<crate::scanner::DependencyMode>,
185    ) -> Self {
186        let parser_engine = Arc::new(ParserEngine::new(language_registry));
187
188        let mut scanner = if let Some(exclude_dirs) = exclude_dirs {
189            RepositoryScanner::with_exclude_dirs(exclude_dirs)
190        } else {
191            RepositoryScanner::new()
192        };
193
194        if let Some(extensions) = include_extensions {
195            scanner = scanner.with_extensions(extensions);
196        }
197
198        // Apply dependency mode if provided
199        if let Some(dep_mode) = dependency_mode {
200            scanner = scanner.with_dependency_mode(dep_mode);
201        }
202
203        Self {
204            scanner,
205            parser_engine,
206            repositories: HashMap::new(),
207        }
208    }
209
210    /// Register a repository
211    pub fn register_repository(&mut self, config: RepositoryConfig) -> Result<()> {
212        // Validate repository path exists
213        if !config.root_path.exists() {
214            return Err(Error::io(format!(
215                "Repository path does not exist: {}",
216                config.root_path.display()
217            )));
218        }
219
220        if !config.root_path.is_dir() {
221            return Err(Error::io(format!(
222                "Repository path is not a directory: {}",
223                config.root_path.display()
224            )));
225        }
226
227        let repo_info = RepositoryInfo::new(config.clone());
228        self.repositories.insert(config.repo_id.clone(), repo_info);
229
230        Ok(())
231    }
232
233    /// Unregister a repository
234    pub fn unregister_repository(&mut self, repo_id: &str) {
235        self.repositories.remove(repo_id);
236    }
237
238    /// Get repository info
239    pub fn get_repository(&self, repo_id: &str) -> Option<&RepositoryInfo> {
240        self.repositories.get(repo_id)
241    }
242
243    /// Get all registered repositories
244    pub fn list_repositories(&self) -> Vec<&RepositoryInfo> {
245        self.repositories.values().collect()
246    }
247
248    /// Perform full repository scan and indexing
249    pub async fn index_repository(
250        &mut self,
251        repo_id: &str,
252        progress_reporter: Option<Arc<dyn ProgressReporter>>,
253    ) -> Result<IndexingResult> {
254        let repo_info = self
255            .repositories
256            .get_mut(repo_id)
257            .ok_or_else(|| Error::other(format!("Repository not found: {repo_id}")))?;
258
259        let progress = progress_reporter.unwrap_or_else(|| Arc::new(NoOpProgressReporter));
260
261        // Step 1: Scan repository
262        let scan_result = self
263            .scanner
264            .scan_repository(&repo_info.config.root_path, Arc::clone(&progress))
265            .await?;
266
267        // Update repository info with scan results
268        repo_info.last_scan = Some(
269            SystemTime::now()
270                .duration_since(UNIX_EPOCH)
271                .unwrap_or_default()
272                .as_secs(),
273        );
274        repo_info.total_files = scan_result.total_files;
275
276        // Step 2: Index discovered files
277        let indexing_config = IndexingConfig::new(
278            repo_id.to_string(),
279            format!("scan-{}", chrono::Utc::now().timestamp()),
280        );
281
282        let indexer = BulkIndexer::new(indexing_config, Arc::clone(&self.parser_engine));
283        let indexing_result = indexer.index_scan_result(&scan_result, progress).await?;
284
285        // Update repository info with indexing results
286        repo_info.last_index = Some(
287            SystemTime::now()
288                .duration_since(UNIX_EPOCH)
289                .unwrap_or_default()
290                .as_secs(),
291        );
292        repo_info.last_stats = Some(indexing_result.stats.clone());
293        repo_info.total_nodes = indexing_result.stats.nodes_created;
294        repo_info.total_edges = indexing_result.stats.edges_created;
295
296        // Update health status
297        repo_info.health = if indexing_result.stats.error_count == 0 {
298            HealthStatus::Healthy
299        } else if indexing_result.stats.error_count < indexing_result.stats.files_processed / 10 {
300            HealthStatus::Degraded {
301                error_count: indexing_result.stats.error_count,
302            }
303        } else {
304            HealthStatus::Unhealthy {
305                reason: format!(
306                    "High error rate: {}/{} files failed",
307                    indexing_result.stats.error_count, indexing_result.stats.files_processed
308                ),
309            }
310        };
311
312        Ok(indexing_result)
313    }
314
315    /// Quick repository health check
316    pub async fn health_check(&mut self, repo_id: &str) -> Result<HealthStatus> {
317        let repo_info = self
318            .repositories
319            .get_mut(repo_id)
320            .ok_or_else(|| Error::other(format!("Repository not found: {repo_id}")))?;
321
322        // Check if repository path still exists
323        if !repo_info.config.root_path.exists() {
324            repo_info.health = HealthStatus::Unhealthy {
325                reason: "Repository path no longer exists".to_string(),
326            };
327            return Ok(repo_info.health.clone());
328        }
329
330        // Check if indexing is stale (older than 24 hours)
331        if let Some(time_since) = repo_info.time_since_last_index() {
332            if time_since > 24 * 60 * 60 {
333                // 24 hours
334                repo_info.health = HealthStatus::Stale;
335            }
336        }
337
338        Ok(repo_info.health.clone())
339    }
340
341    /// Get repository statistics
342    pub fn get_stats(&self, repo_id: &str) -> Option<&IndexingStats> {
343        self.repositories
344            .get(repo_id)
345            .and_then(|info| info.last_stats.as_ref())
346    }
347
348    /// Get total statistics across all repositories
349    pub fn get_total_stats(&self) -> HashMap<String, usize> {
350        let mut stats = HashMap::new();
351
352        let total_repos = self.repositories.len();
353        let total_files: usize = self
354            .repositories
355            .values()
356            .map(|info| info.total_files)
357            .sum();
358        let total_nodes: usize = self
359            .repositories
360            .values()
361            .map(|info| info.total_nodes)
362            .sum();
363        let total_edges: usize = self
364            .repositories
365            .values()
366            .map(|info| info.total_edges)
367            .sum();
368
369        stats.insert("repositories".to_string(), total_repos);
370        stats.insert("files".to_string(), total_files);
371        stats.insert("nodes".to_string(), total_nodes);
372        stats.insert("edges".to_string(), total_edges);
373
374        stats
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    use crate::parser::LanguageRegistry;
382    use std::fs;
383    use tempfile::TempDir;
384
385    fn create_test_manager() -> (RepositoryManager, TempDir) {
386        let temp_dir = TempDir::new().unwrap();
387        let registry = Arc::new(LanguageRegistry::new());
388        let manager = RepositoryManager::new(registry);
389        (manager, temp_dir)
390    }
391
392    #[test]
393    fn test_repository_config() {
394        let config = RepositoryConfig::new("test_repo".to_string(), "/tmp/test");
395
396        assert_eq!(config.repo_id, "test_repo");
397        assert_eq!(config.root_path, PathBuf::from("/tmp/test"));
398        assert_eq!(config.name, "test");
399    }
400
401    #[test]
402    fn test_repository_config_builder() {
403        let config = RepositoryConfig::new("test".to_string(), "/tmp/test")
404            .with_name("My Test Repo".to_string())
405            .with_description("A test repository".to_string())
406            .with_metadata("version".to_string(), "1.0".to_string());
407
408        assert_eq!(config.name, "My Test Repo");
409        assert_eq!(config.description, Some("A test repository".to_string()));
410        assert_eq!(config.metadata.get("version"), Some(&"1.0".to_string()));
411    }
412
413    #[test]
414    fn test_repository_info() {
415        let config = RepositoryConfig::new("test".to_string(), "/tmp/test");
416        let info = RepositoryInfo::new(config);
417
418        assert!(info.needs_reindexing());
419        assert!(matches!(info.health, HealthStatus::Stale));
420        assert_eq!(info.total_files, 0);
421    }
422
423    #[test]
424    fn test_repository_manager_creation() {
425        let registry = Arc::new(LanguageRegistry::new());
426        let manager = RepositoryManager::new(registry);
427
428        assert_eq!(
429            manager.list_repositories().len(),
430            0,
431            "New manager should start with no repositories"
432        );
433        let repos = manager.list_repositories();
434        assert!(
435            repos.is_empty(),
436            "Repository list should be empty initially"
437        );
438    }
439
440    #[test]
441    fn test_register_repository() {
442        let (mut manager, temp_dir) = create_test_manager();
443
444        let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
445
446        let result = manager.register_repository(config);
447        assert!(result.is_ok(), "Repository operation should succeed");
448        assert_eq!(
449            manager.list_repositories().len(),
450            1,
451            "Should have 1 repository after registration"
452        );
453
454        // Verify repository content and properties
455        let repos = manager.list_repositories();
456        let repo = &repos[0];
457        assert_eq!(
458            repo.config.repo_id, "test_repo",
459            "Repository should have correct repo_id"
460        );
461        assert_eq!(
462            repo.config.root_path,
463            temp_dir.path(),
464            "Repository should have correct root_path"
465        );
466    }
467
468    #[test]
469    fn test_register_nonexistent_repository() {
470        let (mut manager, _temp_dir) = create_test_manager();
471
472        let config = RepositoryConfig::new("test_repo".to_string(), "/nonexistent/path");
473
474        let result = manager.register_repository(config);
475        assert!(result.is_err());
476    }
477
478    #[test]
479    fn test_unregister_repository() {
480        let (mut manager, temp_dir) = create_test_manager();
481
482        let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
483
484        manager.register_repository(config).unwrap();
485        assert_eq!(
486            manager.list_repositories().len(),
487            1,
488            "Should have 1 repository after registration"
489        );
490
491        // Verify repository exists with correct name
492        let repos_before = manager.list_repositories();
493        assert_eq!(
494            repos_before[0].config.repo_id, "test_repo",
495            "Repository should have correct repo_id"
496        );
497
498        manager.unregister_repository("test_repo");
499        assert_eq!(
500            manager.list_repositories().len(),
501            0,
502            "Should have 0 repositories after unregistration"
503        );
504
505        // Verify repository is actually removed
506        let repos_after = manager.list_repositories();
507        assert!(
508            repos_after.is_empty(),
509            "Repository list should be empty after unregistration"
510        );
511        assert!(
512            !repos_after.iter().any(|r| r.config.repo_id == "test_repo"),
513            "test_repo should be completely removed"
514        );
515    }
516
517    #[tokio::test]
518    async fn test_index_nonexistent_repository() {
519        let (mut manager, _temp_dir) = create_test_manager();
520
521        let result = manager.index_repository("nonexistent", None).await;
522        assert!(result.is_err());
523    }
524
525    #[tokio::test]
526    async fn test_health_check() {
527        let (mut manager, temp_dir) = create_test_manager();
528
529        // Create test file
530        fs::write(temp_dir.path().join("test.js"), "console.log('hello');").unwrap();
531
532        let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
533
534        manager.register_repository(config).unwrap();
535
536        let health = manager.health_check("test_repo").await.unwrap();
537        assert!(matches!(health, HealthStatus::Stale));
538    }
539
540    #[test]
541    fn test_total_stats() {
542        let (mut manager, temp_dir) = create_test_manager();
543
544        let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
545
546        manager.register_repository(config).unwrap();
547
548        let stats = manager.get_total_stats();
549        assert_eq!(stats.get("repositories"), Some(&1));
550        assert_eq!(stats.get("files"), Some(&0));
551    }
552}