codeprism_core/repository/
mod.rs1use crate::error::{Error, Result};
7use crate::indexer::{BulkIndexer, IndexingConfig, IndexingResult, IndexingStats};
8use crate::parser::{LanguageRegistry, ParserEngine};
9use crate::scanner::{NoOpProgressReporter, ProgressReporter, RepositoryScanner};
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::path::{Path, PathBuf};
13use std::sync::Arc;
14use std::time::{SystemTime, UNIX_EPOCH};
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct RepositoryConfig {
19 pub repo_id: String,
21 pub root_path: PathBuf,
23 pub name: String,
25 pub description: Option<String>,
27 pub include_languages: Option<Vec<String>>,
29 pub max_file_size: Option<usize>,
31 pub follow_symlinks: bool,
33 pub exclude_patterns: Vec<String>,
35 pub metadata: HashMap<String, String>,
37}
38
39impl RepositoryConfig {
40 pub fn new<P: AsRef<Path>>(repo_id: String, root_path: P) -> Self {
42 let root_path = root_path.as_ref().to_path_buf();
43 let name = root_path
44 .file_name()
45 .and_then(|n| n.to_str())
46 .unwrap_or(&repo_id)
47 .to_string();
48
49 Self {
50 repo_id,
51 root_path,
52 name,
53 description: None,
54 include_languages: None,
55 max_file_size: Some(10 * 1024 * 1024), follow_symlinks: false,
57 exclude_patterns: Vec::new(),
58 metadata: HashMap::new(),
59 }
60 }
61
62 pub fn with_name(mut self, name: String) -> Self {
64 self.name = name;
65 self
66 }
67
68 pub fn with_description(mut self, description: String) -> Self {
70 self.description = Some(description);
71 self
72 }
73
74 pub fn with_metadata(mut self, key: String, value: String) -> Self {
76 self.metadata.insert(key, value);
77 self
78 }
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83pub enum HealthStatus {
84 Healthy,
86 Stale,
88 Degraded {
90 error_count: usize,
92 },
93 Unhealthy {
95 reason: String,
97 },
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct RepositoryInfo {
103 pub config: RepositoryConfig,
105 pub health: HealthStatus,
107 pub last_scan: Option<u64>,
109 pub last_index: Option<u64>,
111 pub last_stats: Option<IndexingStats>,
113 pub total_files: usize,
115 pub total_nodes: usize,
117 pub total_edges: usize,
119 pub repo_size_bytes: usize,
121}
122
123impl RepositoryInfo {
124 pub fn new(config: RepositoryConfig) -> Self {
126 Self {
127 config,
128 health: HealthStatus::Stale, last_scan: None,
130 last_index: None,
131 last_stats: None,
132 total_files: 0,
133 total_nodes: 0,
134 total_edges: 0,
135 repo_size_bytes: 0,
136 }
137 }
138
139 pub fn needs_reindexing(&self) -> bool {
141 matches!(
142 self.health,
143 HealthStatus::Stale | HealthStatus::Unhealthy { .. }
144 )
145 }
146
147 pub fn time_since_last_index(&self) -> Option<u64> {
149 self.last_index.map(|last| {
150 SystemTime::now()
151 .duration_since(UNIX_EPOCH)
152 .unwrap_or_default()
153 .as_secs()
154 - last
155 })
156 }
157}
158
159pub struct RepositoryManager {
161 scanner: RepositoryScanner,
162 parser_engine: Arc<ParserEngine>,
163 repositories: HashMap<String, RepositoryInfo>,
164}
165
166impl RepositoryManager {
167 pub fn new(language_registry: Arc<LanguageRegistry>) -> Self {
169 let parser_engine = Arc::new(ParserEngine::new(language_registry));
170 let scanner = RepositoryScanner::new();
171
172 Self {
173 scanner,
174 parser_engine,
175 repositories: HashMap::new(),
176 }
177 }
178
179 pub fn new_with_config(
181 language_registry: Arc<LanguageRegistry>,
182 exclude_dirs: Option<Vec<String>>,
183 include_extensions: Option<Vec<String>>,
184 dependency_mode: Option<crate::scanner::DependencyMode>,
185 ) -> Self {
186 let parser_engine = Arc::new(ParserEngine::new(language_registry));
187
188 let mut scanner = if let Some(exclude_dirs) = exclude_dirs {
189 RepositoryScanner::with_exclude_dirs(exclude_dirs)
190 } else {
191 RepositoryScanner::new()
192 };
193
194 if let Some(extensions) = include_extensions {
195 scanner = scanner.with_extensions(extensions);
196 }
197
198 if let Some(dep_mode) = dependency_mode {
200 scanner = scanner.with_dependency_mode(dep_mode);
201 }
202
203 Self {
204 scanner,
205 parser_engine,
206 repositories: HashMap::new(),
207 }
208 }
209
210 pub fn register_repository(&mut self, config: RepositoryConfig) -> Result<()> {
212 if !config.root_path.exists() {
214 return Err(Error::io(format!(
215 "Repository path does not exist: {}",
216 config.root_path.display()
217 )));
218 }
219
220 if !config.root_path.is_dir() {
221 return Err(Error::io(format!(
222 "Repository path is not a directory: {}",
223 config.root_path.display()
224 )));
225 }
226
227 let repo_info = RepositoryInfo::new(config.clone());
228 self.repositories.insert(config.repo_id.clone(), repo_info);
229
230 Ok(())
231 }
232
233 pub fn unregister_repository(&mut self, repo_id: &str) {
235 self.repositories.remove(repo_id);
236 }
237
238 pub fn get_repository(&self, repo_id: &str) -> Option<&RepositoryInfo> {
240 self.repositories.get(repo_id)
241 }
242
243 pub fn list_repositories(&self) -> Vec<&RepositoryInfo> {
245 self.repositories.values().collect()
246 }
247
248 pub async fn index_repository(
250 &mut self,
251 repo_id: &str,
252 progress_reporter: Option<Arc<dyn ProgressReporter>>,
253 ) -> Result<IndexingResult> {
254 let repo_info = self
255 .repositories
256 .get_mut(repo_id)
257 .ok_or_else(|| Error::other(format!("Repository not found: {repo_id}")))?;
258
259 let progress = progress_reporter.unwrap_or_else(|| Arc::new(NoOpProgressReporter));
260
261 let scan_result = self
263 .scanner
264 .scan_repository(&repo_info.config.root_path, Arc::clone(&progress))
265 .await?;
266
267 repo_info.last_scan = Some(
269 SystemTime::now()
270 .duration_since(UNIX_EPOCH)
271 .unwrap_or_default()
272 .as_secs(),
273 );
274 repo_info.total_files = scan_result.total_files;
275
276 let indexing_config = IndexingConfig::new(
278 repo_id.to_string(),
279 format!("scan-{}", chrono::Utc::now().timestamp()),
280 );
281
282 let indexer = BulkIndexer::new(indexing_config, Arc::clone(&self.parser_engine));
283 let indexing_result = indexer.index_scan_result(&scan_result, progress).await?;
284
285 repo_info.last_index = Some(
287 SystemTime::now()
288 .duration_since(UNIX_EPOCH)
289 .unwrap_or_default()
290 .as_secs(),
291 );
292 repo_info.last_stats = Some(indexing_result.stats.clone());
293 repo_info.total_nodes = indexing_result.stats.nodes_created;
294 repo_info.total_edges = indexing_result.stats.edges_created;
295
296 repo_info.health = if indexing_result.stats.error_count == 0 {
298 HealthStatus::Healthy
299 } else if indexing_result.stats.error_count < indexing_result.stats.files_processed / 10 {
300 HealthStatus::Degraded {
301 error_count: indexing_result.stats.error_count,
302 }
303 } else {
304 HealthStatus::Unhealthy {
305 reason: format!(
306 "High error rate: {}/{} files failed",
307 indexing_result.stats.error_count, indexing_result.stats.files_processed
308 ),
309 }
310 };
311
312 Ok(indexing_result)
313 }
314
315 pub async fn health_check(&mut self, repo_id: &str) -> Result<HealthStatus> {
317 let repo_info = self
318 .repositories
319 .get_mut(repo_id)
320 .ok_or_else(|| Error::other(format!("Repository not found: {repo_id}")))?;
321
322 if !repo_info.config.root_path.exists() {
324 repo_info.health = HealthStatus::Unhealthy {
325 reason: "Repository path no longer exists".to_string(),
326 };
327 return Ok(repo_info.health.clone());
328 }
329
330 if let Some(time_since) = repo_info.time_since_last_index() {
332 if time_since > 24 * 60 * 60 {
333 repo_info.health = HealthStatus::Stale;
335 }
336 }
337
338 Ok(repo_info.health.clone())
339 }
340
341 pub fn get_stats(&self, repo_id: &str) -> Option<&IndexingStats> {
343 self.repositories
344 .get(repo_id)
345 .and_then(|info| info.last_stats.as_ref())
346 }
347
348 pub fn get_total_stats(&self) -> HashMap<String, usize> {
350 let mut stats = HashMap::new();
351
352 let total_repos = self.repositories.len();
353 let total_files: usize = self
354 .repositories
355 .values()
356 .map(|info| info.total_files)
357 .sum();
358 let total_nodes: usize = self
359 .repositories
360 .values()
361 .map(|info| info.total_nodes)
362 .sum();
363 let total_edges: usize = self
364 .repositories
365 .values()
366 .map(|info| info.total_edges)
367 .sum();
368
369 stats.insert("repositories".to_string(), total_repos);
370 stats.insert("files".to_string(), total_files);
371 stats.insert("nodes".to_string(), total_nodes);
372 stats.insert("edges".to_string(), total_edges);
373
374 stats
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381 use crate::parser::LanguageRegistry;
382 use std::fs;
383 use tempfile::TempDir;
384
385 fn create_test_manager() -> (RepositoryManager, TempDir) {
386 let temp_dir = TempDir::new().unwrap();
387 let registry = Arc::new(LanguageRegistry::new());
388 let manager = RepositoryManager::new(registry);
389 (manager, temp_dir)
390 }
391
392 #[test]
393 fn test_repository_config() {
394 let config = RepositoryConfig::new("test_repo".to_string(), "/tmp/test");
395
396 assert_eq!(config.repo_id, "test_repo");
397 assert_eq!(config.root_path, PathBuf::from("/tmp/test"));
398 assert_eq!(config.name, "test");
399 }
400
401 #[test]
402 fn test_repository_config_builder() {
403 let config = RepositoryConfig::new("test".to_string(), "/tmp/test")
404 .with_name("My Test Repo".to_string())
405 .with_description("A test repository".to_string())
406 .with_metadata("version".to_string(), "1.0".to_string());
407
408 assert_eq!(config.name, "My Test Repo");
409 assert_eq!(config.description, Some("A test repository".to_string()));
410 assert_eq!(config.metadata.get("version"), Some(&"1.0".to_string()));
411 }
412
413 #[test]
414 fn test_repository_info() {
415 let config = RepositoryConfig::new("test".to_string(), "/tmp/test");
416 let info = RepositoryInfo::new(config);
417
418 assert!(info.needs_reindexing());
419 assert!(matches!(info.health, HealthStatus::Stale));
420 assert_eq!(info.total_files, 0);
421 }
422
423 #[test]
424 fn test_repository_manager_creation() {
425 let registry = Arc::new(LanguageRegistry::new());
426 let manager = RepositoryManager::new(registry);
427
428 assert_eq!(
429 manager.list_repositories().len(),
430 0,
431 "New manager should start with no repositories"
432 );
433 let repos = manager.list_repositories();
434 assert!(
435 repos.is_empty(),
436 "Repository list should be empty initially"
437 );
438 }
439
440 #[test]
441 fn test_register_repository() {
442 let (mut manager, temp_dir) = create_test_manager();
443
444 let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
445
446 let result = manager.register_repository(config);
447 assert!(result.is_ok(), "Repository operation should succeed");
448 assert_eq!(
449 manager.list_repositories().len(),
450 1,
451 "Should have 1 repository after registration"
452 );
453
454 let repos = manager.list_repositories();
456 let repo = &repos[0];
457 assert_eq!(
458 repo.config.repo_id, "test_repo",
459 "Repository should have correct repo_id"
460 );
461 assert_eq!(
462 repo.config.root_path,
463 temp_dir.path(),
464 "Repository should have correct root_path"
465 );
466 }
467
468 #[test]
469 fn test_register_nonexistent_repository() {
470 let (mut manager, _temp_dir) = create_test_manager();
471
472 let config = RepositoryConfig::new("test_repo".to_string(), "/nonexistent/path");
473
474 let result = manager.register_repository(config);
475 assert!(result.is_err());
476 }
477
478 #[test]
479 fn test_unregister_repository() {
480 let (mut manager, temp_dir) = create_test_manager();
481
482 let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
483
484 manager.register_repository(config).unwrap();
485 assert_eq!(
486 manager.list_repositories().len(),
487 1,
488 "Should have 1 repository after registration"
489 );
490
491 let repos_before = manager.list_repositories();
493 assert_eq!(
494 repos_before[0].config.repo_id, "test_repo",
495 "Repository should have correct repo_id"
496 );
497
498 manager.unregister_repository("test_repo");
499 assert_eq!(
500 manager.list_repositories().len(),
501 0,
502 "Should have 0 repositories after unregistration"
503 );
504
505 let repos_after = manager.list_repositories();
507 assert!(
508 repos_after.is_empty(),
509 "Repository list should be empty after unregistration"
510 );
511 assert!(
512 !repos_after.iter().any(|r| r.config.repo_id == "test_repo"),
513 "test_repo should be completely removed"
514 );
515 }
516
517 #[tokio::test]
518 async fn test_index_nonexistent_repository() {
519 let (mut manager, _temp_dir) = create_test_manager();
520
521 let result = manager.index_repository("nonexistent", None).await;
522 assert!(result.is_err());
523 }
524
525 #[tokio::test]
526 async fn test_health_check() {
527 let (mut manager, temp_dir) = create_test_manager();
528
529 fs::write(temp_dir.path().join("test.js"), "console.log('hello');").unwrap();
531
532 let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
533
534 manager.register_repository(config).unwrap();
535
536 let health = manager.health_check("test_repo").await.unwrap();
537 assert!(matches!(health, HealthStatus::Stale));
538 }
539
540 #[test]
541 fn test_total_stats() {
542 let (mut manager, temp_dir) = create_test_manager();
543
544 let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
545
546 manager.register_repository(config).unwrap();
547
548 let stats = manager.get_total_stats();
549 assert_eq!(stats.get("repositories"), Some(&1));
550 assert_eq!(stats.get("files"), Some(&0));
551 }
552}