codeprism_core/repository/
mod.rs1use crate::error::{Error, Result};
7use crate::indexer::{BulkIndexer, IndexingConfig, IndexingResult, IndexingStats};
8use crate::parser::{LanguageRegistry, ParserEngine};
9use crate::scanner::{NoOpProgressReporter, ProgressReporter, RepositoryScanner};
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::path::{Path, PathBuf};
13use std::sync::Arc;
14use std::time::{SystemTime, UNIX_EPOCH};
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct RepositoryConfig {
19 pub repo_id: String,
21 pub root_path: PathBuf,
23 pub name: String,
25 pub description: Option<String>,
27 pub include_languages: Option<Vec<String>>,
29 pub max_file_size: Option<usize>,
31 pub follow_symlinks: bool,
33 pub exclude_patterns: Vec<String>,
35 pub metadata: HashMap<String, String>,
37}
38
39impl RepositoryConfig {
40 pub fn new<P: AsRef<Path>>(repo_id: String, root_path: P) -> Self {
42 let root_path = root_path.as_ref().to_path_buf();
43 let name = root_path
44 .file_name()
45 .and_then(|n| n.to_str())
46 .unwrap_or(&repo_id)
47 .to_string();
48
49 Self {
50 repo_id,
51 root_path,
52 name,
53 description: None,
54 include_languages: None,
55 max_file_size: Some(10 * 1024 * 1024), follow_symlinks: false,
57 exclude_patterns: Vec::new(),
58 metadata: HashMap::new(),
59 }
60 }
61
62 pub fn with_name(mut self, name: String) -> Self {
64 self.name = name;
65 self
66 }
67
68 pub fn with_description(mut self, description: String) -> Self {
70 self.description = Some(description);
71 self
72 }
73
74 pub fn with_metadata(mut self, key: String, value: String) -> Self {
76 self.metadata.insert(key, value);
77 self
78 }
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83pub enum HealthStatus {
84 Healthy,
86 Stale,
88 Degraded { error_count: usize },
90 Unhealthy { reason: String },
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct RepositoryInfo {
97 pub config: RepositoryConfig,
99 pub health: HealthStatus,
101 pub last_scan: Option<u64>,
103 pub last_index: Option<u64>,
105 pub last_stats: Option<IndexingStats>,
107 pub total_files: usize,
109 pub total_nodes: usize,
111 pub total_edges: usize,
113 pub repo_size_bytes: usize,
115}
116
117impl RepositoryInfo {
118 pub fn new(config: RepositoryConfig) -> Self {
120 Self {
121 config,
122 health: HealthStatus::Stale, last_scan: None,
124 last_index: None,
125 last_stats: None,
126 total_files: 0,
127 total_nodes: 0,
128 total_edges: 0,
129 repo_size_bytes: 0,
130 }
131 }
132
133 pub fn needs_reindexing(&self) -> bool {
135 matches!(
136 self.health,
137 HealthStatus::Stale | HealthStatus::Unhealthy { .. }
138 )
139 }
140
141 pub fn time_since_last_index(&self) -> Option<u64> {
143 self.last_index.map(|last| {
144 SystemTime::now()
145 .duration_since(UNIX_EPOCH)
146 .unwrap_or_default()
147 .as_secs()
148 - last
149 })
150 }
151}
152
153pub struct RepositoryManager {
155 scanner: RepositoryScanner,
156 parser_engine: Arc<ParserEngine>,
157 repositories: HashMap<String, RepositoryInfo>,
158}
159
160impl RepositoryManager {
161 pub fn new(language_registry: Arc<LanguageRegistry>) -> Self {
163 let parser_engine = Arc::new(ParserEngine::new(language_registry));
164 let scanner = RepositoryScanner::new();
165
166 Self {
167 scanner,
168 parser_engine,
169 repositories: HashMap::new(),
170 }
171 }
172
173 pub fn new_with_config(
175 language_registry: Arc<LanguageRegistry>,
176 exclude_dirs: Option<Vec<String>>,
177 include_extensions: Option<Vec<String>>,
178 dependency_mode: Option<crate::scanner::DependencyMode>,
179 ) -> Self {
180 let parser_engine = Arc::new(ParserEngine::new(language_registry));
181
182 let mut scanner = if let Some(exclude_dirs) = exclude_dirs {
183 RepositoryScanner::with_exclude_dirs(exclude_dirs)
184 } else {
185 RepositoryScanner::new()
186 };
187
188 if let Some(extensions) = include_extensions {
189 scanner = scanner.with_extensions(extensions);
190 }
191
192 if let Some(dep_mode) = dependency_mode {
194 scanner = scanner.with_dependency_mode(dep_mode);
195 }
196
197 Self {
198 scanner,
199 parser_engine,
200 repositories: HashMap::new(),
201 }
202 }
203
204 pub fn register_repository(&mut self, config: RepositoryConfig) -> Result<()> {
206 if !config.root_path.exists() {
208 return Err(Error::io(format!(
209 "Repository path does not exist: {}",
210 config.root_path.display()
211 )));
212 }
213
214 if !config.root_path.is_dir() {
215 return Err(Error::io(format!(
216 "Repository path is not a directory: {}",
217 config.root_path.display()
218 )));
219 }
220
221 let repo_info = RepositoryInfo::new(config.clone());
222 self.repositories.insert(config.repo_id.clone(), repo_info);
223
224 Ok(())
225 }
226
227 pub fn unregister_repository(&mut self, repo_id: &str) {
229 self.repositories.remove(repo_id);
230 }
231
232 pub fn get_repository(&self, repo_id: &str) -> Option<&RepositoryInfo> {
234 self.repositories.get(repo_id)
235 }
236
237 pub fn list_repositories(&self) -> Vec<&RepositoryInfo> {
239 self.repositories.values().collect()
240 }
241
242 pub async fn index_repository(
244 &mut self,
245 repo_id: &str,
246 progress_reporter: Option<Arc<dyn ProgressReporter>>,
247 ) -> Result<IndexingResult> {
248 let repo_info = self
249 .repositories
250 .get_mut(repo_id)
251 .ok_or_else(|| Error::other(format!("Repository not found: {}", repo_id)))?;
252
253 let progress = progress_reporter.unwrap_or_else(|| Arc::new(NoOpProgressReporter));
254
255 let scan_result = self
257 .scanner
258 .scan_repository(&repo_info.config.root_path, Arc::clone(&progress))
259 .await?;
260
261 repo_info.last_scan = Some(
263 SystemTime::now()
264 .duration_since(UNIX_EPOCH)
265 .unwrap_or_default()
266 .as_secs(),
267 );
268 repo_info.total_files = scan_result.total_files;
269
270 let indexing_config = IndexingConfig::new(
272 repo_id.to_string(),
273 format!("scan-{}", chrono::Utc::now().timestamp()),
274 );
275
276 let indexer = BulkIndexer::new(indexing_config, Arc::clone(&self.parser_engine));
277 let indexing_result = indexer.index_scan_result(&scan_result, progress).await?;
278
279 repo_info.last_index = Some(
281 SystemTime::now()
282 .duration_since(UNIX_EPOCH)
283 .unwrap_or_default()
284 .as_secs(),
285 );
286 repo_info.last_stats = Some(indexing_result.stats.clone());
287 repo_info.total_nodes = indexing_result.stats.nodes_created;
288 repo_info.total_edges = indexing_result.stats.edges_created;
289
290 repo_info.health = if indexing_result.stats.error_count == 0 {
292 HealthStatus::Healthy
293 } else if indexing_result.stats.error_count < indexing_result.stats.files_processed / 10 {
294 HealthStatus::Degraded {
295 error_count: indexing_result.stats.error_count,
296 }
297 } else {
298 HealthStatus::Unhealthy {
299 reason: format!(
300 "High error rate: {}/{} files failed",
301 indexing_result.stats.error_count, indexing_result.stats.files_processed
302 ),
303 }
304 };
305
306 Ok(indexing_result)
307 }
308
309 pub async fn health_check(&mut self, repo_id: &str) -> Result<HealthStatus> {
311 let repo_info = self
312 .repositories
313 .get_mut(repo_id)
314 .ok_or_else(|| Error::other(format!("Repository not found: {}", repo_id)))?;
315
316 if !repo_info.config.root_path.exists() {
318 repo_info.health = HealthStatus::Unhealthy {
319 reason: "Repository path no longer exists".to_string(),
320 };
321 return Ok(repo_info.health.clone());
322 }
323
324 if let Some(time_since) = repo_info.time_since_last_index() {
326 if time_since > 24 * 60 * 60 {
327 repo_info.health = HealthStatus::Stale;
329 }
330 }
331
332 Ok(repo_info.health.clone())
333 }
334
335 pub fn get_stats(&self, repo_id: &str) -> Option<&IndexingStats> {
337 self.repositories
338 .get(repo_id)
339 .and_then(|info| info.last_stats.as_ref())
340 }
341
342 pub fn get_total_stats(&self) -> HashMap<String, usize> {
344 let mut stats = HashMap::new();
345
346 let total_repos = self.repositories.len();
347 let total_files: usize = self
348 .repositories
349 .values()
350 .map(|info| info.total_files)
351 .sum();
352 let total_nodes: usize = self
353 .repositories
354 .values()
355 .map(|info| info.total_nodes)
356 .sum();
357 let total_edges: usize = self
358 .repositories
359 .values()
360 .map(|info| info.total_edges)
361 .sum();
362
363 stats.insert("repositories".to_string(), total_repos);
364 stats.insert("files".to_string(), total_files);
365 stats.insert("nodes".to_string(), total_nodes);
366 stats.insert("edges".to_string(), total_edges);
367
368 stats
369 }
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375 use crate::parser::LanguageRegistry;
376 use std::fs;
377 use tempfile::TempDir;
378
379 fn create_test_manager() -> (RepositoryManager, TempDir) {
380 let temp_dir = TempDir::new().unwrap();
381 let registry = Arc::new(LanguageRegistry::new());
382 let manager = RepositoryManager::new(registry);
383 (manager, temp_dir)
384 }
385
386 #[test]
387 fn test_repository_config() {
388 let config = RepositoryConfig::new("test_repo".to_string(), "/tmp/test");
389
390 assert_eq!(config.repo_id, "test_repo");
391 assert_eq!(config.root_path, PathBuf::from("/tmp/test"));
392 assert_eq!(config.name, "test");
393 }
394
395 #[test]
396 fn test_repository_config_builder() {
397 let config = RepositoryConfig::new("test".to_string(), "/tmp/test")
398 .with_name("My Test Repo".to_string())
399 .with_description("A test repository".to_string())
400 .with_metadata("version".to_string(), "1.0".to_string());
401
402 assert_eq!(config.name, "My Test Repo");
403 assert_eq!(config.description, Some("A test repository".to_string()));
404 assert_eq!(config.metadata.get("version"), Some(&"1.0".to_string()));
405 }
406
407 #[test]
408 fn test_repository_info() {
409 let config = RepositoryConfig::new("test".to_string(), "/tmp/test");
410 let info = RepositoryInfo::new(config);
411
412 assert!(info.needs_reindexing());
413 assert!(matches!(info.health, HealthStatus::Stale));
414 assert_eq!(info.total_files, 0);
415 }
416
417 #[test]
418 fn test_repository_manager_creation() {
419 let registry = Arc::new(LanguageRegistry::new());
420 let manager = RepositoryManager::new(registry);
421
422 assert_eq!(manager.list_repositories().len(), 0);
423 }
424
425 #[test]
426 fn test_register_repository() {
427 let (mut manager, temp_dir) = create_test_manager();
428
429 let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
430
431 let result = manager.register_repository(config);
432 assert!(result.is_ok());
433 assert_eq!(manager.list_repositories().len(), 1);
434 }
435
436 #[test]
437 fn test_register_nonexistent_repository() {
438 let (mut manager, _temp_dir) = create_test_manager();
439
440 let config = RepositoryConfig::new("test_repo".to_string(), "/nonexistent/path");
441
442 let result = manager.register_repository(config);
443 assert!(result.is_err());
444 }
445
446 #[test]
447 fn test_unregister_repository() {
448 let (mut manager, temp_dir) = create_test_manager();
449
450 let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
451
452 manager.register_repository(config).unwrap();
453 assert_eq!(manager.list_repositories().len(), 1);
454
455 manager.unregister_repository("test_repo");
456 assert_eq!(manager.list_repositories().len(), 0);
457 }
458
459 #[tokio::test]
460 async fn test_index_nonexistent_repository() {
461 let (mut manager, _temp_dir) = create_test_manager();
462
463 let result = manager.index_repository("nonexistent", None).await;
464 assert!(result.is_err());
465 }
466
467 #[tokio::test]
468 async fn test_health_check() {
469 let (mut manager, temp_dir) = create_test_manager();
470
471 fs::write(temp_dir.path().join("test.js"), "console.log('hello');").unwrap();
473
474 let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
475
476 manager.register_repository(config).unwrap();
477
478 let health = manager.health_check("test_repo").await.unwrap();
479 assert!(matches!(health, HealthStatus::Stale));
480 }
481
482 #[test]
483 fn test_total_stats() {
484 let (mut manager, temp_dir) = create_test_manager();
485
486 let config = RepositoryConfig::new("test_repo".to_string(), temp_dir.path());
487
488 manager.register_repository(config).unwrap();
489
490 let stats = manager.get_total_stats();
491 assert_eq!(stats.get("repositories"), Some(&1));
492 assert_eq!(stats.get("files"), Some(&0));
493 }
494}