ricecoder_github/managers/
repository_analyzer.rs

1//! Repository Analysis Manager
2//!
3//! Analyzes GitHub repositories for context, including metadata, dependencies, and code patterns.
4
5use crate::errors::{GitHubError, Result};
6use crate::models::{Dependency, ProjectStructure, Repository};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Code pattern information
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct CodePattern {
13    /// Pattern name
14    pub name: String,
15    /// Pattern description
16    pub description: String,
17    /// Frequency (how often this pattern appears)
18    pub frequency: u32,
19    /// Example code
20    pub example: Option<String>,
21}
22
23/// Codebase summary
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct CodebaseSummary {
26    /// Total lines of code
27    pub total_lines: u64,
28    /// Number of files
29    pub file_count: u32,
30    /// Primary language
31    pub primary_language: Option<String>,
32    /// Languages used
33    pub languages: Vec<String>,
34    /// Key patterns found
35    pub patterns: Vec<CodePattern>,
36    /// Architecture overview
37    pub architecture: String,
38    /// Key modules/components
39    pub components: Vec<String>,
40}
41
42/// Repository analysis result
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct RepositoryAnalysis {
45    /// Repository metadata
46    pub repository: Repository,
47    /// Codebase summary
48    pub summary: CodebaseSummary,
49    /// Analysis timestamp
50    pub analyzed_at: chrono::DateTime<chrono::Utc>,
51}
52
53/// Cache entry with metadata
54#[derive(Debug, Clone)]
55struct CacheEntry {
56    /// The cached analysis
57    analysis: RepositoryAnalysis,
58    /// When the cache entry was created
59    created_at: chrono::DateTime<chrono::Utc>,
60    /// Cache TTL in seconds (0 = no expiration)
61    ttl_seconds: u64,
62}
63
64impl CacheEntry {
65    /// Check if this cache entry is still valid
66    fn is_valid(&self) -> bool {
67        if self.ttl_seconds == 0 {
68            return true;
69        }
70        let now = chrono::Utc::now();
71        let elapsed = (now - self.created_at).num_seconds() as u64;
72        elapsed < self.ttl_seconds
73    }
74}
75
76/// Repository Analyzer
77///
78/// Analyzes GitHub repositories for context, including:
79/// - Repository metadata and structure
80/// - Project dependencies and versions
81/// - Code patterns and conventions
82/// - Codebase summaries
83/// - Analysis result caching with TTL
84/// - Incremental updates
85pub struct RepositoryAnalyzer {
86    /// Cache for analysis results with TTL
87    cache: HashMap<String, CacheEntry>,
88    /// Default cache TTL in seconds (3600 = 1 hour)
89    default_ttl_seconds: u64,
90}
91
92impl RepositoryAnalyzer {
93    /// Create a new RepositoryAnalyzer with default TTL (1 hour)
94    pub fn new() -> Self {
95        Self {
96            cache: HashMap::new(),
97            default_ttl_seconds: 3600,
98        }
99    }
100
101    /// Create a new RepositoryAnalyzer with custom TTL
102    ///
103    /// # Arguments
104    /// * `ttl_seconds` - Cache TTL in seconds (0 = no expiration)
105    pub fn with_ttl(ttl_seconds: u64) -> Self {
106        Self {
107            cache: HashMap::new(),
108            default_ttl_seconds: ttl_seconds,
109        }
110    }
111
112    /// Set the default cache TTL
113    ///
114    /// # Arguments
115    /// * `ttl_seconds` - Cache TTL in seconds (0 = no expiration)
116    pub fn set_ttl(&mut self, ttl_seconds: u64) {
117        self.default_ttl_seconds = ttl_seconds;
118    }
119
120    /// Fetch repository metadata and structure
121    ///
122    /// # Arguments
123    /// * `owner` - Repository owner
124    /// * `repo` - Repository name
125    ///
126    /// # Returns
127    /// Repository metadata including name, owner, description, language, and structure
128    ///
129    /// # Errors
130    /// Returns error if repository cannot be fetched
131    pub async fn fetch_repository_metadata(
132        &self,
133        owner: &str,
134        repo: &str,
135    ) -> Result<Repository> {
136        // Validate inputs
137        if owner.is_empty() {
138            return Err(GitHubError::invalid_input("Owner cannot be empty"));
139        }
140        if repo.is_empty() {
141            return Err(GitHubError::invalid_input("Repository name cannot be empty"));
142        }
143
144        // In a real implementation, this would call the GitHub API
145        // For now, we return a placeholder that demonstrates the structure
146        Ok(Repository {
147            name: repo.to_string(),
148            owner: owner.to_string(),
149            description: format!("Repository {}/{}", owner, repo),
150            url: format!("https://github.com/{}/{}", owner, repo),
151            language: Some("Rust".to_string()),
152            dependencies: Vec::new(),
153            structure: ProjectStructure {
154                language: Some("Rust".to_string()),
155                project_type: "library".to_string(),
156                directories: vec!["src".to_string(), "tests".to_string()],
157                files: vec!["Cargo.toml".to_string(), "README.md".to_string()],
158            },
159        })
160    }
161
162    /// Identify project dependencies and versions
163    ///
164    /// # Arguments
165    /// * `owner` - Repository owner
166    /// * `repo` - Repository name
167    ///
168    /// # Returns
169    /// List of dependencies with their versions
170    ///
171    /// # Errors
172    /// Returns error if dependencies cannot be identified
173    pub async fn identify_dependencies(
174        &self,
175        owner: &str,
176        repo: &str,
177    ) -> Result<Vec<Dependency>> {
178        // Validate inputs
179        if owner.is_empty() {
180            return Err(GitHubError::invalid_input("Owner cannot be empty"));
181        }
182        if repo.is_empty() {
183            return Err(GitHubError::invalid_input("Repository name cannot be empty"));
184        }
185
186        // In a real implementation, this would parse manifest files (Cargo.toml, package.json, etc.)
187        // For now, return an empty list
188        Ok(Vec::new())
189    }
190
191    /// Extract code patterns and conventions
192    ///
193    /// # Arguments
194    /// * `owner` - Repository owner
195    /// * `repo` - Repository name
196    ///
197    /// # Returns
198    /// List of code patterns found in the repository
199    ///
200    /// # Errors
201    /// Returns error if patterns cannot be extracted
202    pub async fn extract_code_patterns(
203        &self,
204        owner: &str,
205        repo: &str,
206    ) -> Result<Vec<CodePattern>> {
207        // Validate inputs
208        if owner.is_empty() {
209            return Err(GitHubError::invalid_input("Owner cannot be empty"));
210        }
211        if repo.is_empty() {
212            return Err(GitHubError::invalid_input("Repository name cannot be empty"));
213        }
214
215        // In a real implementation, this would analyze code files for patterns
216        // For now, return an empty list
217        Ok(Vec::new())
218    }
219
220    /// Generate codebase summary
221    ///
222    /// # Arguments
223    /// * `owner` - Repository owner
224    /// * `repo` - Repository name
225    ///
226    /// # Returns
227    /// Summary of the codebase including lines of code, file count, languages, and patterns
228    ///
229    /// # Errors
230    /// Returns error if summary cannot be generated
231    pub async fn generate_codebase_summary(
232        &self,
233        owner: &str,
234        repo: &str,
235    ) -> Result<CodebaseSummary> {
236        // Validate inputs
237        if owner.is_empty() {
238            return Err(GitHubError::invalid_input("Owner cannot be empty"));
239        }
240        if repo.is_empty() {
241            return Err(GitHubError::invalid_input("Repository name cannot be empty"));
242        }
243
244        // In a real implementation, this would analyze the repository
245        Ok(CodebaseSummary {
246            total_lines: 0,
247            file_count: 0,
248            primary_language: Some("Rust".to_string()),
249            languages: vec!["Rust".to_string()],
250            patterns: Vec::new(),
251            architecture: "Modular architecture".to_string(),
252            components: Vec::new(),
253        })
254    }
255
256    /// Perform complete repository analysis
257    ///
258    /// # Arguments
259    /// * `owner` - Repository owner
260    /// * `repo` - Repository name
261    ///
262    /// # Returns
263    /// Complete analysis including metadata, dependencies, patterns, and summary
264    ///
265    /// # Errors
266    /// Returns error if analysis cannot be performed
267    pub async fn analyze_repository(
268        &mut self,
269        owner: &str,
270        repo: &str,
271    ) -> Result<RepositoryAnalysis> {
272        // Check cache first
273        let cache_key = format!("{}/{}", owner, repo);
274        if let Some(entry) = self.cache.get(&cache_key) {
275            if entry.is_valid() {
276                return Ok(entry.analysis.clone());
277            }
278        }
279
280        // Fetch metadata
281        let repository = self.fetch_repository_metadata(owner, repo).await?;
282
283        // Generate summary
284        let summary = self.generate_codebase_summary(owner, repo).await?;
285
286        let analysis = RepositoryAnalysis {
287            repository,
288            summary,
289            analyzed_at: chrono::Utc::now(),
290        };
291
292        // Cache the result with TTL
293        self.cache.insert(
294            cache_key,
295            CacheEntry {
296                analysis: analysis.clone(),
297                created_at: chrono::Utc::now(),
298                ttl_seconds: self.default_ttl_seconds,
299            },
300        );
301
302        Ok(analysis)
303    }
304
305    /// Perform incremental repository analysis (updates existing analysis)
306    ///
307    /// # Arguments
308    /// * `owner` - Repository owner
309    /// * `repo` - Repository name
310    ///
311    /// # Returns
312    /// Updated analysis
313    ///
314    /// # Errors
315    /// Returns error if analysis cannot be performed
316    pub async fn update_repository_analysis(
317        &mut self,
318        owner: &str,
319        repo: &str,
320    ) -> Result<RepositoryAnalysis> {
321        // Always perform fresh analysis for updates
322        let cache_key = format!("{}/{}", owner, repo);
323        self.cache.remove(&cache_key);
324        self.analyze_repository(owner, repo).await
325    }
326
327    /// Get cached analysis result if valid
328    ///
329    /// # Arguments
330    /// * `owner` - Repository owner
331    /// * `repo` - Repository name
332    ///
333    /// # Returns
334    /// Cached analysis if available and valid
335    pub fn get_cached_analysis(&self, owner: &str, repo: &str) -> Option<RepositoryAnalysis> {
336        let cache_key = format!("{}/{}", owner, repo);
337        self.cache.get(&cache_key).and_then(|entry| {
338            if entry.is_valid() {
339                Some(entry.analysis.clone())
340            } else {
341                None
342            }
343        })
344    }
345
346    /// Get cache statistics
347    ///
348    /// # Returns
349    /// Tuple of (total_entries, valid_entries, expired_entries)
350    pub fn cache_stats(&self) -> (usize, usize, usize) {
351        let total = self.cache.len();
352        let valid = self.cache.values().filter(|e| e.is_valid()).count();
353        let expired = total - valid;
354        (total, valid, expired)
355    }
356
357    /// Clear all cache entries
358    pub fn clear_cache(&mut self) {
359        self.cache.clear();
360    }
361
362    /// Clear specific cache entry
363    ///
364    /// # Arguments
365    /// * `owner` - Repository owner
366    /// * `repo` - Repository name
367    pub fn clear_cache_entry(&mut self, owner: &str, repo: &str) {
368        let cache_key = format!("{}/{}", owner, repo);
369        self.cache.remove(&cache_key);
370    }
371
372    /// Remove expired cache entries
373    pub fn cleanup_expired_entries(&mut self) {
374        self.cache.retain(|_, entry| entry.is_valid());
375    }
376}
377
378impl Default for RepositoryAnalyzer {
379    fn default() -> Self {
380        Self::new()
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    #[tokio::test]
389    async fn test_fetch_repository_metadata_success() {
390        let analyzer = RepositoryAnalyzer::new();
391        let result = analyzer.fetch_repository_metadata("owner", "repo").await;
392        assert!(result.is_ok());
393        let repo = result.unwrap();
394        assert_eq!(repo.name, "repo");
395        assert_eq!(repo.owner, "owner");
396    }
397
398    #[tokio::test]
399    async fn test_fetch_repository_metadata_empty_owner() {
400        let analyzer = RepositoryAnalyzer::new();
401        let result = analyzer.fetch_repository_metadata("", "repo").await;
402        assert!(result.is_err());
403    }
404
405    #[tokio::test]
406    async fn test_fetch_repository_metadata_empty_repo() {
407        let analyzer = RepositoryAnalyzer::new();
408        let result = analyzer.fetch_repository_metadata("owner", "").await;
409        assert!(result.is_err());
410    }
411
412    #[tokio::test]
413    async fn test_identify_dependencies_success() {
414        let analyzer = RepositoryAnalyzer::new();
415        let result = analyzer.identify_dependencies("owner", "repo").await;
416        assert!(result.is_ok());
417    }
418
419    #[tokio::test]
420    async fn test_identify_dependencies_empty_owner() {
421        let analyzer = RepositoryAnalyzer::new();
422        let result = analyzer.identify_dependencies("", "repo").await;
423        assert!(result.is_err());
424    }
425
426    #[tokio::test]
427    async fn test_extract_code_patterns_success() {
428        let analyzer = RepositoryAnalyzer::new();
429        let result = analyzer.extract_code_patterns("owner", "repo").await;
430        assert!(result.is_ok());
431    }
432
433    #[tokio::test]
434    async fn test_extract_code_patterns_empty_owner() {
435        let analyzer = RepositoryAnalyzer::new();
436        let result = analyzer.extract_code_patterns("", "repo").await;
437        assert!(result.is_err());
438    }
439
440    #[tokio::test]
441    async fn test_generate_codebase_summary_success() {
442        let analyzer = RepositoryAnalyzer::new();
443        let result = analyzer.generate_codebase_summary("owner", "repo").await;
444        assert!(result.is_ok());
445        let summary = result.unwrap();
446        assert_eq!(summary.primary_language, Some("Rust".to_string()));
447    }
448
449    #[tokio::test]
450    async fn test_generate_codebase_summary_empty_owner() {
451        let analyzer = RepositoryAnalyzer::new();
452        let result = analyzer.generate_codebase_summary("", "repo").await;
453        assert!(result.is_err());
454    }
455
456    #[tokio::test]
457    async fn test_analyze_repository_success() {
458        let mut analyzer = RepositoryAnalyzer::new();
459        let result = analyzer.analyze_repository("owner", "repo").await;
460        assert!(result.is_ok());
461    }
462
463    #[tokio::test]
464    async fn test_analyze_repository_caching() {
465        let mut analyzer = RepositoryAnalyzer::new();
466        let result1 = analyzer.analyze_repository("owner", "repo").await;
467        assert!(result1.is_ok());
468
469        let cached = analyzer.get_cached_analysis("owner", "repo");
470        assert!(cached.is_some());
471    }
472
473    #[tokio::test]
474    async fn test_clear_cache() {
475        let mut analyzer = RepositoryAnalyzer::new();
476        let _ = analyzer.analyze_repository("owner", "repo").await;
477        analyzer.clear_cache();
478        let cached = analyzer.get_cached_analysis("owner", "repo");
479        assert!(cached.is_none());
480    }
481
482    #[tokio::test]
483    async fn test_clear_cache_entry() {
484        let mut analyzer = RepositoryAnalyzer::new();
485        let _ = analyzer.analyze_repository("owner", "repo").await;
486        analyzer.clear_cache_entry("owner", "repo");
487        let cached = analyzer.get_cached_analysis("owner", "repo");
488        assert!(cached.is_none());
489    }
490
491    #[tokio::test]
492    async fn test_with_ttl() {
493        let analyzer = RepositoryAnalyzer::with_ttl(3600);
494        assert_eq!(analyzer.default_ttl_seconds, 3600);
495    }
496
497    #[tokio::test]
498    async fn test_set_ttl() {
499        let mut analyzer = RepositoryAnalyzer::new();
500        analyzer.set_ttl(7200);
501        assert_eq!(analyzer.default_ttl_seconds, 7200);
502    }
503
504    #[tokio::test]
505    async fn test_cache_stats() {
506        let mut analyzer = RepositoryAnalyzer::new();
507        let _ = analyzer.analyze_repository("owner", "repo").await;
508        let (total, valid, expired) = analyzer.cache_stats();
509        assert_eq!(total, 1);
510        assert_eq!(valid, 1);
511        assert_eq!(expired, 0);
512    }
513
514    #[tokio::test]
515    async fn test_update_repository_analysis() {
516        let mut analyzer = RepositoryAnalyzer::new();
517        let result1 = analyzer.analyze_repository("owner", "repo").await;
518        assert!(result1.is_ok());
519
520        let result2 = analyzer.update_repository_analysis("owner", "repo").await;
521        assert!(result2.is_ok());
522    }
523
524    #[tokio::test]
525    async fn test_cleanup_expired_entries() {
526        let mut analyzer = RepositoryAnalyzer::with_ttl(0); // No expiration
527        let _ = analyzer.analyze_repository("owner", "repo").await;
528        analyzer.cleanup_expired_entries();
529        let (total, _, _) = analyzer.cache_stats();
530        assert_eq!(total, 1); // Should still be there with TTL 0
531    }
532}