organizational_intelligence_plugin/
git.rs

1// Git history analyzer
2// Phase 1: Clone repositories and analyze commit history for defect patterns
3// Toyota Way: Simple local cloning, can evolve to distributed if metrics show need
4
5use anyhow::{anyhow, Result};
6use git2::Repository;
7use serde::{Deserialize, Serialize};
8use std::path::{Path, PathBuf};
9use tracing::{debug, info};
10
11/// Information about a single commit with quality metrics
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct CommitInfo {
14    pub hash: String,
15    pub message: String,
16    pub author: String,
17    pub timestamp: i64,
18    /// Number of files changed in this commit
19    pub files_changed: usize,
20    /// Lines added
21    pub lines_added: usize,
22    /// Lines removed
23    pub lines_removed: usize,
24}
25
26/// Git repository analyzer
27/// Clones and analyzes git repositories to extract commit history
28pub struct GitAnalyzer {
29    cache_dir: PathBuf,
30}
31
32impl GitAnalyzer {
33    /// Create a new GitAnalyzer with specified cache directory
34    ///
35    /// # Arguments
36    /// * `cache_dir` - Directory to store cloned repositories
37    ///
38    /// # Examples
39    /// ```
40    /// use organizational_intelligence_plugin::git::GitAnalyzer;
41    /// use std::path::PathBuf;
42    ///
43    /// let analyzer = GitAnalyzer::new(PathBuf::from("/tmp/repos"));
44    /// ```
45    pub fn new<P: AsRef<Path>>(cache_dir: P) -> Self {
46        let cache_dir = cache_dir.as_ref().to_path_buf();
47        Self { cache_dir }
48    }
49
50    /// Clone a repository to the cache directory
51    ///
52    /// # Arguments
53    /// * `repo_url` - Git repository URL (https)
54    /// * `name` - Local name for the repository
55    ///
56    /// # Returns
57    /// * `Ok(())` if successful
58    /// * `Err` if clone fails
59    ///
60    /// # Examples
61    /// ```no_run
62    /// # use organizational_intelligence_plugin::git::GitAnalyzer;
63    /// # use std::path::PathBuf;
64    /// # async fn example() -> Result<(), anyhow::Error> {
65    /// let analyzer = GitAnalyzer::new(PathBuf::from("/tmp/repos"));
66    /// analyzer.clone_repository("https://github.com/rust-lang/rust", "rust")?;
67    /// # Ok(())
68    /// # }
69    /// ```
70    pub fn clone_repository(&self, repo_url: &str, name: &str) -> Result<()> {
71        let repo_path = self.cache_dir.join(name);
72
73        // Skip if already cloned
74        if repo_path.exists() {
75            debug!("Repository {} already exists at {:?}", name, repo_path);
76            return Ok(());
77        }
78
79        info!("Cloning repository {} from {}", name, repo_url);
80
81        // Clone the repository
82        Repository::clone(repo_url, &repo_path).map_err(|e| {
83            anyhow!(
84                "Failed to clone repository {} from {}: {}",
85                name,
86                repo_url,
87                e
88            )
89        })?;
90
91        info!("Successfully cloned {} to {:?}", name, repo_path);
92        Ok(())
93    }
94
95    /// Analyze commits in a cloned repository
96    ///
97    /// # Arguments
98    /// * `name` - Repository name (must be already cloned)
99    /// * `limit` - Maximum number of commits to analyze
100    ///
101    /// # Returns
102    /// * `Ok(Vec<CommitInfo>)` with commit information
103    /// * `Err` if repository not found or analysis fails
104    ///
105    /// # Examples
106    /// ```no_run
107    /// # use organizational_intelligence_plugin::git::GitAnalyzer;
108    /// # use std::path::PathBuf;
109    /// # async fn example() -> Result<(), anyhow::Error> {
110    /// let analyzer = GitAnalyzer::new(PathBuf::from("/tmp/repos"));
111    /// analyzer.clone_repository("https://github.com/rust-lang/rust", "rust")?;
112    /// let commits = analyzer.analyze_commits("rust", 100)?;
113    /// # Ok(())
114    /// # }
115    /// ```
116    pub fn analyze_commits(&self, name: &str, limit: usize) -> Result<Vec<CommitInfo>> {
117        let repo_path = self.cache_dir.join(name);
118
119        if !repo_path.exists() {
120            return Err(anyhow!(
121                "Repository {} not found at {:?}. Clone it first.",
122                name,
123                repo_path
124            ));
125        }
126
127        debug!("Opening repository at {:?}", repo_path);
128        let repo = Repository::open(&repo_path)
129            .map_err(|e| anyhow!("Failed to open repository {}: {}", name, e))?;
130
131        let mut revwalk = repo.revwalk()?;
132        revwalk.push_head()?;
133
134        let mut commits = Vec::new();
135
136        for (i, oid) in revwalk.enumerate() {
137            if i >= limit {
138                break;
139            }
140
141            let oid = oid?;
142            let commit = repo.find_commit(oid)?;
143
144            let hash = commit.id().to_string();
145            let message = commit.message().unwrap_or("").to_string();
146            let author = commit.author().email().unwrap_or("unknown").to_string();
147            let timestamp = commit.time().seconds();
148
149            // Get diff stats
150            let (files_changed, lines_added, lines_removed) = if commit.parent_count() > 0 {
151                let parent = commit.parent(0)?;
152                let diff =
153                    repo.diff_tree_to_tree(Some(&parent.tree()?), Some(&commit.tree()?), None)?;
154                let stats = diff.stats()?;
155                (stats.files_changed(), stats.insertions(), stats.deletions())
156            } else {
157                // Initial commit - count all files as changed
158                let tree = commit.tree()?;
159                (tree.len(), 0, 0)
160            };
161
162            commits.push(CommitInfo {
163                hash,
164                message,
165                author,
166                timestamp,
167                files_changed,
168                lines_added,
169                lines_removed,
170            });
171        }
172
173        debug!("Analyzed {} commits from {}", commits.len(), name);
174        Ok(commits)
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181    use tempfile::TempDir;
182
183    #[test]
184    fn test_git_analyzer_can_be_created() {
185        let temp_dir = TempDir::new().unwrap();
186        let _analyzer = GitAnalyzer::new(temp_dir.path());
187    }
188
189    #[test]
190    fn test_commit_info_structure() {
191        let commit = CommitInfo {
192            hash: "abc123".to_string(),
193            message: "fix: null pointer dereference".to_string(),
194            author: "test@example.com".to_string(),
195            timestamp: 1234567890,
196            files_changed: 3,
197            lines_added: 15,
198            lines_removed: 8,
199        };
200
201        assert_eq!(commit.hash, "abc123");
202        assert_eq!(commit.message, "fix: null pointer dereference");
203        assert_eq!(commit.author, "test@example.com");
204        assert_eq!(commit.timestamp, 1234567890);
205        assert_eq!(commit.files_changed, 3);
206        assert_eq!(commit.lines_added, 15);
207        assert_eq!(commit.lines_removed, 8);
208    }
209
210    #[test]
211    fn test_analyze_nonexistent_repo() {
212        let temp_dir = TempDir::new().unwrap();
213        let analyzer = GitAnalyzer::new(temp_dir.path());
214
215        let result = analyzer.analyze_commits("nonexistent-repo", 10);
216
217        assert!(result.is_err());
218    }
219
220    // Integration tests that require network access are marked as ignored
221    // Run with: cargo test -- --ignored
222    #[test]
223    #[ignore]
224    fn test_clone_small_repository() {
225        let temp_dir = TempDir::new().unwrap();
226        let analyzer = GitAnalyzer::new(temp_dir.path());
227
228        // Use a very small test repository
229        let result =
230            analyzer.clone_repository("https://github.com/rust-lang/rustlings", "rustlings");
231
232        assert!(result.is_ok());
233    }
234
235    #[test]
236    #[ignore]
237    fn test_analyze_commits_basic() {
238        let temp_dir = TempDir::new().unwrap();
239        let analyzer = GitAnalyzer::new(temp_dir.path());
240
241        analyzer
242            .clone_repository("https://github.com/rust-lang/rustlings", "rustlings")
243            .unwrap();
244
245        let commits = analyzer.analyze_commits("rustlings", 10).unwrap();
246
247        assert!(!commits.is_empty());
248        assert!(commits.len() <= 10);
249
250        let first_commit = &commits[0];
251        assert!(!first_commit.hash.is_empty());
252        assert!(!first_commit.message.is_empty());
253    }
254
255    #[test]
256    #[ignore]
257    fn test_analyze_commits_respects_limit() {
258        let temp_dir = TempDir::new().unwrap();
259        let analyzer = GitAnalyzer::new(temp_dir.path());
260
261        analyzer
262            .clone_repository("https://github.com/rust-lang/rustlings", "rustlings")
263            .unwrap();
264
265        let commits_5 = analyzer.analyze_commits("rustlings", 5).unwrap();
266        assert!(commits_5.len() <= 5);
267
268        let commits_20 = analyzer.analyze_commits("rustlings", 20).unwrap();
269        assert!(commits_20.len() <= 20);
270    }
271
272    #[test]
273    #[ignore]
274    fn test_analyzer_caches_cloned_repos() {
275        let temp_dir = TempDir::new().unwrap();
276        let analyzer = GitAnalyzer::new(temp_dir.path());
277
278        // First clone
279        analyzer
280            .clone_repository("https://github.com/rust-lang/rustlings", "rustlings")
281            .unwrap();
282
283        // Second call should not re-clone
284        let result =
285            analyzer.clone_repository("https://github.com/rust-lang/rustlings", "rustlings");
286        assert!(result.is_ok());
287
288        // Verify we can still analyze
289        let commits = analyzer.analyze_commits("rustlings", 5).unwrap();
290        assert!(!commits.is_empty());
291    }
292}