git_iris/changes/
change_analyzer.rs

1use super::models::{ChangeMetrics, ChangelogType};
2use crate::context::{ChangeType, RecentCommit};
3use crate::git::GitRepo;
4use anyhow::Result;
5use git2::{Diff, Oid};
6use once_cell::sync::Lazy;
7use regex::Regex;
8use std::sync::Arc;
9
10// Regex for extracting issue numbers (e.g., #123, GH-123)
11static ISSUE_RE: Lazy<Regex> = Lazy::new(|| {
12    Regex::new(r"(?:#|GH-)(\d+)")
13        .expect("Failed to compile issue number regex pattern - this is a bug")
14});
15
16// Regex for extracting pull request numbers (e.g., PR #123, pull request 123)
17static PR_RE: Lazy<Regex> = Lazy::new(|| {
18    Regex::new(r"(?i)(?:pull request|PR)\s*#?(\d+)")
19        .expect("Failed to compile pull request regex pattern - this is a bug")
20});
21
22/// Represents the analyzed changes for a single commit
23#[derive(Debug, Clone)]
24pub struct AnalyzedChange {
25    pub commit_hash: String,
26    pub commit_message: String,
27    pub author: String,
28    pub file_changes: Vec<FileChange>,
29    pub metrics: ChangeMetrics,
30    pub impact_score: f32,
31    pub change_type: ChangelogType,
32    pub is_breaking_change: bool,
33    pub associated_issues: Vec<String>,
34    pub pull_request: Option<String>,
35}
36
37/// Represents changes to a single file
38#[derive(Debug, Clone)]
39pub struct FileChange {
40    pub old_path: String,
41    pub new_path: String,
42    pub change_type: ChangeType,
43    pub analysis: Vec<String>,
44}
45
46/// Analyzer for processing Git commits and generating detailed change information
47pub struct ChangeAnalyzer {
48    git_repo: Arc<GitRepo>,
49}
50
51impl ChangeAnalyzer {
52    /// Create a new `ChangeAnalyzer` instance
53    pub fn new(git_repo: Arc<GitRepo>) -> Result<Self> {
54        Ok(Self { git_repo })
55    }
56
57    /// Analyze commits between two Git references
58    pub fn analyze_commits(&self, from: &str, to: &str) -> Result<Vec<AnalyzedChange>> {
59        self.git_repo
60            .get_commits_between_with_callback(from, to, |commit| self.analyze_commit(commit))
61    }
62
63    /// Analyze changes between two Git references and return the analyzed changes along with total metrics
64    pub fn analyze_changes(
65        &self,
66        from: &str,
67        to: &str,
68    ) -> Result<(Vec<AnalyzedChange>, ChangeMetrics)> {
69        let analyzed_changes = self.analyze_commits(from, to)?;
70        let total_metrics = self.calculate_total_metrics(&analyzed_changes);
71        Ok((analyzed_changes, total_metrics))
72    }
73
74    /// Analyze a single commit
75    fn analyze_commit(&self, commit: &RecentCommit) -> Result<AnalyzedChange> {
76        let repo = self.git_repo.open_repo()?;
77        let commit_obj = repo.find_commit(Oid::from_str(&commit.hash)?)?;
78
79        let parent_tree = if commit_obj.parent_count() > 0 {
80            Some(commit_obj.parent(0)?.tree()?)
81        } else {
82            None
83        };
84
85        let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_obj.tree()?), None)?;
86
87        let file_changes = Self::analyze_file_changes(&diff)?;
88        let metrics = Self::calculate_metrics(&diff)?;
89        let change_type = Self::classify_change(&commit.message, &file_changes);
90        let is_breaking_change = Self::detect_breaking_change(&commit.message, &file_changes);
91        let associated_issues = Self::extract_associated_issues(&commit.message);
92        let pull_request = Self::extract_pull_request(&commit.message);
93        let impact_score =
94            Self::calculate_impact_score(&metrics, &file_changes, is_breaking_change);
95
96        Ok(AnalyzedChange {
97            commit_hash: commit.hash.clone(),
98            commit_message: commit.message.clone(),
99            author: commit.author.clone(),
100            file_changes,
101            metrics,
102            impact_score,
103            change_type,
104            is_breaking_change,
105            associated_issues,
106            pull_request,
107        })
108    }
109
110    /// Analyze changes for each file in the commit
111    fn analyze_file_changes(diff: &Diff) -> Result<Vec<FileChange>> {
112        let mut file_changes = Vec::new();
113
114        diff.foreach(
115            &mut |delta, _| {
116                let old_file = delta.old_file();
117                let new_file = delta.new_file();
118                let change_type = match delta.status() {
119                    git2::Delta::Added => ChangeType::Added,
120                    git2::Delta::Deleted => ChangeType::Deleted,
121                    _ => ChangeType::Modified,
122                };
123
124                let file_path = new_file.path().map_or_else(
125                    || {
126                        old_file
127                            .path()
128                            .map(|p| p.to_string_lossy().into_owned())
129                            .unwrap_or_default()
130                    },
131                    |p| p.to_string_lossy().into_owned(),
132                );
133
134                // Perform file-specific analysis based on file type
135                let mut analysis = Vec::new();
136
137                // Determine file type and add relevant analysis
138                if let Some(extension) = std::path::Path::new(&file_path).extension() {
139                    if let Some(ext_str) = extension.to_str() {
140                        match ext_str.to_lowercase().as_str() {
141                            "rs" => analysis.push("Rust source code changes".to_string()),
142                            "js" | "ts" => {
143                                analysis.push("JavaScript/TypeScript changes".to_string());
144                            }
145                            "py" => analysis.push("Python code changes".to_string()),
146                            "java" => analysis.push("Java code changes".to_string()),
147                            "c" | "cpp" | "h" => analysis.push("C/C++ code changes".to_string()),
148                            "md" => analysis.push("Documentation changes".to_string()),
149                            "json" | "yml" | "yaml" | "toml" => {
150                                analysis.push("Configuration changes".to_string());
151                            }
152                            _ => {}
153                        }
154                    }
155                }
156
157                // Add analysis based on change type
158                match change_type {
159                    ChangeType::Added => analysis.push("New file added".to_string()),
160                    ChangeType::Deleted => analysis.push("File removed".to_string()),
161                    ChangeType::Modified => {
162                        if file_path.contains("test") || file_path.contains("spec") {
163                            analysis.push("Test modifications".to_string());
164                        } else if file_path.contains("README") || file_path.contains("docs/") {
165                            analysis.push("Documentation updates".to_string());
166                        }
167                    }
168                }
169
170                let file_change = FileChange {
171                    old_path: old_file
172                        .path()
173                        .map(|p| p.to_string_lossy().into_owned())
174                        .unwrap_or_default(),
175                    new_path: new_file
176                        .path()
177                        .map(|p| p.to_string_lossy().into_owned())
178                        .unwrap_or_default(),
179                    change_type,
180                    analysis,
181                };
182
183                file_changes.push(file_change);
184                true
185            },
186            None,
187            None,
188            None,
189        )?;
190
191        Ok(file_changes)
192    }
193
194    /// Calculate metrics for the commit
195    fn calculate_metrics(diff: &Diff) -> Result<ChangeMetrics> {
196        let stats = diff.stats()?;
197        Ok(ChangeMetrics {
198            total_commits: 1,
199            files_changed: stats.files_changed(),
200            insertions: stats.insertions(),
201            deletions: stats.deletions(),
202            total_lines_changed: stats.insertions() + stats.deletions(),
203        })
204    }
205
206    /// Classify the type of change based on commit message and file changes
207    fn classify_change(commit_message: &str, file_changes: &[FileChange]) -> ChangelogType {
208        let message_lower = commit_message.to_lowercase();
209
210        // First, check the commit message
211        if message_lower.contains("add") || message_lower.contains("new") {
212            return ChangelogType::Added;
213        } else if message_lower.contains("deprecat") {
214            return ChangelogType::Deprecated;
215        } else if message_lower.contains("remov") || message_lower.contains("delet") {
216            return ChangelogType::Removed;
217        } else if message_lower.contains("fix") || message_lower.contains("bug") {
218            return ChangelogType::Fixed;
219        } else if message_lower.contains("secur") || message_lower.contains("vulnerab") {
220            return ChangelogType::Security;
221        }
222
223        // If the commit message doesn't give us a clear indication, check the file changes
224        let has_additions = file_changes
225            .iter()
226            .any(|fc| fc.change_type == ChangeType::Added);
227        let has_deletions = file_changes
228            .iter()
229            .any(|fc| fc.change_type == ChangeType::Deleted);
230
231        if has_additions && !has_deletions {
232            ChangelogType::Added
233        } else if has_deletions && !has_additions {
234            ChangelogType::Removed
235        } else {
236            ChangelogType::Changed
237        }
238    }
239
240    /// Detect if the change is a breaking change
241    fn detect_breaking_change(commit_message: &str, file_changes: &[FileChange]) -> bool {
242        let message_lower = commit_message.to_lowercase();
243        if message_lower.contains("breaking change")
244            || message_lower.contains("breaking-change")
245            || message_lower.contains("major version")
246        {
247            return true;
248        }
249
250        // Check file changes for potential breaking changes
251        file_changes.iter().any(|fc| {
252            fc.analysis.iter().any(|analysis| {
253                analysis.to_lowercase().contains("breaking change")
254                    || analysis.to_lowercase().contains("api change")
255                    || analysis.to_lowercase().contains("incompatible")
256            })
257        })
258    }
259
260    /// Extract associated issue numbers from the commit message
261    fn extract_associated_issues(commit_message: &str) -> Vec<String> {
262        // Use the lazily initialized static regex
263        ISSUE_RE
264            .captures_iter(commit_message)
265            .map(|cap| format!("#{}", &cap[1]))
266            .collect()
267    }
268
269    /// Extract pull request number from the commit message
270    fn extract_pull_request(commit_message: &str) -> Option<String> {
271        // Use the lazily initialized static regex
272        PR_RE
273            .captures(commit_message)
274            .map(|cap| format!("PR #{}", &cap[1]))
275    }
276
277    /// Calculate the impact score of the change
278    #[allow(clippy::cast_precision_loss)]
279    #[allow(clippy::as_conversions)]
280    fn calculate_impact_score(
281        metrics: &ChangeMetrics,
282        file_changes: &[FileChange],
283        is_breaking_change: bool,
284    ) -> f32 {
285        let base_score = (metrics.total_lines_changed as f32) / 100.0;
286        let file_score = file_changes.len() as f32 / 10.0;
287        let breaking_change_score = if is_breaking_change { 5.0 } else { 0.0 };
288
289        base_score + file_score + breaking_change_score
290    }
291
292    /// Calculate total metrics for a set of analyzed changes
293    pub fn calculate_total_metrics(&self, changes: &[AnalyzedChange]) -> ChangeMetrics {
294        changes.iter().fold(
295            ChangeMetrics {
296                total_commits: changes.len(),
297                files_changed: 0,
298                insertions: 0,
299                deletions: 0,
300                total_lines_changed: 0,
301            },
302            |mut acc, change| {
303                acc.files_changed += change.metrics.files_changed;
304                acc.insertions += change.metrics.insertions;
305                acc.deletions += change.metrics.deletions;
306                acc.total_lines_changed += change.metrics.total_lines_changed;
307                acc
308            },
309        )
310    }
311}