repo_analyzer/
git.rs

1use anyhow::{Context, Result};
2use chrono::{DateTime, Local, TimeZone};
3use git2::{build::RepoBuilder, FetchOptions, RemoteCallbacks, Repository, Time};
4use std::collections::{HashMap, HashSet};
5use std::io::Write;
6use std::path::{Path, PathBuf};
7
8#[derive(Debug, Clone)]
9pub struct Contributor {
10    pub name: String,
11    pub email: String,
12    pub commit_count: usize,
13    pub first_commit: String,
14    pub last_commit: String,
15}
16
17#[derive(Debug, Clone)]
18pub struct FileStats {
19    pub commit_count: usize,
20    pub first_commit_date: String,
21    pub last_commit_date: String,
22    pub authors: Vec<String>,
23    pub lines_added: usize,
24    pub lines_removed: usize,
25    pub change_frequency: f64,                        // Changes per month
26    pub author_contributions: HashMap<String, usize>, // Author -> commit count
27    pub last_modified_by: String,
28    pub avg_changes_per_commit: f64,
29}
30
31pub fn clone_repository(url: &str, target_path: &Path) -> Result<Repository> {
32    let mut callbacks = RemoteCallbacks::new();
33    callbacks.transfer_progress(|stats| {
34        if stats.received_objects() == stats.total_objects() {
35            print!(
36                "\rReceiving objects: 100% ({}/{}), {:.2} KiB\r",
37                stats.received_objects(),
38                stats.total_objects(),
39                stats.received_bytes() as f64 / 1024.0
40            );
41        } else if stats.total_objects() > 0 {
42            print!(
43                "\rReceiving objects: {}% ({}/{}), {:.2} KiB\r",
44                (stats.received_objects() * 100) / stats.total_objects(),
45                stats.received_objects(),
46                stats.total_objects(),
47                stats.received_bytes() as f64 / 1024.0
48            );
49        }
50        std::io::stdout().flush().unwrap_or(());
51        true
52    });
53
54    let mut fetch_options = FetchOptions::new();
55    fetch_options.remote_callbacks(callbacks);
56
57    let repo = RepoBuilder::new()
58        .fetch_options(fetch_options)
59        .clone(url, target_path)
60        .context("Failed to clone repository")?;
61
62    println!("\nRepository cloned successfully");
63    Ok(repo)
64}
65
66pub fn analyze_git_repo(
67    repo_path: &Path,
68    depth: usize,
69) -> Result<(usize, Vec<Contributor>, String)> {
70    let (commit_count, contributors, last_activity, _) =
71        analyze_git_repo_extended(repo_path, depth)?;
72    Ok((commit_count, contributors, last_activity))
73}
74
75pub fn analyze_git_repo_extended(
76    repo_path: &Path,
77    depth: usize,
78) -> Result<(usize, Vec<Contributor>, String, HashMap<PathBuf, FileStats>)> {
79    let repo = Repository::open(repo_path).context("Failed to open git repository")?;
80
81    let mut commit_count = 0;
82    let mut contributors_map: HashMap<String, Contributor> = HashMap::new();
83    let mut last_commit_time = None;
84    let mut file_stats: HashMap<PathBuf, FileStats> = HashMap::new();
85
86    // Get the HEAD reference
87    let head = repo.head().context("Failed to get HEAD reference")?;
88
89    // Get the commit that HEAD points to
90    let obj = head
91        .peel(git2::ObjectType::Commit)
92        .context("Failed to peel to commit")?;
93    let commit = obj
94        .into_commit()
95        .map_err(|_| anyhow::anyhow!("Failed to convert to commit"))?;
96
97    // Create a revwalk to iterate through the commit history
98    let mut revwalk = repo.revwalk().context("Failed to create revwalk")?;
99    revwalk
100        .push(commit.id())
101        .context("Failed to push commit to revwalk")?;
102
103    for (i, oid_result) in revwalk.enumerate() {
104        // If depth is set and we've reached it, break
105        if depth > 0 && i >= depth {
106            break;
107        }
108
109        let oid = oid_result.context("Failed to get commit OID")?;
110        let commit = repo.find_commit(oid).context("Failed to find commit")?;
111
112        commit_count += 1;
113
114        // Get commit author
115        let author = commit.author();
116        let time = commit.time();
117        let datetime = format_git_time(&time);
118
119        // Update last commit time
120        if last_commit_time.is_none() || time.seconds() > last_commit_time.unwrap() {
121            last_commit_time = Some(time.seconds());
122        }
123
124        // Update contributor information
125        let key = format!(
126            "{} <{}>",
127            author.name().unwrap_or("Unknown"),
128            author.email().unwrap_or("unknown")
129        );
130
131        contributors_map
132            .entry(key.clone())
133            .and_modify(|contributor| {
134                contributor.commit_count += 1;
135                contributor.last_commit = datetime.clone();
136            })
137            .or_insert_with(|| Contributor {
138                name: author.name().unwrap_or("Unknown").to_string(),
139                email: author.email().unwrap_or("unknown").to_string(),
140                commit_count: 1,
141                first_commit: datetime.clone(),
142                last_commit: datetime.clone(),
143            });
144
145        // Get file changes in this commit
146        if let Ok(parent) = commit.parent(0) {
147            let diff = repo
148                .diff_tree_to_tree(
149                    Some(&parent.tree().unwrap()),
150                    Some(&commit.tree().unwrap()),
151                    None,
152                )
153                .unwrap();
154
155            let mut lines_added_map: HashMap<PathBuf, usize> = HashMap::new();
156            let mut lines_removed_map: HashMap<PathBuf, usize> = HashMap::new();
157            let mut files_changed: HashSet<PathBuf> = HashSet::new();
158
159            diff.foreach(
160                &mut |delta, _| {
161                    if let Some(path) = delta.new_file().path() {
162                        files_changed.insert(repo_path.join(path));
163                    }
164                    true
165                },
166                None,
167                Some(&mut |delta, hunk| {
168                    if let Some(path) = delta.new_file().path() {
169                        let path_buf = repo_path.join(path);
170                        *lines_added_map.entry(path_buf.clone()).or_insert(0) +=
171                            hunk.new_lines() as usize;
172                        *lines_removed_map.entry(path_buf).or_insert(0) +=
173                            hunk.old_lines() as usize;
174                    }
175                    true
176                }),
177                None,
178            )
179            .unwrap();
180
181            // Now update file_stats with the collected information
182            for path in files_changed {
183                let author_name = author.name().unwrap_or("Unknown").to_string();
184                let added = lines_added_map.get(&path).cloned().unwrap_or(0);
185                let removed = lines_removed_map.get(&path).cloned().unwrap_or(0);
186
187                // Check if we already have stats for this file
188                if let Some(stats) = file_stats.get_mut(&path) {
189                    // Update existing stats
190                    stats.commit_count += 1;
191                    stats.last_commit_date = datetime.clone();
192                    stats.last_modified_by = author_name.clone();
193                    stats.lines_added += added;
194                    stats.lines_removed += removed;
195
196                    // Update author contributions
197                    *stats
198                        .author_contributions
199                        .entry(author_name.clone())
200                        .or_insert(0) += 1;
201
202                    if !stats.authors.contains(&author_name) {
203                        stats.authors.push(author_name);
204                    }
205                } else {
206                    // Create new stats
207                    let mut authors = Vec::new();
208                    authors.push(author_name.clone());
209
210                    let mut author_contributions = HashMap::new();
211                    author_contributions.insert(author_name.clone(), 1);
212
213                    let new_stats = FileStats {
214                        commit_count: 1,
215                        first_commit_date: datetime.clone(),
216                        last_commit_date: datetime.clone(),
217                        authors,
218                        lines_added: added,
219                        lines_removed: removed,
220                        change_frequency: 0.0,
221                        author_contributions,
222                        last_modified_by: author_name,
223                        avg_changes_per_commit: 0.0,
224                    };
225
226                    file_stats.insert(path, new_stats);
227                }
228            }
229        }
230    }
231
232    // Calculate additional statistics for each file
233    for stats in file_stats.values_mut() {
234        // Calculate change frequency (changes per month)
235        if let (Ok(first_date), Ok(last_date)) = (
236            chrono::DateTime::parse_from_str(&stats.first_commit_date, "%Y-%m-%d %H:%M:%S %z"),
237            chrono::DateTime::parse_from_str(&stats.last_commit_date, "%Y-%m-%d %H:%M:%S %z"),
238        ) {
239            let duration = last_date.signed_duration_since(first_date);
240            let months = (duration.num_days() as f64) / 30.0;
241
242            if months > 0.0 {
243                stats.change_frequency = stats.commit_count as f64 / months;
244            } else {
245                stats.change_frequency = stats.commit_count as f64; // All changes in less than a month
246            }
247        }
248
249        // Calculate average changes per commit
250        let total_changes = stats.lines_added + stats.lines_removed;
251        if stats.commit_count > 0 {
252            stats.avg_changes_per_commit = total_changes as f64 / stats.commit_count as f64;
253        }
254    }
255
256    // Sort contributors by commit count
257    let mut contributors: Vec<Contributor> = contributors_map.values().cloned().collect();
258    contributors.sort_by(|a, b| b.commit_count.cmp(&a.commit_count));
259
260    // Format last activity time
261    let last_activity = if let Some(time) = last_commit_time {
262        let dt = Local.timestamp_opt(time, 0).unwrap();
263        dt.format("%Y-%m-%d %H:%M:%S").to_string()
264    } else {
265        "Unknown".to_string()
266    };
267
268    Ok((commit_count, contributors, last_activity, file_stats))
269}
270
271fn format_git_time(time: &Time) -> String {
272    let dt: DateTime<Local> = Local.timestamp_opt(time.seconds(), 0).unwrap();
273    dt.format("%Y-%m-%d %H:%M:%S").to_string()
274}