1use std::path::Path;
7
8use argus_core::ArgusError;
9use git2::{Delta, DiffOptions, Repository, Sort};
10
11#[derive(Debug, Clone)]
29pub struct CommitInfo {
30 pub hash: String,
32 pub author: String,
34 pub email: String,
36 pub timestamp: i64,
38 pub message: String,
40 pub files_changed: Vec<FileChange>,
42}
43
44#[derive(Debug, Clone)]
60pub struct FileChange {
61 pub path: String,
63 pub lines_added: u64,
65 pub lines_deleted: u64,
67 pub status: ChangeStatus,
69}
70
71#[derive(Debug, Clone, PartialEq)]
82pub enum ChangeStatus {
83 Added,
85 Modified,
87 Deleted,
89 Renamed {
91 from: String,
93 },
94}
95
96pub struct MiningOptions {
108 pub since_days: u64,
110 pub max_files_per_commit: usize,
112 pub branch: Option<String>,
114}
115
116impl Default for MiningOptions {
117 fn default() -> Self {
118 Self {
119 since_days: 180,
120 max_files_per_commit: 25,
121 branch: None,
122 }
123 }
124}
125
126pub fn mine_history(
147 repo_path: &Path,
148 options: &MiningOptions,
149) -> Result<Vec<CommitInfo>, ArgusError> {
150 let repo = Repository::open(repo_path)
151 .map_err(|e| ArgusError::Git(format!("failed to open repository: {e}")))?;
152
153 let mut revwalk = repo
154 .revwalk()
155 .map_err(|e| ArgusError::Git(format!("failed to create revwalk: {e}")))?;
156
157 revwalk.set_sorting(Sort::TIME).ok();
158
159 if let Some(ref branch) = options.branch {
161 let reference = repo
162 .resolve_reference_from_short_name(branch)
163 .map_err(|e| ArgusError::Git(format!("failed to resolve branch '{branch}': {e}")))?;
164 let oid = reference
165 .target()
166 .ok_or_else(|| ArgusError::Git("branch has no target".into()))?;
167 revwalk
168 .push(oid)
169 .map_err(|e| ArgusError::Git(format!("failed to push oid: {e}")))?;
170 } else {
171 revwalk
172 .push_head()
173 .map_err(|e| ArgusError::Git(format!("failed to push HEAD: {e}")))?;
174 }
175
176 let cutoff = compute_cutoff(options.since_days);
177 let mut commits = Vec::new();
178
179 for oid_result in revwalk {
180 let oid = oid_result.map_err(|e| ArgusError::Git(format!("revwalk error: {e}")))?;
181
182 let commit = repo
183 .find_commit(oid)
184 .map_err(|e| ArgusError::Git(format!("failed to find commit: {e}")))?;
185
186 let timestamp = commit.time().seconds();
187 if timestamp < cutoff {
188 break;
189 }
190
191 let parent_count = commit.parent_count();
193 if parent_count > 1 {
194 let file_count = count_diff_files(&repo, &commit)?;
196 if file_count > options.max_files_per_commit {
197 continue;
198 }
199 }
200
201 let files_changed = extract_file_changes(&repo, &commit)?;
202
203 if files_changed.len() > options.max_files_per_commit {
205 continue;
206 }
207
208 let author = commit.author();
209 let hash = oid.to_string();
210
211 commits.push(CommitInfo {
212 hash: hash[..hash.len().min(8)].to_string(),
213 author: author.name().unwrap_or("unknown").to_string(),
214 email: author.email().unwrap_or("unknown").to_string(),
215 timestamp,
216 message: commit
217 .message()
218 .unwrap_or("")
219 .lines()
220 .next()
221 .unwrap_or("")
222 .to_string(),
223 files_changed,
224 });
225 }
226
227 Ok(commits)
228}
229
230fn compute_cutoff(since_days: u64) -> i64 {
231 let now = std::time::SystemTime::now()
232 .duration_since(std::time::UNIX_EPOCH)
233 .unwrap_or_default()
234 .as_secs() as i64;
235 now - (since_days as i64 * 86400)
236}
237
238fn count_diff_files(repo: &Repository, commit: &git2::Commit) -> Result<usize, ArgusError> {
239 let commit_tree = commit
240 .tree()
241 .map_err(|e| ArgusError::Git(format!("failed to get commit tree: {e}")))?;
242
243 let parent_tree = if commit.parent_count() > 0 {
244 let parent = commit
245 .parent(0)
246 .map_err(|e| ArgusError::Git(format!("failed to get parent: {e}")))?;
247 Some(
248 parent
249 .tree()
250 .map_err(|e| ArgusError::Git(format!("failed to get parent tree: {e}")))?,
251 )
252 } else {
253 None
254 };
255
256 let mut diff_opts = DiffOptions::new();
257 let diff = repo
258 .diff_tree_to_tree(
259 parent_tree.as_ref(),
260 Some(&commit_tree),
261 Some(&mut diff_opts),
262 )
263 .map_err(|e| ArgusError::Git(format!("failed to compute diff: {e}")))?;
264
265 Ok(diff.deltas().len())
266}
267
268fn extract_file_changes(
269 repo: &Repository,
270 commit: &git2::Commit,
271) -> Result<Vec<FileChange>, ArgusError> {
272 let commit_tree = commit
273 .tree()
274 .map_err(|e| ArgusError::Git(format!("failed to get commit tree: {e}")))?;
275
276 let parent_tree = if commit.parent_count() > 0 {
277 let parent = commit
278 .parent(0)
279 .map_err(|e| ArgusError::Git(format!("failed to get parent: {e}")))?;
280 Some(
281 parent
282 .tree()
283 .map_err(|e| ArgusError::Git(format!("failed to get parent tree: {e}")))?,
284 )
285 } else {
286 None
287 };
288
289 let mut diff_opts = DiffOptions::new();
290 let diff = repo
291 .diff_tree_to_tree(
292 parent_tree.as_ref(),
293 Some(&commit_tree),
294 Some(&mut diff_opts),
295 )
296 .map_err(|e| ArgusError::Git(format!("failed to compute diff: {e}")))?;
297
298 let mut find_opts = git2::DiffFindOptions::new();
300 find_opts.renames(true);
301 let mut diff = diff;
302 diff.find_similar(Some(&mut find_opts))
303 .map_err(|e| ArgusError::Git(format!("failed to find renames: {e}")))?;
304
305 let mut changes = Vec::new();
306 let num_deltas = diff.deltas().len();
307
308 for delta_idx in 0..num_deltas {
309 let delta = diff.get_delta(delta_idx).unwrap();
310
311 let new_file = delta.new_file();
312 let path = new_file
313 .path()
314 .unwrap_or(Path::new(""))
315 .to_string_lossy()
316 .to_string();
317
318 if path.is_empty() {
319 continue;
320 }
321
322 let status = match delta.status() {
323 Delta::Added => ChangeStatus::Added,
324 Delta::Deleted => {
325 let old_path = delta
326 .old_file()
327 .path()
328 .unwrap_or(Path::new(""))
329 .to_string_lossy()
330 .to_string();
331 changes.push(FileChange {
333 path: old_path,
334 lines_added: 0,
335 lines_deleted: 0,
336 status: ChangeStatus::Deleted,
337 });
338 continue;
339 }
340 Delta::Modified => ChangeStatus::Modified,
341 Delta::Renamed => {
342 let old_path = delta
343 .old_file()
344 .path()
345 .unwrap_or(Path::new(""))
346 .to_string_lossy()
347 .to_string();
348 ChangeStatus::Renamed { from: old_path }
349 }
350 _ => ChangeStatus::Modified,
351 };
352
353 changes.push(FileChange {
354 path,
355 lines_added: 0,
356 lines_deleted: 0,
357 status,
358 });
359 }
360
361 let mut line_counts: std::collections::HashMap<String, (u64, u64)> =
363 std::collections::HashMap::new();
364
365 diff.foreach(
366 &mut |_delta, _progress| true,
367 None,
368 None,
369 Some(&mut |delta, _hunk, line| {
370 let path = delta
371 .new_file()
372 .path()
373 .or_else(|| delta.old_file().path())
374 .unwrap_or(Path::new(""))
375 .to_string_lossy()
376 .to_string();
377
378 let entry = line_counts.entry(path).or_insert((0, 0));
379 match line.origin() {
380 '+' => entry.0 += 1,
381 '-' => entry.1 += 1,
382 _ => {}
383 }
384 true
385 }),
386 )
387 .map_err(|e| ArgusError::Git(format!("failed to iterate diff lines: {e}")))?;
388
389 for change in &mut changes {
391 if let Some((added, deleted)) = line_counts.get(&change.path) {
392 change.lines_added = *added;
393 change.lines_deleted = *deleted;
394 }
395 }
396
397 Ok(changes)
398}
399
400#[cfg(test)]
401mod tests {
402 use super::*;
403
404 #[test]
405 fn mining_options_defaults_are_correct() {
406 let opts = MiningOptions::default();
407 assert_eq!(opts.since_days, 180);
408 assert_eq!(opts.max_files_per_commit, 25);
409 assert!(opts.branch.is_none());
410 }
411
412 #[test]
413 fn mine_argus_repo_returns_commits() {
414 let repo_path = find_repo_root().expect("should find repo root");
416 let opts = MiningOptions {
417 since_days: 365,
418 ..MiningOptions::default()
419 };
420 let commits = mine_history(&repo_path, &opts).unwrap();
421 assert!(!commits.is_empty(), "argus repo should have commits");
422 let first = &commits[0];
424 assert!(!first.hash.is_empty());
425 assert!(!first.author.is_empty());
426 assert!(first.timestamp > 0);
427 }
428
429 #[test]
430 fn large_commits_are_skipped() {
431 let repo_path = find_repo_root().expect("should find repo root");
432 let opts = MiningOptions {
433 since_days: 365,
434 max_files_per_commit: 2, ..MiningOptions::default()
436 };
437 let commits = mine_history(&repo_path, &opts).unwrap();
438 for commit in &commits {
440 assert!(
441 commit.files_changed.len() <= 2,
442 "commit {} has {} files, expected <= 2",
443 commit.hash,
444 commit.files_changed.len()
445 );
446 }
447 }
448
449 #[test]
450 fn change_status_identifies_correctly() {
451 let added = ChangeStatus::Added;
452 let modified = ChangeStatus::Modified;
453 let deleted = ChangeStatus::Deleted;
454 let renamed = ChangeStatus::Renamed {
455 from: "old.rs".into(),
456 };
457
458 assert_eq!(added, ChangeStatus::Added);
459 assert_eq!(modified, ChangeStatus::Modified);
460 assert_eq!(deleted, ChangeStatus::Deleted);
461 assert_ne!(renamed, ChangeStatus::Modified);
462 }
463
464 fn find_repo_root() -> Option<std::path::PathBuf> {
465 let mut path = std::env::current_dir().ok()?;
466 loop {
467 if path.join(".git").exists() {
468 return Some(path);
469 }
470 if !path.pop() {
471 return None;
472 }
473 }
474 }
475}