1use dashmap::DashMap;
10use scribe_core::{GitFileStatus, GitStatus, Result, ScribeError};
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet};
13use std::path::{Path, PathBuf};
14use std::process::Command;
15use std::time::{SystemTime, UNIX_EPOCH};
16use tokio::process::Command as AsyncCommand;
17
18#[derive(Debug)]
20pub struct GitIntegrator {
21 repo_path: PathBuf,
22 git_available: bool,
23 cache: GitCache,
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct GitFileInfo {
29 pub path: PathBuf,
30 pub status: GitFileStatus,
31 pub last_commit: Option<GitCommitInfo>,
32 pub blame_info: Option<GitBlameInfo>,
33 pub changes_count: usize,
34 pub additions: usize,
35 pub deletions: usize,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct GitCommitInfo {
41 pub hash: String,
42 pub author: String,
43 pub email: String,
44 pub timestamp: u64,
45 pub message: String,
46 pub files_changed: usize,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct GitBlameInfo {
52 pub lines: Vec<GitBlameLine>,
53 pub contributors: HashMap<String, usize>, pub last_modified: u64,
55 pub age_distribution: AgeDistribution,
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct GitBlameLine {
61 pub line_number: usize,
62 pub commit_hash: String,
63 pub author: String,
64 pub timestamp: u64,
65 pub content: String,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct AgeDistribution {
71 pub recent: usize, pub moderate: usize, pub old: usize, pub ancient: usize, }
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct GitRepositoryStats {
80 pub total_commits: usize,
81 pub contributors: Vec<ContributorStats>,
82 pub branches: Vec<String>,
83 pub tags: Vec<String>,
84 pub file_types: HashMap<String, usize>,
85 pub activity_timeline: Vec<ActivityPeriod>,
86 pub repository_health: RepositoryHealth,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ContributorStats {
92 pub name: String,
93 pub email: String,
94 pub commits: usize,
95 pub lines_added: usize,
96 pub lines_deleted: usize,
97 pub files_modified: usize,
98 pub first_commit: u64,
99 pub last_commit: u64,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct ActivityPeriod {
105 pub period: String, pub commits: usize,
107 pub lines_changed: usize,
108 pub files_touched: usize,
109 pub contributors: HashSet<String>,
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct RepositoryHealth {
115 pub commit_frequency: f64, pub contributor_diversity: f64, pub code_churn: f64, pub documentation_ratio: f64, pub test_coverage_estimate: f64, pub branch_health: BranchHealth,
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct BranchHealth {
126 pub main_branch: String,
127 pub active_branches: usize,
128 pub stale_branches: usize,
129 pub merge_conflicts_risk: f64,
130}
131
132#[derive(Debug)]
134struct GitCache {
135 file_statuses: DashMap<PathBuf, GitFileStatus>,
136 commit_cache: DashMap<String, GitCommitInfo>,
137 blame_cache: DashMap<PathBuf, GitBlameInfo>,
138 files_discovered: parking_lot::RwLock<usize>,
139 cache_timestamp: parking_lot::RwLock<Option<SystemTime>>,
140 cache_ttl: std::time::Duration,
141 batch_status_cache: DashMap<PathBuf, GitFileStatus>,
142}
143
144impl Default for GitCache {
145 fn default() -> Self {
146 Self {
147 file_statuses: DashMap::new(),
148 commit_cache: DashMap::new(),
149 blame_cache: DashMap::new(),
150 files_discovered: parking_lot::RwLock::new(0),
151 cache_timestamp: parking_lot::RwLock::new(None),
152 cache_ttl: std::time::Duration::from_secs(300),
153 batch_status_cache: DashMap::new(),
154 }
155 }
156}
157
158impl GitIntegrator {
159 pub fn new<P: AsRef<Path>>(repo_path: P) -> Result<Self> {
161 let repo_path = repo_path.as_ref().to_path_buf();
162
163 let git_dir = repo_path.join(".git");
165 if !git_dir.exists() {
166 return Err(ScribeError::git("Not a git repository".to_string()));
167 }
168
169 let git_available = Command::new("git")
171 .arg("--version")
172 .output()
173 .map(|output| output.status.success())
174 .unwrap_or(false);
175
176 if !git_available {
177 log::warn!("Git command not available, falling back to filesystem scanning");
178 }
179
180 Ok(Self {
181 repo_path,
182 git_available,
183 cache: GitCache {
184 cache_ttl: std::time::Duration::from_secs(300), ..Default::default()
186 },
187 })
188 }
189
190 pub async fn list_tracked_files(&self) -> Result<Vec<PathBuf>> {
192 if !self.git_available {
193 return Err(ScribeError::git("Git not available".to_string()));
194 }
195
196 let output = AsyncCommand::new("git")
197 .arg("ls-files")
198 .arg("-z") .current_dir(&self.repo_path)
200 .output()
201 .await
202 .map_err(|e| ScribeError::git(format!("Failed to run git ls-files: {}", e)))?;
203
204 if !output.status.success() {
205 let stderr = String::from_utf8_lossy(&output.stderr);
206 return Err(ScribeError::git(format!("git ls-files failed: {}", stderr)));
207 }
208
209 let stdout = String::from_utf8_lossy(&output.stdout);
210 let files: Vec<PathBuf> = stdout
211 .split('\0')
212 .filter(|s| !s.is_empty())
213 .map(|s| self.repo_path.join(s))
214 .collect();
215
216 *self.cache.files_discovered.write() = files.len();
218 *self.cache.cache_timestamp.write() = Some(SystemTime::now());
219
220 log::debug!("Git discovered {} tracked files", files.len());
221 Ok(files)
222 }
223
224 pub async fn load_batch_file_statuses(&self) -> Result<()> {
226 if !self.git_available {
227 return Ok(());
228 }
229
230 let output = AsyncCommand::new("git")
231 .arg("status")
232 .arg("--porcelain")
233 .arg("-z") .current_dir(&self.repo_path)
235 .output()
236 .await
237 .map_err(|e| ScribeError::git(format!("Failed to get batch file status: {}", e)))?;
238
239 if !output.status.success() {
240 log::warn!("Git status failed, batch status unavailable");
241 return Ok(());
242 }
243
244 let stdout = String::from_utf8_lossy(&output.stdout);
245 let mut status_map: HashMap<String, Vec<char>> = HashMap::new();
246
247 for line in stdout.split('\0') {
248 if line.len() < 3 {
249 continue;
250 }
251
252 let status_code = &line[..2];
253 let file_path = &line[3..];
254
255 if file_path.is_empty() {
256 continue;
257 }
258
259 let status = match status_code {
260 " M" | "M " | "MM" => GitFileStatus::Modified,
261 "A " | " A" => GitFileStatus::Added,
262 "D " | " D" => GitFileStatus::Deleted,
263 "R " | " R" => GitFileStatus::Renamed,
264 "C " | " C" => GitFileStatus::Copied,
265 "??" => GitFileStatus::Untracked,
266 "!!" => GitFileStatus::Ignored,
267 _ => GitFileStatus::Unmodified,
268 };
269
270 let full_path = self.repo_path.join(file_path);
271 self.cache.batch_status_cache.insert(full_path, status);
272 }
273
274 *self.cache.cache_timestamp.write() = Some(SystemTime::now());
275
276 log::debug!(
277 "Loaded batch file statuses for {} files",
278 self.cache.batch_status_cache.len()
279 );
280
281 Ok(())
282 }
283
284 pub async fn get_file_info(&self, file_path: &Path) -> Result<GitFileInfo> {
286 if let Some(cached_status) = self.cache.file_statuses.get(file_path) {
288 if self.is_cache_valid() {
289 return Ok(GitFileInfo {
290 path: file_path.to_path_buf(),
291 status: cached_status.clone(),
292 last_commit: None, blame_info: self
294 .cache
295 .blame_cache
296 .get(file_path)
297 .map(|entry| entry.clone()),
298 changes_count: 0,
299 additions: 0,
300 deletions: 0,
301 });
302 }
303 }
304
305 let status = self.get_file_status(file_path).await?;
306 let last_commit = self.get_last_commit_for_file(file_path).await.ok();
307 let blame_info = self.get_blame_info(file_path).await.ok();
308
309 let (changes_count, additions, deletions) = self
311 .get_file_change_stats(file_path)
312 .await
313 .unwrap_or((0, 0, 0));
314
315 self.cache
317 .file_statuses
318 .insert(file_path.to_path_buf(), status.clone());
319 *self.cache.cache_timestamp.write() = Some(SystemTime::now());
320
321 Ok(GitFileInfo {
322 path: file_path.to_path_buf(),
323 status,
324 last_commit,
325 blame_info,
326 changes_count,
327 additions,
328 deletions,
329 })
330 }
331
332 async fn get_file_status(&self, file_path: &Path) -> Result<GitFileStatus> {
334 if !self.git_available {
335 return Ok(GitFileStatus::Untracked);
336 }
337
338 if !self.cache.batch_status_cache.is_empty() {
340 if let Some(status) = self.cache.batch_status_cache.get(file_path) {
341 return Ok(status.clone());
342 }
343 return Ok(GitFileStatus::Unmodified);
345 }
346
347 let relative_path = file_path
349 .strip_prefix(&self.repo_path)
350 .map_err(|_| ScribeError::git("File not in repository".to_string()))?;
351
352 let output = AsyncCommand::new("git")
353 .arg("status")
354 .arg("--porcelain")
355 .arg(relative_path)
356 .current_dir(&self.repo_path)
357 .output()
358 .await
359 .map_err(|e| ScribeError::git(format!("Failed to get file status: {}", e)))?;
360
361 if !output.status.success() {
362 return Ok(GitFileStatus::Unmodified);
363 }
364
365 let stdout = String::from_utf8_lossy(&output.stdout);
366 let status = if stdout.is_empty() {
367 GitFileStatus::Unmodified
368 } else {
369 let status_code = stdout.chars().take(2).collect::<String>();
370 match status_code.as_str() {
371 " M" => GitFileStatus::Modified,
372 "M " => GitFileStatus::Modified,
373 "MM" => GitFileStatus::Modified, "A " => GitFileStatus::Added,
375 "D " => GitFileStatus::Deleted,
376 "R " => GitFileStatus::Renamed,
377 "C " => GitFileStatus::Copied,
378 "??" => GitFileStatus::Untracked,
379 "!!" => GitFileStatus::Ignored,
380 _ => GitFileStatus::Unmodified,
381 }
382 };
383
384 Ok(status)
385 }
386
387 async fn get_last_commit_for_file(&self, file_path: &Path) -> Result<GitCommitInfo> {
389 if !self.git_available {
390 return Err(ScribeError::git("Git not available".to_string()));
391 }
392
393 let relative_path = file_path
394 .strip_prefix(&self.repo_path)
395 .map_err(|_| ScribeError::git("File not in repository".to_string()))?;
396
397 let output = AsyncCommand::new("git")
398 .arg("log")
399 .arg("-1")
400 .arg("--pretty=format:%H|%an|%ae|%at|%s|%H") .arg("--")
402 .arg(relative_path)
403 .current_dir(&self.repo_path)
404 .output()
405 .await
406 .map_err(|e| ScribeError::git(format!("Failed to get commit info: {}", e)))?;
407
408 if !output.status.success() {
409 let stderr = String::from_utf8_lossy(&output.stderr);
410 return Err(ScribeError::git(format!("git log failed: {}", stderr)));
411 }
412
413 let stdout = String::from_utf8_lossy(&output.stdout);
414 let parts: Vec<&str> = stdout.trim().splitn(6, '|').collect();
415
416 if parts.len() < 5 {
417 return Err(ScribeError::git("Invalid git log output".to_string()));
418 }
419
420 let timestamp = parts[3]
421 .parse::<u64>()
422 .map_err(|_| ScribeError::git("Invalid timestamp".to_string()))?;
423
424 Ok(GitCommitInfo {
425 hash: parts[0].to_string(),
426 author: parts[1].to_string(),
427 email: parts[2].to_string(),
428 timestamp,
429 message: parts[4].to_string(),
430 files_changed: 1, })
432 }
433
434 async fn get_blame_info(&self, file_path: &Path) -> Result<GitBlameInfo> {
436 if !self.git_available {
437 return Err(ScribeError::git("Git not available".to_string()));
438 }
439
440 if let Some(cached_blame) = self.cache.blame_cache.get(file_path) {
442 if self.is_cache_valid() {
443 return Ok(cached_blame.clone());
444 }
445 }
446
447 let relative_path = file_path
448 .strip_prefix(&self.repo_path)
449 .map_err(|_| ScribeError::git("File not in repository".to_string()))?;
450
451 let output = AsyncCommand::new("git")
452 .arg("blame")
453 .arg("--porcelain")
454 .arg(relative_path)
455 .current_dir(&self.repo_path)
456 .output()
457 .await
458 .map_err(|e| ScribeError::git(format!("Failed to get blame info: {}", e)))?;
459
460 if !output.status.success() {
461 let stderr = String::from_utf8_lossy(&output.stderr);
462 return Err(ScribeError::git(format!("git blame failed: {}", stderr)));
463 }
464
465 let stdout = String::from_utf8_lossy(&output.stdout);
466 let blame_info = self.parse_blame_output(&stdout)?;
467
468 Ok(blame_info)
469 }
470
471 fn parse_blame_output(&self, blame_output: &str) -> Result<GitBlameInfo> {
473 let mut lines = Vec::new();
474 let mut contributors = HashMap::new();
475 let mut last_modified = 0u64;
476
477 let blame_lines: Vec<&str> = blame_output.lines().collect();
478 let mut i = 0;
479
480 while i < blame_lines.len() {
481 let line = blame_lines[i];
482 if line.is_empty() {
483 i += 1;
484 continue;
485 }
486
487 let parts: Vec<&str> = line.split_whitespace().collect();
489 if parts.len() < 3 {
490 i += 1;
491 continue;
492 }
493
494 let commit_hash = parts[0].to_string();
495 let line_number = parts[2].parse::<usize>().unwrap_or(0);
496
497 let mut author = String::new();
499 let mut timestamp = 0u64;
500 let mut content = String::new();
501
502 i += 1;
503 while i < blame_lines.len() {
504 let info_line = blame_lines[i];
505 if info_line.starts_with("author ") {
506 author = info_line[7..].to_string();
507 } else if info_line.starts_with("author-time ") {
508 timestamp = info_line[12..].parse().unwrap_or(0);
509 last_modified = last_modified.max(timestamp);
510 } else if info_line.starts_with('\t') {
511 content = info_line[1..].to_string();
512 break;
513 }
514 i += 1;
515 }
516
517 *contributors.entry(author.clone()).or_insert(0) += 1;
519
520 lines.push(GitBlameLine {
521 line_number,
522 commit_hash,
523 author,
524 timestamp,
525 content,
526 });
527
528 i += 1;
529 }
530
531 let now = SystemTime::now()
533 .duration_since(UNIX_EPOCH)
534 .unwrap()
535 .as_secs();
536
537 let mut age_distribution = AgeDistribution {
538 recent: 0,
539 moderate: 0,
540 old: 0,
541 ancient: 0,
542 };
543
544 for line in &lines {
545 let age_seconds = now.saturating_sub(line.timestamp);
546 let age_days = age_seconds / 86400; match age_days {
549 0..=30 => age_distribution.recent += 1,
550 31..=180 => age_distribution.moderate += 1,
551 181..=365 => age_distribution.old += 1,
552 _ => age_distribution.ancient += 1,
553 }
554 }
555
556 Ok(GitBlameInfo {
557 lines,
558 contributors,
559 last_modified,
560 age_distribution,
561 })
562 }
563
564 async fn get_file_change_stats(&self, file_path: &Path) -> Result<(usize, usize, usize)> {
566 if !self.git_available {
567 return Err(ScribeError::git("Git not available".to_string()));
568 }
569
570 let relative_path = file_path
571 .strip_prefix(&self.repo_path)
572 .map_err(|_| ScribeError::git("File not in repository".to_string()))?;
573
574 let output = AsyncCommand::new("git")
575 .arg("log")
576 .arg("--numstat")
577 .arg("--pretty=format:")
578 .arg("--")
579 .arg(relative_path)
580 .current_dir(&self.repo_path)
581 .output()
582 .await
583 .map_err(|e| ScribeError::git(format!("Failed to get change stats: {}", e)))?;
584
585 if !output.status.success() {
586 return Ok((0, 0, 0));
587 }
588
589 let stdout = String::from_utf8_lossy(&output.stdout);
590 let mut total_changes = 0;
591 let mut total_additions = 0;
592 let mut total_deletions = 0;
593
594 for line in stdout.lines() {
595 if line.trim().is_empty() {
596 continue;
597 }
598
599 let parts: Vec<&str> = line.split_whitespace().collect();
600 if parts.len() >= 2 {
601 if let (Ok(additions), Ok(deletions)) =
602 (parts[0].parse::<usize>(), parts[1].parse::<usize>())
603 {
604 total_additions += additions;
605 total_deletions += deletions;
606 total_changes += 1;
607 }
608 }
609 }
610
611 Ok((total_changes, total_additions, total_deletions))
612 }
613
614 pub async fn get_repository_stats(&self) -> Result<GitRepositoryStats> {
616 if !self.git_available {
617 return Err(ScribeError::git("Git not available".to_string()));
618 }
619
620 let (total_commits, contributors) = self.get_contributor_stats().await?;
621 let branches = self.get_branches().await?;
622 let tags = self.get_tags().await?;
623 let file_types = self.analyze_file_types().await?;
624 let activity_timeline = self.get_activity_timeline().await?;
625 let repository_health = self
626 .calculate_repository_health(&contributors, &activity_timeline)
627 .await?;
628
629 Ok(GitRepositoryStats {
630 total_commits,
631 contributors,
632 branches,
633 tags,
634 file_types,
635 activity_timeline,
636 repository_health,
637 })
638 }
639
640 async fn get_contributor_stats(&self) -> Result<(usize, Vec<ContributorStats>)> {
642 let output = AsyncCommand::new("git")
643 .arg("shortlog")
644 .arg("-sne")
645 .arg("--all")
646 .current_dir(&self.repo_path)
647 .output()
648 .await
649 .map_err(|e| ScribeError::git(format!("Failed to get contributors: {}", e)))?;
650
651 if !output.status.success() {
652 return Ok((0, vec![]));
653 }
654
655 let stdout = String::from_utf8_lossy(&output.stdout);
656 let mut contributors = Vec::new();
657 let mut total_commits = 0;
658
659 for line in stdout.lines() {
660 if let Some((count_str, name_email)) = line.trim().split_once('\t') {
661 if let Ok(commits) = count_str.trim().parse::<usize>() {
662 total_commits += commits;
663
664 let (name, email) = if let Some((n, e)) = name_email.rsplit_once('<') {
666 let email = e.trim_end_matches('>');
667 (n.trim().to_string(), email.to_string())
668 } else {
669 (name_email.to_string(), String::new())
670 };
671
672 let (lines_added, lines_deleted, files_modified, first_commit, last_commit) =
674 self.get_detailed_contributor_stats(&email)
675 .await
676 .unwrap_or((0, 0, 0, 0, 0));
677
678 contributors.push(ContributorStats {
679 name,
680 email,
681 commits,
682 lines_added,
683 lines_deleted,
684 files_modified,
685 first_commit,
686 last_commit,
687 });
688 }
689 }
690 }
691
692 contributors.sort_by(|a, b| b.commits.cmp(&a.commits));
694
695 Ok((total_commits, contributors))
696 }
697
698 async fn get_detailed_contributor_stats(
700 &self,
701 email: &str,
702 ) -> Result<(usize, usize, usize, u64, u64)> {
703 let output = AsyncCommand::new("git")
704 .arg("log")
705 .arg("--author")
706 .arg(email)
707 .arg("--numstat")
708 .arg("--pretty=format:%at")
709 .current_dir(&self.repo_path)
710 .output()
711 .await
712 .map_err(|e| ScribeError::git(format!("Failed to get detailed stats: {}", e)))?;
713
714 if !output.status.success() {
715 return Ok((0, 0, 0, 0, 0));
716 }
717
718 let stdout = String::from_utf8_lossy(&output.stdout);
719 let mut lines_added = 0;
720 let mut lines_deleted = 0;
721 let mut files_modified = 0;
722 let mut timestamps = Vec::new();
723
724 for line in stdout.lines() {
725 if line.trim().is_empty() {
726 continue;
727 }
728
729 if let Ok(timestamp) = line.parse::<u64>() {
731 timestamps.push(timestamp);
732 continue;
733 }
734
735 let parts: Vec<&str> = line.split_whitespace().collect();
737 if parts.len() >= 3 {
738 if let (Ok(added), Ok(deleted)) =
739 (parts[0].parse::<usize>(), parts[1].parse::<usize>())
740 {
741 lines_added += added;
742 lines_deleted += deleted;
743 files_modified += 1;
744 }
745 }
746 }
747
748 let first_commit = timestamps.iter().min().copied().unwrap_or(0);
749 let last_commit = timestamps.iter().max().copied().unwrap_or(0);
750
751 Ok((
752 lines_added,
753 lines_deleted,
754 files_modified,
755 first_commit,
756 last_commit,
757 ))
758 }
759
760 async fn get_branches(&self) -> Result<Vec<String>> {
762 let output = AsyncCommand::new("git")
763 .arg("branch")
764 .arg("-a")
765 .current_dir(&self.repo_path)
766 .output()
767 .await
768 .map_err(|e| ScribeError::git(format!("Failed to get branches: {}", e)))?;
769
770 if !output.status.success() {
771 return Ok(vec![]);
772 }
773
774 let stdout = String::from_utf8_lossy(&output.stdout);
775 let branches = stdout
776 .lines()
777 .map(|line| line.trim_start_matches("* ").trim())
778 .filter(|line| !line.is_empty())
779 .map(|line| line.to_string())
780 .collect();
781
782 Ok(branches)
783 }
784
785 async fn get_tags(&self) -> Result<Vec<String>> {
787 let output = AsyncCommand::new("git")
788 .arg("tag")
789 .current_dir(&self.repo_path)
790 .output()
791 .await
792 .map_err(|e| ScribeError::git(format!("Failed to get tags: {}", e)))?;
793
794 if !output.status.success() {
795 return Ok(vec![]);
796 }
797
798 let stdout = String::from_utf8_lossy(&output.stdout);
799 let tags = stdout
800 .lines()
801 .filter(|line| !line.trim().is_empty())
802 .map(|line| line.trim().to_string())
803 .collect();
804
805 Ok(tags)
806 }
807
808 async fn analyze_file_types(&self) -> Result<HashMap<String, usize>> {
810 let files = self.list_tracked_files().await?;
811 let mut file_types = HashMap::new();
812
813 for file in files {
814 if let Some(extension) = file.extension().and_then(|ext| ext.to_str()) {
815 *file_types.entry(extension.to_string()).or_insert(0) += 1;
816 } else {
817 *file_types.entry("no_extension".to_string()).or_insert(0) += 1;
818 }
819 }
820
821 Ok(file_types)
822 }
823
824 async fn get_activity_timeline(&self) -> Result<Vec<ActivityPeriod>> {
826 Ok(vec![])
829 }
830
831 async fn calculate_repository_health(
833 &self,
834 contributors: &[ContributorStats],
835 activity_timeline: &[ActivityPeriod],
836 ) -> Result<RepositoryHealth> {
837 let commit_frequency = if !activity_timeline.is_empty() {
839 let total_commits: usize = activity_timeline.iter().map(|p| p.commits).sum();
840 total_commits as f64 / activity_timeline.len() as f64
841 } else {
842 0.0
843 };
844
845 let contributor_diversity = contributors.len() as f64;
846
847 let total_added: usize = contributors.iter().map(|c| c.lines_added).sum();
849 let total_deleted: usize = contributors.iter().map(|c| c.lines_deleted).sum();
850 let code_churn = if total_added > 0 {
851 total_deleted as f64 / total_added as f64
852 } else {
853 0.0
854 };
855
856 let documentation_ratio = 0.0;
858 let test_coverage_estimate = 0.0;
859
860 let branch_health = BranchHealth {
861 main_branch: "main".to_string(),
862 active_branches: 1,
863 stale_branches: 0,
864 merge_conflicts_risk: 0.0,
865 };
866
867 Ok(RepositoryHealth {
868 commit_frequency,
869 contributor_diversity,
870 code_churn,
871 documentation_ratio,
872 test_coverage_estimate,
873 branch_health,
874 })
875 }
876
877 fn is_cache_valid(&self) -> bool {
879 if let Some(cache_time) = *self.cache.cache_timestamp.read() {
880 SystemTime::now()
881 .duration_since(cache_time)
882 .map(|duration| duration < self.cache.cache_ttl)
883 .unwrap_or(false)
884 } else {
885 false
886 }
887 }
888
889 pub fn clear_cache(&self) {
891 self.cache.file_statuses.clear();
892 self.cache.commit_cache.clear();
893 self.cache.blame_cache.clear();
894 self.cache.batch_status_cache.clear();
895 *self.cache.cache_timestamp.write() = None;
896 }
897
898 pub fn files_discovered(&self) -> usize {
900 *self.cache.files_discovered.read()
901 }
902
903 pub fn is_git_available(&self) -> bool {
905 self.git_available
906 }
907
908 pub fn repo_path(&self) -> &Path {
910 &self.repo_path
911 }
912}
913
914#[derive(Debug, Clone, Serialize, Deserialize)]
916pub struct GitDiffEntry {
917 pub file_path: PathBuf,
918 pub change_type: DiffChangeType,
919 pub diff_content: String,
920 pub line_additions: usize,
921 pub line_deletions: usize,
922 pub commit_hash: Option<String>,
923 pub commit_message: Option<String>,
924 pub author: Option<String>,
925 pub timestamp: Option<u64>,
926 pub old_file_path: Option<PathBuf>, }
928
929#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
931pub enum DiffChangeType {
932 Added,
933 Modified,
934 Deleted,
935 Renamed,
936 Copied,
937}
938
939#[derive(Debug, Clone)]
941pub struct DiffAnalysisConfig {
942 pub include_staged: bool,
943 pub include_unstaged: bool,
944 pub include_commits: Option<Vec<String>>,
945 pub commit_range: Option<String>,
946 pub branch_comparison: Option<String>,
947 pub max_commits: usize,
948 pub max_diff_size_kb: usize,
949 pub ignore_patterns: Vec<String>,
950 pub relevance_threshold: f64,
951 pub include_binary_diffs: bool,
952 pub include_generated_files: bool,
953 pub max_lines_per_diff: usize,
954}
955
956impl Default for DiffAnalysisConfig {
957 fn default() -> Self {
958 Self {
959 include_staged: true,
960 include_unstaged: true,
961 include_commits: None,
962 commit_range: None,
963 branch_comparison: None,
964 max_commits: 50,
965 max_diff_size_kb: 100,
966 ignore_patterns: vec![
967 "*.lock".to_string(),
968 "*.log".to_string(),
969 "*.tmp".to_string(),
970 "*.cache".to_string(),
971 "node_modules/*".to_string(),
972 ".git/*".to_string(),
973 "__pycache__/*".to_string(),
974 "*.min.js".to_string(),
975 "*.min.css".to_string(),
976 "build/*".to_string(),
977 "dist/*".to_string(),
978 ],
979 relevance_threshold: 0.1,
980 include_binary_diffs: false,
981 include_generated_files: false,
982 max_lines_per_diff: 1000,
983 }
984 }
985}
986
987#[derive(Debug, Clone, Serialize, Deserialize)]
989pub struct DiffAnalysisResult {
990 pub diffs: Vec<GitDiffEntry>,
991 pub total_files_changed: usize,
992 pub total_additions: usize,
993 pub total_deletions: usize,
994 pub commit_range_analyzed: Option<String>,
995 pub analysis_timestamp: u64,
996}
997
998impl GitIntegrator {
999 pub async fn analyze_diffs(&self, config: &DiffAnalysisConfig) -> Result<DiffAnalysisResult> {
1001 if !self.git_available {
1002 return Err(ScribeError::git(
1003 "Git not available for diff analysis".to_string(),
1004 ));
1005 }
1006
1007 let mut all_diffs = Vec::new();
1008
1009 if config.include_staged {
1011 let staged_diffs = self.extract_staged_diffs(config).await?;
1012 all_diffs.extend(staged_diffs);
1013 }
1014
1015 if config.include_unstaged {
1017 let unstaged_diffs = self.extract_unstaged_diffs(config).await?;
1018 all_diffs.extend(unstaged_diffs);
1019 }
1020
1021 if let Some(ref commits) = config.include_commits {
1023 for commit_hash in commits {
1024 let commit_diffs = self.extract_commit_diffs(commit_hash, config).await?;
1025 all_diffs.extend(commit_diffs);
1026 }
1027 }
1028
1029 if let Some(ref range) = config.commit_range {
1031 let range_diffs = self.extract_range_diffs(range, config).await?;
1032 all_diffs.extend(range_diffs);
1033 }
1034
1035 if let Some(ref branch_comp) = config.branch_comparison {
1037 let branch_diffs = self
1038 .extract_branch_comparison_diffs(branch_comp, config)
1039 .await?;
1040 all_diffs.extend(branch_diffs);
1041 }
1042
1043 all_diffs = self.filter_diffs(all_diffs, config).await?;
1045
1046 let total_files_changed = all_diffs.len();
1048 let total_additions = all_diffs.iter().map(|d| d.line_additions).sum();
1049 let total_deletions = all_diffs.iter().map(|d| d.line_deletions).sum();
1050
1051 let analysis_timestamp = SystemTime::now()
1052 .duration_since(UNIX_EPOCH)
1053 .unwrap()
1054 .as_secs();
1055
1056 Ok(DiffAnalysisResult {
1057 diffs: all_diffs,
1058 total_files_changed,
1059 total_additions,
1060 total_deletions,
1061 commit_range_analyzed: config.commit_range.clone(),
1062 analysis_timestamp,
1063 })
1064 }
1065
1066 async fn extract_staged_diffs(&self, config: &DiffAnalysisConfig) -> Result<Vec<GitDiffEntry>> {
1068 let output = AsyncCommand::new("git")
1069 .arg("diff")
1070 .arg("--cached")
1071 .arg("--numstat")
1072 .current_dir(&self.repo_path)
1073 .output()
1074 .await
1075 .map_err(|e| ScribeError::git(format!("Failed to get staged diffs: {}", e)))?;
1076
1077 if !output.status.success() {
1078 return Ok(Vec::new());
1079 }
1080
1081 let stdout = String::from_utf8_lossy(&output.stdout);
1082 self.parse_numstat_output(&stdout, DiffSource::Staged).await
1083 }
1084
1085 async fn extract_unstaged_diffs(
1087 &self,
1088 config: &DiffAnalysisConfig,
1089 ) -> Result<Vec<GitDiffEntry>> {
1090 let output = AsyncCommand::new("git")
1091 .arg("diff")
1092 .arg("--numstat")
1093 .current_dir(&self.repo_path)
1094 .output()
1095 .await
1096 .map_err(|e| ScribeError::git(format!("Failed to get unstaged diffs: {}", e)))?;
1097
1098 if !output.status.success() {
1099 return Ok(Vec::new());
1100 }
1101
1102 let stdout = String::from_utf8_lossy(&output.stdout);
1103 self.parse_numstat_output(&stdout, DiffSource::Unstaged)
1104 .await
1105 }
1106
1107 async fn extract_commit_diffs(
1109 &self,
1110 commit_hash: &str,
1111 config: &DiffAnalysisConfig,
1112 ) -> Result<Vec<GitDiffEntry>> {
1113 let output = AsyncCommand::new("git")
1114 .arg("show")
1115 .arg("--numstat")
1116 .arg("--name-status")
1117 .arg("--pretty=format:%H|%an|%at|%s")
1118 .arg(commit_hash)
1119 .current_dir(&self.repo_path)
1120 .output()
1121 .await
1122 .map_err(|e| ScribeError::git(format!("Failed to get commit diffs: {}", e)))?;
1123
1124 if !output.status.success() {
1125 return Ok(Vec::new());
1126 }
1127
1128 let stdout = String::from_utf8_lossy(&output.stdout);
1129 self.parse_commit_diff_output(&stdout, commit_hash).await
1130 }
1131
1132 async fn extract_range_diffs(
1134 &self,
1135 range: &str,
1136 config: &DiffAnalysisConfig,
1137 ) -> Result<Vec<GitDiffEntry>> {
1138 let output = AsyncCommand::new("git")
1139 .arg("log")
1140 .arg("--numstat")
1141 .arg("--pretty=format:%H|%an|%at|%s")
1142 .arg(format!("--max-count={}", config.max_commits))
1143 .arg(range)
1144 .current_dir(&self.repo_path)
1145 .output()
1146 .await
1147 .map_err(|e| ScribeError::git(format!("Failed to get range diffs: {}", e)))?;
1148
1149 if !output.status.success() {
1150 return Ok(Vec::new());
1151 }
1152
1153 let stdout = String::from_utf8_lossy(&output.stdout);
1154 log::debug!("Git log output for range {}: '{}'", range, stdout);
1155 self.parse_log_diff_output(&stdout).await
1156 }
1157
1158 async fn extract_branch_comparison_diffs(
1160 &self,
1161 branch_comp: &str,
1162 config: &DiffAnalysisConfig,
1163 ) -> Result<Vec<GitDiffEntry>> {
1164 let output = AsyncCommand::new("git")
1165 .arg("diff")
1166 .arg("--numstat")
1167 .arg(branch_comp)
1168 .current_dir(&self.repo_path)
1169 .output()
1170 .await
1171 .map_err(|e| {
1172 ScribeError::git(format!("Failed to get branch comparison diffs: {}", e))
1173 })?;
1174
1175 if !output.status.success() {
1176 return Ok(Vec::new());
1177 }
1178
1179 let stdout = String::from_utf8_lossy(&output.stdout);
1180 self.parse_numstat_output(&stdout, DiffSource::BranchComparison)
1181 .await
1182 }
1183
1184 async fn parse_numstat_output(
1186 &self,
1187 output: &str,
1188 source: DiffSource,
1189 ) -> Result<Vec<GitDiffEntry>> {
1190 let mut diffs = Vec::new();
1191
1192 for line in output.lines() {
1193 if line.trim().is_empty() {
1194 continue;
1195 }
1196
1197 let parts: Vec<&str> = line.split('\t').collect();
1199 if parts.len() >= 3 {
1200 let additions = if parts[0] == "-" {
1202 0
1203 } else {
1204 parts[0].parse::<usize>().unwrap_or(0)
1205 };
1206 let deletions = if parts[1] == "-" {
1207 0
1208 } else {
1209 parts[1].parse::<usize>().unwrap_or(0)
1210 };
1211 let file_path = PathBuf::from(parts[2]);
1212
1213 let diff_content = self.get_file_diff_content(&file_path, &source).await?;
1215
1216 let change_type = self.determine_change_type(&file_path, &source).await?;
1218
1219 diffs.push(GitDiffEntry {
1220 file_path,
1221 change_type,
1222 diff_content,
1223 line_additions: additions,
1224 line_deletions: deletions,
1225 commit_hash: None,
1226 commit_message: None,
1227 author: None,
1228 timestamp: None,
1229 old_file_path: None,
1230 });
1231 }
1232 }
1233
1234 Ok(diffs)
1235 }
1236
1237 async fn parse_commit_diff_output(
1239 &self,
1240 output: &str,
1241 commit_hash: &str,
1242 ) -> Result<Vec<GitDiffEntry>> {
1243 let lines: Vec<&str> = output.lines().collect();
1244 let mut diffs = Vec::new();
1245
1246 if lines.is_empty() {
1247 return Ok(diffs);
1248 }
1249
1250 let (commit_info, author, timestamp, message) = if let Some(first_line) = lines.first() {
1252 if first_line.contains('|') && first_line.split('|').count() >= 4 {
1253 let parts: Vec<&str> = first_line.split('|').collect();
1254 (
1255 Some(parts[0].to_string()),
1256 Some(parts[1].to_string()),
1257 parts[2].parse::<u64>().ok(),
1258 Some(parts[3].to_string()),
1259 )
1260 } else {
1261 (Some(commit_hash.to_string()), None, None, None)
1262 }
1263 } else {
1264 (Some(commit_hash.to_string()), None, None, None)
1265 };
1266
1267 for line in lines.iter().skip(1) {
1269 if line.trim().is_empty() {
1270 continue;
1271 }
1272
1273 let parts: Vec<&str> = line.split('\t').collect();
1275 if parts.len() >= 3 {
1276 let additions = if parts[0] == "-" {
1277 0
1278 } else {
1279 parts[0].parse::<usize>().unwrap_or(0)
1280 };
1281 let deletions = if parts[1] == "-" {
1282 0
1283 } else {
1284 parts[1].parse::<usize>().unwrap_or(0)
1285 };
1286 let file_path = PathBuf::from(parts[2]);
1287
1288 let diff_content = self
1290 .get_commit_file_diff_content(&file_path, commit_hash)
1291 .await?;
1292 let change_type = DiffChangeType::Modified; diffs.push(GitDiffEntry {
1295 file_path,
1296 change_type,
1297 diff_content,
1298 line_additions: additions,
1299 line_deletions: deletions,
1300 commit_hash: commit_info.clone(),
1301 commit_message: message.clone(),
1302 author: author.clone(),
1303 timestamp,
1304 old_file_path: None,
1305 });
1306 }
1307 }
1308
1309 Ok(diffs)
1310 }
1311
1312 async fn parse_log_diff_output(&self, output: &str) -> Result<Vec<GitDiffEntry>> {
1314 let mut diffs = Vec::new();
1315 let lines: Vec<&str> = output.lines().collect();
1316 log::debug!("Parsing log diff output with {} lines", lines.len());
1317 let mut i = 0;
1318
1319 while i < lines.len() {
1320 let line = lines[i];
1321
1322 if line.contains('|') && line.split('|').count() >= 4 {
1324 let parts: Vec<&str> = line.split('|').collect();
1325 let commit_hash = parts[0].to_string();
1326 let author = parts[1].to_string();
1327 let timestamp = parts[2].parse::<u64>().ok();
1328 let message = parts[3].to_string();
1329
1330 i += 1;
1331
1332 while i < lines.len() && !lines[i].contains('|') {
1334 let file_line = lines[i];
1335 if file_line.trim().is_empty() {
1336 i += 1;
1337 continue;
1338 }
1339
1340 let parts: Vec<&str> = file_line.split('\t').collect();
1342 if parts.len() >= 3 {
1343 let additions = if parts[0] == "-" {
1344 0
1345 } else {
1346 parts[0].parse::<usize>().unwrap_or(0)
1347 };
1348 let deletions = if parts[1] == "-" {
1349 0
1350 } else {
1351 parts[1].parse::<usize>().unwrap_or(0)
1352 };
1353 let file_path = PathBuf::from(parts[2]);
1354
1355 let diff_content = self
1356 .get_commit_file_diff_content(&file_path, &commit_hash)
1357 .await?;
1358
1359 diffs.push(GitDiffEntry {
1360 file_path,
1361 change_type: DiffChangeType::Modified,
1362 diff_content,
1363 line_additions: additions,
1364 line_deletions: deletions,
1365 commit_hash: Some(commit_hash.clone()),
1366 commit_message: Some(message.clone()),
1367 author: Some(author.clone()),
1368 timestamp,
1369 old_file_path: None,
1370 });
1371 }
1372 i += 1;
1373 }
1374 } else {
1375 i += 1;
1376 }
1377 }
1378
1379 Ok(diffs)
1380 }
1381
1382 async fn get_file_diff_content(&self, file_path: &Path, source: &DiffSource) -> Result<String> {
1384 let mut cmd = AsyncCommand::new("git");
1385 cmd.arg("diff");
1386
1387 match source {
1388 DiffSource::Staged => {
1389 cmd.arg("--cached");
1390 }
1391 DiffSource::Unstaged => {}
1392 DiffSource::BranchComparison => {}
1393 }
1394
1395 let output = cmd
1396 .arg("--")
1397 .arg(file_path)
1398 .current_dir(&self.repo_path)
1399 .output()
1400 .await
1401 .map_err(|e| ScribeError::git(format!("Failed to get file diff: {}", e)))?;
1402
1403 if output.status.success() {
1404 Ok(String::from_utf8_lossy(&output.stdout).to_string())
1405 } else {
1406 Ok(String::new())
1407 }
1408 }
1409
1410 async fn get_commit_file_diff_content(
1412 &self,
1413 file_path: &Path,
1414 commit_hash: &str,
1415 ) -> Result<String> {
1416 let output = AsyncCommand::new("git")
1417 .arg("show")
1418 .arg(format!("{}:{}", commit_hash, file_path.display()))
1419 .current_dir(&self.repo_path)
1420 .output()
1421 .await
1422 .map_err(|e| ScribeError::git(format!("Failed to get commit file diff: {}", e)))?;
1423
1424 if output.status.success() {
1425 Ok(String::from_utf8_lossy(&output.stdout).to_string())
1426 } else {
1427 Ok(String::new())
1428 }
1429 }
1430
1431 async fn determine_change_type(
1433 &self,
1434 file_path: &Path,
1435 source: &DiffSource,
1436 ) -> Result<DiffChangeType> {
1437 let mut cmd = AsyncCommand::new("git");
1438 cmd.arg("status").arg("--porcelain");
1439
1440 let output = cmd
1441 .arg("--")
1442 .arg(file_path)
1443 .current_dir(&self.repo_path)
1444 .output()
1445 .await
1446 .map_err(|e| ScribeError::git(format!("Failed to determine change type: {}", e)))?;
1447
1448 if output.status.success() {
1449 let stdout = String::from_utf8_lossy(&output.stdout);
1450 if let Some(first_line) = stdout.lines().next() {
1451 let status_code = first_line.chars().take(2).collect::<String>();
1452 return Ok(match status_code.as_str() {
1453 "A " | " A" => DiffChangeType::Added,
1454 "D " | " D" => DiffChangeType::Deleted,
1455 "R " | " R" => DiffChangeType::Renamed,
1456 "C " | " C" => DiffChangeType::Copied,
1457 _ => DiffChangeType::Modified,
1458 });
1459 }
1460 }
1461
1462 Ok(DiffChangeType::Modified)
1463 }
1464
1465 async fn filter_diffs(
1467 &self,
1468 mut diffs: Vec<GitDiffEntry>,
1469 config: &DiffAnalysisConfig,
1470 ) -> Result<Vec<GitDiffEntry>> {
1471 diffs.retain(|diff| {
1473 !config.ignore_patterns.iter().any(|pattern| {
1474 if pattern.ends_with("/*") {
1476 let prefix = &pattern[..pattern.len() - 2];
1477 diff.file_path.to_string_lossy().starts_with(prefix)
1478 } else if pattern.starts_with("*.") {
1479 let suffix = &pattern[1..];
1480 diff.file_path.to_string_lossy().ends_with(suffix)
1481 } else {
1482 diff.file_path.to_string_lossy().contains(pattern)
1483 }
1484 })
1485 });
1486
1487 diffs.retain(|diff| {
1489 let diff_size_kb = diff.diff_content.len() / 1024;
1490 diff_size_kb <= config.max_diff_size_kb
1491 });
1492
1493 diffs.retain(|diff| {
1495 let line_count = diff.line_additions + diff.line_deletions;
1496 line_count <= config.max_lines_per_diff
1497 });
1498
1499 if !config.include_binary_diffs {
1501 diffs.retain(|diff| !self.is_likely_binary_file(&diff.file_path));
1502 }
1503
1504 if !config.include_generated_files {
1506 diffs.retain(|diff| !self.is_likely_generated_file(&diff.file_path));
1507 }
1508
1509 Ok(diffs)
1510 }
1511
1512 fn is_likely_binary_file(&self, file_path: &Path) -> bool {
1514 if let Some(extension) = file_path.extension().and_then(|ext| ext.to_str()) {
1515 matches!(
1516 extension.to_lowercase().as_str(),
1517 "png"
1518 | "jpg"
1519 | "jpeg"
1520 | "gif"
1521 | "bmp"
1522 | "ico"
1523 | "svg"
1524 | "pdf"
1525 | "doc"
1526 | "docx"
1527 | "xls"
1528 | "xlsx"
1529 | "ppt"
1530 | "pptx"
1531 | "zip"
1532 | "tar"
1533 | "gz"
1534 | "7z"
1535 | "rar"
1536 | "exe"
1537 | "dll"
1538 | "so"
1539 | "dylib"
1540 | "mp3"
1541 | "mp4"
1542 | "avi"
1543 | "mov"
1544 | "wav"
1545 )
1546 } else {
1547 false
1548 }
1549 }
1550
1551 fn is_likely_generated_file(&self, file_path: &Path) -> bool {
1553 let path_str = file_path.to_string_lossy().to_lowercase();
1554
1555 path_str.contains("generated")
1557 || path_str.contains(".generated.")
1558 || path_str.contains("node_modules")
1559 || path_str.contains("__pycache__")
1560 || path_str.contains(".pyc")
1561 || path_str.contains("target/")
1562 || path_str.contains("build/")
1563 || path_str.contains("dist/")
1564 || path_str.ends_with(".min.js")
1565 || path_str.ends_with(".min.css")
1566 || path_str.contains("package-lock.json")
1567 || path_str.contains("yarn.lock")
1568 || path_str.contains("Cargo.lock")
1569 }
1570}
1571
1572#[derive(Debug)]
1574enum DiffSource {
1575 Staged,
1576 Unstaged,
1577 BranchComparison,
1578}
1579
1580impl Default for AgeDistribution {
1581 fn default() -> Self {
1582 Self {
1583 recent: 0,
1584 moderate: 0,
1585 old: 0,
1586 ancient: 0,
1587 }
1588 }
1589}
1590
1591#[cfg(test)]
1592mod tests {
1593 use super::*;
1594 use std::fs;
1595 use std::process::Command;
1596 use tempfile::TempDir;
1597
1598 async fn create_test_git_repo() -> Result<TempDir> {
1599 let temp_dir = TempDir::new().unwrap();
1600 let repo_path = temp_dir.path();
1601
1602 let output = Command::new("git")
1604 .arg("init")
1605 .current_dir(repo_path)
1606 .output();
1607
1608 if output.is_err() || !output.unwrap().status.success() {
1609 return Err(ScribeError::git(
1611 "Git not available for testing".to_string(),
1612 ));
1613 }
1614
1615 Command::new("git")
1617 .args(&["config", "user.name", "Test User"])
1618 .current_dir(repo_path)
1619 .output()
1620 .unwrap();
1621
1622 Command::new("git")
1623 .args(&["config", "user.email", "test@example.com"])
1624 .current_dir(repo_path)
1625 .output()
1626 .unwrap();
1627
1628 let test_file = repo_path.join("test.rs");
1630 fs::write(&test_file, "fn main() { println!(\"Hello, world!\"); }").unwrap();
1631
1632 Command::new("git")
1633 .args(&["add", "test.rs"])
1634 .current_dir(repo_path)
1635 .output()
1636 .unwrap();
1637
1638 Command::new("git")
1639 .args(&["commit", "-m", "Initial commit"])
1640 .current_dir(repo_path)
1641 .output()
1642 .unwrap();
1643
1644 Ok(temp_dir)
1645 }
1646
1647 #[tokio::test]
1648 async fn test_git_integrator_creation() {
1649 if let Ok(temp_dir) = create_test_git_repo().await {
1650 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1651 assert!(integrator.is_git_available());
1652 assert_eq!(integrator.repo_path(), temp_dir.path());
1653 }
1654 }
1655
1656 #[tokio::test]
1657 async fn test_list_tracked_files() {
1658 if let Ok(temp_dir) = create_test_git_repo().await {
1659 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1660 let files = integrator.list_tracked_files().await.unwrap();
1661
1662 assert_eq!(files.len(), 1);
1663 assert!(files[0].file_name().unwrap() == "test.rs");
1664 assert_eq!(integrator.files_discovered(), 1);
1665 }
1666 }
1667
1668 #[tokio::test]
1669 async fn test_get_file_info() {
1670 if let Ok(temp_dir) = create_test_git_repo().await {
1671 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1672 let test_file = temp_dir.path().join("test.rs");
1673
1674 let file_info = integrator.get_file_info(&test_file).await.unwrap();
1675
1676 assert_eq!(file_info.path, test_file);
1677 assert_eq!(file_info.status, GitFileStatus::Unmodified);
1678 assert!(file_info.last_commit.is_some());
1679 }
1680 }
1681
1682 #[tokio::test]
1683 async fn test_get_repository_stats() {
1684 if let Ok(temp_dir) = create_test_git_repo().await {
1685 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1686 let stats = integrator.get_repository_stats().await.unwrap();
1687
1688 assert!(stats.total_commits >= 1);
1689 assert!(!stats.contributors.is_empty());
1690 assert!(stats.contributors[0].name == "Test User");
1691 assert!(stats.file_types.contains_key("rs"));
1692 }
1693 }
1694
1695 #[tokio::test]
1696 async fn test_file_status_detection() {
1697 if let Ok(temp_dir) = create_test_git_repo().await {
1698 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1699 let test_file = temp_dir.path().join("test.rs");
1700
1701 let status = integrator.get_file_status(&test_file).await.unwrap();
1703 assert_eq!(status, GitFileStatus::Unmodified);
1704
1705 fs::write(&test_file, "fn main() { println!(\"Modified!\"); }").unwrap();
1707
1708 let status = integrator.get_file_status(&test_file).await.unwrap();
1709 assert_eq!(status, GitFileStatus::Modified);
1710
1711 let new_file = temp_dir.path().join("untracked.rs");
1713 fs::write(&new_file, "// untracked").unwrap();
1714
1715 let status = integrator.get_file_status(&new_file).await.unwrap();
1716 assert_eq!(status, GitFileStatus::Untracked);
1717 }
1718 }
1719
1720 #[tokio::test]
1721 async fn test_blame_info() {
1722 if let Ok(temp_dir) = create_test_git_repo().await {
1723 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1724 let test_file = temp_dir.path().join("test.rs");
1725
1726 let blame_info = integrator.get_blame_info(&test_file).await.unwrap();
1727
1728 assert_eq!(blame_info.lines.len(), 1);
1729 assert!(!blame_info.contributors.is_empty());
1730 assert!(blame_info.contributors.contains_key("Test User"));
1731 assert!(blame_info.last_modified > 0);
1732 }
1733 }
1734
1735 #[test]
1736 fn test_age_distribution_calculation() {
1737 let now = SystemTime::now()
1738 .duration_since(UNIX_EPOCH)
1739 .unwrap()
1740 .as_secs();
1741
1742 let mut age_dist = AgeDistribution::default();
1743
1744 let recent_timestamp = now - (15 * 24 * 3600); let moderate_timestamp = now - (90 * 24 * 3600); let old_timestamp = now - (300 * 24 * 3600); let ancient_timestamp = now - (400 * 24 * 3600); let timestamps = vec![
1751 recent_timestamp,
1752 moderate_timestamp,
1753 old_timestamp,
1754 ancient_timestamp,
1755 ];
1756
1757 for timestamp in timestamps {
1758 let age_seconds = now.saturating_sub(timestamp);
1759 let age_days = age_seconds / 86400;
1760
1761 match age_days {
1762 0..=30 => age_dist.recent += 1,
1763 31..=180 => age_dist.moderate += 1,
1764 181..=365 => age_dist.old += 1,
1765 _ => age_dist.ancient += 1,
1766 }
1767 }
1768
1769 assert_eq!(age_dist.recent, 1);
1770 assert_eq!(age_dist.moderate, 1);
1771 assert_eq!(age_dist.old, 1);
1772 assert_eq!(age_dist.ancient, 1);
1773 }
1774
1775 #[tokio::test]
1776 async fn test_cache_functionality() {
1777 if let Ok(temp_dir) = create_test_git_repo().await {
1778 let mut integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1779 let test_file = temp_dir.path().join("test.rs");
1780
1781 let _ = integrator.get_file_info(&test_file).await.unwrap();
1783 assert!(integrator.is_cache_valid());
1784
1785 integrator.clear_cache();
1787 assert!(!integrator.is_cache_valid());
1788 }
1789 }
1790
1791 #[tokio::test]
1792 async fn test_diff_analysis_staged_changes() {
1793 if let Ok(temp_dir) = create_test_git_repo().await {
1794 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1795
1796 let test_file = temp_dir.path().join("test.rs");
1798 fs::write(
1799 &test_file,
1800 "fn main() { println!(\"Modified and staged!\"); }",
1801 )
1802 .unwrap();
1803
1804 Command::new("git")
1805 .args(&["add", "test.rs"])
1806 .current_dir(temp_dir.path())
1807 .output()
1808 .unwrap();
1809
1810 let config = DiffAnalysisConfig {
1811 include_staged: true,
1812 include_unstaged: false,
1813 ..Default::default()
1814 };
1815
1816 let result = integrator.analyze_diffs(&config).await.unwrap();
1817
1818 assert_eq!(result.total_files_changed, 1);
1819 assert!(result.total_additions > 0 || result.total_deletions > 0);
1820 assert!(!result.diffs.is_empty());
1821 assert_eq!(result.diffs[0].file_path.file_name().unwrap(), "test.rs");
1822 }
1823 }
1824
1825 #[tokio::test]
1826 async fn test_diff_analysis_unstaged_changes() {
1827 if let Ok(temp_dir) = create_test_git_repo().await {
1828 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1829
1830 let test_file = temp_dir.path().join("test.rs");
1832 fs::write(
1833 &test_file,
1834 "fn main() { println!(\"Modified but not staged!\"); }",
1835 )
1836 .unwrap();
1837
1838 let config = DiffAnalysisConfig {
1839 include_staged: false,
1840 include_unstaged: true,
1841 ..Default::default()
1842 };
1843
1844 let result = integrator.analyze_diffs(&config).await.unwrap();
1845
1846 assert_eq!(result.total_files_changed, 1);
1847 assert!(!result.diffs.is_empty());
1848 assert_eq!(result.diffs[0].change_type, DiffChangeType::Modified);
1849 }
1850 }
1851
1852 #[tokio::test]
1853 async fn test_diff_analysis_commit_range() {
1854 if let Ok(temp_dir) = create_test_git_repo().await {
1855 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1856
1857 let test_file = temp_dir.path().join("test.rs");
1859 fs::write(
1860 &test_file,
1861 "fn main() { println!(\"Modified for second commit!\"); }",
1862 )
1863 .unwrap();
1864
1865 Command::new("git")
1866 .args(&["add", "test.rs"])
1867 .current_dir(temp_dir.path())
1868 .output()
1869 .unwrap();
1870
1871 Command::new("git")
1872 .args(&["commit", "-m", "Modify existing file"])
1873 .current_dir(temp_dir.path())
1874 .output()
1875 .unwrap();
1876
1877 let config = DiffAnalysisConfig {
1878 include_staged: false,
1879 include_unstaged: false,
1880 commit_range: Some("HEAD~1..HEAD".to_string()),
1881 ..Default::default()
1882 };
1883
1884 let result = integrator.analyze_diffs(&config).await.unwrap();
1885
1886 assert!(
1888 !result.diffs.is_empty(),
1889 "Expected diffs but got: {:?}",
1890 result
1891 );
1892 let has_test_file = result
1893 .diffs
1894 .iter()
1895 .any(|d| d.file_path.file_name().unwrap() == "test.rs");
1896 assert!(
1897 has_test_file,
1898 "Expected test.rs in diffs but got: {:?}",
1899 result
1900 .diffs
1901 .iter()
1902 .map(|d| &d.file_path)
1903 .collect::<Vec<_>>()
1904 );
1905 }
1906 }
1907
1908 #[tokio::test]
1909 async fn test_diff_filtering() {
1910 if let Ok(temp_dir) = create_test_git_repo().await {
1911 let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1912
1913 let js_file = temp_dir.path().join("test.js");
1915 let lock_file = temp_dir.path().join("package.lock");
1916
1917 fs::write(&js_file, "console.log('test');").unwrap();
1918 fs::write(&lock_file, "{ \"lockfileVersion\": 1 }").unwrap();
1919
1920 Command::new("git")
1921 .args(&["add", "."])
1922 .current_dir(temp_dir.path())
1923 .output()
1924 .unwrap();
1925
1926 let config = DiffAnalysisConfig {
1927 include_staged: true,
1928 include_unstaged: false,
1929 ignore_patterns: vec!["*.lock".to_string()],
1930 ..Default::default()
1931 };
1932
1933 let result = integrator.analyze_diffs(&config).await.unwrap();
1934
1935 let has_js = result
1937 .diffs
1938 .iter()
1939 .any(|d| d.file_path.extension().unwrap() == "js");
1940 let has_lock = result
1941 .diffs
1942 .iter()
1943 .any(|d| d.file_path.extension().unwrap() == "lock");
1944
1945 assert!(has_js);
1946 assert!(!has_lock);
1947 }
1948 }
1949
1950 #[test]
1951 fn test_binary_file_detection() {
1952 let integrator = GitIntegrator {
1953 repo_path: PathBuf::from("/tmp"),
1954 git_available: true,
1955 cache: GitCache::default(),
1956 };
1957
1958 assert!(integrator.is_likely_binary_file(&PathBuf::from("image.png")));
1959 assert!(integrator.is_likely_binary_file(&PathBuf::from("document.pdf")));
1960 assert!(integrator.is_likely_binary_file(&PathBuf::from("archive.zip")));
1961 assert!(!integrator.is_likely_binary_file(&PathBuf::from("code.rs")));
1962 assert!(!integrator.is_likely_binary_file(&PathBuf::from("README.md")));
1963 }
1964
1965 #[test]
1966 fn test_generated_file_detection() {
1967 let integrator = GitIntegrator {
1968 repo_path: PathBuf::from("/tmp"),
1969 git_available: true,
1970 cache: GitCache::default(),
1971 };
1972
1973 assert!(integrator.is_likely_generated_file(&PathBuf::from("bundle.min.js")));
1974 assert!(integrator.is_likely_generated_file(&PathBuf::from("styles.min.css")));
1975 assert!(
1976 integrator.is_likely_generated_file(&PathBuf::from("node_modules/package/index.js"))
1977 );
1978 assert!(integrator.is_likely_generated_file(&PathBuf::from("target/debug/scribe")));
1979 assert!(integrator.is_likely_generated_file(&PathBuf::from("package-lock.json")));
1980 assert!(!integrator.is_likely_generated_file(&PathBuf::from("src/main.rs")));
1981 assert!(!integrator.is_likely_generated_file(&PathBuf::from("package.json")));
1982 }
1983
1984 #[test]
1985 fn test_diff_analysis_config_default() {
1986 let config = DiffAnalysisConfig::default();
1987
1988 assert!(config.include_staged);
1989 assert!(config.include_unstaged);
1990 assert_eq!(config.max_commits, 50);
1991 assert_eq!(config.max_diff_size_kb, 100);
1992 assert!(!config.include_binary_diffs);
1993 assert!(!config.include_generated_files);
1994 assert!(config.ignore_patterns.contains(&"*.lock".to_string()));
1995 assert!(config
1996 .ignore_patterns
1997 .contains(&"node_modules/*".to_string()));
1998 }
1999}