git_insights/
stats.rs

1use crate::git::{count_pull_requests, run_command};
2use crate::output::{print_progress, print_table};
3use std::collections::{HashMap, HashSet};
4use std::sync::{Arc, Mutex};
5use std::thread;
6use std::time::Instant;
7use std::io::{self, Write};
8
9/// Represents the statistics for a single author.
10#[derive(Default, Debug, Clone)]
11pub struct AuthorStats {
12    pub loc: usize,
13    pub commits: usize,
14    pub files: HashSet<String>,
15}
16
17impl AuthorStats {
18    pub fn to_json(&self) -> String {
19        let files_json: Vec<String> = self.files.iter().map(|f| format!("\"{}\"", f)).collect();
20        format!(
21            "{{\"loc\": {}, \"commits\": {}, \"files\": [{}]}}",
22            self.loc,
23            self.commits,
24            files_json.join(", ")
25        )
26    }
27}
28
29#[derive(Default, Debug, Clone)]
30pub struct UserStats {
31    pub tags: HashSet<String>,
32    pub pull_requests: usize,
33}
34
35impl UserStats {
36    pub fn to_json(&self) -> String {
37        let tags_json: Vec<String> = self.tags.iter().map(|t| format!("\"{}\"", t)).collect();
38        format!(
39            "{{\"tags\": [{}], \"pull_requests\": {}}}",
40            tags_json.join(", "),
41            self.pull_requests
42        )
43    }
44}
45
46// A type alias for our map of statistics for readability.
47pub type StatsMap = HashMap<String, AuthorStats>;
48
49/// Gathers historical commit counts for each author from `git log`.
50pub fn gather_commit_stats() -> Result<StatsMap, String> {
51    let mut stats: StatsMap = HashMap::new();
52    let log_output = run_command(&["log", "--no-merges", "--pretty=format:--%aN--"])?;
53
54    for author in log_output.split("--").filter(|s| !s.is_empty()) {
55        // check if is empty, if not then increment commits
56        let trimmed_author = author.trim().to_string();
57
58        if !trimmed_author.is_empty() {
59            stats.entry(trimmed_author).or_default().commits += 1;
60        }
61    }
62    Ok(stats)
63}
64
65/// Gathers LOC and file stats by running `git blame` in parallel.
66pub fn gather_loc_and_file_stats() -> Result<StatsMap, String> {
67    let files_to_blame: Vec<String> = run_command(&["ls-files"])?
68        .lines()
69        .map(String::from)
70        .collect();
71
72    let stats = Arc::new(Mutex::new(StatsMap::new()));
73    let total_files = files_to_blame.len();
74    let processed_files = Arc::new(Mutex::new(0));
75    let start_time = Instant::now();
76
77    thread::scope(|s| {
78        for file in files_to_blame {
79            let stats_clone = Arc::clone(&stats);
80            let processed_clone = Arc::clone(&processed_files);
81
82            s.spawn(move || {
83                if let Ok(blame_output) =
84                    run_command(&["blame", "-w", "-C", "-C", "--line-porcelain", &file])
85                {
86                    let mut current_author = String::new();
87                    let mut author_loc_for_file = HashMap::new();
88
89                    for line in blame_output.lines() {
90                        if line.starts_with("author ") {
91                            current_author = line[7..].trim().to_string();
92                        } else if line.starts_with('\t') {
93                            if !current_author.is_empty() {
94                                *author_loc_for_file
95                                    .entry(current_author.clone())
96                                    .or_insert(0) += 1;
97                            }
98                        }
99                    }
100
101                    let mut stats_guard = stats_clone.lock().unwrap();
102                    for (author, loc) in author_loc_for_file {
103                        if !author.is_empty() {
104                            let author_stats = stats_guard.entry(author).or_default();
105                            author_stats.loc += loc;
106                            author_stats.files.insert(file.clone());
107                        }
108                    }
109                }
110
111                let mut processed_count = processed_clone.lock().unwrap();
112                *processed_count += 1;
113                print_progress(*processed_count, total_files, start_time);
114            });
115        }
116    });
117
118    println!(); // Newline after progress bar finishes.
119    let final_stats = Arc::try_unwrap(stats).unwrap().into_inner().unwrap();
120    Ok(final_stats)
121}
122
123pub fn gather_user_stats(username: &str) -> Result<UserStats, String> {
124    let mut user_stats = UserStats::default();
125
126    // handle tag listing errors as empty
127    let tags_output = match run_command(&["tag", "--list", "--format=%(refname:short)"]) {
128        Ok(s) => s,
129        Err(_) => String::new(),
130    };
131    for tag in tags_output.lines() {
132        // If git log fails for a tag, treat as no matches for that tag.
133        let log_output = run_command(&["log", tag, "--author", username, "--pretty=format:%an"])
134            .unwrap_or_default();
135        if !log_output.is_empty() {
136            user_stats.tags.insert(tag.to_string());
137        }
138    }
139
140    // If counting PR merges fails, default to 0 for resilience.
141    user_stats.pull_requests = count_pull_requests(username).unwrap_or(0);
142
143    Ok(user_stats)
144}
145
146fn tracked_text_files_head() -> Result<Vec<String>, String> {
147    // tracked files (preserve order)
148    let files = run_command(&["--no-pager", "ls-files"])?;
149    let files: Vec<String> = files
150        .lines()
151        .map(|s| s.trim().to_string())
152        .filter(|s| !s.is_empty())
153        .collect();
154
155    // text files at HEAD
156    let grep = run_command(&["--no-pager", "grep", "-I", "--name-only", ".", "HEAD"])?;
157    let mut text: HashSet<String> = HashSet::new();
158    for mut line in grep.lines().map(|s| s.trim()) {
159        if line.is_empty() {
160            continue;
161        }
162        if let Some(stripped) = line.strip_prefix("HEAD:") {
163            line = stripped;
164        }
165        text.insert(line.to_string());
166    }
167
168    // Intersect while preserving original order
169    let filtered: Vec<String> = files.into_iter().filter(|f| text.contains(f)).collect();
170    Ok(filtered)
171}
172
173/// Gather surviving LOC per author via blame --line-porcelain HEAD.
174/// by_name=false groups by "Name <email>", by_name=true groups by name only.
175pub fn gather_loc_and_file_statsx(by_name: bool) -> Result<StatsMap, String> {
176    let files = tracked_text_files_head()?;
177    let mut stats: StatsMap = HashMap::new();
178
179    let total = files.len();
180    let mut idx: usize = 0;
181    let spinner = ['|', '/', '-', '\\'];
182
183    for file in files {
184        idx += 1;
185        let ch = spinner[idx % spinner.len()];
186        print!("\rProcessing: {}/{} {}", idx, total, ch);
187        let _ = io::stdout().flush();
188
189        let blame = run_command(&["--no-pager", "blame", "--line-porcelain", "HEAD", "--", &file]);
190        if blame.is_err() {
191            continue;
192        }
193        let blame = blame.unwrap();
194
195        let mut current_name: Option<String> = None;
196        let mut current_mail: Option<String> = None;
197
198        for line in blame.lines() {
199            if let Some(rest) = line.strip_prefix("author ") {
200                current_name = Some(rest.trim().to_string());
201            } else if let Some(rest) = line.strip_prefix("author-mail ") {
202                current_mail = Some(rest.trim().to_string());
203            } else if line.starts_with('\t') {
204                if let (Some(name), Some(mail)) = (&current_name, &current_mail) {
205                    let key = if by_name {
206                        name.clone()
207                    } else {
208                        format!("{} {}", name, mail)
209                    };
210                    let entry = stats.entry(key).or_default();
211                    entry.loc += 1;
212                    entry.files.insert(file.clone());
213                }
214            }
215        }
216    }
217
218    println!();
219    Ok(stats)
220}
221
222/// Gather commit counts per author via `git shortlog -s -e HEAD`.
223/// by_name=false groups by "Name <email>", by_name=true groups by name only.
224pub fn gather_commit_statsx(by_name: bool) -> Result<StatsMap, String> {
225    let out = run_command(&["--no-pager", "shortlog", "-s", "-e", "HEAD"])?;
226    let mut stats: StatsMap = HashMap::new();
227
228    for line in out.lines() {
229        let l = line.trim();
230        if l.is_empty() {
231            continue;
232        }
233        // parse leading integer
234        let mut idx = 0;
235        while idx < l.len() && l.as_bytes()[idx].is_ascii_whitespace() {
236            idx += 1;
237        }
238        let start_num = idx;
239        while idx < l.len() && l.as_bytes()[idx].is_ascii_digit() {
240            idx += 1;
241        }
242        if start_num == idx {
243            continue;
244        }
245        let num_str = &l[start_num..idx];
246        let commits: usize = num_str.parse().unwrap_or(0);
247        let rest = l[idx..].trim();
248        if rest.is_empty() {
249            continue;
250        }
251        let key = if by_name {
252            let name_part = rest.rsplit_once(" <").map(|(n, _)| n).unwrap_or(rest);
253            name_part.to_string()
254        } else {
255            rest.to_string()
256        };
257        let entry = stats.entry(key).or_default();
258        entry.commits += commits;
259    }
260
261    Ok(stats)
262}
263
264/// Orchestrate stats and print totals + table.
265pub fn run_stats(by_name: bool) -> Result<(), String> {
266    let mut commit_stats = gather_commit_statsx(by_name)?;
267    let loc_stats = gather_loc_and_file_statsx(by_name)?;
268
269    let mut final_stats = loc_stats;
270    for (author, data) in commit_stats.drain() {
271        final_stats.entry(author).or_default().commits = data.commits;
272    }
273
274    let total_loc: usize = final_stats.values().map(|s| s.loc).sum();
275    let total_commits: usize = final_stats.values().map(|s| s.commits).sum();
276
277    let mut all_files = HashSet::new();
278    for stats in final_stats.values() {
279        all_files.extend(stats.files.iter().cloned());
280    }
281    let total_files = all_files.len();
282
283    let mut rows: Vec<(String, AuthorStats)> = final_stats.into_iter().collect();
284    rows.sort_by(|a, b| b.1.loc.cmp(&a.1.loc));
285
286    println!("Total commits: {}", total_commits);
287    println!("Total files: {}", total_files);
288    println!("Total loc: {}", total_loc);
289    print_table(rows, total_loc, total_commits, total_files);
290    Ok(())
291}
292
293/// Compute per-file ownership for a user.
294/// - username: match against blame author (by_name) or author-mail (by_email)
295/// - by_email: if true, compare normalized emails; otherwise compare author name
296/// - top: max rows to return (use usize::MAX to disable)
297/// - sort_pct: if true, sort by percentage desc; otherwise by user_loc desc
298pub fn get_user_file_ownership(
299    username: &str,
300    by_email: bool,
301    top: usize,
302    sort_pct: bool,
303) -> Result<Vec<(String, usize, usize, f32)>, String> {
304    let files = tracked_text_files_head()?;
305    let mut rows: Vec<(String, usize, usize, f32)> = Vec::new();
306
307    let uname_norm = username.trim().to_string();
308    // normalize email for comparison
309    let email_norm = uname_norm
310        .trim_matches(|c| c == '<' || c == '>')
311        .to_ascii_lowercase();
312
313    for file in files {
314        let blame = run_command(&["--no-pager", "blame", "--line-porcelain", "HEAD", "--", &file]);
315        if blame.is_err() {
316            continue;
317        }
318        let blame = blame.unwrap();
319
320        let mut current_name: Option<String> = None;
321        let mut current_mail: Option<String> = None;
322        let mut file_total: usize = 0;
323        let mut user_loc: usize = 0;
324
325        for line in blame.lines() {
326            if let Some(rest) = line.strip_prefix("author ") {
327                current_name = Some(rest.trim().to_string());
328            } else if let Some(rest) = line.strip_prefix("author-mail ") {
329                current_mail = Some(rest.trim().to_string());
330            } else if line.starts_with('\t') {
331                file_total += 1;
332                if let (Some(name), Some(mail)) = (&current_name, &current_mail) {
333                    let is_match = if by_email {
334                        let mail_norm = mail.trim_matches(|c| c == '<' || c == '>').to_ascii_lowercase();
335                        mail_norm == email_norm
336                    } else {
337                        name == &uname_norm
338                    };
339                    if is_match {
340                        user_loc += 1;
341                    }
342                }
343            }
344        }
345
346        if user_loc > 0 && file_total > 0 {
347            let pct = (user_loc as f32 / file_total as f32) * 100.0;
348            rows.push((file, user_loc, file_total, pct));
349        }
350    }
351
352    if sort_pct {
353        rows.sort_by(|a, b| {
354            b.3.partial_cmp(&a.3).unwrap_or(std::cmp::Ordering::Equal)
355                .then_with(|| b.1.cmp(&a.1))
356                .then_with(|| a.0.cmp(&b.0))
357        });
358    } else {
359        rows.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| b.3.partial_cmp(&a.3).unwrap_or(std::cmp::Ordering::Equal)).then_with(|| a.0.cmp(&b.0)));
360    }
361
362    if top < rows.len() {
363        rows.truncate(top);
364    }
365
366    Ok(rows)
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372
373    #[test]
374    fn test_author_stats_default() {
375        let stats = AuthorStats::default();
376        assert_eq!(stats.loc, 0);
377        assert_eq!(stats.commits, 0);
378        assert!(stats.files.is_empty());
379    }
380
381    #[test]
382    fn test_user_stats_default() {
383        let stats = UserStats::default();
384        assert!(stats.tags.is_empty());
385        assert_eq!(stats.pull_requests, 0);
386    }
387
388    #[test]
389    fn test_gather_commit_stats_runs_ok() {
390        // This test runs against the live git repository.
391        let result = gather_commit_stats();
392        assert!(result.is_ok());
393        let stats = result.unwrap();
394        // The project should have at least one commit/author.
395        assert!(!stats.is_empty());
396    }
397
398    #[test]
399    #[ignore] // This test is slow and prints to stdout.
400    fn test_gather_loc_and_file_stats_runs_ok() {
401        // This test runs against the live git repository and can be slow.
402        let result = gather_loc_and_file_stats();
403        assert!(result.is_ok());
404        let stats = result.unwrap();
405        // The project should have some stats.
406        assert!(!stats.is_empty());
407    }
408
409    #[test]
410    fn test_gather_user_stats_for_unknown_user() {
411        // Test with a user that almost certainly doesn't exist.
412        let result = gather_user_stats("a-very-unlikely-user-name-to-exist");
413        assert!(result.is_ok());
414        let stats = result.unwrap();
415        assert_eq!(stats.pull_requests, 0);
416        assert!(stats.tags.is_empty());
417    }
418
419    #[test]
420    fn test_author_stats_to_json() {
421        let mut author_stats = AuthorStats::default();
422        author_stats.loc = 100;
423        author_stats.commits = 10;
424        author_stats.files.insert("file1.rs".to_string());
425        author_stats.files.insert("file2.rs".to_string());
426
427        let json = author_stats.to_json();
428        // Due to HashSet's unordered nature, we need to check for both possible orders of files.
429        let expected_json1 = "{\"loc\": 100, \"commits\": 10, \"files\": [\"file1.rs\", \"file2.rs\"]}";
430        let expected_json2 = "{\"loc\": 100, \"commits\": 10, \"files\": [\"file2.rs\", \"file1.rs\"]}";
431
432        assert!(json == expected_json1 || json == expected_json2, "Actual JSON: {}", json);
433    }
434
435    #[test]
436    fn test_user_stats_to_json() {
437        let mut user_stats = UserStats::default();
438        user_stats.pull_requests = 5;
439        user_stats.tags.insert("v1.0".to_string());
440        user_stats.tags.insert("v1.1".to_string());
441
442        let json = user_stats.to_json();
443        // Due to HashSet's unordered nature, we need to check for both possible orders of tags.
444        let expected_json1 = "{\"tags\": [\"v1.0\", \"v1.1\"], \"pull_requests\": 5}";
445        let expected_json2 = "{\"tags\": [\"v1.1\", \"v1.0\"], \"pull_requests\": 5}";
446
447        assert!(json == expected_json1 || json == expected_json2, "Actual JSON: {}", json);
448    }
449
450    // Ownership tests (create and clean a small repo under ./.tmp-git-insights-tests)
451    use std::env;
452    use std::fs;
453    use std::io::Write;
454    use std::path::PathBuf;
455    use std::process::{Command, Stdio};
456    use std::time::{SystemTime, UNIX_EPOCH};
457    use std::sync::{Mutex, OnceLock, MutexGuard};
458
459    static TEST_DIR_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
460
461    struct TempRepo {
462        _guard: MutexGuard<'static, ()>,
463        old_dir: PathBuf,
464        base: PathBuf,
465        path: PathBuf,
466    }
467
468    impl TempRepo {
469        fn new() -> Self {
470            let guard = TEST_DIR_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap();
471
472            let old_dir = env::current_dir().unwrap();
473            let base = old_dir.join(".tmp-git-insights-tests");
474            fs::create_dir_all(&base).unwrap();
475            let ts = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos();
476            let path = base.join(format!("git-insights-ownership-{}", ts));
477            fs::create_dir_all(&path).unwrap();
478            env::set_current_dir(&path).unwrap();
479
480            assert!(
481                Command::new("git")
482                    .args(["init", "-q"])
483                    .stdout(Stdio::null())
484                    .stderr(Stdio::null())
485                    .status()
486                    .unwrap()
487                    .success()
488            );
489            fs::write("INIT", "init\n").unwrap();
490            let add_ok = Command::new("git")
491                .args(["add", "."])
492                .stdout(Stdio::null())
493                .stderr(Stdio::null())
494                .status()
495                .map(|s| s.success())
496                .unwrap_or(false)
497                || Command::new("git")
498                    .args(["add", "-A", "."])
499                    .stdout(Stdio::null())
500                    .stderr(Stdio::null())
501                    .status()
502                    .map(|s| s.success())
503                    .unwrap_or(false);
504            assert!(add_ok, "git add failed in TempRepo::new");
505            let mut c = Command::new("git");
506            c.args(["-c", "commit.gpgsign=false"])
507                .arg("commit")
508                .arg("--no-verify")
509                .arg("-q")
510                .arg("-m")
511                .arg("chore: init");
512            c.env("GIT_AUTHOR_NAME", "Init");
513            c.env("GIT_AUTHOR_EMAIL", "init@example.com");
514            c.env("GIT_COMMITTER_NAME", "Init");
515            c.env("GIT_COMMITTER_EMAIL", "init@example.com");
516            c.stdout(Stdio::null()).stderr(Stdio::null());
517            assert!(c.status().unwrap().success());
518
519            Self { _guard: guard, old_dir, base, path }
520        }
521    }
522    impl Drop for TempRepo {
523        fn drop(&mut self) {
524            let _ = env::set_current_dir(&self.old_dir);
525            let _ = fs::remove_dir_all(&self.path);
526            // Ensure the base test directory is also removed so tests leave no trace
527            let _ = fs::remove_dir_all(&self.base);
528        }
529    }
530
531    fn commit_as(name: &str, email: &str, msg: &str) {
532        let add_ok = Command::new("git")
533            .args(["add", "."])
534            .stdout(Stdio::null())
535            .stderr(Stdio::null())
536            .status()
537            .map(|s| s.success())
538            .unwrap_or(false)
539            || Command::new("git")
540                .args(["add", "-A", "."])
541                .stdout(Stdio::null())
542                .stderr(Stdio::null())
543                .status()
544                .map(|s| s.success())
545                .unwrap_or(false);
546        assert!(add_ok, "git add failed");
547        let mut c = Command::new("git");
548        c.args(["-c", "commit.gpgsign=false"])
549            .args(["-c", "core.hooksPath=/dev/null"])
550            .arg("commit")
551            .arg("--no-verify")
552            .arg("-q")
553            .arg("-m")
554            .arg(msg);
555        c.env("GIT_AUTHOR_NAME", name);
556        c.env("GIT_AUTHOR_EMAIL", email);
557        c.env("GIT_COMMITTER_NAME", name);
558        c.env("GIT_COMMITTER_EMAIL", email);
559        c.stdout(Stdio::null()).stderr(Stdio::null());
560        assert!(c.status().unwrap().success());
561    }
562
563    #[test]
564    fn test_get_user_file_ownership_by_name_and_email() {
565        let _repo = TempRepo::new();
566
567        // Alice owns README fully (4 lines total)
568        fs::write("README.md", "a\nb\nc\n").unwrap();
569        commit_as("Alice", "alice@example.com", "feat: add README");
570        fs::OpenOptions::new()
571            .append(true)
572            .open("README.md")
573            .unwrap()
574            .write_all(b"d\n")
575            .unwrap();
576        commit_as("Alice", "alice@example.com", "feat: update README");
577
578        // Bob owns src.txt fully (2 lines total)
579        fs::write("src.txt", "x\ny\n").unwrap();
580        commit_as("Bob", "bob@example.com", "feat: add src");
581
582        // By name
583        let rows = super::get_user_file_ownership("Alice", false, usize::MAX, false)
584            .expect("ownership by name failed");
585        // Expect README.md 4/4 ~100%
586        let mut found_readme = false;
587        for (file, u, f, pct) in &rows {
588            if file == "README.md" {
589                found_readme = true;
590                assert_eq!(*u, 4);
591                assert_eq!(*f, 4);
592                assert!((*pct - 100.0).abs() < 0.01);
593            }
594        }
595        assert!(found_readme);
596
597        // By email
598        let rows_email =
599            super::get_user_file_ownership("alice@example.com", true, usize::MAX, false)
600                .expect("ownership by email failed");
601        let mut found_readme_email = false;
602        for (file, u, f, pct) in &rows_email {
603            if file == "README.md" {
604                found_readme_email = true;
605                assert_eq!(*u, 4);
606                assert_eq!(*f, 4);
607                assert!((*pct - 100.0).abs() < 0.01);
608            }
609        }
610        assert!(found_readme_email);
611
612        // Bob by name should show src.txt 2/2
613        let rows_bob = super::get_user_file_ownership("Bob", false, usize::MAX, false)
614            .expect("ownership Bob failed");
615        let mut found_src = false;
616        for (file, u, f, pct) in &rows_bob {
617            if file == "src.txt" {
618                found_src = true;
619                assert_eq!(*u, 2);
620                assert_eq!(*f, 2);
621                assert!((*pct - 100.0).abs() < 0.01);
622            }
623        }
624        assert!(found_src);
625    }
626
627    #[test]
628    fn test_get_user_file_ownership_top_and_sort_pct() {
629        let _repo = TempRepo::new();
630
631        // Create 3 files owned by Alice with varying ownership
632        fs::write("a.txt", "1\n2\n3\n").unwrap(); // 3 lines
633        commit_as("Alice", "alice@example.com", "a");
634        fs::write("b.txt", "1\n2\n").unwrap(); // 2 lines
635        commit_as("Alice", "alice@example.com", "b");
636        fs::write("c.txt", "1\n2\n3\n4\n").unwrap(); // 4 lines
637        commit_as("Alice", "alice@example.com", "c");
638
639        // sort by pct and top 2
640        let rows = super::get_user_file_ownership("Alice", false, 2, true)
641            .expect("ownership sort pct failed");
642        // All 100% but ensure we only got top 2 rows
643        assert_eq!(rows.len(), 2);
644    }
645}