1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
//! Git repository analysis and history tracking.
//!
//! This module provides functionality for analyzing git commit history
//! to track code changes over time.
use crate::classifier::LineType;
use crate::stats::FileStats;
use chrono::{DateTime, NaiveDate, Utc};
use git2::Repository;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
/// Daily statistics for git commits.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DailyStats {
pub date: NaiveDate,
pub additions: FileStats,
pub deletions: FileStats,
pub net_code: i64,
}
/// Historical statistics aggregated from git history.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct HistoricalStats {
pub daily: Vec<DailyStats>,
pub by_author: HashMap<String, FileStats>,
pub total_commits: usize,
}
impl HistoricalStats {
/// Aggregate daily statistics by week (Monday-Sunday).
/// Returns a new vector of DailyStats where each entry represents a week.
pub fn aggregate_by_week(&self) -> Vec<DailyStats> {
use chrono::Datelike;
if self.daily.is_empty() {
return Vec::new();
}
let mut weekly: HashMap<(i32, u32), DailyStats> = HashMap::new();
for daily in &self.daily {
// Get the ISO week number and year
let iso_week = daily.date.iso_week();
let year = iso_week.year();
let week = iso_week.week();
let key = (year, week);
// Get the Monday of this week as the representative date
let week_start = chrono::NaiveDate::from_isoywd_opt(year, week, chrono::Weekday::Mon)
.unwrap_or(daily.date);
let week_stat = weekly.entry(key).or_insert_with(|| DailyStats {
date: week_start,
additions: FileStats::default(),
deletions: FileStats::default(),
net_code: 0,
});
week_stat.additions += daily.additions;
week_stat.deletions += daily.deletions;
week_stat.net_code += daily.net_code;
}
// Convert to sorted vec
let mut result: Vec<_> = weekly.into_values().collect();
result.sort_by(|a, b| b.date.cmp(&a.date)); // Most recent first
result
}
}
/// Git repository analyzer.
pub struct GitAnalyzer {
repo: Repository,
}
impl GitAnalyzer {
/// Create a new GitAnalyzer for the given path.
///
/// Returns None if the path is not in a git repository.
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, git2::Error> {
let repo = Repository::discover(path)?;
Ok(Self { repo })
}
/// Check if a path is in a git repository.
pub fn is_git_repo<P: AsRef<Path>>(path: P) -> bool {
Repository::discover(path).is_ok()
}
/// Analyze commit history and return historical statistics.
///
/// If `verbose` is true, progress will be printed to stderr every 100 commits.
pub fn analyze_history(
&self,
since: Option<DateTime<Utc>>,
until: Option<DateTime<Utc>>,
verbose: bool,
) -> Result<HistoricalStats, git2::Error> {
let mut stats = HistoricalStats::default();
let mut daily_map: HashMap<NaiveDate, DailyStats> = HashMap::new();
let mut commits_processed = 0;
// Walk commits
let mut revwalk = self.repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(git2::Sort::TIME)?;
for oid in revwalk {
let oid = oid?;
let commit = self.repo.find_commit(oid)?;
// Filter by date if specified
let commit_time = DateTime::from_timestamp(commit.time().seconds(), 0)
.unwrap_or(DateTime::UNIX_EPOCH);
// Check if commit is before 'since' date
if let Some(since_date) = since {
if commit_time < since_date {
break; // Stop processing older commits
}
}
// Check if commit is after 'until' date (inclusive - we want commits on the until date)
// We add one day to make the until date inclusive (commits until end of that day)
if let Some(until_date) = until {
let until_end_of_day = until_date + chrono::Duration::days(1);
if commit_time >= until_end_of_day {
continue; // Skip this commit but continue walking
}
}
stats.total_commits += 1;
commits_processed += 1;
// Show progress every 100 commits if verbose
if verbose && commits_processed % 100 == 0 {
eprintln!("Processed {} commits...", commits_processed);
}
// Get commit date
let date = commit_time.date_naive();
// Analyze commit diff
let (additions, deletions) = self.analyze_commit(&commit)?;
// Update daily stats
let daily_stat = daily_map.entry(date).or_insert_with(|| DailyStats {
date,
additions: FileStats::default(),
deletions: FileStats::default(),
net_code: 0,
});
daily_stat.additions += additions;
daily_stat.deletions += deletions;
daily_stat.net_code += (additions.code as i64) - (deletions.code as i64);
// Track by author - extract name to owned String to avoid lifetime issues
let author_name = commit.author().name().map(|s| s.to_string());
if let Some(author) = author_name {
let author_stats = stats
.by_author
.entry(author)
.or_insert_with(FileStats::default);
*author_stats += additions;
}
}
// Convert daily map to sorted vec
let mut daily: Vec<_> = daily_map.into_values().collect();
daily.sort_by(|a, b| b.date.cmp(&a.date)); // Most recent first
stats.daily = daily;
// Show completion message if verbose
if verbose {
eprintln!("Completed analyzing {} commits", commits_processed);
}
Ok(stats)
}
/// Analyze a single commit and return added/deleted line stats.
fn analyze_commit(&self, commit: &git2::Commit) -> Result<(FileStats, FileStats), git2::Error> {
let mut additions = FileStats::default();
let mut deletions = FileStats::default();
// Get the tree for this commit
let tree = commit.tree()?;
// Get parent tree (if exists)
let parent_tree = if commit.parent_count() > 0 {
Some(commit.parent(0)?.tree()?)
} else {
None
};
// Create diff
let diff = if let Some(parent_tree) = parent_tree {
self.repo
.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None)?
} else {
// First commit - diff against empty tree
self.repo.diff_tree_to_tree(None, Some(&tree), None)?
};
// Process diff
diff.foreach(
&mut |_delta, _progress| {
// Continue processing all files
true
},
None,
None,
Some(&mut |_delta, _hunk, line| {
let line_type = Self::classify_diff_line(line.content());
match line.origin() {
'+' => {
// Added line
match line_type {
LineType::Blank => additions.blank += 1,
LineType::Comment => additions.comment += 1,
LineType::Code => additions.code += 1,
}
}
'-' => {
// Deleted line
match line_type {
LineType::Blank => deletions.blank += 1,
LineType::Comment => deletions.comment += 1,
LineType::Code => deletions.code += 1,
}
}
_ => {} // Context lines, ignore
}
true
}),
)?;
Ok((additions, deletions))
}
/// Classify a single line from a diff.
fn classify_diff_line(content: &[u8]) -> LineType {
// Convert to string, skip invalid UTF-8
let line = match std::str::from_utf8(content) {
Ok(s) => s,
Err(_) => return LineType::Code, // Treat binary as code
};
let trimmed = line.trim();
// Check if blank
if trimmed.is_empty() {
return LineType::Blank;
}
// Simple heuristics for comments (not perfect, but reasonable)
// This is a simplified version - doesn't track multi-line comment state
if trimmed.starts_with("//")
|| trimmed.starts_with('#')
|| trimmed.starts_with("--")
|| trimmed.starts_with("/*")
|| trimmed.starts_with("*/")
|| trimmed.starts_with('*')
|| trimmed.starts_with("<!--")
|| trimmed.starts_with("-->")
{
return LineType::Comment;
}
LineType::Code
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_classify_diff_line_blank() {
assert_eq!(GitAnalyzer::classify_diff_line(b""), LineType::Blank);
assert_eq!(GitAnalyzer::classify_diff_line(b" "), LineType::Blank);
assert_eq!(GitAnalyzer::classify_diff_line(b"\t\t"), LineType::Blank);
}
#[test]
fn test_classify_diff_line_comment() {
assert_eq!(
GitAnalyzer::classify_diff_line(b"// comment"),
LineType::Comment
);
assert_eq!(
GitAnalyzer::classify_diff_line(b"# comment"),
LineType::Comment
);
assert_eq!(
GitAnalyzer::classify_diff_line(b"/* comment"),
LineType::Comment
);
assert_eq!(
GitAnalyzer::classify_diff_line(b"-- SQL comment"),
LineType::Comment
);
}
#[test]
fn test_classify_diff_line_code() {
assert_eq!(
GitAnalyzer::classify_diff_line(b"let x = 5;"),
LineType::Code
);
assert_eq!(
GitAnalyzer::classify_diff_line(b"fn main() {"),
LineType::Code
);
}
#[test]
fn test_is_git_repo() {
// Current directory should be a git repo (sniffy project)
assert!(GitAnalyzer::is_git_repo("."));
}
}