1use std::collections::HashMap;
18
19#[derive(Debug, Clone, Default, PartialEq, serde::Serialize, serde::Deserialize)]
21pub struct CommitFeatures {
22 pub lines_added: u32,
24 pub lines_deleted: u32,
26 pub files_changed: u32,
28 pub churn_ratio: f32,
30 pub has_test_changes: bool,
32 pub complexity_delta: f32,
34 pub author_experience: f32,
36 pub days_since_last_change: f32,
38}
39
40impl CommitFeatures {
41 #[must_use]
43 pub fn to_array(&self) -> [f32; 8] {
44 [
45 self.lines_added as f32,
46 self.lines_deleted as f32,
47 self.files_changed as f32,
48 self.churn_ratio,
49 if self.has_test_changes { 1.0 } else { 0.0 },
50 self.complexity_delta,
51 self.author_experience,
52 self.days_since_last_change,
53 ]
54 }
55
56 #[must_use]
58 #[allow(clippy::cast_sign_loss)]
59 pub fn from_array(arr: [f32; 8]) -> Self {
60 Self {
61 lines_added: arr[0].max(0.0) as u32,
62 lines_deleted: arr[1].max(0.0) as u32,
63 files_changed: arr[2].max(0.0) as u32,
64 churn_ratio: arr[3],
65 has_test_changes: arr[4] > 0.5,
66 complexity_delta: arr[5],
67 author_experience: arr[6],
68 days_since_last_change: arr[7],
69 }
70 }
71
72 #[must_use]
76 pub fn normalize(&self, stats: &FeatureStats) -> [f32; 8] {
77 let raw = self.to_array();
78 let mut normalized = [0.0f32; 8];
79
80 for (i, &val) in raw.iter().enumerate() {
81 if stats.std[i] > f32::EPSILON {
82 normalized[i] = (val - stats.mean[i]) / stats.std[i];
83 } else {
84 normalized[i] = 0.0;
85 }
86 }
87
88 normalized
89 }
90}
91
92#[derive(Debug, Clone, Default)]
94pub struct FeatureStats {
95 pub mean: [f32; 8],
97 pub std: [f32; 8],
99}
100
101impl FeatureStats {
102 #[must_use]
104 pub fn from_features(features: &[CommitFeatures]) -> Self {
105 if features.is_empty() {
106 return Self::default();
107 }
108
109 let n = features.len() as f32;
110
111 let mut mean = [0.0f32; 8];
113 for f in features {
114 let arr = f.to_array();
115 for (i, &val) in arr.iter().enumerate() {
116 mean[i] += val;
117 }
118 }
119 for m in &mut mean {
120 *m /= n;
121 }
122
123 let mut std = [0.0f32; 8];
125 for f in features {
126 let arr = f.to_array();
127 for (i, &val) in arr.iter().enumerate() {
128 let diff = val - mean[i];
129 std[i] += diff * diff;
130 }
131 }
132 for s in &mut std {
133 *s = (*s / n).sqrt();
134 }
135
136 Self { mean, std }
137 }
138}
139
140#[derive(Debug, Default)]
142pub struct CommitFeatureExtractor {
143 author_commits: HashMap<String, u32>,
145 file_last_modified: HashMap<String, f64>,
147 total_commits: u32,
149}
150
151impl CommitFeatureExtractor {
152 #[must_use]
154 pub fn new() -> Self {
155 Self::default()
156 }
157
158 #[must_use]
166 pub fn extract(&mut self, diff: &str, author: &str, timestamp: f64) -> CommitFeatures {
167 let mut features = CommitFeatures::default();
168
169 let (added, deleted, files) = self.parse_diff_stats(diff);
171 features.lines_added = added;
172 features.lines_deleted = deleted;
173 features.files_changed = files;
174
175 let total = added + deleted;
177 features.churn_ratio = if total > 0 {
178 added as f32 / (total as f32 + 1.0)
179 } else {
180 0.5
181 };
182
183 features.has_test_changes = self.detect_test_changes(diff);
185
186 features.complexity_delta = self.estimate_complexity_delta(diff);
188
189 let author_count = self.author_commits.entry(author.to_string()).or_insert(0);
191 *author_count += 1;
192 self.total_commits += 1;
193
194 features.author_experience = if self.total_commits > 0 {
196 ((*author_count as f32).ln() / (self.total_commits as f32).ln().max(1.0)).min(1.0)
197 } else {
198 0.0
199 };
200
201 let affected_files = self.extract_affected_files(diff);
203 let mut min_days = f64::MAX;
204 let seconds_per_day = 86400.0;
205
206 for file in &affected_files {
207 if let Some(&last_mod) = self.file_last_modified.get(file) {
208 let days = (timestamp - last_mod) / seconds_per_day;
209 if days < min_days && days >= 0.0 {
210 min_days = days;
211 }
212 }
213 self.file_last_modified.insert(file.clone(), timestamp);
214 }
215
216 features.days_since_last_change = if min_days >= f64::MAX - 1.0 {
217 365.0 } else {
219 (min_days as f32).min(365.0)
220 };
221
222 features
223 }
224
225 fn parse_diff_stats(&self, diff: &str) -> (u32, u32, u32) {
227 let mut added = 0u32;
228 let mut deleted = 0u32;
229 let mut files = 0u32;
230
231 for line in diff.lines() {
232 if line.starts_with("diff --git") || line.starts_with("--- ") {
233 if line.starts_with("diff --git") {
234 files += 1;
235 }
236 } else if line.starts_with('+') && !line.starts_with("+++") {
237 added += 1;
238 } else if line.starts_with('-') && !line.starts_with("---") {
239 deleted += 1;
240 }
241 }
242
243 (added, deleted, files.max(1))
244 }
245
246 fn detect_test_changes(&self, diff: &str) -> bool {
248 for line in diff.lines() {
249 if line.starts_with("diff --git")
250 || line.starts_with("--- ")
251 || line.starts_with("+++ ")
252 {
253 let lower = line.to_lowercase();
254 if lower.contains("test")
255 || lower.contains("spec")
256 || lower.contains("_test.")
257 || lower.contains(".test.")
258 {
259 return true;
260 }
261 }
262 }
263 false
264 }
265
266 fn estimate_complexity_delta(&self, diff: &str) -> f32 {
268 let mut delta = 0i32;
269
270 for line in diff.lines() {
271 let trimmed = line.trim();
272 let is_addition = line.starts_with('+') && !line.starts_with("+++");
273 let is_deletion = line.starts_with('-') && !line.starts_with("---");
274
275 let control_flow = ["if ", "elif ", "else:", "for ", "while ", "match ", "case "];
277 for kw in control_flow {
278 if trimmed.contains(kw) {
279 if is_addition {
280 delta += 1;
281 } else if is_deletion {
282 delta -= 1;
283 }
284 }
285 }
286 }
287
288 delta as f32
289 }
290
291 fn extract_affected_files(&self, diff: &str) -> Vec<String> {
293 let mut files = Vec::new();
294
295 for line in diff.lines() {
296 if line.starts_with("diff --git a/") {
297 if let Some(path) = line.strip_prefix("diff --git a/") {
299 if let Some(space_pos) = path.find(" b/") {
300 files.push(path[..space_pos].to_string());
301 }
302 }
303 } else if line.starts_with("+++ b/") {
304 if let Some(path) = line.strip_prefix("+++ b/") {
305 if !files.contains(&path.to_string()) {
306 files.push(path.to_string());
307 }
308 }
309 }
310 }
311
312 files
313 }
314
315 pub fn reset(&mut self) {
317 self.author_commits.clear();
318 self.file_last_modified.clear();
319 self.total_commits = 0;
320 }
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326
327 #[test]
328 fn test_commit_features_default() {
329 let features = CommitFeatures::default();
330 assert_eq!(features.lines_added, 0);
331 assert_eq!(features.lines_deleted, 0);
332 assert_eq!(features.files_changed, 0);
333 }
334
335 #[test]
336 fn test_commit_features_to_array() {
337 let features = CommitFeatures {
338 lines_added: 10,
339 lines_deleted: 5,
340 files_changed: 2,
341 churn_ratio: 0.67,
342 has_test_changes: true,
343 complexity_delta: 3.0,
344 author_experience: 0.5,
345 days_since_last_change: 7.0,
346 };
347
348 let arr = features.to_array();
349 assert_eq!(arr[0], 10.0);
350 assert_eq!(arr[1], 5.0);
351 assert_eq!(arr[2], 2.0);
352 assert!((arr[3] - 0.67).abs() < 0.01);
353 assert_eq!(arr[4], 1.0); assert_eq!(arr[5], 3.0);
355 assert_eq!(arr[6], 0.5);
356 assert_eq!(arr[7], 7.0);
357 }
358
359 #[test]
360 fn test_commit_features_from_array() {
361 let arr = [10.0, 5.0, 2.0, 0.67, 1.0, 3.0, 0.5, 7.0];
362 let features = CommitFeatures::from_array(arr);
363
364 assert_eq!(features.lines_added, 10);
365 assert_eq!(features.lines_deleted, 5);
366 assert_eq!(features.files_changed, 2);
367 assert!(features.has_test_changes);
368 }
369
370 #[test]
371 fn test_feature_stats_from_features() {
372 let features = vec![
373 CommitFeatures {
374 lines_added: 10,
375 lines_deleted: 5,
376 ..Default::default()
377 },
378 CommitFeatures {
379 lines_added: 20,
380 lines_deleted: 15,
381 ..Default::default()
382 },
383 ];
384
385 let stats = FeatureStats::from_features(&features);
386
387 assert!((stats.mean[0] - 15.0).abs() < 0.01);
389 assert!((stats.mean[1] - 10.0).abs() < 0.01);
391 }
392
393 #[test]
394 fn test_feature_stats_empty() {
395 let features: Vec<CommitFeatures> = vec![];
396 let stats = FeatureStats::from_features(&features);
397 assert_eq!(stats.mean[0], 0.0);
398 assert_eq!(stats.std[0], 0.0);
399 }
400
401 #[test]
402 fn test_normalize_features() {
403 let features = CommitFeatures {
404 lines_added: 20,
405 ..Default::default()
406 };
407
408 let stats = FeatureStats {
409 mean: [10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
410 std: [5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
411 };
412
413 let normalized = features.normalize(&stats);
414 assert!((normalized[0] - 2.0).abs() < 0.01);
416 }
417
418 #[test]
419 fn test_extractor_parse_diff() {
420 let mut extractor = CommitFeatureExtractor::new();
421
422 let diff = r#"diff --git a/src/main.rs b/src/main.rs
423--- a/src/main.rs
424+++ b/src/main.rs
425@@ -1,3 +1,5 @@
426 fn main() {
427+ let x = 1;
428+ let y = 2;
429- println!("hello");
430 }
431"#;
432
433 let features = extractor.extract(diff, "alice", 1700000000.0);
434
435 assert_eq!(features.lines_added, 2);
436 assert_eq!(features.lines_deleted, 1);
437 assert_eq!(features.files_changed, 1);
438 assert!(!features.has_test_changes);
439 }
440
441 #[test]
442 fn test_extractor_test_changes() {
443 let mut extractor = CommitFeatureExtractor::new();
444
445 let diff = r#"diff --git a/tests/test_main.rs b/tests/test_main.rs
446+++ b/tests/test_main.rs
447+#[test]
448+fn test_foo() {}
449"#;
450
451 let features = extractor.extract(diff, "bob", 1700000000.0);
452 assert!(features.has_test_changes);
453 }
454
455 #[test]
456 fn test_extractor_complexity_delta() {
457 let mut extractor = CommitFeatureExtractor::new();
458
459 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
460+if x > 0 {
461+ for i in 0..10 {
462+ while running {
463- println!("simple");
464"#;
465
466 let features = extractor.extract(diff, "carol", 1700000000.0);
467 assert!((features.complexity_delta - 3.0).abs() < 0.01);
470 }
471
472 #[test]
473 fn test_extractor_author_experience() {
474 let mut extractor = CommitFeatureExtractor::new();
475
476 let diff = "diff --git a/foo.rs b/foo.rs\n+line";
477
478 let f1 = extractor.extract(diff, "alice", 1700000000.0);
480 assert!(f1.author_experience >= 0.0);
481
482 let f2 = extractor.extract(diff, "alice", 1700001000.0);
484 assert!(f2.author_experience >= f1.author_experience);
485
486 let f3 = extractor.extract(diff, "bob", 1700002000.0);
488 assert!(f3.author_experience <= f2.author_experience);
489 }
490
491 #[test]
492 fn test_extractor_days_since_last_change() {
493 let mut extractor = CommitFeatureExtractor::new();
494
495 let diff = "diff --git a/src/foo.rs b/src/foo.rs\n+++ b/src/foo.rs\n+line";
496
497 let f1 = extractor.extract(diff, "alice", 1700000000.0);
499 assert!((f1.days_since_last_change - 365.0).abs() < 0.01); let seconds_per_day = 86400.0;
503 let f2 = extractor.extract(diff, "alice", 1700000000.0 + 7.0 * seconds_per_day);
504 assert!((f2.days_since_last_change - 7.0).abs() < 0.01);
505 }
506
507 #[test]
508 fn test_extractor_reset() {
509 let mut extractor = CommitFeatureExtractor::new();
510
511 let diff = "diff --git a/foo.rs b/foo.rs\n+line";
512 extractor.extract(diff, "alice", 1700000000.0);
513
514 assert!(extractor.total_commits > 0);
515
516 extractor.reset();
517
518 assert_eq!(extractor.total_commits, 0);
519 assert!(extractor.author_commits.is_empty());
520 }
521
522 #[test]
523 fn test_extractor_churn_ratio() {
524 let mut extractor = CommitFeatureExtractor::new();
525
526 let diff1 = "diff --git a/f.rs b/f.rs\n+a\n+b\n+c";
528 let f1 = extractor.extract(diff1, "alice", 1.0);
529 assert!(f1.churn_ratio > 0.5); extractor.reset();
532
533 let diff2 = "diff --git a/f.rs b/f.rs\n-a\n-b\n-c";
535 let f2 = extractor.extract(diff2, "alice", 1.0);
536 assert!(f2.churn_ratio < 0.5); }
538
539 #[test]
540 fn test_extractor_empty_diff() {
541 let mut extractor = CommitFeatureExtractor::new();
542 let features = extractor.extract("", "alice", 1.0);
543
544 assert_eq!(features.lines_added, 0);
545 assert_eq!(features.lines_deleted, 0);
546 assert_eq!(features.files_changed, 1); }
548
549 #[test]
550 fn test_commit_features_debug() {
551 let features = CommitFeatures::default();
552 let debug = format!("{features:?}");
553 assert!(debug.contains("CommitFeatures"));
554 }
555
556 #[test]
557 fn test_feature_stats_debug() {
558 let stats = FeatureStats::default();
559 let debug = format!("{stats:?}");
560 assert!(debug.contains("FeatureStats"));
561 }
562
563 #[test]
564 fn test_extractor_debug() {
565 let extractor = CommitFeatureExtractor::new();
566 let debug = format!("{extractor:?}");
567 assert!(debug.contains("CommitFeatureExtractor"));
568 }
569
570 #[test]
571 fn test_commit_features_clone_eq() {
572 let f1 = CommitFeatures {
573 lines_added: 10,
574 ..Default::default()
575 };
576 let f2 = f1.clone();
577 assert_eq!(f1, f2);
578 }
579}