1use std::{
2 collections::{HashMap, HashSet},
3 process::Command,
4};
5
6use crate::config::CommitConfig;
8use crate::{
9 error::{CommitGenError, Result},
10 types::{Mode, ScopeCandidate},
11};
12
13const PLACEHOLDER_DIRS: &[&str] =
15 &["src", "lib", "include", "tests", "test", "benches", "examples", "docs"];
16
17const SKIP_DIRS: &[&str] =
19 &["test", "tests", "benches", "examples", "target", "build", "node_modules", ".github"];
20
21pub struct ScopeAnalyzer {
22 component_lines: HashMap<String, usize>,
23 total_lines: usize,
24}
25
26impl Default for ScopeAnalyzer {
27 fn default() -> Self {
28 Self::new()
29 }
30}
31
32impl ScopeAnalyzer {
33 pub fn new() -> Self {
34 Self { component_lines: HashMap::new(), total_lines: 0 }
35 }
36
37 pub fn process_numstat_line(&mut self, line: &str, config: &CommitConfig) {
39 let parts: Vec<&str> = line.split('\t').collect();
40 if parts.len() < 3 {
41 return;
42 }
43
44 let (added_str, deleted_str, path_part) = (parts[0], parts[1], parts[2]);
45
46 let added = added_str.parse::<usize>().unwrap_or(0);
48 let deleted = deleted_str.parse::<usize>().unwrap_or(0);
49 let lines_changed = added + deleted;
50
51 if lines_changed == 0 {
52 return;
53 }
54
55 let path = Self::extract_path_from_rename(path_part);
57
58 if config.excluded_files.iter().any(|ex| path.ends_with(ex)) {
60 return;
61 }
62
63 self.total_lines += lines_changed;
64
65 let component_candidates = Self::extract_components_from_path(&path);
67
68 for comp in component_candidates {
69 if comp.split('/').any(|s| s.contains('.')) {
71 continue;
72 }
73
74 *self.component_lines.entry(comp).or_insert(0) += lines_changed;
75 }
76 }
77
78 fn extract_path_from_rename(path_part: &str) -> String {
80 if let Some(brace_start) = path_part.find('{') {
82 if let Some(arrow_pos) = path_part[brace_start..].find(" => ") {
83 let arrow_abs = brace_start + arrow_pos;
84 if let Some(brace_end) = path_part[arrow_abs..].find('}') {
85 let brace_end_abs = arrow_abs + brace_end;
86 let prefix = &path_part[..brace_start];
87 let new_name = path_part[arrow_abs + 4..brace_end_abs].trim();
88 return format!("{prefix}{new_name}");
89 }
90 }
91 } else if path_part.contains(" => ") {
92 return path_part
94 .split(" => ")
95 .nth(1)
96 .unwrap_or(path_part)
97 .trim()
98 .to_string();
99 }
100
101 path_part.trim().to_string()
102 }
103
104 fn extract_components_from_path(path: &str) -> Vec<String> {
106 let segments: Vec<&str> = path.split('/').collect();
107 let mut component_candidates = Vec::new();
108 let mut meaningful_segments = Vec::new();
109
110 let strip_ext = |s: &str| -> String {
112 if let Some(pos) = s.rfind('.') {
113 s[..pos].to_string()
114 } else {
115 s.to_string()
116 }
117 };
118
119 let is_file = |s: &str| -> bool {
121 s.contains('.') && !s.starts_with('.') && s.rfind('.').is_some_and(|p| p > 0)
122 };
123
124 for (seg_idx, seg) in segments.iter().enumerate() {
126 if PLACEHOLDER_DIRS.contains(seg) {
128 if segments.len() > seg_idx + 2 {
131 continue;
133 }
134 }
135 if is_file(seg) {
137 continue;
138 }
139 if SKIP_DIRS.contains(seg) {
141 continue;
142 }
143
144 let stripped = strip_ext(seg);
145 if !stripped.is_empty() && !stripped.starts_with('.') {
147 meaningful_segments.push(stripped);
148 }
149 }
150
151 if !meaningful_segments.is_empty() {
153 component_candidates.push(meaningful_segments[0].clone());
154
155 if meaningful_segments.len() >= 2 {
156 component_candidates
157 .push(format!("{}/{}", meaningful_segments[0], meaningful_segments[1]));
158 }
159 }
160
161 component_candidates
162 }
163
164 pub fn build_scope_candidates(&self) -> Vec<ScopeCandidate> {
166 let mut candidates: Vec<ScopeCandidate> = self
167 .component_lines
168 .iter()
169 .filter(|(path, _)| {
170 if !path.contains('/') && PLACEHOLDER_DIRS.contains(&path.as_str()) {
172 return false;
173 }
174 if let Some(root) = path.split('/').next()
177 && PLACEHOLDER_DIRS.contains(&root)
178 && path.split('/').count() == 2
179 {
180 return true;
183 }
184 true
185 })
186 .map(|(path, &lines)| {
187 let percentage = (lines as f32 / self.total_lines as f32) * 100.0;
188 let is_two_segment = path.contains('/');
189
190 let confidence = if is_two_segment {
194 if percentage > 60.0 {
195 percentage * 1.2
196 } else {
197 percentage * 0.8
198 }
199 } else {
200 percentage
201 };
202
203 ScopeCandidate { percentage, path: path.clone(), confidence }
204 })
205 .collect();
206
207 candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
208 candidates
209 }
210
211 pub fn is_wide_change(candidates: &[ScopeCandidate], config: &CommitConfig) -> bool {
213 let is_wide = if let Some(top) = candidates.first() {
215 top.percentage / 100.0 < config.wide_change_threshold
216 } else {
217 false
218 };
219
220 let distinct_roots: HashSet<&str> = candidates
222 .iter()
223 .map(|c| c.path.split('/').next().unwrap_or(&c.path))
224 .collect();
225
226 is_wide || distinct_roots.len() >= 3
227 }
228
229 pub fn extract_scope(numstat: &str, config: &CommitConfig) -> (Vec<ScopeCandidate>, usize) {
231 let mut analyzer = Self::new();
232
233 for line in numstat.lines() {
234 analyzer.process_numstat_line(line, config);
235 }
236
237 let candidates = analyzer.build_scope_candidates();
238 (candidates, analyzer.total_lines)
239 }
240}
241
242pub fn extract_scope_candidates(
245 mode: &Mode,
246 target: Option<&str>,
247 dir: &str,
248 config: &CommitConfig,
249) -> Result<(String, bool)> {
250 let output = match mode {
252 Mode::Staged => Command::new("git")
253 .args(["diff", "--cached", "--numstat"])
254 .current_dir(dir)
255 .output()
256 .map_err(|e| {
257 CommitGenError::GitError(format!("Failed to run git diff --cached --numstat: {e}"))
258 })?,
259 Mode::Commit => {
260 let target = target.ok_or_else(|| {
261 CommitGenError::ValidationError("--target required for commit mode".to_string())
262 })?;
263 Command::new("git")
264 .args(["show", "--numstat", target])
265 .current_dir(dir)
266 .output()
267 .map_err(|e| {
268 CommitGenError::GitError(format!("Failed to run git show --numstat: {e}"))
269 })?
270 },
271 Mode::Unstaged => Command::new("git")
272 .args(["diff", "--numstat"])
273 .current_dir(dir)
274 .output()
275 .map_err(|e| CommitGenError::GitError(format!("Failed to run git diff --numstat: {e}")))?,
276 Mode::Compose => unreachable!("compose mode handled separately"),
277 };
278
279 if !output.status.success() {
280 return Err(CommitGenError::GitError("git diff --numstat failed".to_string()));
281 }
282
283 let numstat = String::from_utf8_lossy(&output.stdout);
284
285 let (candidates, total_lines) = ScopeAnalyzer::extract_scope(&numstat, config);
286
287 if total_lines == 0 {
288 return Ok(("(none - no measurable changes)".to_string(), false));
289 }
290
291 let is_wide = ScopeAnalyzer::is_wide_change(&candidates, config);
292
293 if is_wide {
294 return Ok(("(none - multi-component change)".to_string(), true));
295 }
296
297 let mut suggestion_parts = Vec::new();
300 for cand in candidates.iter().take(5) {
301 if cand.percentage >= 10.0 {
303 let confidence_label = if cand.path.contains('/') {
304 if cand.percentage > 60.0 {
305 "high confidence"
306 } else {
307 "moderate confidence"
308 }
309 } else {
310 "high confidence"
311 };
312
313 suggestion_parts
314 .push(format!("{} ({:.0}%, {})", cand.path, cand.percentage, confidence_label));
315 }
316 }
317
318 let scope_str = if suggestion_parts.is_empty() {
319 "(none - unclear component)".to_string()
320 } else {
321 format!("{}\nPrefer 2-segment scopes marked 'high confidence'", suggestion_parts.join(", "))
322 };
323
324 Ok((scope_str, is_wide))
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 fn default_config() -> CommitConfig {
332 CommitConfig {
333 excluded_files: vec![
334 "Cargo.lock".to_string(),
335 "package-lock.json".to_string(),
336 "yarn.lock".to_string(),
337 ],
338 wide_change_threshold: 0.5,
339 ..Default::default()
340 }
341 }
342
343 #[test]
345 fn test_extract_path_from_rename_brace() {
346 assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/{old => new}/file.rs"), "lib/new");
349 }
350
351 #[test]
352 fn test_extract_path_from_rename_brace_complex() {
353 assert_eq!(
354 ScopeAnalyzer::extract_path_from_rename("src/api/{client.rs => http_client.rs}"),
355 "src/api/http_client.rs"
356 );
357 }
358
359 #[test]
360 fn test_extract_path_from_rename_arrow() {
361 assert_eq!(
362 ScopeAnalyzer::extract_path_from_rename("old/file.rs => new/file.rs"),
363 "new/file.rs"
364 );
365 }
366
367 #[test]
368 fn test_extract_path_from_rename_arrow_with_spaces() {
369 assert_eq!(
370 ScopeAnalyzer::extract_path_from_rename(" old/path.rs => new/path.rs "),
371 "new/path.rs"
372 );
373 }
374
375 #[test]
376 fn test_extract_path_from_rename_no_rename() {
377 assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/file.rs"), "lib/file.rs");
378 }
379
380 #[test]
381 fn test_extract_path_from_rename_malformed_brace() {
382 assert_eq!(
384 ScopeAnalyzer::extract_path_from_rename("lib/{old => new/file.rs"),
385 "lib/{old => new/file.rs"
386 );
387 }
388
389 #[test]
391 fn test_extract_components_simple() {
392 let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
394 assert_eq!(comps, vec!["api"]);
395 }
396
397 #[test]
398 fn test_extract_components_with_placeholder() {
399 let comps = ScopeAnalyzer::extract_components_from_path("lib/foo/bar/baz.tsx");
401 assert_eq!(comps, vec!["foo", "foo/bar"]);
402 }
403
404 #[test]
405 fn test_extract_components_skip_tests() {
406 let comps = ScopeAnalyzer::extract_components_from_path("tests/api/client_test.rs");
408 assert_eq!(comps, vec!["api"]);
409 }
410
411 #[test]
412 fn test_extract_components_skip_node_modules() {
413 let comps = ScopeAnalyzer::extract_components_from_path("node_modules/foo/bar.js");
415 assert_eq!(comps, vec!["foo"]);
416 }
417
418 #[test]
419 fn test_extract_components_single_segment() {
420 let comps = ScopeAnalyzer::extract_components_from_path("src/main.rs");
421 assert_eq!(comps, vec!["src"]);
424 }
425
426 #[test]
427 fn test_extract_components_dotfile_skipped() {
428 let comps = ScopeAnalyzer::extract_components_from_path("lib/.git/config");
430 assert_eq!(comps, vec!["config"]);
431 }
432
433 #[test]
434 fn test_extract_components_strips_extension() {
435 let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
436 assert!(comps.contains(&"api".to_string()));
438 }
439
440 #[test]
442 fn test_process_numstat_line_normal() {
443 let mut analyzer = ScopeAnalyzer::new();
444 let config = default_config();
445 analyzer.process_numstat_line("10\t5\tlib/foo/bar.rs", &config);
446
447 assert_eq!(analyzer.total_lines, 15);
448 assert_eq!(analyzer.component_lines.get("foo"), Some(&15));
449 }
450
451 #[test]
452 fn test_process_numstat_line_excluded_file() {
453 let mut analyzer = ScopeAnalyzer::new();
454 let config = default_config();
455 analyzer.process_numstat_line("10\t5\tCargo.lock", &config);
456
457 assert_eq!(analyzer.total_lines, 0);
458 assert!(analyzer.component_lines.is_empty());
459 }
460
461 #[test]
462 fn test_process_numstat_line_binary_file() {
463 let mut analyzer = ScopeAnalyzer::new();
464 let config = default_config();
465 analyzer.process_numstat_line("-\t-\timage.png", &config);
466
467 assert_eq!(analyzer.total_lines, 0);
468 }
469
470 #[test]
471 fn test_process_numstat_line_invalid() {
472 let mut analyzer = ScopeAnalyzer::new();
473 let config = default_config();
474 analyzer.process_numstat_line("invalid line", &config);
475
476 assert_eq!(analyzer.total_lines, 0);
477 }
478
479 #[test]
480 fn test_process_numstat_line_rename_brace() {
481 let mut analyzer = ScopeAnalyzer::new();
482 let config = default_config();
483 analyzer.process_numstat_line("20\t10\tlib/{old => new}/file.rs", &config);
485
486 assert_eq!(analyzer.total_lines, 30);
487 assert!(
489 analyzer.component_lines.contains_key("lib")
490 || analyzer.component_lines.contains_key("new"),
491 "Expected either 'lib' or 'new' component, got: {:?}",
492 analyzer.component_lines
493 );
494 }
495
496 #[test]
497 fn test_process_numstat_line_multiple_files() {
498 let mut analyzer = ScopeAnalyzer::new();
499 let config = default_config();
500 analyzer.process_numstat_line("10\t5\tsrc/api/client.rs", &config);
501 analyzer.process_numstat_line("20\t10\tsrc/api/server.rs", &config);
502
503 assert_eq!(analyzer.total_lines, 45);
504 assert_eq!(analyzer.component_lines.get("api"), Some(&45));
505 }
506
507 #[test]
509 fn test_is_wide_change_focused() {
510 let config = default_config();
511 let candidates = vec![
512 ScopeCandidate { path: "api".to_string(), percentage: 80.0, confidence: 80.0 },
513 ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
514 ];
515
516 assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
517 }
518
519 #[test]
520 fn test_is_wide_change_dispersed() {
521 let config = default_config();
522 let candidates = vec![
523 ScopeCandidate { path: "api".to_string(), percentage: 30.0, confidence: 30.0 },
524 ScopeCandidate { path: "db".to_string(), percentage: 30.0, confidence: 30.0 },
525 ScopeCandidate { path: "ui".to_string(), percentage: 40.0, confidence: 40.0 },
526 ];
527
528 assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
529 }
530
531 #[test]
532 fn test_is_wide_change_three_roots() {
533 let config = default_config();
534 let candidates = vec![
535 ScopeCandidate { path: "api".to_string(), percentage: 60.0, confidence: 60.0 },
536 ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
537 ScopeCandidate { path: "ui".to_string(), percentage: 20.0, confidence: 20.0 },
538 ];
539
540 assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
541 }
542
543 #[test]
544 fn test_is_wide_change_nested_same_root() {
545 let config = default_config();
546 let candidates = vec![
547 ScopeCandidate {
548 path: "api/client".to_string(),
549 percentage: 60.0,
550 confidence: 72.0,
551 },
552 ScopeCandidate {
553 path: "api/server".to_string(),
554 percentage: 40.0,
555 confidence: 32.0,
556 },
557 ];
558
559 assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
560 }
561
562 #[test]
563 fn test_is_wide_change_empty() {
564 let config = default_config();
565 let candidates = vec![];
566
567 assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
568 }
569
570 #[test]
572 fn test_extract_scope_single_file() {
573 let config = default_config();
574 let numstat = "10\t5\tsrc/api/client.rs";
575 let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
576
577 assert_eq!(total_lines, 15);
578 assert_eq!(candidates.len(), 1);
580 assert_eq!(candidates[0].path, "api");
581 assert_eq!(candidates[0].percentage, 100.0);
582 }
583
584 #[test]
585 fn test_extract_scope_placeholder_only() {
586 let config = default_config();
587 let numstat = "10\t5\tsrc/main.rs";
588 let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
589
590 assert_eq!(total_lines, 15);
591 assert_eq!(candidates.len(), 0);
593 }
594
595 #[test]
596 fn test_extract_scope_multiple_files() {
597 let config = default_config();
598 let numstat = "10\t5\tsrc/api/client.rs\n20\t10\tsrc/db/models.rs";
599 let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
600
601 assert_eq!(total_lines, 45);
602 assert!(candidates.len() >= 2);
603
604 let api_cand = candidates.iter().find(|c| c.path == "api");
606 let db_cand = candidates.iter().find(|c| c.path == "db");
607
608 assert!(api_cand.is_some());
609 assert!(db_cand.is_some());
610
611 assert!(db_cand.unwrap().percentage > api_cand.unwrap().percentage);
613 }
614
615 #[test]
616 fn test_extract_scope_excluded_files() {
617 let config = default_config();
618 let numstat = "100\t50\tCargo.lock\n10\t5\tsrc/api/client.rs";
619 let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
620
621 assert_eq!(total_lines, 15);
623 assert_eq!(candidates[0].path, "api");
624 }
625
626 #[test]
627 fn test_extract_scope_no_changes() {
628 let config = default_config();
629 let numstat = "";
630 let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
631
632 assert_eq!(total_lines, 0);
633 assert!(candidates.is_empty());
634 }
635
636 #[test]
637 fn test_extract_scope_sorted_by_percentage() {
638 let config = default_config();
639 let numstat = "5\t0\tsrc/api/client.rs\n50\t0\tsrc/db/models.rs\n10\t0\tsrc/ui/component.tsx";
640 let (candidates, _) = ScopeAnalyzer::extract_scope(numstat, &config);
641
642 assert!(candidates[0].percentage >= candidates[1].percentage);
644 assert!(candidates[1].percentage >= candidates[2].percentage);
645 }
646
647 #[test]
648 fn test_build_scope_candidates_percentages() {
649 let mut analyzer = ScopeAnalyzer::new();
650 analyzer.component_lines.insert("api".to_string(), 30);
651 analyzer.component_lines.insert("db".to_string(), 70);
652 analyzer.total_lines = 100;
653
654 let candidates = analyzer.build_scope_candidates();
655
656 assert_eq!(candidates.len(), 2);
657 assert_eq!(candidates[0].path, "db");
658 assert!((candidates[0].percentage - 70.0).abs() < 0.001);
659 assert_eq!(candidates[1].path, "api");
660 assert!((candidates[1].percentage - 30.0).abs() < 0.001);
661 }
662
663 #[test]
665 fn test_confidence_70_percent_in_two_segment_prefers_specific() {
666 let mut analyzer = ScopeAnalyzer::new();
667 analyzer.component_lines.insert("api".to_string(), 70);
668 analyzer
669 .component_lines
670 .insert("api/client".to_string(), 70);
671 analyzer.component_lines.insert("other".to_string(), 30);
672 analyzer.total_lines = 100;
673
674 let candidates = analyzer.build_scope_candidates();
675
676 assert_eq!(candidates[0].path, "api/client");
681 assert!((candidates[0].percentage - 70.0).abs() < 0.001);
682 assert!((candidates[0].confidence - 84.0).abs() < 0.001);
683 }
684
685 #[test]
687 fn test_confidence_45_percent_in_two_segment_prefers_single() {
688 let mut analyzer = ScopeAnalyzer::new();
689 analyzer.component_lines.insert("api".to_string(), 45);
690 analyzer
691 .component_lines
692 .insert("api/client".to_string(), 45);
693 analyzer.component_lines.insert("other".to_string(), 55);
694 analyzer.total_lines = 100;
695
696 let candidates = analyzer.build_scope_candidates();
697
698 assert_eq!(candidates[0].path, "other");
703 assert_eq!(candidates[1].path, "api");
704 assert_eq!(candidates[2].path, "api/client");
705 assert!((candidates[2].confidence - 36.0).abs() < 0.001);
706 }
707}