llm_git/
analysis.rs

1use std::{
2   collections::{HashMap, HashSet},
3   process::Command,
4};
5
6/// Scope analysis functionality for git diff numstat parsing
7use crate::config::CommitConfig;
8use crate::{
9   error::{CommitGenError, Result},
10   types::{Mode, ScopeCandidate},
11};
12
13/// Placeholder dirs to skip when building two-segment scopes
14/// These are organizational directories that rarely represent meaningful scopes
15const PLACEHOLDER_DIRS: &[&str] = &[
16   // Rust conventions
17   "src", "lib", "bin", "crates", "benches", "examples", // Go conventions
18   "internal", "pkg",     // C/C++ conventions
19   "include", // Testing
20   "tests", "test", // Documentation
21   "docs", // Generic organizational
22   "packages", "modules",
23];
24
25/// Directories to skip entirely when extracting scopes
26const SKIP_DIRS: &[&str] =
27   &["test", "tests", "benches", "examples", "target", "build", "node_modules", ".github"];
28
29pub struct ScopeAnalyzer {
30   component_lines: HashMap<String, usize>,
31   total_lines:     usize,
32}
33
34impl Default for ScopeAnalyzer {
35   fn default() -> Self {
36      Self::new()
37   }
38}
39
40impl ScopeAnalyzer {
41   pub fn new() -> Self {
42      Self { component_lines: HashMap::new(), total_lines: 0 }
43   }
44
45   /// Process single numstat line: "added\tdeleted\tpath"
46   pub fn process_numstat_line(&mut self, line: &str, config: &CommitConfig) {
47      let parts: Vec<&str> = line.split('\t').collect();
48      if parts.len() < 3 {
49         return;
50      }
51
52      let (added_str, deleted_str, path_part) = (parts[0], parts[1], parts[2]);
53
54      // Parse line counts (skip binary files marked with "-")
55      let added = added_str.parse::<usize>().unwrap_or(0);
56      let deleted = deleted_str.parse::<usize>().unwrap_or(0);
57      let lines_changed = added + deleted;
58
59      if lines_changed == 0 {
60         return;
61      }
62
63      // Extract actual path from rename syntax
64      let path = Self::extract_path_from_rename(path_part);
65
66      // Skip excluded files
67      if config.excluded_files.iter().any(|ex| path.ends_with(ex)) {
68         return;
69      }
70
71      self.total_lines += lines_changed;
72
73      // Extract component candidates from path
74      let component_candidates = Self::extract_components_from_path(&path);
75
76      for comp in component_candidates {
77         // Final sanity check: no segments should contain dots
78         if comp.split('/').any(|s| s.contains('.')) {
79            continue;
80         }
81
82         *self.component_lines.entry(comp).or_insert(0) += lines_changed;
83      }
84   }
85
86   /// Extract new path from rename syntax (handles both brace and arrow forms)
87   fn extract_path_from_rename(path_part: &str) -> String {
88      // Handle renames with brace syntax: "lib/wal/{io_worker.rs => io.rs}"
89      if let Some(brace_start) = path_part.find('{') {
90         if let Some(arrow_pos) = path_part[brace_start..].find(" => ") {
91            let arrow_abs = brace_start + arrow_pos;
92            if let Some(brace_end) = path_part[arrow_abs..].find('}') {
93               let brace_end_abs = arrow_abs + brace_end;
94               let prefix = &path_part[..brace_start];
95               let new_name = path_part[arrow_abs + 4..brace_end_abs].trim();
96               return format!("{prefix}{new_name}");
97            }
98         }
99      } else if path_part.contains(" => ") {
100         // Simple arrow syntax: "old/path => new/path"
101         return path_part
102            .split(" => ")
103            .nth(1)
104            .unwrap_or(path_part)
105            .trim()
106            .to_string();
107      }
108
109      path_part.trim().to_string()
110   }
111
112   /// Extract meaningful component paths from file path
113   fn extract_components_from_path(path: &str) -> Vec<String> {
114      let segments: Vec<&str> = path.split('/').collect();
115      let mut component_candidates = Vec::new();
116      let mut meaningful_segments = Vec::new();
117
118      // Helper: strip extension from segment
119      let strip_ext = |s: &str| -> String {
120         if let Some(pos) = s.rfind('.') {
121            s[..pos].to_string()
122         } else {
123            s.to_string()
124         }
125      };
126
127      // Helper: is this segment a file (contains extension)?
128      let is_file = |s: &str| -> bool {
129         s.contains('.') && !s.starts_with('.') && s.rfind('.').is_some_and(|p| p > 0)
130      };
131
132      // Build candidates by walking path and extracting meaningful directory segments
133      for (seg_idx, seg) in segments.iter().enumerate() {
134         // Skip placeholder dirs when any deeper segments exist
135         if PLACEHOLDER_DIRS.contains(seg) {
136            // If this is a placeholder and we have more segments after it, skip it
137            if segments.len() > seg_idx + 1 {
138               continue;
139            }
140         }
141         // Skip if it's a file (has extension)
142         if is_file(seg) {
143            continue;
144         }
145         // Skip common non-scope dirs
146         if SKIP_DIRS.contains(seg) {
147            continue;
148         }
149
150         let stripped = strip_ext(seg);
151         // Filter out empty segments or dotfiles
152         if !stripped.is_empty() && !stripped.starts_with('.') {
153            meaningful_segments.push(stripped);
154         }
155      }
156
157      // Generate candidates: single-level and two-level
158      if !meaningful_segments.is_empty() {
159         component_candidates.push(meaningful_segments[0].clone());
160
161         if meaningful_segments.len() >= 2 {
162            component_candidates
163               .push(format!("{}/{}", meaningful_segments[0], meaningful_segments[1]));
164         }
165      }
166
167      component_candidates
168   }
169
170   /// Build sorted `ScopeCandidate` list from accumulated data
171   pub fn build_scope_candidates(&self) -> Vec<ScopeCandidate> {
172      let mut candidates: Vec<ScopeCandidate> = self
173         .component_lines
174         .iter()
175         .filter(|(path, _)| {
176            // Filter out pure placeholder single-segment scopes
177            if !path.contains('/') && PLACEHOLDER_DIRS.contains(&path.as_str()) {
178               return false;
179            }
180            // Filter out scopes starting with placeholder dirs
181            if let Some(root) = path.split('/').next()
182               && PLACEHOLDER_DIRS.contains(&root)
183            {
184               return false;
185            }
186            true
187         })
188         .map(|(path, &lines)| {
189            let percentage = (lines as f32 / self.total_lines as f32) * 100.0;
190            let is_two_segment = path.contains('/');
191
192            // Confidence calculation:
193            // - Single-segment: percentage as-is
194            // - Two-segment: percentage * 1.2 if >60%, else * 0.8
195            let confidence = if is_two_segment {
196               if percentage > 60.0 {
197                  percentage * 1.2
198               } else {
199                  percentage * 0.8
200               }
201            } else {
202               percentage
203            };
204
205            ScopeCandidate { percentage, path: path.clone(), confidence }
206         })
207         .collect();
208
209      candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
210      candidates
211   }
212
213   /// Check if change spans multiple components (wide change)
214   pub fn is_wide_change(candidates: &[ScopeCandidate], config: &CommitConfig) -> bool {
215      // Check if top component is below threshold
216      let is_wide = if let Some(top) = candidates.first() {
217         top.percentage / 100.0 < config.wide_change_threshold
218      } else {
219         false
220      };
221
222      // Check if ≥3 distinct roots
223      let distinct_roots: HashSet<&str> = candidates
224         .iter()
225         .map(|c| c.path.split('/').next().unwrap_or(&c.path))
226         .collect();
227
228      is_wide || distinct_roots.len() >= 3
229   }
230
231   /// Public API: extract scope candidates from git numstat output
232   pub fn extract_scope(numstat: &str, config: &CommitConfig) -> (Vec<ScopeCandidate>, usize) {
233      let mut analyzer = Self::new();
234
235      for line in numstat.lines() {
236         analyzer.process_numstat_line(line, config);
237      }
238
239      let candidates = analyzer.build_scope_candidates();
240      (candidates, analyzer.total_lines)
241   }
242
243   /// Analyze wide changes to detect cross-cutting patterns
244   pub fn analyze_wide_change(numstat: &str) -> Option<String> {
245      let lines: Vec<&str> = numstat.lines().collect();
246      if lines.is_empty() {
247         return None;
248      }
249
250      // Extract file paths from numstat
251      let paths: Vec<&str> = lines
252         .iter()
253         .filter_map(|line| {
254            let parts: Vec<&str> = line.split('\t').collect();
255            if parts.len() >= 3 {
256               Some(parts[2])
257            } else {
258               None
259            }
260         })
261         .collect();
262
263      if paths.is_empty() {
264         return None;
265      }
266
267      // Count file types
268      let total = paths.len();
269      let mut md_count = 0;
270      let mut test_count = 0;
271      let mut config_count = 0;
272      let mut has_cargo_toml = false;
273      let mut has_package_json = false;
274
275      // Track patterns
276      let mut error_keywords = 0;
277      let mut type_keywords = 0;
278
279      for path in &paths {
280         // File extension analysis
281         if std::path::Path::new(path)
282            .extension()
283            .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
284         {
285            md_count += 1;
286         }
287         if path.contains("/test") || path.contains("_test.") || path.ends_with("_test.rs") {
288            test_count += 1;
289         }
290         if std::path::Path::new(path).extension().is_some_and(|ext| {
291            ext.eq_ignore_ascii_case("toml")
292               || ext.eq_ignore_ascii_case("yaml")
293               || ext.eq_ignore_ascii_case("yml")
294               || ext.eq_ignore_ascii_case("json")
295         }) {
296            config_count += 1;
297         }
298
299         // Dependency files
300         if path.contains("Cargo.toml") {
301            has_cargo_toml = true;
302         }
303         if path.contains("package.json") {
304            has_package_json = true;
305         }
306
307         // Pattern keywords in paths
308         let lower_path = path.to_lowercase();
309         if lower_path.contains("error")
310            || lower_path.contains("result")
311            || lower_path.contains("err")
312         {
313            error_keywords += 1;
314         }
315         if lower_path.contains("type")
316            || lower_path.contains("struct")
317            || lower_path.contains("enum")
318         {
319            type_keywords += 1;
320         }
321      }
322
323      // Detection heuristics (ordered by specificity)
324
325      // 1. Dependency updates (high confidence)
326      if has_cargo_toml || has_package_json {
327         return Some("deps".to_string());
328      }
329
330      // 2. Documentation updates (>70% .md files)
331      if md_count * 100 / total > 70 {
332         return Some("docs".to_string());
333      }
334
335      // 3. Test updates (>60% test files)
336      if test_count * 100 / total > 60 {
337         return Some("tests".to_string());
338      }
339
340      // 4. Error handling migration (>40% files with error keywords)
341      if error_keywords * 100 / total > 40 {
342         return Some("error-handling".to_string());
343      }
344
345      // 5. Type migration (>40% files with type keywords)
346      if type_keywords * 100 / total > 40 {
347         return Some("type-refactor".to_string());
348      }
349
350      // 6. Config/tooling updates (>50% config files)
351      if config_count * 100 / total > 50 {
352         return Some("config".to_string());
353      }
354
355      // No clear pattern detected
356      None
357   }
358}
359
360/// Extract candidate scopes from git diff --numstat output
361/// Returns (`scope_string`, `is_wide_change`)
362pub fn extract_scope_candidates(
363   mode: &Mode,
364   target: Option<&str>,
365   dir: &str,
366   config: &CommitConfig,
367) -> Result<(String, bool)> {
368   // Get numstat output
369   let output = match mode {
370      Mode::Staged => Command::new("git")
371         .args(["diff", "--cached", "--numstat"])
372         .current_dir(dir)
373         .output()
374         .map_err(|e| {
375            CommitGenError::GitError(format!("Failed to run git diff --cached --numstat: {e}"))
376         })?,
377      Mode::Commit => {
378         let target = target.ok_or_else(|| {
379            CommitGenError::ValidationError("--target required for commit mode".to_string())
380         })?;
381         Command::new("git")
382            .args(["show", "--numstat", target])
383            .current_dir(dir)
384            .output()
385            .map_err(|e| {
386               CommitGenError::GitError(format!("Failed to run git show --numstat: {e}"))
387            })?
388      },
389      Mode::Unstaged => Command::new("git")
390         .args(["diff", "--numstat"])
391         .current_dir(dir)
392         .output()
393         .map_err(|e| CommitGenError::GitError(format!("Failed to run git diff --numstat: {e}")))?,
394      Mode::Compose => unreachable!("compose mode handled separately"),
395   };
396
397   if !output.status.success() {
398      return Err(CommitGenError::GitError("git diff --numstat failed".to_string()));
399   }
400
401   let numstat = String::from_utf8_lossy(&output.stdout);
402
403   let (candidates, total_lines) = ScopeAnalyzer::extract_scope(&numstat, config);
404
405   if total_lines == 0 {
406      return Ok(("(none - no measurable changes)".to_string(), false));
407   }
408
409   let is_wide = ScopeAnalyzer::is_wide_change(&candidates, config);
410
411   if is_wide {
412      // Try to detect a pattern if wide_change_abstract is enabled
413      let scope_str = if config.wide_change_abstract {
414         if let Some(pattern) = ScopeAnalyzer::analyze_wide_change(&numstat) {
415            format!("(cross-cutting: {pattern})")
416         } else {
417            "(none - multi-component change)".to_string()
418         }
419      } else {
420         "(none - multi-component change)".to_string()
421      };
422
423      return Ok((scope_str, true));
424   }
425
426   // Format suggested scopes with weights for prompt (keep top 5, prefer 2-segment
427   // when >60%)
428   let mut suggestion_parts = Vec::new();
429   for cand in candidates.iter().take(5) {
430      // Only suggest if ≥10% to avoid noise
431      if cand.percentage >= 10.0 {
432         let confidence_label = if cand.path.contains('/') {
433            if cand.percentage > 60.0 {
434               "high confidence"
435            } else {
436               "moderate confidence"
437            }
438         } else {
439            "high confidence"
440         };
441
442         suggestion_parts
443            .push(format!("{} ({:.0}%, {})", cand.path, cand.percentage, confidence_label));
444      }
445   }
446
447   let scope_str = if suggestion_parts.is_empty() {
448      "(none - unclear component)".to_string()
449   } else {
450      format!("{}\nPrefer 2-segment scopes marked 'high confidence'", suggestion_parts.join(", "))
451   };
452
453   Ok((scope_str, is_wide))
454}
455
456#[cfg(test)]
457mod tests {
458   use super::*;
459
460   fn default_config() -> CommitConfig {
461      CommitConfig {
462         excluded_files: vec![
463            "Cargo.lock".to_string(),
464            "package-lock.json".to_string(),
465            "yarn.lock".to_string(),
466         ],
467         wide_change_threshold: 0.5,
468         ..Default::default()
469      }
470   }
471
472   // Tests for extract_path_from_rename()
473   #[test]
474   fn test_extract_path_from_rename_brace() {
475      // Brace syntax replaces only the content within braces (suffix is not
476      // preserved)
477      assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/{old => new}/file.rs"), "lib/new");
478   }
479
480   #[test]
481   fn test_extract_path_from_rename_brace_complex() {
482      assert_eq!(
483         ScopeAnalyzer::extract_path_from_rename("src/api/{client.rs => http_client.rs}"),
484         "src/api/http_client.rs"
485      );
486   }
487
488   #[test]
489   fn test_extract_path_from_rename_arrow() {
490      assert_eq!(
491         ScopeAnalyzer::extract_path_from_rename("old/file.rs => new/file.rs"),
492         "new/file.rs"
493      );
494   }
495
496   #[test]
497   fn test_extract_path_from_rename_arrow_with_spaces() {
498      assert_eq!(
499         ScopeAnalyzer::extract_path_from_rename("  old/path.rs => new/path.rs  "),
500         "new/path.rs"
501      );
502   }
503
504   #[test]
505   fn test_extract_path_from_rename_no_rename() {
506      assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/file.rs"), "lib/file.rs");
507   }
508
509   #[test]
510   fn test_extract_path_from_rename_malformed_brace() {
511      // Missing closing brace - falls back to original
512      assert_eq!(
513         ScopeAnalyzer::extract_path_from_rename("lib/{old => new/file.rs"),
514         "lib/{old => new/file.rs"
515      );
516   }
517
518   // Tests for extract_components_from_path()
519   #[test]
520   fn test_extract_components_simple() {
521      // "src" is placeholder and skipped, only "api" remains
522      let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
523      assert_eq!(comps, vec!["api"]);
524   }
525
526   #[test]
527   fn test_extract_components_with_placeholder() {
528      // "lib" is placeholder and skipped, "foo" and "bar" remain
529      let comps = ScopeAnalyzer::extract_components_from_path("lib/foo/bar/baz.tsx");
530      assert_eq!(comps, vec!["foo", "foo/bar"]);
531   }
532
533   #[test]
534   fn test_extract_components_skip_tests() {
535      // "tests" is in SKIP_DIRS, so skipped, only "api" remains
536      let comps = ScopeAnalyzer::extract_components_from_path("tests/api/client_test.rs");
537      assert_eq!(comps, vec!["api"]);
538   }
539
540   #[test]
541   fn test_extract_components_skip_node_modules() {
542      // "node_modules" is in SKIP_DIRS, only "foo" remains
543      let comps = ScopeAnalyzer::extract_components_from_path("node_modules/foo/bar.js");
544      assert_eq!(comps, vec!["foo"]);
545   }
546
547   #[test]
548   fn test_extract_components_single_segment() {
549      let comps = ScopeAnalyzer::extract_components_from_path("src/main.rs");
550      // "src" is a placeholder and is stripped, leaving no components
551      assert_eq!(comps, Vec::<String>::new());
552   }
553
554   #[test]
555   fn test_extract_components_dotfile_skipped() {
556      // ".git" gets stripped to "" and filtered out, "config" is kept
557      let comps = ScopeAnalyzer::extract_components_from_path("lib/.git/config");
558      assert_eq!(comps, vec!["config"]);
559   }
560
561   #[test]
562   fn test_extract_components_strips_extension() {
563      let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
564      // "client.rs" is a file, so skipped; "api" and "src" are dirs
565      assert!(comps.contains(&"api".to_string()));
566   }
567
568   #[test]
569   fn test_extract_components_go_internal() {
570      // Go projects: internal/ is placeholder, extract actual module
571      let comps = ScopeAnalyzer::extract_components_from_path("internal/agent/worker.go");
572      assert_eq!(comps, vec!["agent"]);
573   }
574
575   #[test]
576   fn test_extract_components_go_internal_nested() {
577      // Go projects: internal/foo/bar/baz.go → extract "foo" and "foo/bar"
578      let comps = ScopeAnalyzer::extract_components_from_path("internal/config/parser/json.go");
579      assert_eq!(comps, vec!["config", "config/parser"]);
580   }
581
582   #[test]
583   fn test_extract_components_go_pkg() {
584      // Go projects: pkg/ is also a placeholder
585      let comps = ScopeAnalyzer::extract_components_from_path("pkg/util/strings.go");
586      assert_eq!(comps, vec!["util"]);
587   }
588
589   #[test]
590   fn test_extract_components_monorepo_packages() {
591      // Monorepos: packages/ is placeholder
592      let comps = ScopeAnalyzer::extract_components_from_path("packages/core/index.ts");
593      assert_eq!(comps, vec!["core"]);
594   }
595
596   // Tests for process_numstat_line()
597   #[test]
598   fn test_process_numstat_line_normal() {
599      let mut analyzer = ScopeAnalyzer::new();
600      let config = default_config();
601      analyzer.process_numstat_line("10\t5\tlib/foo/bar.rs", &config);
602
603      assert_eq!(analyzer.total_lines, 15);
604      assert_eq!(analyzer.component_lines.get("foo"), Some(&15));
605   }
606
607   #[test]
608   fn test_process_numstat_line_excluded_file() {
609      let mut analyzer = ScopeAnalyzer::new();
610      let config = default_config();
611      analyzer.process_numstat_line("10\t5\tCargo.lock", &config);
612
613      assert_eq!(analyzer.total_lines, 0);
614      assert!(analyzer.component_lines.is_empty());
615   }
616
617   #[test]
618   fn test_process_numstat_line_binary_file() {
619      let mut analyzer = ScopeAnalyzer::new();
620      let config = default_config();
621      analyzer.process_numstat_line("-\t-\timage.png", &config);
622
623      assert_eq!(analyzer.total_lines, 0);
624   }
625
626   #[test]
627   fn test_process_numstat_line_invalid() {
628      let mut analyzer = ScopeAnalyzer::new();
629      let config = default_config();
630      analyzer.process_numstat_line("invalid line", &config);
631
632      assert_eq!(analyzer.total_lines, 0);
633   }
634
635   #[test]
636   fn test_process_numstat_line_rename_brace() {
637      let mut analyzer = ScopeAnalyzer::new();
638      let config = default_config();
639      // Brace syntax gives "lib/new" path
640      analyzer.process_numstat_line("20\t10\tlib/{old => new}/file.rs", &config);
641
642      assert_eq!(analyzer.total_lines, 30);
643      // Path "lib/new/file.rs" -> extracts "new" (lib is stripped as placeholder)
644      assert_eq!(analyzer.component_lines.get("new"), Some(&30));
645   }
646
647   #[test]
648   fn test_process_numstat_line_multiple_files() {
649      let mut analyzer = ScopeAnalyzer::new();
650      let config = default_config();
651      analyzer.process_numstat_line("10\t5\tsrc/api/client.rs", &config);
652      analyzer.process_numstat_line("20\t10\tsrc/api/server.rs", &config);
653
654      assert_eq!(analyzer.total_lines, 45);
655      assert_eq!(analyzer.component_lines.get("api"), Some(&45));
656   }
657
658   // Tests for is_wide_change()
659   #[test]
660   fn test_is_wide_change_focused() {
661      let config = default_config();
662      let candidates = vec![
663         ScopeCandidate { path: "api".to_string(), percentage: 80.0, confidence: 80.0 },
664         ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
665      ];
666
667      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
668   }
669
670   #[test]
671   fn test_is_wide_change_dispersed() {
672      let config = default_config();
673      let candidates = vec![
674         ScopeCandidate { path: "api".to_string(), percentage: 30.0, confidence: 30.0 },
675         ScopeCandidate { path: "db".to_string(), percentage: 30.0, confidence: 30.0 },
676         ScopeCandidate { path: "ui".to_string(), percentage: 40.0, confidence: 40.0 },
677      ];
678
679      assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
680   }
681
682   #[test]
683   fn test_is_wide_change_three_roots() {
684      let config = default_config();
685      let candidates = vec![
686         ScopeCandidate { path: "api".to_string(), percentage: 60.0, confidence: 60.0 },
687         ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
688         ScopeCandidate { path: "ui".to_string(), percentage: 20.0, confidence: 20.0 },
689      ];
690
691      assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
692   }
693
694   #[test]
695   fn test_is_wide_change_nested_same_root() {
696      let config = default_config();
697      let candidates = vec![
698         ScopeCandidate {
699            path:       "api/client".to_string(),
700            percentage: 60.0,
701            confidence: 72.0,
702         },
703         ScopeCandidate {
704            path:       "api/server".to_string(),
705            percentage: 40.0,
706            confidence: 32.0,
707         },
708      ];
709
710      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
711   }
712
713   #[test]
714   fn test_is_wide_change_empty() {
715      let config = default_config();
716      let candidates = vec![];
717
718      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
719   }
720
721   // Integration tests for extract_scope()
722   #[test]
723   fn test_extract_scope_single_file() {
724      let config = default_config();
725      let numstat = "10\t5\tsrc/api/client.rs";
726      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
727
728      assert_eq!(total_lines, 15);
729      // "src" is filtered out, only "api" remains
730      assert_eq!(candidates.len(), 1);
731      assert_eq!(candidates[0].path, "api");
732      assert_eq!(candidates[0].percentage, 100.0);
733   }
734
735   #[test]
736   fn test_extract_scope_placeholder_only() {
737      let config = default_config();
738      let numstat = "10\t5\tsrc/main.rs";
739      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
740
741      assert_eq!(total_lines, 15);
742      // "src" is placeholder and filtered out, no candidates
743      assert_eq!(candidates.len(), 0);
744   }
745
746   #[test]
747   fn test_extract_scope_multiple_files() {
748      let config = default_config();
749      let numstat = "10\t5\tsrc/api/client.rs\n20\t10\tsrc/db/models.rs";
750      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
751
752      assert_eq!(total_lines, 45);
753      assert!(candidates.len() >= 2);
754
755      // Check that both components are present
756      let api_cand = candidates.iter().find(|c| c.path == "api");
757      let db_cand = candidates.iter().find(|c| c.path == "db");
758
759      assert!(api_cand.is_some());
760      assert!(db_cand.is_some());
761
762      // DB should have higher percentage (30 lines vs 15)
763      assert!(db_cand.unwrap().percentage > api_cand.unwrap().percentage);
764   }
765
766   #[test]
767   fn test_extract_scope_excluded_files() {
768      let config = default_config();
769      let numstat = "100\t50\tCargo.lock\n10\t5\tsrc/api/client.rs";
770      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
771
772      // Cargo.lock should be excluded
773      assert_eq!(total_lines, 15);
774      assert_eq!(candidates[0].path, "api");
775   }
776
777   #[test]
778   fn test_extract_scope_no_changes() {
779      let config = default_config();
780      let numstat = "";
781      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
782
783      assert_eq!(total_lines, 0);
784      assert!(candidates.is_empty());
785   }
786
787   #[test]
788   fn test_extract_scope_sorted_by_percentage() {
789      let config = default_config();
790      let numstat = "5\t0\tsrc/api/client.rs\n50\t0\tsrc/db/models.rs\n10\t0\tsrc/ui/component.tsx";
791      let (candidates, _) = ScopeAnalyzer::extract_scope(numstat, &config);
792
793      // Should be sorted descending by percentage
794      assert!(candidates[0].percentage >= candidates[1].percentage);
795      assert!(candidates[1].percentage >= candidates[2].percentage);
796   }
797
798   #[test]
799   fn test_build_scope_candidates_percentages() {
800      let mut analyzer = ScopeAnalyzer::new();
801      analyzer.component_lines.insert("api".to_string(), 30);
802      analyzer.component_lines.insert("db".to_string(), 70);
803      analyzer.total_lines = 100;
804
805      let candidates = analyzer.build_scope_candidates();
806
807      assert_eq!(candidates.len(), 2);
808      assert_eq!(candidates[0].path, "db");
809      assert!((candidates[0].percentage - 70.0).abs() < 0.001);
810      assert_eq!(candidates[1].path, "api");
811      assert!((candidates[1].percentage - 30.0).abs() < 0.001);
812   }
813
814   // Confidence heuristic tests: 70% in two-segment should prefer specific scope
815   #[test]
816   fn test_confidence_70_percent_in_two_segment_prefers_specific() {
817      let mut analyzer = ScopeAnalyzer::new();
818      analyzer.component_lines.insert("api".to_string(), 70);
819      analyzer
820         .component_lines
821         .insert("api/client".to_string(), 70);
822      analyzer.component_lines.insert("other".to_string(), 30);
823      analyzer.total_lines = 100;
824
825      let candidates = analyzer.build_scope_candidates();
826
827      // api/client at 70% gets confidence = 70 * 1.2 = 84
828      // api at 70% gets confidence = 70
829      // other at 30% gets confidence = 30
830      // So api/client should be first
831      assert_eq!(candidates[0].path, "api/client");
832      assert!((candidates[0].percentage - 70.0).abs() < 0.001);
833      assert!((candidates[0].confidence - 84.0).abs() < 0.001);
834   }
835
836   // Confidence heuristic tests: 45% in two-segment should prefer single-segment
837   #[test]
838   fn test_confidence_45_percent_in_two_segment_prefers_single() {
839      let mut analyzer = ScopeAnalyzer::new();
840      analyzer.component_lines.insert("api".to_string(), 45);
841      analyzer
842         .component_lines
843         .insert("api/client".to_string(), 45);
844      analyzer.component_lines.insert("other".to_string(), 55);
845      analyzer.total_lines = 100;
846
847      let candidates = analyzer.build_scope_candidates();
848
849      // other at 55% gets confidence = 55
850      // api at 45% gets confidence = 45
851      // api/client at 45% gets confidence = 45 * 0.8 = 36
852      // So order should be: other, api, api/client
853      assert_eq!(candidates[0].path, "other");
854      assert_eq!(candidates[1].path, "api");
855      assert_eq!(candidates[2].path, "api/client");
856      assert!((candidates[2].confidence - 36.0).abs() < 0.001);
857   }
858
859   // Tests for analyze_wide_change()
860   #[test]
861   fn test_analyze_wide_change_dependency_updates() {
862      let numstat = "10\t5\tCargo.toml\n20\t10\tsrc/lib.rs\n5\t3\tsrc/api.rs";
863      let result = ScopeAnalyzer::analyze_wide_change(numstat);
864      assert_eq!(result, Some("deps".to_string()));
865   }
866
867   #[test]
868   fn test_analyze_wide_change_documentation() {
869      let numstat =
870         "50\t20\tREADME.md\n30\t10\tdocs/guide.md\n20\t5\tdocs/api.md\n5\t2\tsrc/lib.rs";
871      let result = ScopeAnalyzer::analyze_wide_change(numstat);
872      assert_eq!(result, Some("docs".to_string()));
873   }
874
875   #[test]
876   fn test_analyze_wide_change_tests() {
877      let numstat = "10\t5\tsrc/api_test.rs\n15\t8\tsrc/client_test.rs\n20\t10\ttests/\
878                     integration_test.rs\n5\t2\tsrc/lib.rs";
879      let result = ScopeAnalyzer::analyze_wide_change(numstat);
880      assert_eq!(result, Some("tests".to_string()));
881   }
882
883   #[test]
884   fn test_analyze_wide_change_error_handling() {
885      let numstat =
886         "10\t5\tsrc/error.rs\n15\t8\tsrc/result.rs\n20\t10\tsrc/error_types.rs\n5\t2\tsrc/lib.rs";
887      let result = ScopeAnalyzer::analyze_wide_change(numstat);
888      assert_eq!(result, Some("error-handling".to_string()));
889   }
890
891   #[test]
892   fn test_analyze_wide_change_type_refactor() {
893      let numstat =
894         "10\t5\tsrc/types.rs\n15\t8\tsrc/structs.rs\n20\t10\tsrc/enums.rs\n5\t2\tsrc/lib.rs";
895      let result = ScopeAnalyzer::analyze_wide_change(numstat);
896      assert_eq!(result, Some("type-refactor".to_string()));
897   }
898
899   #[test]
900   fn test_analyze_wide_change_config() {
901      let numstat =
902         "10\t5\tconfig.toml\n15\t8\tsettings.yaml\n20\t10\tconfig.json\n5\t2\tsrc/lib.rs";
903      let result = ScopeAnalyzer::analyze_wide_change(numstat);
904      assert_eq!(result, Some("config".to_string()));
905   }
906
907   #[test]
908   fn test_analyze_wide_change_no_pattern() {
909      let numstat = "10\t5\tsrc/foo.rs\n15\t8\tsrc/bar.rs\n20\t10\tsrc/baz.rs";
910      let result = ScopeAnalyzer::analyze_wide_change(numstat);
911      assert_eq!(result, None);
912   }
913
914   #[test]
915   fn test_analyze_wide_change_empty() {
916      let numstat = "";
917      let result = ScopeAnalyzer::analyze_wide_change(numstat);
918      assert_eq!(result, None);
919   }
920
921   #[test]
922   fn test_analyze_wide_change_package_json() {
923      let numstat = "10\t5\tpackage.json\n20\t10\tsrc/index.js\n5\t3\tsrc/utils.js";
924      let result = ScopeAnalyzer::analyze_wide_change(numstat);
925      assert_eq!(result, Some("deps".to_string()));
926   }
927}