Skip to main content

llm_git/
analysis.rs

1use std::collections::{HashMap, HashSet};
2
3/// Scope analysis functionality for git diff numstat parsing
4use crate::config::CommitConfig;
5use crate::{
6   error::Result,
7   git::get_git_numstat,
8   types::{Mode, ScopeCandidate},
9};
10
11/// Placeholder dirs to skip when building two-segment scopes
12/// These are organizational directories that rarely represent meaningful scopes
13const PLACEHOLDER_DIRS: &[&str] = &[
14   // Rust conventions
15   "src", "lib", "bin", "crates", "benches", "examples", // Go conventions
16   "internal", "pkg",     // C/C++ conventions
17   "include", // Testing
18   "tests", "test", // Documentation
19   "docs", // Generic organizational
20   "packages", "modules",
21];
22
23/// Directories to skip entirely when extracting scopes
24const SKIP_DIRS: &[&str] =
25   &["test", "tests", "benches", "examples", "target", "build", "node_modules", ".github"];
26
27pub struct ScopeAnalyzer {
28   component_lines: HashMap<String, usize>,
29   total_lines:     usize,
30}
31
32impl Default for ScopeAnalyzer {
33   fn default() -> Self {
34      Self::new()
35   }
36}
37
38impl ScopeAnalyzer {
39   pub fn new() -> Self {
40      Self { component_lines: HashMap::new(), total_lines: 0 }
41   }
42
43   fn from_numstat(numstat: &str, config: &CommitConfig) -> Self {
44      let mut analyzer = Self::new();
45
46      for line in numstat.lines() {
47         analyzer.process_numstat_line(line, config);
48      }
49
50      analyzer
51   }
52
53   /// Process single numstat line: "added\tdeleted\tpath"
54   pub fn process_numstat_line(&mut self, line: &str, config: &CommitConfig) {
55      let parts: Vec<&str> = line.split('\t').collect();
56      if parts.len() < 3 {
57         return;
58      }
59
60      let (added_str, deleted_str, path_part) = (parts[0], parts[1], parts[2]);
61
62      // Parse line counts (skip binary files marked with "-")
63      let added = added_str.parse::<usize>().unwrap_or(0);
64      let deleted = deleted_str.parse::<usize>().unwrap_or(0);
65      let lines_changed = added + deleted;
66
67      if lines_changed == 0 {
68         return;
69      }
70
71      // Extract actual path from rename syntax
72      let path = Self::extract_path_from_rename(path_part);
73
74      // Skip excluded files
75      if config.excluded_files.iter().any(|ex| path.ends_with(ex)) {
76         return;
77      }
78
79      self.total_lines += lines_changed;
80
81      // Extract component candidates from path
82      let component_candidates = Self::extract_components_from_path(&path);
83
84      for comp in component_candidates {
85         // Final sanity check: no segments should contain dots
86         if comp.split('/').any(|s| s.contains('.')) {
87            continue;
88         }
89
90         *self.component_lines.entry(comp).or_insert(0) += lines_changed;
91      }
92   }
93
94   /// Extract new path from rename syntax (handles both brace and arrow forms)
95   fn extract_path_from_rename(path_part: &str) -> String {
96      // Handle renames with brace syntax: "lib/wal/{io_worker.rs => io.rs}"
97      if let Some(brace_start) = path_part.find('{') {
98         if let Some(arrow_pos) = path_part[brace_start..].find(" => ") {
99            let arrow_abs = brace_start + arrow_pos;
100            if let Some(brace_end) = path_part[arrow_abs..].find('}') {
101               let brace_end_abs = arrow_abs + brace_end;
102               let prefix = &path_part[..brace_start];
103               let new_name = path_part[arrow_abs + 4..brace_end_abs].trim();
104               return format!("{prefix}{new_name}");
105            }
106         }
107      } else if path_part.contains(" => ") {
108         // Simple arrow syntax: "old/path => new/path"
109         return path_part
110            .split(" => ")
111            .nth(1)
112            .unwrap_or(path_part)
113            .trim()
114            .to_string();
115      }
116
117      path_part.trim().to_string()
118   }
119
120   /// Extract meaningful component paths from file path
121   fn extract_components_from_path(path: &str) -> Vec<String> {
122      let segments: Vec<&str> = path.split('/').collect();
123      let mut component_candidates = Vec::new();
124      let mut meaningful_segments = Vec::new();
125
126      // Helper: strip extension from segment
127      let strip_ext = |s: &str| -> String {
128         if let Some(pos) = s.rfind('.') {
129            s[..pos].to_string()
130         } else {
131            s.to_string()
132         }
133      };
134
135      // Helper: is this segment a file (contains extension)?
136      let is_file = |s: &str| -> bool {
137         s.contains('.') && !s.starts_with('.') && s.rfind('.').is_some_and(|p| p > 0)
138      };
139
140      // Build candidates by walking path and extracting meaningful directory segments
141      for (seg_idx, seg) in segments.iter().enumerate() {
142         // Skip placeholder dirs when any deeper segments exist
143         if PLACEHOLDER_DIRS.contains(seg) {
144            // If this is a placeholder and we have more segments after it, skip it
145            if segments.len() > seg_idx + 1 {
146               continue;
147            }
148         }
149         // Skip if it's a file (has extension)
150         if is_file(seg) {
151            continue;
152         }
153         // Skip common non-scope dirs
154         if SKIP_DIRS.contains(seg) {
155            continue;
156         }
157
158         let stripped = strip_ext(seg);
159         // Filter out empty segments or dotfiles
160         if !stripped.is_empty() && !stripped.starts_with('.') {
161            meaningful_segments.push(stripped);
162         }
163      }
164
165      // Generate candidates: single-level and two-level
166      if !meaningful_segments.is_empty() {
167         component_candidates.push(meaningful_segments[0].clone());
168
169         if meaningful_segments.len() >= 2 {
170            component_candidates
171               .push(format!("{}/{}", meaningful_segments[0], meaningful_segments[1]));
172         }
173      }
174
175      component_candidates
176   }
177
178   /// Build sorted `ScopeCandidate` list from accumulated data
179   pub fn build_scope_candidates(&self) -> Vec<ScopeCandidate> {
180      let mut candidates: Vec<ScopeCandidate> = self
181         .component_lines
182         .iter()
183         .filter(|(path, _)| {
184            // Filter out pure placeholder single-segment scopes
185            if !path.contains('/') && PLACEHOLDER_DIRS.contains(&path.as_str()) {
186               return false;
187            }
188            // Filter out scopes starting with placeholder dirs
189            if let Some(root) = path.split('/').next()
190               && PLACEHOLDER_DIRS.contains(&root)
191            {
192               return false;
193            }
194            true
195         })
196         .map(|(path, &lines)| {
197            let percentage = (lines as f32 / self.total_lines as f32) * 100.0;
198            let is_two_segment = path.contains('/');
199
200            // Confidence calculation:
201            // - Single-segment: percentage as-is
202            // - Two-segment: percentage * 1.2 if >60%, else * 0.8
203            let confidence = if is_two_segment {
204               if percentage > 60.0 {
205                  percentage * 1.2
206               } else {
207                  percentage * 0.8
208               }
209            } else {
210               percentage
211            };
212
213            ScopeCandidate { percentage, path: path.clone(), confidence }
214         })
215         .collect();
216
217      candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
218      candidates
219   }
220
221   /// Check if change spans multiple components (wide change)
222   pub fn is_wide_change(candidates: &[ScopeCandidate], config: &CommitConfig) -> bool {
223      // Check if top component is below threshold
224      let is_wide = if let Some(top) = candidates.first() {
225         top.percentage / 100.0 < config.wide_change_threshold
226      } else {
227         false
228      };
229
230      // Check if ≥3 distinct roots
231      let distinct_roots: HashSet<&str> = candidates
232         .iter()
233         .map(|c| c.path.split('/').next().unwrap_or(&c.path))
234         .collect();
235
236      is_wide || distinct_roots.len() >= 3
237   }
238
239   /// Public API: extract scope candidates from git numstat output
240   pub fn extract_scope(numstat: &str, config: &CommitConfig) -> (Vec<ScopeCandidate>, usize) {
241      let analyzer = Self::from_numstat(numstat, config);
242      let candidates = analyzer.build_scope_candidates();
243      (candidates, analyzer.total_lines)
244   }
245
246   /// Count changed lines from git numstat output, ignoring excluded and binary
247   /// files.
248   pub fn count_changed_lines(numstat: &str, config: &CommitConfig) -> usize {
249      Self::from_numstat(numstat, config).total_lines
250   }
251
252   /// Analyze wide changes to detect cross-cutting patterns
253   pub fn analyze_wide_change(numstat: &str) -> Option<String> {
254      let lines: Vec<&str> = numstat.lines().collect();
255      if lines.is_empty() {
256         return None;
257      }
258
259      // Extract file paths from numstat
260      let paths: Vec<&str> = lines
261         .iter()
262         .filter_map(|line| {
263            let parts: Vec<&str> = line.split('\t').collect();
264            if parts.len() >= 3 {
265               Some(parts[2])
266            } else {
267               None
268            }
269         })
270         .collect();
271
272      if paths.is_empty() {
273         return None;
274      }
275
276      // Count file types
277      let total = paths.len();
278      let mut md_count = 0;
279      let mut test_count = 0;
280      let mut config_count = 0;
281      let mut has_cargo_toml = false;
282      let mut has_package_json = false;
283
284      // Track patterns
285      let mut error_keywords = 0;
286      let mut type_keywords = 0;
287
288      for path in &paths {
289         // File extension analysis
290         if std::path::Path::new(path)
291            .extension()
292            .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
293         {
294            md_count += 1;
295         }
296         if path.contains("/test") || path.contains("_test.") || path.ends_with("_test.rs") {
297            test_count += 1;
298         }
299         if std::path::Path::new(path).extension().is_some_and(|ext| {
300            ext.eq_ignore_ascii_case("toml")
301               || ext.eq_ignore_ascii_case("yaml")
302               || ext.eq_ignore_ascii_case("yml")
303               || ext.eq_ignore_ascii_case("json")
304         }) {
305            config_count += 1;
306         }
307
308         // Dependency files
309         if path.contains("Cargo.toml") {
310            has_cargo_toml = true;
311         }
312         if path.contains("package.json") {
313            has_package_json = true;
314         }
315
316         // Pattern keywords in paths
317         let lower_path = path.to_lowercase();
318         if lower_path.contains("error")
319            || lower_path.contains("result")
320            || lower_path.contains("err")
321         {
322            error_keywords += 1;
323         }
324         if lower_path.contains("type")
325            || lower_path.contains("struct")
326            || lower_path.contains("enum")
327         {
328            type_keywords += 1;
329         }
330      }
331
332      // Detection heuristics (ordered by specificity)
333
334      // 1. Dependency updates (high confidence)
335      if has_cargo_toml || has_package_json {
336         return Some("deps".to_string());
337      }
338
339      // 2. Documentation updates (>70% .md files)
340      if md_count * 100 / total > 70 {
341         return Some("docs".to_string());
342      }
343
344      // 3. Test updates (>60% test files)
345      if test_count * 100 / total > 60 {
346         return Some("tests".to_string());
347      }
348
349      // 4. Error handling migration (>40% files with error keywords)
350      if error_keywords * 100 / total > 40 {
351         return Some("error-handling".to_string());
352      }
353
354      // 5. Type migration (>40% files with type keywords)
355      if type_keywords * 100 / total > 40 {
356         return Some("type-refactor".to_string());
357      }
358
359      // 6. Config/tooling updates (>50% config files)
360      if config_count * 100 / total > 50 {
361         return Some("config".to_string());
362      }
363
364      // No clear pattern detected
365      None
366   }
367}
368
369/// Extract candidate scopes from git diff --numstat output
370/// Returns (`scope_string`, `is_wide_change`)
371pub fn extract_scope_candidates(
372   mode: &Mode,
373   target: Option<&str>,
374   dir: &str,
375   config: &CommitConfig,
376) -> Result<(String, bool)> {
377   let numstat = get_git_numstat(mode, target, dir, config)?;
378
379   let (candidates, total_lines) = ScopeAnalyzer::extract_scope(&numstat, config);
380
381   if total_lines == 0 {
382      return Ok(("(none - no measurable changes)".to_string(), false));
383   }
384
385   let is_wide = ScopeAnalyzer::is_wide_change(&candidates, config);
386
387   if is_wide {
388      // Try to detect a pattern if wide_change_abstract is enabled
389      let scope_str = if config.wide_change_abstract {
390         if let Some(pattern) = ScopeAnalyzer::analyze_wide_change(&numstat) {
391            format!("(cross-cutting: {pattern})")
392         } else {
393            "(none - multi-component change)".to_string()
394         }
395      } else {
396         "(none - multi-component change)".to_string()
397      };
398
399      return Ok((scope_str, true));
400   }
401
402   // Format suggested scopes with weights for prompt (keep top 5, prefer 2-segment
403   // when >60%)
404   let mut suggestion_parts = Vec::new();
405   for cand in candidates.iter().take(5) {
406      // Only suggest if ≥10% to avoid noise
407      if cand.percentage >= 10.0 {
408         let confidence_label = if cand.path.contains('/') {
409            if cand.percentage > 60.0 {
410               "high confidence"
411            } else {
412               "moderate confidence"
413            }
414         } else {
415            "high confidence"
416         };
417
418         suggestion_parts
419            .push(format!("{} ({:.0}%, {})", cand.path, cand.percentage, confidence_label));
420      }
421   }
422
423   let scope_str = if suggestion_parts.is_empty() {
424      "(none - unclear component)".to_string()
425   } else {
426      format!("{}\nPrefer 2-segment scopes marked 'high confidence'", suggestion_parts.join(", "))
427   };
428
429   Ok((scope_str, is_wide))
430}
431
432#[cfg(test)]
433mod tests {
434   use super::*;
435
436   fn default_config() -> CommitConfig {
437      CommitConfig {
438         excluded_files: vec![
439            "Cargo.lock".to_string(),
440            "package-lock.json".to_string(),
441            "yarn.lock".to_string(),
442         ],
443         wide_change_threshold: 0.5,
444         ..Default::default()
445      }
446   }
447
448   // Tests for extract_path_from_rename()
449   #[test]
450   fn test_extract_path_from_rename_brace() {
451      // Brace syntax replaces only the content within braces (suffix is not
452      // preserved)
453      assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/{old => new}/file.rs"), "lib/new");
454   }
455
456   #[test]
457   fn test_extract_path_from_rename_brace_complex() {
458      assert_eq!(
459         ScopeAnalyzer::extract_path_from_rename("src/api/{client.rs => http_client.rs}"),
460         "src/api/http_client.rs"
461      );
462   }
463
464   #[test]
465   fn test_extract_path_from_rename_arrow() {
466      assert_eq!(
467         ScopeAnalyzer::extract_path_from_rename("old/file.rs => new/file.rs"),
468         "new/file.rs"
469      );
470   }
471
472   #[test]
473   fn test_extract_path_from_rename_arrow_with_spaces() {
474      assert_eq!(
475         ScopeAnalyzer::extract_path_from_rename("  old/path.rs => new/path.rs  "),
476         "new/path.rs"
477      );
478   }
479
480   #[test]
481   fn test_extract_path_from_rename_no_rename() {
482      assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/file.rs"), "lib/file.rs");
483   }
484
485   #[test]
486   fn test_extract_path_from_rename_malformed_brace() {
487      // Missing closing brace - falls back to original
488      assert_eq!(
489         ScopeAnalyzer::extract_path_from_rename("lib/{old => new/file.rs"),
490         "lib/{old => new/file.rs"
491      );
492   }
493
494   // Tests for extract_components_from_path()
495   #[test]
496   fn test_extract_components_simple() {
497      // "src" is placeholder and skipped, only "api" remains
498      let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
499      assert_eq!(comps, vec!["api"]);
500   }
501
502   #[test]
503   fn test_extract_components_with_placeholder() {
504      // "lib" is placeholder and skipped, "foo" and "bar" remain
505      let comps = ScopeAnalyzer::extract_components_from_path("lib/foo/bar/baz.tsx");
506      assert_eq!(comps, vec!["foo", "foo/bar"]);
507   }
508
509   #[test]
510   fn test_extract_components_skip_tests() {
511      // "tests" is in SKIP_DIRS, so skipped, only "api" remains
512      let comps = ScopeAnalyzer::extract_components_from_path("tests/api/client_test.rs");
513      assert_eq!(comps, vec!["api"]);
514   }
515
516   #[test]
517   fn test_extract_components_skip_node_modules() {
518      // "node_modules" is in SKIP_DIRS, only "foo" remains
519      let comps = ScopeAnalyzer::extract_components_from_path("node_modules/foo/bar.js");
520      assert_eq!(comps, vec!["foo"]);
521   }
522
523   #[test]
524   fn test_extract_components_single_segment() {
525      let comps = ScopeAnalyzer::extract_components_from_path("src/main.rs");
526      // "src" is a placeholder and is stripped, leaving no components
527      assert_eq!(comps, Vec::<String>::new());
528   }
529
530   #[test]
531   fn test_extract_components_dotfile_skipped() {
532      // ".git" gets stripped to "" and filtered out, "config" is kept
533      let comps = ScopeAnalyzer::extract_components_from_path("lib/.git/config");
534      assert_eq!(comps, vec!["config"]);
535   }
536
537   #[test]
538   fn test_extract_components_strips_extension() {
539      let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
540      // "client.rs" is a file, so skipped; "api" and "src" are dirs
541      assert!(comps.contains(&"api".to_string()));
542   }
543
544   #[test]
545   fn test_extract_components_go_internal() {
546      // Go projects: internal/ is placeholder, extract actual module
547      let comps = ScopeAnalyzer::extract_components_from_path("internal/agent/worker.go");
548      assert_eq!(comps, vec!["agent"]);
549   }
550
551   #[test]
552   fn test_extract_components_go_internal_nested() {
553      // Go projects: internal/foo/bar/baz.go → extract "foo" and "foo/bar"
554      let comps = ScopeAnalyzer::extract_components_from_path("internal/config/parser/json.go");
555      assert_eq!(comps, vec!["config", "config/parser"]);
556   }
557
558   #[test]
559   fn test_extract_components_go_pkg() {
560      // Go projects: pkg/ is also a placeholder
561      let comps = ScopeAnalyzer::extract_components_from_path("pkg/util/strings.go");
562      assert_eq!(comps, vec!["util"]);
563   }
564
565   #[test]
566   fn test_extract_components_monorepo_packages() {
567      // Monorepos: packages/ is placeholder
568      let comps = ScopeAnalyzer::extract_components_from_path("packages/core/index.ts");
569      assert_eq!(comps, vec!["core"]);
570   }
571
572   // Tests for process_numstat_line()
573   #[test]
574   fn test_process_numstat_line_normal() {
575      let mut analyzer = ScopeAnalyzer::new();
576      let config = default_config();
577      analyzer.process_numstat_line("10\t5\tlib/foo/bar.rs", &config);
578
579      assert_eq!(analyzer.total_lines, 15);
580      assert_eq!(analyzer.component_lines.get("foo"), Some(&15));
581   }
582
583   #[test]
584   fn test_process_numstat_line_excluded_file() {
585      let mut analyzer = ScopeAnalyzer::new();
586      let config = default_config();
587      analyzer.process_numstat_line("10\t5\tCargo.lock", &config);
588
589      assert_eq!(analyzer.total_lines, 0);
590      assert!(analyzer.component_lines.is_empty());
591   }
592
593   #[test]
594   fn test_process_numstat_line_binary_file() {
595      let mut analyzer = ScopeAnalyzer::new();
596      let config = default_config();
597      analyzer.process_numstat_line("-\t-\timage.png", &config);
598
599      assert_eq!(analyzer.total_lines, 0);
600   }
601
602   #[test]
603   fn test_process_numstat_line_invalid() {
604      let mut analyzer = ScopeAnalyzer::new();
605      let config = default_config();
606      analyzer.process_numstat_line("invalid line", &config);
607
608      assert_eq!(analyzer.total_lines, 0);
609   }
610
611   #[test]
612   fn test_process_numstat_line_rename_brace() {
613      let mut analyzer = ScopeAnalyzer::new();
614      let config = default_config();
615      // Brace syntax gives "lib/new" path
616      analyzer.process_numstat_line("20\t10\tlib/{old => new}/file.rs", &config);
617
618      assert_eq!(analyzer.total_lines, 30);
619      // Path "lib/new/file.rs" -> extracts "new" (lib is stripped as placeholder)
620      assert_eq!(analyzer.component_lines.get("new"), Some(&30));
621   }
622
623   #[test]
624   fn test_process_numstat_line_multiple_files() {
625      let mut analyzer = ScopeAnalyzer::new();
626      let config = default_config();
627      analyzer.process_numstat_line("10\t5\tsrc/api/client.rs", &config);
628      analyzer.process_numstat_line("20\t10\tsrc/api/server.rs", &config);
629
630      assert_eq!(analyzer.total_lines, 45);
631      assert_eq!(analyzer.component_lines.get("api"), Some(&45));
632   }
633
634   // Tests for is_wide_change()
635   #[test]
636   fn test_is_wide_change_focused() {
637      let config = default_config();
638      let candidates = vec![
639         ScopeCandidate { path: "api".to_string(), percentage: 80.0, confidence: 80.0 },
640         ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
641      ];
642
643      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
644   }
645
646   #[test]
647   fn test_is_wide_change_dispersed() {
648      let config = default_config();
649      let candidates = vec![
650         ScopeCandidate { path: "api".to_string(), percentage: 30.0, confidence: 30.0 },
651         ScopeCandidate { path: "db".to_string(), percentage: 30.0, confidence: 30.0 },
652         ScopeCandidate { path: "ui".to_string(), percentage: 40.0, confidence: 40.0 },
653      ];
654
655      assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
656   }
657
658   #[test]
659   fn test_is_wide_change_three_roots() {
660      let config = default_config();
661      let candidates = vec![
662         ScopeCandidate { path: "api".to_string(), percentage: 60.0, confidence: 60.0 },
663         ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
664         ScopeCandidate { path: "ui".to_string(), percentage: 20.0, confidence: 20.0 },
665      ];
666
667      assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
668   }
669
670   #[test]
671   fn test_is_wide_change_nested_same_root() {
672      let config = default_config();
673      let candidates = vec![
674         ScopeCandidate {
675            path:       "api/client".to_string(),
676            percentage: 60.0,
677            confidence: 72.0,
678         },
679         ScopeCandidate {
680            path:       "api/server".to_string(),
681            percentage: 40.0,
682            confidence: 32.0,
683         },
684      ];
685
686      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
687   }
688
689   #[test]
690   fn test_is_wide_change_empty() {
691      let config = default_config();
692      let candidates = vec![];
693
694      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
695   }
696
697   // Integration tests for extract_scope()
698   #[test]
699   fn test_extract_scope_single_file() {
700      let config = default_config();
701      let numstat = "10\t5\tsrc/api/client.rs";
702      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
703
704      assert_eq!(total_lines, 15);
705      // "src" is filtered out, only "api" remains
706      assert_eq!(candidates.len(), 1);
707      assert_eq!(candidates[0].path, "api");
708      assert_eq!(candidates[0].percentage, 100.0);
709   }
710
711   #[test]
712   fn test_extract_scope_placeholder_only() {
713      let config = default_config();
714      let numstat = "10\t5\tsrc/main.rs";
715      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
716
717      assert_eq!(total_lines, 15);
718      // "src" is placeholder and filtered out, no candidates
719      assert_eq!(candidates.len(), 0);
720   }
721
722   #[test]
723   fn test_extract_scope_multiple_files() {
724      let config = default_config();
725      let numstat = "10\t5\tsrc/api/client.rs\n20\t10\tsrc/db/models.rs";
726      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
727
728      assert_eq!(total_lines, 45);
729      assert!(candidates.len() >= 2);
730
731      // Check that both components are present
732      let api_cand = candidates.iter().find(|c| c.path == "api");
733      let db_cand = candidates.iter().find(|c| c.path == "db");
734
735      assert!(api_cand.is_some());
736      assert!(db_cand.is_some());
737
738      // DB should have higher percentage (30 lines vs 15)
739      assert!(db_cand.unwrap().percentage > api_cand.unwrap().percentage);
740   }
741
742   #[test]
743   fn test_extract_scope_excluded_files() {
744      let config = default_config();
745      let numstat = "100\t50\tCargo.lock\n10\t5\tsrc/api/client.rs";
746      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
747
748      // Cargo.lock should be excluded
749      assert_eq!(total_lines, 15);
750      assert_eq!(candidates[0].path, "api");
751   }
752
753   #[test]
754   fn test_extract_scope_no_changes() {
755      let config = default_config();
756      let numstat = "";
757      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
758
759      assert_eq!(total_lines, 0);
760      assert!(candidates.is_empty());
761   }
762
763   #[test]
764   fn test_count_changed_lines_ignores_excluded_and_binary_files() {
765      let config = default_config();
766      let numstat = "100\t50\tCargo.lock\n-\t-\timage.png\n10\t5\tsrc/api/client.rs";
767
768      assert_eq!(ScopeAnalyzer::count_changed_lines(numstat, &config), 15);
769   }
770
771   #[test]
772   fn test_extract_scope_sorted_by_percentage() {
773      let config = default_config();
774      let numstat = "5\t0\tsrc/api/client.rs\n50\t0\tsrc/db/models.rs\n10\t0\tsrc/ui/component.tsx";
775      let (candidates, _) = ScopeAnalyzer::extract_scope(numstat, &config);
776
777      // Should be sorted descending by percentage
778      assert!(candidates[0].percentage >= candidates[1].percentage);
779      assert!(candidates[1].percentage >= candidates[2].percentage);
780   }
781
782   #[test]
783   fn test_build_scope_candidates_percentages() {
784      let mut analyzer = ScopeAnalyzer::new();
785      analyzer.component_lines.insert("api".to_string(), 30);
786      analyzer.component_lines.insert("db".to_string(), 70);
787      analyzer.total_lines = 100;
788
789      let candidates = analyzer.build_scope_candidates();
790
791      assert_eq!(candidates.len(), 2);
792      assert_eq!(candidates[0].path, "db");
793      assert!((candidates[0].percentage - 70.0).abs() < 0.001);
794      assert_eq!(candidates[1].path, "api");
795      assert!((candidates[1].percentage - 30.0).abs() < 0.001);
796   }
797
798   // Confidence heuristic tests: 70% in two-segment should prefer specific scope
799   #[test]
800   fn test_confidence_70_percent_in_two_segment_prefers_specific() {
801      let mut analyzer = ScopeAnalyzer::new();
802      analyzer.component_lines.insert("api".to_string(), 70);
803      analyzer
804         .component_lines
805         .insert("api/client".to_string(), 70);
806      analyzer.component_lines.insert("other".to_string(), 30);
807      analyzer.total_lines = 100;
808
809      let candidates = analyzer.build_scope_candidates();
810
811      // api/client at 70% gets confidence = 70 * 1.2 = 84
812      // api at 70% gets confidence = 70
813      // other at 30% gets confidence = 30
814      // So api/client should be first
815      assert_eq!(candidates[0].path, "api/client");
816      assert!((candidates[0].percentage - 70.0).abs() < 0.001);
817      assert!((candidates[0].confidence - 84.0).abs() < 0.001);
818   }
819
820   // Confidence heuristic tests: 45% in two-segment should prefer single-segment
821   #[test]
822   fn test_confidence_45_percent_in_two_segment_prefers_single() {
823      let mut analyzer = ScopeAnalyzer::new();
824      analyzer.component_lines.insert("api".to_string(), 45);
825      analyzer
826         .component_lines
827         .insert("api/client".to_string(), 45);
828      analyzer.component_lines.insert("other".to_string(), 55);
829      analyzer.total_lines = 100;
830
831      let candidates = analyzer.build_scope_candidates();
832
833      // other at 55% gets confidence = 55
834      // api at 45% gets confidence = 45
835      // api/client at 45% gets confidence = 45 * 0.8 = 36
836      // So order should be: other, api, api/client
837      assert_eq!(candidates[0].path, "other");
838      assert_eq!(candidates[1].path, "api");
839      assert_eq!(candidates[2].path, "api/client");
840      assert!((candidates[2].confidence - 36.0).abs() < 0.001);
841   }
842
843   // Tests for analyze_wide_change()
844   #[test]
845   fn test_analyze_wide_change_dependency_updates() {
846      let numstat = "10\t5\tCargo.toml\n20\t10\tsrc/lib.rs\n5\t3\tsrc/api.rs";
847      let result = ScopeAnalyzer::analyze_wide_change(numstat);
848      assert_eq!(result, Some("deps".to_string()));
849   }
850
851   #[test]
852   fn test_analyze_wide_change_documentation() {
853      let numstat =
854         "50\t20\tREADME.md\n30\t10\tdocs/guide.md\n20\t5\tdocs/api.md\n5\t2\tsrc/lib.rs";
855      let result = ScopeAnalyzer::analyze_wide_change(numstat);
856      assert_eq!(result, Some("docs".to_string()));
857   }
858
859   #[test]
860   fn test_analyze_wide_change_tests() {
861      let numstat = "10\t5\tsrc/api_test.rs\n15\t8\tsrc/client_test.rs\n20\t10\ttests/\
862                     integration_test.rs\n5\t2\tsrc/lib.rs";
863      let result = ScopeAnalyzer::analyze_wide_change(numstat);
864      assert_eq!(result, Some("tests".to_string()));
865   }
866
867   #[test]
868   fn test_analyze_wide_change_error_handling() {
869      let numstat =
870         "10\t5\tsrc/error.rs\n15\t8\tsrc/result.rs\n20\t10\tsrc/error_types.rs\n5\t2\tsrc/lib.rs";
871      let result = ScopeAnalyzer::analyze_wide_change(numstat);
872      assert_eq!(result, Some("error-handling".to_string()));
873   }
874
875   #[test]
876   fn test_analyze_wide_change_type_refactor() {
877      let numstat =
878         "10\t5\tsrc/types.rs\n15\t8\tsrc/structs.rs\n20\t10\tsrc/enums.rs\n5\t2\tsrc/lib.rs";
879      let result = ScopeAnalyzer::analyze_wide_change(numstat);
880      assert_eq!(result, Some("type-refactor".to_string()));
881   }
882
883   #[test]
884   fn test_analyze_wide_change_config() {
885      let numstat =
886         "10\t5\tconfig.toml\n15\t8\tsettings.yaml\n20\t10\tconfig.json\n5\t2\tsrc/lib.rs";
887      let result = ScopeAnalyzer::analyze_wide_change(numstat);
888      assert_eq!(result, Some("config".to_string()));
889   }
890
891   #[test]
892   fn test_analyze_wide_change_no_pattern() {
893      let numstat = "10\t5\tsrc/foo.rs\n15\t8\tsrc/bar.rs\n20\t10\tsrc/baz.rs";
894      let result = ScopeAnalyzer::analyze_wide_change(numstat);
895      assert_eq!(result, None);
896   }
897
898   #[test]
899   fn test_analyze_wide_change_empty() {
900      let numstat = "";
901      let result = ScopeAnalyzer::analyze_wide_change(numstat);
902      assert_eq!(result, None);
903   }
904
905   #[test]
906   fn test_analyze_wide_change_package_json() {
907      let numstat = "10\t5\tpackage.json\n20\t10\tsrc/index.js\n5\t3\tsrc/utils.js";
908      let result = ScopeAnalyzer::analyze_wide_change(numstat);
909      assert_eq!(result, Some("deps".to_string()));
910   }
911}