Skip to main content

llm_git/
analysis.rs

1use std::collections::{HashMap, HashSet};
2
3/// Scope analysis functionality for git diff numstat parsing
4use crate::config::CommitConfig;
5use crate::{
6   error::Result,
7   git::get_git_numstat,
8   types::{Mode, ScopeCandidate},
9};
10
11/// Placeholder dirs to skip when building two-segment scopes
12/// These are organizational directories that rarely represent meaningful scopes
13const PLACEHOLDER_DIRS: &[&str] = &[
14   // Rust conventions
15   "src", "lib", "bin", "crates", "benches", "examples", // Go conventions
16   "internal", "pkg",     // C/C++ conventions
17   "include", // Testing
18   "tests", "test", // Documentation
19   "docs", // Generic organizational
20   "packages", "modules",
21];
22
23/// Directories to skip entirely when extracting scopes
24const SKIP_DIRS: &[&str] =
25   &["test", "tests", "benches", "examples", "target", "build", "node_modules", ".github"];
26
27pub struct ScopeAnalyzer {
28   component_lines: HashMap<String, usize>,
29   total_lines:     usize,
30}
31
32impl Default for ScopeAnalyzer {
33   fn default() -> Self {
34      Self::new()
35   }
36}
37
38impl ScopeAnalyzer {
39   pub fn new() -> Self {
40      Self { component_lines: HashMap::new(), total_lines: 0 }
41   }
42
43   fn from_numstat(numstat: &str, config: &CommitConfig) -> Self {
44      let mut analyzer = Self::new();
45
46      for line in numstat.lines() {
47         analyzer.process_numstat_line(line, config);
48      }
49
50      analyzer
51   }
52
53   /// Process single numstat line: "added\tdeleted\tpath"
54   pub fn process_numstat_line(&mut self, line: &str, config: &CommitConfig) {
55      let parts: Vec<&str> = line.split('\t').collect();
56      if parts.len() < 3 {
57         return;
58      }
59
60      let (added_str, deleted_str, path_part) = (parts[0], parts[1], parts[2]);
61
62      // Parse line counts (skip binary files marked with "-")
63      let added = added_str.parse::<usize>().unwrap_or(0);
64      let deleted = deleted_str.parse::<usize>().unwrap_or(0);
65      let lines_changed = added + deleted;
66
67      if lines_changed == 0 {
68         return;
69      }
70
71      // Extract actual path from rename syntax
72      let path = Self::extract_path_from_rename(path_part);
73
74      // Skip excluded files
75      if config.excluded_files.iter().any(|ex| path.ends_with(ex)) {
76         return;
77      }
78
79      self.total_lines += lines_changed;
80
81      // Extract component candidates from path
82      let component_candidates = Self::extract_components_from_path(&path);
83
84      for comp in component_candidates {
85         // Final sanity check: no segments should contain dots
86         if comp.split('/').any(|s| s.contains('.')) {
87            continue;
88         }
89
90         *self.component_lines.entry(comp).or_insert(0) += lines_changed;
91      }
92   }
93
94   /// Extract new path from rename syntax (handles both brace and arrow forms)
95   fn extract_path_from_rename(path_part: &str) -> String {
96      // Handle renames with brace syntax: "lib/wal/{io_worker.rs => io.rs}"
97      if let Some(brace_start) = path_part.find('{') {
98         if let Some(arrow_pos) = path_part[brace_start..].find(" => ") {
99            let arrow_abs = brace_start + arrow_pos;
100            if let Some(brace_end) = path_part[arrow_abs..].find('}') {
101               let brace_end_abs = arrow_abs + brace_end;
102               let prefix = &path_part[..brace_start];
103               let new_name = path_part[arrow_abs + 4..brace_end_abs].trim();
104               return format!("{prefix}{new_name}");
105            }
106         }
107      } else if path_part.contains(" => ") {
108         // Simple arrow syntax: "old/path => new/path"
109         return path_part
110            .split(" => ")
111            .nth(1)
112            .unwrap_or(path_part)
113            .trim()
114            .to_string();
115      }
116
117      path_part.trim().to_string()
118   }
119
120   /// Extract meaningful component paths from file path
121   fn extract_components_from_path(path: &str) -> Vec<String> {
122      let segments: Vec<&str> = path.split('/').collect();
123      let mut component_candidates = Vec::new();
124      let mut meaningful_segments = Vec::new();
125
126      // Helper: strip extension from segment
127      let strip_ext = |s: &str| -> String {
128         if let Some(pos) = s.rfind('.') {
129            s[..pos].to_string()
130         } else {
131            s.to_string()
132         }
133      };
134
135      // Helper: is this segment a file (contains extension)?
136      let is_file = |s: &str| -> bool {
137         s.contains('.') && !s.starts_with('.') && s.rfind('.').is_some_and(|p| p > 0)
138      };
139
140      // Build candidates by walking path and extracting meaningful directory segments
141      for (seg_idx, seg) in segments.iter().enumerate() {
142         // Skip placeholder dirs when any deeper segments exist
143         if PLACEHOLDER_DIRS.contains(seg) {
144            // If this is a placeholder and we have more segments after it, skip it
145            if segments.len() > seg_idx + 1 {
146               continue;
147            }
148         }
149         // Skip if it's a file (has extension)
150         if is_file(seg) {
151            continue;
152         }
153         // Skip common non-scope dirs
154         if SKIP_DIRS.contains(seg) {
155            continue;
156         }
157
158         let stripped = strip_ext(seg);
159         // Filter out empty segments or dotfiles
160         if !stripped.is_empty() && !stripped.starts_with('.') {
161            meaningful_segments.push(stripped);
162         }
163      }
164
165      // Generate candidates: single-level and two-level
166      if !meaningful_segments.is_empty() {
167         component_candidates.push(meaningful_segments[0].clone());
168
169         if meaningful_segments.len() >= 2 {
170            component_candidates
171               .push(format!("{}/{}", meaningful_segments[0], meaningful_segments[1]));
172         }
173      }
174
175      component_candidates
176   }
177
178   /// Build sorted `ScopeCandidate` list from accumulated data
179   pub fn build_scope_candidates(&self) -> Vec<ScopeCandidate> {
180      let mut candidates: Vec<ScopeCandidate> = self
181         .component_lines
182         .iter()
183         .filter(|(path, _)| {
184            // Filter out pure placeholder single-segment scopes
185            if !path.contains('/') && PLACEHOLDER_DIRS.contains(&path.as_str()) {
186               return false;
187            }
188            // Filter out scopes starting with placeholder dirs
189            if let Some(root) = path.split('/').next()
190               && PLACEHOLDER_DIRS.contains(&root)
191            {
192               return false;
193            }
194            true
195         })
196         .map(|(path, &lines)| {
197            let percentage = (lines as f32 / self.total_lines as f32) * 100.0;
198            let is_two_segment = path.contains('/');
199
200            // Confidence calculation:
201            // - Single-segment: percentage as-is
202            // - Two-segment: percentage * 1.2 if >60%, else * 0.8
203            let confidence = if is_two_segment {
204               if percentage > 60.0 {
205                  percentage * 1.2
206               } else {
207                  percentage * 0.8
208               }
209            } else {
210               percentage
211            };
212
213            ScopeCandidate { percentage, path: path.clone(), confidence }
214         })
215         .collect();
216
217      candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
218      candidates
219   }
220
221   /// Check if change spans multiple components (wide change)
222   pub fn is_wide_change(candidates: &[ScopeCandidate], config: &CommitConfig) -> bool {
223      // Check if top component is below threshold
224      let is_wide = if let Some(top) = candidates.first() {
225         top.percentage / 100.0 < config.wide_change_threshold
226      } else {
227         false
228      };
229
230      // Check if ≥3 distinct roots
231      let distinct_roots: HashSet<&str> = candidates
232         .iter()
233         .map(|c| c.path.split('/').next().unwrap_or(&c.path))
234         .collect();
235
236      is_wide || distinct_roots.len() >= 3
237   }
238
239   /// Public API: extract scope candidates from git numstat output
240   pub fn extract_scope(numstat: &str, config: &CommitConfig) -> (Vec<ScopeCandidate>, usize) {
241      let analyzer = Self::from_numstat(numstat, config);
242      let candidates = analyzer.build_scope_candidates();
243      (candidates, analyzer.total_lines)
244   }
245
246   /// Count changed lines from git numstat output, ignoring excluded and binary
247   /// files.
248   pub fn count_changed_lines(numstat: &str, config: &CommitConfig) -> usize {
249      Self::from_numstat(numstat, config).total_lines
250   }
251
252   /// Analyze wide changes to detect cross-cutting patterns
253   pub fn analyze_wide_change(numstat: &str) -> Option<String> {
254      let lines: Vec<&str> = numstat.lines().collect();
255      if lines.is_empty() {
256         return None;
257      }
258
259      // Extract file paths from numstat
260      let paths: Vec<&str> = lines
261         .iter()
262         .filter_map(|line| {
263            let parts: Vec<&str> = line.split('\t').collect();
264            if parts.len() >= 3 {
265               Some(parts[2])
266            } else {
267               None
268            }
269         })
270         .collect();
271
272      if paths.is_empty() {
273         return None;
274      }
275
276      // Count file types
277      let total = paths.len();
278      let mut md_count = 0;
279      let mut test_count = 0;
280      let mut config_count = 0;
281      let mut has_cargo_toml = false;
282      let mut has_package_json = false;
283
284      // Track patterns
285      let mut error_keywords = 0;
286      let mut type_keywords = 0;
287
288      for path in &paths {
289         // File extension analysis
290         if std::path::Path::new(path)
291            .extension()
292            .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
293         {
294            md_count += 1;
295         }
296         if path.contains("/test") || path.contains("_test.") || path.ends_with("_test.rs") {
297            test_count += 1;
298         }
299         if std::path::Path::new(path).extension().is_some_and(|ext| {
300            ext.eq_ignore_ascii_case("toml")
301               || ext.eq_ignore_ascii_case("yaml")
302               || ext.eq_ignore_ascii_case("yml")
303               || ext.eq_ignore_ascii_case("json")
304         }) {
305            config_count += 1;
306         }
307
308         // Dependency files
309         if path.contains("Cargo.toml") {
310            has_cargo_toml = true;
311         }
312         if path.contains("package.json") {
313            has_package_json = true;
314         }
315
316         // Pattern keywords in paths
317         let lower_path = path.to_lowercase();
318         if lower_path.contains("error")
319            || lower_path.contains("result")
320            || lower_path.contains("err")
321         {
322            error_keywords += 1;
323         }
324         if lower_path.contains("type")
325            || lower_path.contains("struct")
326            || lower_path.contains("enum")
327         {
328            type_keywords += 1;
329         }
330      }
331
332      // Detection heuristics (ordered by specificity)
333
334      // 1. Dependency updates (high confidence)
335      if has_cargo_toml || has_package_json {
336         return Some("deps".to_string());
337      }
338
339      // 2. Documentation updates (>70% .md files)
340      if md_count * 100 / total > 70 {
341         return Some("docs".to_string());
342      }
343
344      // 3. Test updates (>60% test files)
345      if test_count * 100 / total > 60 {
346         return Some("tests".to_string());
347      }
348
349      // 4. Error handling migration (>40% files with error keywords)
350      if error_keywords * 100 / total > 40 {
351         return Some("error-handling".to_string());
352      }
353
354      // 5. Type migration (>40% files with type keywords)
355      if type_keywords * 100 / total > 40 {
356         return Some("type-refactor".to_string());
357      }
358
359      // 6. Config/tooling updates (>50% config files)
360      if config_count * 100 / total > 50 {
361         return Some("config".to_string());
362      }
363
364      // No clear pattern detected
365      None
366   }
367}
368
369/// Extract candidate scopes from git diff --numstat output
370/// Returns (`scope_string`, `is_wide_change`)
371#[tracing::instrument(target = "lgit", name = "analysis.extract_scope_candidates", skip_all, fields(mode = ?mode, target = ?target, dir))]
372pub fn extract_scope_candidates(
373   mode: &Mode,
374   target: Option<&str>,
375   dir: &str,
376   config: &CommitConfig,
377) -> Result<(String, bool)> {
378   let numstat = get_git_numstat(mode, target, dir, config)?;
379
380   let (candidates, total_lines) = ScopeAnalyzer::extract_scope(&numstat, config);
381
382   if total_lines == 0 {
383      return Ok(("(none - no measurable changes)".to_string(), false));
384   }
385
386   let is_wide = ScopeAnalyzer::is_wide_change(&candidates, config);
387
388   if is_wide {
389      // Try to detect a pattern if wide_change_abstract is enabled
390      let scope_str = if config.wide_change_abstract {
391         if let Some(pattern) = ScopeAnalyzer::analyze_wide_change(&numstat) {
392            format!("(cross-cutting: {pattern})")
393         } else {
394            "(none - multi-component change)".to_string()
395         }
396      } else {
397         "(none - multi-component change)".to_string()
398      };
399
400      return Ok((scope_str, true));
401   }
402
403   // Format suggested scopes with weights for prompt (keep top 5, prefer 2-segment
404   // when >60%)
405   let mut suggestion_parts = Vec::new();
406   for cand in candidates.iter().take(5) {
407      // Only suggest if ≥10% to avoid noise
408      if cand.percentage >= 10.0 {
409         let confidence_label = if cand.path.contains('/') {
410            if cand.percentage > 60.0 {
411               "high confidence"
412            } else {
413               "moderate confidence"
414            }
415         } else {
416            "high confidence"
417         };
418
419         suggestion_parts
420            .push(format!("{} ({:.0}%, {})", cand.path, cand.percentage, confidence_label));
421      }
422   }
423
424   let scope_str = if suggestion_parts.is_empty() {
425      "(none - unclear component)".to_string()
426   } else {
427      format!("{}\nPrefer 2-segment scopes marked 'high confidence'", suggestion_parts.join(", "))
428   };
429
430   Ok((scope_str, is_wide))
431}
432
433#[cfg(test)]
434mod tests {
435   use super::*;
436
437   fn default_config() -> CommitConfig {
438      CommitConfig {
439         excluded_files: vec![
440            "Cargo.lock".to_string(),
441            "package-lock.json".to_string(),
442            "yarn.lock".to_string(),
443         ],
444         wide_change_threshold: 0.5,
445         ..Default::default()
446      }
447   }
448
449   // Tests for extract_path_from_rename()
450   #[test]
451   fn test_extract_path_from_rename_brace() {
452      // Brace syntax replaces only the content within braces (suffix is not
453      // preserved)
454      assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/{old => new}/file.rs"), "lib/new");
455   }
456
457   #[test]
458   fn test_extract_path_from_rename_brace_complex() {
459      assert_eq!(
460         ScopeAnalyzer::extract_path_from_rename("src/api/{client.rs => http_client.rs}"),
461         "src/api/http_client.rs"
462      );
463   }
464
465   #[test]
466   fn test_extract_path_from_rename_arrow() {
467      assert_eq!(
468         ScopeAnalyzer::extract_path_from_rename("old/file.rs => new/file.rs"),
469         "new/file.rs"
470      );
471   }
472
473   #[test]
474   fn test_extract_path_from_rename_arrow_with_spaces() {
475      assert_eq!(
476         ScopeAnalyzer::extract_path_from_rename("  old/path.rs => new/path.rs  "),
477         "new/path.rs"
478      );
479   }
480
481   #[test]
482   fn test_extract_path_from_rename_no_rename() {
483      assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/file.rs"), "lib/file.rs");
484   }
485
486   #[test]
487   fn test_extract_path_from_rename_malformed_brace() {
488      // Missing closing brace - falls back to original
489      assert_eq!(
490         ScopeAnalyzer::extract_path_from_rename("lib/{old => new/file.rs"),
491         "lib/{old => new/file.rs"
492      );
493   }
494
495   // Tests for extract_components_from_path()
496   #[test]
497   fn test_extract_components_simple() {
498      // "src" is placeholder and skipped, only "api" remains
499      let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
500      assert_eq!(comps, vec!["api"]);
501   }
502
503   #[test]
504   fn test_extract_components_with_placeholder() {
505      // "lib" is placeholder and skipped, "foo" and "bar" remain
506      let comps = ScopeAnalyzer::extract_components_from_path("lib/foo/bar/baz.tsx");
507      assert_eq!(comps, vec!["foo", "foo/bar"]);
508   }
509
510   #[test]
511   fn test_extract_components_skip_tests() {
512      // "tests" is in SKIP_DIRS, so skipped, only "api" remains
513      let comps = ScopeAnalyzer::extract_components_from_path("tests/api/client_test.rs");
514      assert_eq!(comps, vec!["api"]);
515   }
516
517   #[test]
518   fn test_extract_components_skip_node_modules() {
519      // "node_modules" is in SKIP_DIRS, only "foo" remains
520      let comps = ScopeAnalyzer::extract_components_from_path("node_modules/foo/bar.js");
521      assert_eq!(comps, vec!["foo"]);
522   }
523
524   #[test]
525   fn test_extract_components_single_segment() {
526      let comps = ScopeAnalyzer::extract_components_from_path("src/main.rs");
527      // "src" is a placeholder and is stripped, leaving no components
528      assert_eq!(comps, Vec::<String>::new());
529   }
530
531   #[test]
532   fn test_extract_components_dotfile_skipped() {
533      // ".git" gets stripped to "" and filtered out, "config" is kept
534      let comps = ScopeAnalyzer::extract_components_from_path("lib/.git/config");
535      assert_eq!(comps, vec!["config"]);
536   }
537
538   #[test]
539   fn test_extract_components_strips_extension() {
540      let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
541      // "client.rs" is a file, so skipped; "api" and "src" are dirs
542      assert!(comps.contains(&"api".to_string()));
543   }
544
545   #[test]
546   fn test_extract_components_go_internal() {
547      // Go projects: internal/ is placeholder, extract actual module
548      let comps = ScopeAnalyzer::extract_components_from_path("internal/agent/worker.go");
549      assert_eq!(comps, vec!["agent"]);
550   }
551
552   #[test]
553   fn test_extract_components_go_internal_nested() {
554      // Go projects: internal/foo/bar/baz.go → extract "foo" and "foo/bar"
555      let comps = ScopeAnalyzer::extract_components_from_path("internal/config/parser/json.go");
556      assert_eq!(comps, vec!["config", "config/parser"]);
557   }
558
559   #[test]
560   fn test_extract_components_go_pkg() {
561      // Go projects: pkg/ is also a placeholder
562      let comps = ScopeAnalyzer::extract_components_from_path("pkg/util/strings.go");
563      assert_eq!(comps, vec!["util"]);
564   }
565
566   #[test]
567   fn test_extract_components_monorepo_packages() {
568      // Monorepos: packages/ is placeholder
569      let comps = ScopeAnalyzer::extract_components_from_path("packages/core/index.ts");
570      assert_eq!(comps, vec!["core"]);
571   }
572
573   // Tests for process_numstat_line()
574   #[test]
575   fn test_process_numstat_line_normal() {
576      let mut analyzer = ScopeAnalyzer::new();
577      let config = default_config();
578      analyzer.process_numstat_line("10\t5\tlib/foo/bar.rs", &config);
579
580      assert_eq!(analyzer.total_lines, 15);
581      assert_eq!(analyzer.component_lines.get("foo"), Some(&15));
582   }
583
584   #[test]
585   fn test_process_numstat_line_excluded_file() {
586      let mut analyzer = ScopeAnalyzer::new();
587      let config = default_config();
588      analyzer.process_numstat_line("10\t5\tCargo.lock", &config);
589
590      assert_eq!(analyzer.total_lines, 0);
591      assert!(analyzer.component_lines.is_empty());
592   }
593
594   #[test]
595   fn test_process_numstat_line_binary_file() {
596      let mut analyzer = ScopeAnalyzer::new();
597      let config = default_config();
598      analyzer.process_numstat_line("-\t-\timage.png", &config);
599
600      assert_eq!(analyzer.total_lines, 0);
601   }
602
603   #[test]
604   fn test_process_numstat_line_invalid() {
605      let mut analyzer = ScopeAnalyzer::new();
606      let config = default_config();
607      analyzer.process_numstat_line("invalid line", &config);
608
609      assert_eq!(analyzer.total_lines, 0);
610   }
611
612   #[test]
613   fn test_process_numstat_line_rename_brace() {
614      let mut analyzer = ScopeAnalyzer::new();
615      let config = default_config();
616      // Brace syntax gives "lib/new" path
617      analyzer.process_numstat_line("20\t10\tlib/{old => new}/file.rs", &config);
618
619      assert_eq!(analyzer.total_lines, 30);
620      // Path "lib/new/file.rs" -> extracts "new" (lib is stripped as placeholder)
621      assert_eq!(analyzer.component_lines.get("new"), Some(&30));
622   }
623
624   #[test]
625   fn test_process_numstat_line_multiple_files() {
626      let mut analyzer = ScopeAnalyzer::new();
627      let config = default_config();
628      analyzer.process_numstat_line("10\t5\tsrc/api/client.rs", &config);
629      analyzer.process_numstat_line("20\t10\tsrc/api/server.rs", &config);
630
631      assert_eq!(analyzer.total_lines, 45);
632      assert_eq!(analyzer.component_lines.get("api"), Some(&45));
633   }
634
635   // Tests for is_wide_change()
636   #[test]
637   fn test_is_wide_change_focused() {
638      let config = default_config();
639      let candidates = vec![
640         ScopeCandidate { path: "api".to_string(), percentage: 80.0, confidence: 80.0 },
641         ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
642      ];
643
644      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
645   }
646
647   #[test]
648   fn test_is_wide_change_dispersed() {
649      let config = default_config();
650      let candidates = vec![
651         ScopeCandidate { path: "api".to_string(), percentage: 30.0, confidence: 30.0 },
652         ScopeCandidate { path: "db".to_string(), percentage: 30.0, confidence: 30.0 },
653         ScopeCandidate { path: "ui".to_string(), percentage: 40.0, confidence: 40.0 },
654      ];
655
656      assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
657   }
658
659   #[test]
660   fn test_is_wide_change_three_roots() {
661      let config = default_config();
662      let candidates = vec![
663         ScopeCandidate { path: "api".to_string(), percentage: 60.0, confidence: 60.0 },
664         ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
665         ScopeCandidate { path: "ui".to_string(), percentage: 20.0, confidence: 20.0 },
666      ];
667
668      assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
669   }
670
671   #[test]
672   fn test_is_wide_change_nested_same_root() {
673      let config = default_config();
674      let candidates = vec![
675         ScopeCandidate {
676            path:       "api/client".to_string(),
677            percentage: 60.0,
678            confidence: 72.0,
679         },
680         ScopeCandidate {
681            path:       "api/server".to_string(),
682            percentage: 40.0,
683            confidence: 32.0,
684         },
685      ];
686
687      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
688   }
689
690   #[test]
691   fn test_is_wide_change_empty() {
692      let config = default_config();
693      let candidates = vec![];
694
695      assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
696   }
697
698   // Integration tests for extract_scope()
699   #[test]
700   fn test_extract_scope_single_file() {
701      let config = default_config();
702      let numstat = "10\t5\tsrc/api/client.rs";
703      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
704
705      assert_eq!(total_lines, 15);
706      // "src" is filtered out, only "api" remains
707      assert_eq!(candidates.len(), 1);
708      assert_eq!(candidates[0].path, "api");
709      assert_eq!(candidates[0].percentage, 100.0);
710   }
711
712   #[test]
713   fn test_extract_scope_placeholder_only() {
714      let config = default_config();
715      let numstat = "10\t5\tsrc/main.rs";
716      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
717
718      assert_eq!(total_lines, 15);
719      // "src" is placeholder and filtered out, no candidates
720      assert_eq!(candidates.len(), 0);
721   }
722
723   #[test]
724   fn test_extract_scope_multiple_files() {
725      let config = default_config();
726      let numstat = "10\t5\tsrc/api/client.rs\n20\t10\tsrc/db/models.rs";
727      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
728
729      assert_eq!(total_lines, 45);
730      assert!(candidates.len() >= 2);
731
732      // Check that both components are present
733      let api_cand = candidates.iter().find(|c| c.path == "api");
734      let db_cand = candidates.iter().find(|c| c.path == "db");
735
736      assert!(api_cand.is_some());
737      assert!(db_cand.is_some());
738
739      // DB should have higher percentage (30 lines vs 15)
740      assert!(db_cand.unwrap().percentage > api_cand.unwrap().percentage);
741   }
742
743   #[test]
744   fn test_extract_scope_excluded_files() {
745      let config = default_config();
746      let numstat = "100\t50\tCargo.lock\n10\t5\tsrc/api/client.rs";
747      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
748
749      // Cargo.lock should be excluded
750      assert_eq!(total_lines, 15);
751      assert_eq!(candidates[0].path, "api");
752   }
753
754   #[test]
755   fn test_extract_scope_no_changes() {
756      let config = default_config();
757      let numstat = "";
758      let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
759
760      assert_eq!(total_lines, 0);
761      assert!(candidates.is_empty());
762   }
763
764   #[test]
765   fn test_count_changed_lines_ignores_excluded_and_binary_files() {
766      let config = default_config();
767      let numstat = "100\t50\tCargo.lock\n-\t-\timage.png\n10\t5\tsrc/api/client.rs";
768
769      assert_eq!(ScopeAnalyzer::count_changed_lines(numstat, &config), 15);
770   }
771
772   #[test]
773   fn test_extract_scope_sorted_by_percentage() {
774      let config = default_config();
775      let numstat = "5\t0\tsrc/api/client.rs\n50\t0\tsrc/db/models.rs\n10\t0\tsrc/ui/component.tsx";
776      let (candidates, _) = ScopeAnalyzer::extract_scope(numstat, &config);
777
778      // Should be sorted descending by percentage
779      assert!(candidates[0].percentage >= candidates[1].percentage);
780      assert!(candidates[1].percentage >= candidates[2].percentage);
781   }
782
783   #[test]
784   fn test_build_scope_candidates_percentages() {
785      let mut analyzer = ScopeAnalyzer::new();
786      analyzer.component_lines.insert("api".to_string(), 30);
787      analyzer.component_lines.insert("db".to_string(), 70);
788      analyzer.total_lines = 100;
789
790      let candidates = analyzer.build_scope_candidates();
791
792      assert_eq!(candidates.len(), 2);
793      assert_eq!(candidates[0].path, "db");
794      assert!((candidates[0].percentage - 70.0).abs() < 0.001);
795      assert_eq!(candidates[1].path, "api");
796      assert!((candidates[1].percentage - 30.0).abs() < 0.001);
797   }
798
799   // Confidence heuristic tests: 70% in two-segment should prefer specific scope
800   #[test]
801   fn test_confidence_70_percent_in_two_segment_prefers_specific() {
802      let mut analyzer = ScopeAnalyzer::new();
803      analyzer.component_lines.insert("api".to_string(), 70);
804      analyzer
805         .component_lines
806         .insert("api/client".to_string(), 70);
807      analyzer.component_lines.insert("other".to_string(), 30);
808      analyzer.total_lines = 100;
809
810      let candidates = analyzer.build_scope_candidates();
811
812      // api/client at 70% gets confidence = 70 * 1.2 = 84
813      // api at 70% gets confidence = 70
814      // other at 30% gets confidence = 30
815      // So api/client should be first
816      assert_eq!(candidates[0].path, "api/client");
817      assert!((candidates[0].percentage - 70.0).abs() < 0.001);
818      assert!((candidates[0].confidence - 84.0).abs() < 0.001);
819   }
820
821   // Confidence heuristic tests: 45% in two-segment should prefer single-segment
822   #[test]
823   fn test_confidence_45_percent_in_two_segment_prefers_single() {
824      let mut analyzer = ScopeAnalyzer::new();
825      analyzer.component_lines.insert("api".to_string(), 45);
826      analyzer
827         .component_lines
828         .insert("api/client".to_string(), 45);
829      analyzer.component_lines.insert("other".to_string(), 55);
830      analyzer.total_lines = 100;
831
832      let candidates = analyzer.build_scope_candidates();
833
834      // other at 55% gets confidence = 55
835      // api at 45% gets confidence = 45
836      // api/client at 45% gets confidence = 45 * 0.8 = 36
837      // So order should be: other, api, api/client
838      assert_eq!(candidates[0].path, "other");
839      assert_eq!(candidates[1].path, "api");
840      assert_eq!(candidates[2].path, "api/client");
841      assert!((candidates[2].confidence - 36.0).abs() < 0.001);
842   }
843
844   // Tests for analyze_wide_change()
845   #[test]
846   fn test_analyze_wide_change_dependency_updates() {
847      let numstat = "10\t5\tCargo.toml\n20\t10\tsrc/lib.rs\n5\t3\tsrc/api.rs";
848      let result = ScopeAnalyzer::analyze_wide_change(numstat);
849      assert_eq!(result, Some("deps".to_string()));
850   }
851
852   #[test]
853   fn test_analyze_wide_change_documentation() {
854      let numstat =
855         "50\t20\tREADME.md\n30\t10\tdocs/guide.md\n20\t5\tdocs/api.md\n5\t2\tsrc/lib.rs";
856      let result = ScopeAnalyzer::analyze_wide_change(numstat);
857      assert_eq!(result, Some("docs".to_string()));
858   }
859
860   #[test]
861   fn test_analyze_wide_change_tests() {
862      let numstat = "10\t5\tsrc/api_test.rs\n15\t8\tsrc/client_test.rs\n20\t10\ttests/\
863                     integration_test.rs\n5\t2\tsrc/lib.rs";
864      let result = ScopeAnalyzer::analyze_wide_change(numstat);
865      assert_eq!(result, Some("tests".to_string()));
866   }
867
868   #[test]
869   fn test_analyze_wide_change_error_handling() {
870      let numstat =
871         "10\t5\tsrc/error.rs\n15\t8\tsrc/result.rs\n20\t10\tsrc/error_types.rs\n5\t2\tsrc/lib.rs";
872      let result = ScopeAnalyzer::analyze_wide_change(numstat);
873      assert_eq!(result, Some("error-handling".to_string()));
874   }
875
876   #[test]
877   fn test_analyze_wide_change_type_refactor() {
878      let numstat =
879         "10\t5\tsrc/types.rs\n15\t8\tsrc/structs.rs\n20\t10\tsrc/enums.rs\n5\t2\tsrc/lib.rs";
880      let result = ScopeAnalyzer::analyze_wide_change(numstat);
881      assert_eq!(result, Some("type-refactor".to_string()));
882   }
883
884   #[test]
885   fn test_analyze_wide_change_config() {
886      let numstat =
887         "10\t5\tconfig.toml\n15\t8\tsettings.yaml\n20\t10\tconfig.json\n5\t2\tsrc/lib.rs";
888      let result = ScopeAnalyzer::analyze_wide_change(numstat);
889      assert_eq!(result, Some("config".to_string()));
890   }
891
892   #[test]
893   fn test_analyze_wide_change_no_pattern() {
894      let numstat = "10\t5\tsrc/foo.rs\n15\t8\tsrc/bar.rs\n20\t10\tsrc/baz.rs";
895      let result = ScopeAnalyzer::analyze_wide_change(numstat);
896      assert_eq!(result, None);
897   }
898
899   #[test]
900   fn test_analyze_wide_change_empty() {
901      let numstat = "";
902      let result = ScopeAnalyzer::analyze_wide_change(numstat);
903      assert_eq!(result, None);
904   }
905
906   #[test]
907   fn test_analyze_wide_change_package_json() {
908      let numstat = "10\t5\tpackage.json\n20\t10\tsrc/index.js\n5\t3\tsrc/utils.js";
909      let result = ScopeAnalyzer::analyze_wide_change(numstat);
910      assert_eq!(result, Some("deps".to_string()));
911   }
912}