Skip to main content

omni_dev/git/
commit.rs

1//! Git commit operations and analysis.
2
3use std::fs;
4use std::sync::LazyLock;
5
6use anyhow::{Context, Result};
7use chrono::{DateTime, FixedOffset};
8use git2::{Commit, Repository};
9use globset::Glob;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12
13use crate::data::context::ScopeDefinition;
14use crate::git::diff_split::split_by_file;
15
16/// Matches conventional commit scope patterns including breaking-change syntax.
17#[allow(clippy::unwrap_used)] // Compile-time constant regex pattern
18static SCOPE_RE: LazyLock<Regex> =
19    LazyLock::new(|| Regex::new(r"^[a-z]+!\(([^)]+)\):|^[a-z]+\(([^)]+)\):").unwrap());
20
21/// Commit information structure, generic over analysis type.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct CommitInfo<A = CommitAnalysis> {
24    /// Full SHA-1 hash of the commit.
25    pub hash: String,
26    /// Commit author name and email address.
27    pub author: String,
28    /// Commit date in ISO format with timezone.
29    pub date: DateTime<FixedOffset>,
30    /// The original commit message as written by the author.
31    pub original_message: String,
32    /// Array of remote main branches that contain this commit.
33    pub in_main_branches: Vec<String>,
34    /// Automated analysis of the commit including type detection and proposed message.
35    pub analysis: A,
36}
37
38/// Commit analysis information.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct CommitAnalysis {
41    /// Automatically detected conventional commit type (feat, fix, docs, test, chore, etc.).
42    pub detected_type: String,
43    /// Automatically detected scope based on file paths (cli, git, data, etc.).
44    pub detected_scope: String,
45    /// AI-generated conventional commit message based on file changes.
46    pub proposed_message: String,
47    /// Detailed statistics about file changes in this commit.
48    pub file_changes: FileChanges,
49    /// Git diff --stat output showing lines changed per file.
50    pub diff_summary: String,
51    /// Path to diff file showing line-by-line changes.
52    pub diff_file: String,
53    /// Per-file diff references for individual file changes.
54    #[serde(default, skip_serializing_if = "Vec::is_empty")]
55    pub file_diffs: Vec<FileDiffRef>,
56}
57
58/// Reference to a per-file diff stored on disk.
59///
60/// Tracks the repository-relative file path, the absolute path to the
61/// diff file on disk, and the byte length of that diff. Gives consumers
62/// per-file size information without loading diff content into memory.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct FileDiffRef {
65    /// Repository-relative path of the changed file.
66    pub path: String,
67    /// Absolute path to the per-file diff file on disk.
68    pub diff_file: String,
69    /// Byte length of the per-file diff content.
70    pub byte_len: usize,
71}
72
73/// Enhanced commit analysis for AI processing with full diff content.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct CommitAnalysisForAI {
76    /// Base commit analysis fields.
77    #[serde(flatten)]
78    pub base: CommitAnalysis,
79    /// Full diff content for AI analysis.
80    pub diff_content: String,
81}
82
83/// Commit information with enhanced analysis for AI processing.
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct CommitInfoForAI {
86    /// Base commit information with AI-enhanced analysis.
87    #[serde(flatten)]
88    pub base: CommitInfo<CommitAnalysisForAI>,
89    /// Deterministic checks already performed; the LLM should treat these as authoritative.
90    #[serde(default, skip_serializing_if = "Vec::is_empty")]
91    pub pre_validated_checks: Vec<String>,
92}
93
94/// File changes statistics.
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct FileChanges {
97    /// Total number of files modified in this commit.
98    pub total_files: usize,
99    /// Number of new files added in this commit.
100    pub files_added: usize,
101    /// Number of files deleted in this commit.
102    pub files_deleted: usize,
103    /// Array of files changed with their git status (M=modified, A=added, D=deleted).
104    pub file_list: Vec<FileChange>,
105}
106
107/// Individual file change.
108#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct FileChange {
110    /// Git status code (A=added, M=modified, D=deleted, R=renamed).
111    pub status: String,
112    /// Path to the file relative to repository root.
113    pub file: String,
114}
115
116impl CommitInfo {
117    /// Creates a `CommitInfo` from a `git2::Commit`.
118    pub fn from_git_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
119        let hash = commit.id().to_string();
120
121        let author = format!(
122            "{} <{}>",
123            commit.author().name().unwrap_or("Unknown"),
124            commit.author().email().unwrap_or("unknown@example.com")
125        );
126
127        let timestamp = commit.author().when();
128        let date = DateTime::from_timestamp(timestamp.seconds(), 0)
129            .context("Invalid commit timestamp")?
130            .with_timezone(
131                #[allow(clippy::unwrap_used)] // Offset 0 is always valid
132                &FixedOffset::east_opt(timestamp.offset_minutes() * 60)
133                    .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()),
134            );
135
136        let original_message = commit.message().unwrap_or("").to_string();
137
138        // TODO: Implement main branch detection
139        let in_main_branches = Vec::new();
140
141        // TODO: Implement commit analysis
142        let analysis = CommitAnalysis::analyze_commit(repo, commit)?;
143
144        Ok(Self {
145            hash,
146            author,
147            date,
148            original_message,
149            in_main_branches,
150            analysis,
151        })
152    }
153}
154
155impl CommitAnalysis {
156    /// Analyzes a commit and generates analysis information.
157    pub fn analyze_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
158        // Get file changes
159        let file_changes = Self::analyze_file_changes(repo, commit)?;
160
161        // Detect conventional commit type based on files and message
162        let detected_type = Self::detect_commit_type(commit, &file_changes);
163
164        // Detect scope based on file paths
165        let detected_scope = Self::detect_scope(&file_changes);
166
167        // Generate proposed conventional commit message
168        let proposed_message =
169            Self::generate_proposed_message(commit, &detected_type, &detected_scope, &file_changes);
170
171        // Get diff summary
172        let diff_summary = Self::get_diff_summary(repo, commit)?;
173
174        // Write diff to file and get path
175        let (diff_file, file_diffs) = Self::write_diff_to_file(repo, commit)?;
176
177        Ok(Self {
178            detected_type,
179            detected_scope,
180            proposed_message,
181            file_changes,
182            diff_summary,
183            diff_file,
184            file_diffs,
185        })
186    }
187
188    /// Analyzes file changes in the commit.
189    fn analyze_file_changes(repo: &Repository, commit: &Commit) -> Result<FileChanges> {
190        let mut file_list = Vec::new();
191        let mut files_added = 0;
192        let mut files_deleted = 0;
193
194        // Get the tree for this commit
195        let commit_tree = commit.tree().context("Failed to get commit tree")?;
196
197        // Get parent tree if available
198        let parent_tree = if commit.parent_count() > 0 {
199            Some(
200                commit
201                    .parent(0)
202                    .context("Failed to get parent commit")?
203                    .tree()
204                    .context("Failed to get parent tree")?,
205            )
206        } else {
207            None
208        };
209
210        // Create diff between parent and commit
211        let diff = if let Some(parent_tree) = parent_tree {
212            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
213                .context("Failed to create diff")?
214        } else {
215            // Initial commit - diff against empty tree
216            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
217                .context("Failed to create diff for initial commit")?
218        };
219
220        // Process each diff delta
221        diff.foreach(
222            &mut |delta, _progress| {
223                let status = match delta.status() {
224                    git2::Delta::Added => {
225                        files_added += 1;
226                        "A"
227                    }
228                    git2::Delta::Deleted => {
229                        files_deleted += 1;
230                        "D"
231                    }
232                    git2::Delta::Modified => "M",
233                    git2::Delta::Renamed => "R",
234                    git2::Delta::Copied => "C",
235                    git2::Delta::Typechange => "T",
236                    _ => "?",
237                };
238
239                if let Some(path) = delta.new_file().path() {
240                    if let Some(path_str) = path.to_str() {
241                        file_list.push(FileChange {
242                            status: status.to_string(),
243                            file: path_str.to_string(),
244                        });
245                    }
246                }
247
248                true
249            },
250            None,
251            None,
252            None,
253        )
254        .context("Failed to process diff")?;
255
256        let total_files = file_list.len();
257
258        Ok(FileChanges {
259            total_files,
260            files_added,
261            files_deleted,
262            file_list,
263        })
264    }
265
266    /// Detects conventional commit type based on files and existing message.
267    fn detect_commit_type(commit: &Commit, file_changes: &FileChanges) -> String {
268        let message = commit.message().unwrap_or("");
269
270        // Check if message already has conventional commit format
271        if let Some(existing_type) = Self::extract_conventional_type(message) {
272            return existing_type;
273        }
274
275        // Analyze file patterns
276        let files: Vec<&str> = file_changes
277            .file_list
278            .iter()
279            .map(|f| f.file.as_str())
280            .collect();
281
282        // Check for specific patterns
283        if files
284            .iter()
285            .any(|f| f.contains("test") || f.contains("spec"))
286        {
287            "test".to_string()
288        } else if files
289            .iter()
290            .any(|f| f.ends_with(".md") || f.contains("README") || f.contains("docs/"))
291        {
292            "docs".to_string()
293        } else if files
294            .iter()
295            .any(|f| f.contains("Cargo.toml") || f.contains("package.json") || f.contains("config"))
296        {
297            if file_changes.files_added > 0 {
298                "feat".to_string()
299            } else {
300                "chore".to_string()
301            }
302        } else if file_changes.files_added > 0
303            && files
304                .iter()
305                .any(|f| f.ends_with(".rs") || f.ends_with(".js") || f.ends_with(".py"))
306        {
307            "feat".to_string()
308        } else if message.to_lowercase().contains("fix") || message.to_lowercase().contains("bug") {
309            "fix".to_string()
310        } else if file_changes.files_deleted > file_changes.files_added {
311            "refactor".to_string()
312        } else {
313            "chore".to_string()
314        }
315    }
316
317    /// Extracts conventional commit type from an existing message.
318    fn extract_conventional_type(message: &str) -> Option<String> {
319        let first_line = message.lines().next().unwrap_or("");
320        if let Some(colon_pos) = first_line.find(':') {
321            let prefix = &first_line[..colon_pos];
322            if let Some(paren_pos) = prefix.find('(') {
323                let type_part = &prefix[..paren_pos];
324                if Self::is_valid_conventional_type(type_part) {
325                    return Some(type_part.to_string());
326                }
327            } else if Self::is_valid_conventional_type(prefix) {
328                return Some(prefix.to_string());
329            }
330        }
331        None
332    }
333
334    /// Checks if a string is a valid conventional commit type.
335    fn is_valid_conventional_type(s: &str) -> bool {
336        matches!(
337            s,
338            "feat"
339                | "fix"
340                | "docs"
341                | "style"
342                | "refactor"
343                | "test"
344                | "chore"
345                | "build"
346                | "ci"
347                | "perf"
348        )
349    }
350
351    /// Detects scope from file paths.
352    fn detect_scope(file_changes: &FileChanges) -> String {
353        let files: Vec<&str> = file_changes
354            .file_list
355            .iter()
356            .map(|f| f.file.as_str())
357            .collect();
358
359        // Analyze common path patterns
360        if files.iter().any(|f| f.starts_with("src/cli/")) {
361            "cli".to_string()
362        } else if files.iter().any(|f| f.starts_with("src/git/")) {
363            "git".to_string()
364        } else if files.iter().any(|f| f.starts_with("src/data/")) {
365            "data".to_string()
366        } else if files.iter().any(|f| f.starts_with("tests/")) {
367            "test".to_string()
368        } else if files.iter().any(|f| f.starts_with("docs/")) {
369            "docs".to_string()
370        } else if files
371            .iter()
372            .any(|f| f.contains("Cargo.toml") || f.contains("deny.toml"))
373        {
374            "deps".to_string()
375        } else {
376            String::new()
377        }
378    }
379
380    /// Re-detects scope using file_patterns from scope definitions.
381    ///
382    /// More specific patterns (more literal path components) win regardless of
383    /// definition order in scopes.yaml. Equally specific matches are joined
384    /// with ", ". If no scope definitions match, the existing detected_scope
385    /// is kept as a fallback.
386    pub fn refine_scope(&mut self, scope_defs: &[ScopeDefinition]) {
387        if scope_defs.is_empty() {
388            return;
389        }
390        let files: Vec<&str> = self
391            .file_changes
392            .file_list
393            .iter()
394            .map(|f| f.file.as_str())
395            .collect();
396        if files.is_empty() {
397            return;
398        }
399
400        let mut matches: Vec<(&str, usize)> = Vec::new();
401        for scope_def in scope_defs {
402            if let Some(specificity) = Self::scope_matches_files(&files, &scope_def.file_patterns) {
403                matches.push((&scope_def.name, specificity));
404            }
405        }
406
407        if matches.is_empty() {
408            return;
409        }
410
411        // SAFETY: matches is non-empty (guarded by early return above)
412        #[allow(clippy::expect_used)] // Guarded by is_empty() check above
413        let max_specificity = matches.iter().map(|(_, s)| *s).max().expect("non-empty");
414        let best: Vec<&str> = matches
415            .into_iter()
416            .filter(|(_, s)| *s == max_specificity)
417            .map(|(name, _)| name)
418            .collect();
419
420        self.detected_scope = best.join(", ");
421    }
422
423    /// Checks if a scope's file_patterns match any of the given files.
424    ///
425    /// Returns `Some(max_specificity)` if at least one file matches the scope
426    /// (after applying negation patterns), or `None` if no file matches.
427    fn scope_matches_files(files: &[&str], patterns: &[String]) -> Option<usize> {
428        let mut positive = Vec::new();
429        let mut negative = Vec::new();
430        for pat in patterns {
431            if let Some(stripped) = pat.strip_prefix('!') {
432                negative.push(stripped);
433            } else {
434                positive.push(pat.as_str());
435            }
436        }
437
438        // Build negative matchers
439        let neg_matchers: Vec<_> = negative
440            .iter()
441            .filter_map(|p| Glob::new(p).ok().map(|g| g.compile_matcher()))
442            .collect();
443
444        let mut max_specificity: Option<usize> = None;
445        for pat in &positive {
446            let Ok(glob) = Glob::new(pat) else {
447                continue;
448            };
449            let matcher = glob.compile_matcher();
450            for file in files {
451                if matcher.is_match(file) && !neg_matchers.iter().any(|neg| neg.is_match(file)) {
452                    let specificity = Self::count_specificity(pat);
453                    max_specificity =
454                        Some(max_specificity.map_or(specificity, |cur| cur.max(specificity)));
455                }
456            }
457        }
458        max_specificity
459    }
460
461    /// Counts the number of literal (non-wildcard) path segments in a glob pattern.
462    ///
463    /// - `docs/adrs/**` → 2 (`docs`, `adrs`)
464    /// - `docs/**` → 1 (`docs`)
465    /// - `*.md` → 0
466    /// - `src/main/scala/**` → 3
467    fn count_specificity(pattern: &str) -> usize {
468        pattern
469            .split('/')
470            .filter(|segment| !segment.contains('*') && !segment.contains('?'))
471            .count()
472    }
473
474    /// Generates a proposed conventional commit message.
475    fn generate_proposed_message(
476        commit: &Commit,
477        commit_type: &str,
478        scope: &str,
479        file_changes: &FileChanges,
480    ) -> String {
481        let current_message = commit.message().unwrap_or("").lines().next().unwrap_or("");
482
483        // If already properly formatted, return as-is
484        if Self::extract_conventional_type(current_message).is_some() {
485            return current_message.to_string();
486        }
487
488        // Generate description based on changes
489        let description =
490            if !current_message.is_empty() && !current_message.eq_ignore_ascii_case("stuff") {
491                current_message.to_string()
492            } else {
493                Self::generate_description(commit_type, file_changes)
494            };
495
496        // Format with scope if available
497        if scope.is_empty() {
498            format!("{commit_type}: {description}")
499        } else {
500            format!("{commit_type}({scope}): {description}")
501        }
502    }
503
504    /// Generates a description based on commit type and changes.
505    fn generate_description(commit_type: &str, file_changes: &FileChanges) -> String {
506        match commit_type {
507            "feat" => {
508                if file_changes.total_files == 1 {
509                    format!("add {}", file_changes.file_list[0].file)
510                } else {
511                    format!("add {} new features", file_changes.total_files)
512                }
513            }
514            "fix" => "resolve issues".to_string(),
515            "docs" => "update documentation".to_string(),
516            "test" => "add tests".to_string(),
517            "refactor" => "improve code structure".to_string(),
518            "chore" => "update project files".to_string(),
519            _ => "update project".to_string(),
520        }
521    }
522
523    /// Returns diff summary statistics.
524    fn get_diff_summary(repo: &Repository, commit: &Commit) -> Result<String> {
525        let commit_tree = commit.tree().context("Failed to get commit tree")?;
526
527        let parent_tree = if commit.parent_count() > 0 {
528            Some(
529                commit
530                    .parent(0)
531                    .context("Failed to get parent commit")?
532                    .tree()
533                    .context("Failed to get parent tree")?,
534            )
535        } else {
536            None
537        };
538
539        let diff = if let Some(parent_tree) = parent_tree {
540            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
541                .context("Failed to create diff")?
542        } else {
543            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
544                .context("Failed to create diff for initial commit")?
545        };
546
547        let stats = diff.stats().context("Failed to get diff stats")?;
548
549        let mut summary = String::new();
550        for i in 0..stats.files_changed() {
551            if let Some(path) = diff
552                .get_delta(i)
553                .and_then(|d| d.new_file().path())
554                .and_then(|p| p.to_str())
555            {
556                let insertions = stats.insertions();
557                let deletions = stats.deletions();
558                summary.push_str(&format!(
559                    " {} | {} +{} -{}\n",
560                    path,
561                    insertions + deletions,
562                    insertions,
563                    deletions
564                ));
565            }
566        }
567
568        Ok(summary)
569    }
570
571    /// Writes full diff content to a file and returns the path and per-file refs.
572    fn write_diff_to_file(
573        repo: &Repository,
574        commit: &Commit,
575    ) -> Result<(String, Vec<FileDiffRef>)> {
576        // Get AI scratch directory
577        let ai_scratch_path = crate::utils::ai_scratch::get_ai_scratch_dir()
578            .context("Failed to determine AI scratch directory")?;
579
580        // Create diffs subdirectory
581        let diffs_dir = ai_scratch_path.join("diffs");
582        fs::create_dir_all(&diffs_dir).context("Failed to create diffs directory")?;
583
584        // Create filename with commit hash
585        let commit_hash = commit.id().to_string();
586        let diff_filename = format!("{commit_hash}.diff");
587        let diff_path = diffs_dir.join(&diff_filename);
588
589        let commit_tree = commit.tree().context("Failed to get commit tree")?;
590
591        let parent_tree = if commit.parent_count() > 0 {
592            Some(
593                commit
594                    .parent(0)
595                    .context("Failed to get parent commit")?
596                    .tree()
597                    .context("Failed to get parent tree")?,
598            )
599        } else {
600            None
601        };
602
603        let diff = if let Some(parent_tree) = parent_tree {
604            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
605                .context("Failed to create diff")?
606        } else {
607            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
608                .context("Failed to create diff for initial commit")?
609        };
610
611        let mut diff_content = String::new();
612
613        diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
614            let content = std::str::from_utf8(line.content()).unwrap_or("<binary>");
615            let prefix = match line.origin() {
616                '+' => "+",
617                '-' => "-",
618                ' ' => " ",
619                '@' => "@",
620                _ => "", // Header, file header, and other origins
621            };
622            diff_content.push_str(&format!("{prefix}{content}"));
623            true
624        })
625        .context("Failed to format diff")?;
626
627        // Ensure the diff content ends with a newline to encourage literal block style
628        if !diff_content.ends_with('\n') {
629            diff_content.push('\n');
630        }
631
632        // Write flat diff content to file
633        fs::write(&diff_path, &diff_content).context("Failed to write diff file")?;
634
635        // Split into per-file diffs and write each to disk
636        let per_file_diffs = split_by_file(&diff_content);
637        let mut file_diffs = Vec::with_capacity(per_file_diffs.len());
638
639        if !per_file_diffs.is_empty() {
640            let per_file_dir = diffs_dir.join(&commit_hash);
641            fs::create_dir_all(&per_file_dir)
642                .context("Failed to create per-file diffs directory")?;
643
644            for (index, file_diff) in per_file_diffs.iter().enumerate() {
645                let per_file_name = format!("{index:04}.diff");
646                let per_file_path = per_file_dir.join(&per_file_name);
647                fs::write(&per_file_path, &file_diff.content).with_context(|| {
648                    format!("Failed to write per-file diff: {}", per_file_path.display())
649                })?;
650
651                file_diffs.push(FileDiffRef {
652                    path: file_diff.path.clone(),
653                    diff_file: per_file_path.to_string_lossy().to_string(),
654                    byte_len: file_diff.byte_len,
655                });
656            }
657        }
658
659        Ok((diff_path.to_string_lossy().to_string(), file_diffs))
660    }
661}
662
663impl CommitInfoForAI {
664    /// Converts from a basic `CommitInfo` by loading diff content.
665    pub fn from_commit_info(commit_info: CommitInfo) -> Result<Self> {
666        let analysis = CommitAnalysisForAI::from_commit_analysis(commit_info.analysis)?;
667
668        Ok(Self {
669            base: CommitInfo {
670                hash: commit_info.hash,
671                author: commit_info.author,
672                date: commit_info.date,
673                original_message: commit_info.original_message,
674                in_main_branches: commit_info.in_main_branches,
675                analysis,
676            },
677            pre_validated_checks: Vec::new(),
678        })
679    }
680
681    /// Creates a partial view of a commit containing only the specified file diffs.
682    ///
683    /// Convenience wrapper around [`Self::from_commit_info_partial_with_overrides`]
684    /// with all-`None` overrides (every file loaded from disk).
685    #[cfg(test)]
686    pub(crate) fn from_commit_info_partial(
687        commit_info: CommitInfo,
688        file_paths: &[String],
689    ) -> Result<Self> {
690        let overrides: Vec<Option<String>> = vec![None; file_paths.len()];
691        Self::from_commit_info_partial_with_overrides(commit_info, file_paths, &overrides)
692    }
693
694    /// Creates a partial view using pre-sliced diff content where available.
695    ///
696    /// `file_paths` and `diff_overrides` must be parallel slices. When
697    /// `diff_overrides[i]` is `Some(content)`, that content is used directly
698    /// instead of reading the full per-file diff from disk. This enables
699    /// per-hunk partial views where each chunk receives only its assigned
700    /// hunk slices rather than the entire file.
701    ///
702    /// Entries with `None` overrides fall back to loading from disk via
703    /// [`FileDiffRef::diff_file`], deduplicated by path.
704    pub(crate) fn from_commit_info_partial_with_overrides(
705        commit_info: CommitInfo,
706        file_paths: &[String],
707        diff_overrides: &[Option<String>],
708    ) -> Result<Self> {
709        let mut diff_parts = Vec::new();
710        let mut included_refs = Vec::new();
711        let mut loaded_disk_paths: std::collections::HashSet<String> =
712            std::collections::HashSet::new();
713
714        for (path, override_content) in file_paths.iter().zip(diff_overrides.iter()) {
715            if let Some(content) = override_content {
716                // Pre-sliced hunk content — use directly.
717                diff_parts.push(content.clone());
718                // Include the FileDiffRef for metadata (deduplicated).
719                if let Some(file_ref) = commit_info
720                    .analysis
721                    .file_diffs
722                    .iter()
723                    .find(|r| r.path == *path)
724                {
725                    if !included_refs.iter().any(|r: &FileDiffRef| r.path == *path) {
726                        included_refs.push(file_ref.clone());
727                    }
728                }
729            } else {
730                // Whole-file item — load from disk (deduplicated).
731                if loaded_disk_paths.insert(path.clone()) {
732                    if let Some(file_ref) = commit_info
733                        .analysis
734                        .file_diffs
735                        .iter()
736                        .find(|r| r.path == *path)
737                    {
738                        let content =
739                            fs::read_to_string(&file_ref.diff_file).with_context(|| {
740                                format!("Failed to read per-file diff: {}", file_ref.diff_file)
741                            })?;
742                        diff_parts.push(content);
743                        included_refs.push(file_ref.clone());
744                    }
745                }
746            }
747        }
748
749        let diff_content = diff_parts.join("\n");
750
751        let partial_analysis = CommitAnalysisForAI {
752            base: CommitAnalysis {
753                file_diffs: included_refs,
754                ..commit_info.analysis
755            },
756            diff_content,
757        };
758
759        Ok(Self {
760            base: CommitInfo {
761                hash: commit_info.hash,
762                author: commit_info.author,
763                date: commit_info.date,
764                original_message: commit_info.original_message,
765                in_main_branches: commit_info.in_main_branches,
766                analysis: partial_analysis,
767            },
768            pre_validated_checks: Vec::new(),
769        })
770    }
771
772    /// Runs deterministic pre-validation checks on the commit message.
773    /// Passing checks are recorded in pre_validated_checks so the LLM
774    /// can skip re-checking them. Failing checks are not recorded.
775    pub fn run_pre_validation_checks(&mut self, valid_scopes: &[ScopeDefinition]) {
776        if let Some(caps) = SCOPE_RE.captures(&self.base.original_message) {
777            let scope = caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str());
778            if let Some(scope) = scope {
779                if scope.contains(',') && !scope.contains(", ") {
780                    self.pre_validated_checks.push(format!(
781                        "Scope format verified: multi-scope '{scope}' correctly uses commas without spaces"
782                    ));
783                }
784
785                // Deterministic scope validity check
786                if !valid_scopes.is_empty() {
787                    let scope_parts: Vec<&str> = scope.split(',').collect();
788                    let all_valid = scope_parts
789                        .iter()
790                        .all(|part| valid_scopes.iter().any(|s| s.name == *part));
791                    if all_valid {
792                        self.pre_validated_checks.push(format!(
793                            "Scope validity verified: '{scope}' is in the valid scopes list"
794                        ));
795                    }
796                }
797            }
798        }
799    }
800}
801
802impl CommitAnalysisForAI {
803    /// Converts from a basic `CommitAnalysis` by loading diff content from file.
804    pub fn from_commit_analysis(analysis: CommitAnalysis) -> Result<Self> {
805        // Read the actual diff content from the file
806        let diff_content = fs::read_to_string(&analysis.diff_file)
807            .with_context(|| format!("Failed to read diff file: {}", analysis.diff_file))?;
808
809        Ok(Self {
810            base: analysis,
811            diff_content,
812        })
813    }
814}
815
816#[cfg(test)]
817#[allow(clippy::unwrap_used, clippy::expect_used)]
818mod tests {
819    use super::*;
820    use crate::data::context::ScopeDefinition;
821
822    // ── extract_conventional_type ────────────────────────────────────
823
824    #[test]
825    fn conventional_type_feat_with_scope() {
826        assert_eq!(
827            CommitAnalysis::extract_conventional_type("feat(cli): add flag"),
828            Some("feat".to_string())
829        );
830    }
831
832    #[test]
833    fn conventional_type_without_scope() {
834        assert_eq!(
835            CommitAnalysis::extract_conventional_type("fix: resolve bug"),
836            Some("fix".to_string())
837        );
838    }
839
840    #[test]
841    fn conventional_type_invalid_message() {
842        assert_eq!(
843            CommitAnalysis::extract_conventional_type("random message without colon"),
844            None
845        );
846    }
847
848    #[test]
849    fn conventional_type_unknown_type() {
850        assert_eq!(
851            CommitAnalysis::extract_conventional_type("yolo(scope): stuff"),
852            None
853        );
854    }
855
856    #[test]
857    fn conventional_type_all_valid_types() {
858        let types = [
859            "feat", "fix", "docs", "style", "refactor", "test", "chore", "build", "ci", "perf",
860        ];
861        for t in types {
862            let msg = format!("{t}: description");
863            assert_eq!(
864                CommitAnalysis::extract_conventional_type(&msg),
865                Some(t.to_string()),
866                "expected Some for type '{t}'"
867            );
868        }
869    }
870
871    // ── is_valid_conventional_type ───────────────────────────────────
872
873    #[test]
874    fn valid_conventional_types() {
875        for t in [
876            "feat", "fix", "docs", "style", "refactor", "test", "chore", "build", "ci", "perf",
877        ] {
878            assert!(
879                CommitAnalysis::is_valid_conventional_type(t),
880                "'{t}' should be valid"
881            );
882        }
883    }
884
885    #[test]
886    fn invalid_conventional_types() {
887        for t in ["yolo", "Feat", "", "FEAT", "feature", "bugfix"] {
888            assert!(
889                !CommitAnalysis::is_valid_conventional_type(t),
890                "'{t}' should be invalid"
891            );
892        }
893    }
894
895    // ── detect_scope ─────────────────────────────────────────────────
896
897    fn make_file_changes(files: &[(&str, &str)]) -> FileChanges {
898        FileChanges {
899            total_files: files.len(),
900            files_added: files.iter().filter(|(s, _)| *s == "A").count(),
901            files_deleted: files.iter().filter(|(s, _)| *s == "D").count(),
902            file_list: files
903                .iter()
904                .map(|(status, file)| FileChange {
905                    status: (*status).to_string(),
906                    file: (*file).to_string(),
907                })
908                .collect(),
909        }
910    }
911
912    #[test]
913    fn scope_from_cli_files() {
914        let changes = make_file_changes(&[("M", "src/cli/commands.rs")]);
915        assert_eq!(CommitAnalysis::detect_scope(&changes), "cli");
916    }
917
918    #[test]
919    fn scope_from_git_files() {
920        let changes = make_file_changes(&[("M", "src/git/remote.rs")]);
921        assert_eq!(CommitAnalysis::detect_scope(&changes), "git");
922    }
923
924    #[test]
925    fn scope_from_docs_files() {
926        let changes = make_file_changes(&[("M", "docs/README.md")]);
927        assert_eq!(CommitAnalysis::detect_scope(&changes), "docs");
928    }
929
930    #[test]
931    fn scope_from_data_files() {
932        let changes = make_file_changes(&[("M", "src/data/yaml.rs")]);
933        assert_eq!(CommitAnalysis::detect_scope(&changes), "data");
934    }
935
936    #[test]
937    fn scope_from_test_files() {
938        let changes = make_file_changes(&[("A", "tests/new_test.rs")]);
939        assert_eq!(CommitAnalysis::detect_scope(&changes), "test");
940    }
941
942    #[test]
943    fn scope_from_deps_files() {
944        let changes = make_file_changes(&[("M", "Cargo.toml")]);
945        assert_eq!(CommitAnalysis::detect_scope(&changes), "deps");
946    }
947
948    #[test]
949    fn scope_unknown_files() {
950        let changes = make_file_changes(&[("M", "random/path/file.txt")]);
951        assert_eq!(CommitAnalysis::detect_scope(&changes), "");
952    }
953
954    // ── count_specificity ────────────────────────────────────────────
955
956    #[test]
957    fn count_specificity_deep_path() {
958        assert_eq!(CommitAnalysis::count_specificity("src/main/scala/**"), 3);
959    }
960
961    #[test]
962    fn count_specificity_shallow() {
963        assert_eq!(CommitAnalysis::count_specificity("docs/**"), 1);
964    }
965
966    #[test]
967    fn count_specificity_wildcard_only() {
968        assert_eq!(CommitAnalysis::count_specificity("*.md"), 0);
969    }
970
971    #[test]
972    fn count_specificity_no_wildcards() {
973        assert_eq!(CommitAnalysis::count_specificity("src/lib.rs"), 2);
974    }
975
976    // ── scope_matches_files ──────────────────────────────────────────
977
978    #[test]
979    fn scope_matches_positive_patterns() {
980        let patterns = vec!["src/cli/**".to_string()];
981        let files = &["src/cli/commands.rs"];
982        assert!(CommitAnalysis::scope_matches_files(files, &patterns).is_some());
983    }
984
985    #[test]
986    fn scope_matches_no_match() {
987        let patterns = vec!["src/cli/**".to_string()];
988        let files = &["src/git/remote.rs"];
989        assert!(CommitAnalysis::scope_matches_files(files, &patterns).is_none());
990    }
991
992    #[test]
993    fn scope_matches_with_negation() {
994        let patterns = vec!["src/**".to_string(), "!src/test/**".to_string()];
995        // File in src/ but not in src/test/ should match
996        let files = &["src/lib.rs"];
997        assert!(CommitAnalysis::scope_matches_files(files, &patterns).is_some());
998
999        // File in src/test/ should be excluded
1000        let test_files = &["src/test/helper.rs"];
1001        assert!(CommitAnalysis::scope_matches_files(test_files, &patterns).is_none());
1002    }
1003
1004    // ── refine_scope ─────────────────────────────────────────────────
1005
1006    fn make_scope_def(name: &str, patterns: &[&str]) -> ScopeDefinition {
1007        ScopeDefinition {
1008            name: name.to_string(),
1009            description: String::new(),
1010            examples: vec![],
1011            file_patterns: patterns.iter().map(|p| (*p).to_string()).collect(),
1012        }
1013    }
1014
1015    #[test]
1016    fn refine_scope_empty_defs() {
1017        let mut analysis = CommitAnalysis {
1018            detected_type: "feat".to_string(),
1019            detected_scope: "original".to_string(),
1020            proposed_message: String::new(),
1021            file_changes: make_file_changes(&[("M", "src/cli/commands.rs")]),
1022            diff_summary: String::new(),
1023            diff_file: String::new(),
1024            file_diffs: Vec::new(),
1025        };
1026        analysis.refine_scope(&[]);
1027        assert_eq!(analysis.detected_scope, "original");
1028    }
1029
1030    #[test]
1031    fn refine_scope_most_specific_wins() {
1032        let scope_defs = vec![
1033            make_scope_def("lib", &["src/**"]),
1034            make_scope_def("cli", &["src/cli/**"]),
1035        ];
1036        let mut analysis = CommitAnalysis {
1037            detected_type: "feat".to_string(),
1038            detected_scope: String::new(),
1039            proposed_message: String::new(),
1040            file_changes: make_file_changes(&[("M", "src/cli/commands.rs")]),
1041            diff_summary: String::new(),
1042            diff_file: String::new(),
1043            file_diffs: Vec::new(),
1044        };
1045        analysis.refine_scope(&scope_defs);
1046        assert_eq!(analysis.detected_scope, "cli");
1047    }
1048
1049    #[test]
1050    fn refine_scope_no_matching_files() {
1051        let scope_defs = vec![make_scope_def("cli", &["src/cli/**"])];
1052        let mut analysis = CommitAnalysis {
1053            detected_type: "feat".to_string(),
1054            detected_scope: "original".to_string(),
1055            proposed_message: String::new(),
1056            file_changes: make_file_changes(&[("M", "README.md")]),
1057            diff_summary: String::new(),
1058            diff_file: String::new(),
1059            file_diffs: Vec::new(),
1060        };
1061        analysis.refine_scope(&scope_defs);
1062        // No match → keeps original
1063        assert_eq!(analysis.detected_scope, "original");
1064    }
1065
1066    #[test]
1067    fn refine_scope_equal_specificity_joins() {
1068        let scope_defs = vec![
1069            make_scope_def("cli", &["src/cli/**"]),
1070            make_scope_def("git", &["src/git/**"]),
1071        ];
1072        let mut analysis = CommitAnalysis {
1073            detected_type: "feat".to_string(),
1074            detected_scope: String::new(),
1075            proposed_message: String::new(),
1076            file_changes: make_file_changes(&[
1077                ("M", "src/cli/commands.rs"),
1078                ("M", "src/git/remote.rs"),
1079            ]),
1080            diff_summary: String::new(),
1081            diff_file: String::new(),
1082            file_diffs: Vec::new(),
1083        };
1084        analysis.refine_scope(&scope_defs);
1085        // Both have specificity 2 and both match → joined
1086        assert!(
1087            analysis.detected_scope == "cli, git" || analysis.detected_scope == "git, cli",
1088            "expected joined scopes, got: {}",
1089            analysis.detected_scope
1090        );
1091    }
1092
1093    // ── run_pre_validation_checks ────────────────────────────────────
1094
1095    fn make_commit_info_for_ai(message: &str) -> CommitInfoForAI {
1096        CommitInfoForAI {
1097            base: CommitInfo {
1098                hash: "a".repeat(40),
1099                author: "Test <test@example.com>".to_string(),
1100                date: chrono::DateTime::parse_from_rfc3339("2024-01-01T00:00:00+00:00").unwrap(),
1101                original_message: message.to_string(),
1102                in_main_branches: vec![],
1103                analysis: CommitAnalysisForAI {
1104                    base: CommitAnalysis {
1105                        detected_type: "feat".to_string(),
1106                        detected_scope: String::new(),
1107                        proposed_message: String::new(),
1108                        file_changes: make_file_changes(&[]),
1109                        diff_summary: String::new(),
1110                        diff_file: String::new(),
1111                        file_diffs: Vec::new(),
1112                    },
1113                    diff_content: String::new(),
1114                },
1115            },
1116            pre_validated_checks: vec![],
1117        }
1118    }
1119
1120    #[test]
1121    fn pre_validation_valid_single_scope() {
1122        let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
1123        let mut info = make_commit_info_for_ai("feat(cli): add command");
1124        info.run_pre_validation_checks(&scopes);
1125        assert!(
1126            info.pre_validated_checks
1127                .iter()
1128                .any(|c| c.contains("Scope validity verified")),
1129            "expected scope validity check, got: {:?}",
1130            info.pre_validated_checks
1131        );
1132    }
1133
1134    #[test]
1135    fn pre_validation_multi_scope() {
1136        let scopes = vec![
1137            make_scope_def("cli", &["src/cli/**"]),
1138            make_scope_def("git", &["src/git/**"]),
1139        ];
1140        let mut info = make_commit_info_for_ai("feat(cli,git): cross-cutting change");
1141        info.run_pre_validation_checks(&scopes);
1142        assert!(info
1143            .pre_validated_checks
1144            .iter()
1145            .any(|c| c.contains("Scope validity verified")),);
1146        assert!(info
1147            .pre_validated_checks
1148            .iter()
1149            .any(|c| c.contains("multi-scope")),);
1150    }
1151
1152    #[test]
1153    fn pre_validation_invalid_scope_not_added() {
1154        let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
1155        let mut info = make_commit_info_for_ai("feat(unknown): something");
1156        info.run_pre_validation_checks(&scopes);
1157        assert!(
1158            !info
1159                .pre_validated_checks
1160                .iter()
1161                .any(|c| c.contains("Scope validity verified")),
1162            "should not validate unknown scope"
1163        );
1164    }
1165
1166    #[test]
1167    fn pre_validation_no_scope_message() {
1168        let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
1169        let mut info = make_commit_info_for_ai("feat: no scope here");
1170        info.run_pre_validation_checks(&scopes);
1171        assert!(info.pre_validated_checks.is_empty());
1172    }
1173
1174    // ── property tests ────────────────────────────────────────────
1175
1176    mod prop {
1177        use super::*;
1178        use proptest::prelude::*;
1179
1180        fn arb_conventional_type() -> impl Strategy<Value = &'static str> {
1181            prop_oneof![
1182                Just("feat"),
1183                Just("fix"),
1184                Just("docs"),
1185                Just("style"),
1186                Just("refactor"),
1187                Just("test"),
1188                Just("chore"),
1189                Just("build"),
1190                Just("ci"),
1191                Just("perf"),
1192            ]
1193        }
1194
1195        proptest! {
1196            #[test]
1197            fn valid_conventional_format_extracts_type(
1198                ctype in arb_conventional_type(),
1199                scope in "[a-z]{1,10}",
1200                desc in "[a-zA-Z ]{1,50}",
1201            ) {
1202                let message = format!("{ctype}({scope}): {desc}");
1203                let result = CommitAnalysis::extract_conventional_type(&message);
1204                prop_assert_eq!(result, Some(ctype.to_string()));
1205            }
1206
1207            #[test]
1208            fn no_colon_returns_none(s in "[^:]{0,100}") {
1209                let result = CommitAnalysis::extract_conventional_type(&s);
1210                prop_assert!(result.is_none());
1211            }
1212
1213            #[test]
1214            fn count_specificity_nonnegative(pattern in ".*") {
1215                // usize is always >= 0; this test catches panics on arbitrary input
1216                let _ = CommitAnalysis::count_specificity(&pattern);
1217            }
1218
1219            #[test]
1220            fn count_specificity_bounded_by_segments(
1221                segments in proptest::collection::vec("[a-z*?]{1,10}", 1..6),
1222            ) {
1223                let pattern = segments.join("/");
1224                let result = CommitAnalysis::count_specificity(&pattern);
1225                prop_assert!(result <= segments.len());
1226            }
1227        }
1228    }
1229
1230    // ── conversion tests ────────────────────────────────────────────
1231
1232    #[test]
1233    fn from_commit_analysis_loads_diff_content() {
1234        let dir = tempfile::tempdir().unwrap();
1235        let diff_path = dir.path().join("test.diff");
1236        std::fs::write(&diff_path, "+added line\n-removed line\n").unwrap();
1237
1238        let analysis = CommitAnalysis {
1239            detected_type: "feat".to_string(),
1240            detected_scope: "cli".to_string(),
1241            proposed_message: "feat(cli): test".to_string(),
1242            file_changes: make_file_changes(&[]),
1243            diff_summary: "file.rs | 2 +-".to_string(),
1244            diff_file: diff_path.to_string_lossy().to_string(),
1245            file_diffs: Vec::new(),
1246        };
1247
1248        let ai = CommitAnalysisForAI::from_commit_analysis(analysis.clone()).unwrap();
1249        assert_eq!(ai.diff_content, "+added line\n-removed line\n");
1250        assert_eq!(ai.base.detected_type, analysis.detected_type);
1251        assert_eq!(ai.base.diff_file, analysis.diff_file);
1252    }
1253
1254    #[test]
1255    fn from_commit_info_wraps_and_loads_diff() {
1256        let dir = tempfile::tempdir().unwrap();
1257        let diff_path = dir.path().join("test.diff");
1258        std::fs::write(&diff_path, "diff content").unwrap();
1259
1260        let info = CommitInfo {
1261            hash: "a".repeat(40),
1262            author: "Test <test@example.com>".to_string(),
1263            date: chrono::DateTime::parse_from_rfc3339("2024-01-01T00:00:00+00:00").unwrap(),
1264            original_message: "feat(cli): add flag".to_string(),
1265            in_main_branches: vec!["origin/main".to_string()],
1266            analysis: CommitAnalysis {
1267                detected_type: "feat".to_string(),
1268                detected_scope: "cli".to_string(),
1269                proposed_message: "feat(cli): add flag".to_string(),
1270                file_changes: make_file_changes(&[("M", "src/cli.rs")]),
1271                diff_summary: "cli.rs | 1 +".to_string(),
1272                diff_file: diff_path.to_string_lossy().to_string(),
1273                file_diffs: Vec::new(),
1274            },
1275        };
1276
1277        let ai = CommitInfoForAI::from_commit_info(info).unwrap();
1278        assert_eq!(ai.base.analysis.diff_content, "diff content");
1279        assert_eq!(ai.base.hash, "a".repeat(40));
1280        assert_eq!(ai.base.original_message, "feat(cli): add flag");
1281        assert!(ai.pre_validated_checks.is_empty());
1282    }
1283
1284    #[test]
1285    fn file_diffs_default_empty_on_deserialize() {
1286        let yaml = r#"
1287detected_type: feat
1288detected_scope: cli
1289proposed_message: "feat(cli): test"
1290file_changes:
1291  total_files: 0
1292  files_added: 0
1293  files_deleted: 0
1294  file_list: []
1295diff_summary: ""
1296diff_file: "/tmp/test.diff"
1297"#;
1298        let analysis: CommitAnalysis = serde_yaml::from_str(yaml).unwrap();
1299        assert!(analysis.file_diffs.is_empty());
1300    }
1301
1302    #[test]
1303    fn file_diffs_omitted_when_empty_on_serialize() {
1304        let analysis = CommitAnalysis {
1305            detected_type: "feat".to_string(),
1306            detected_scope: "cli".to_string(),
1307            proposed_message: "feat(cli): test".to_string(),
1308            file_changes: make_file_changes(&[]),
1309            diff_summary: String::new(),
1310            diff_file: String::new(),
1311            file_diffs: Vec::new(),
1312        };
1313        let yaml = serde_yaml::to_string(&analysis).unwrap();
1314        assert!(!yaml.contains("file_diffs"));
1315    }
1316
1317    #[test]
1318    fn file_diffs_included_when_populated() {
1319        let analysis = CommitAnalysis {
1320            detected_type: "feat".to_string(),
1321            detected_scope: "cli".to_string(),
1322            proposed_message: "feat(cli): test".to_string(),
1323            file_changes: make_file_changes(&[]),
1324            diff_summary: String::new(),
1325            diff_file: String::new(),
1326            file_diffs: vec![FileDiffRef {
1327                path: "src/main.rs".to_string(),
1328                diff_file: "/tmp/diffs/abc/0000.diff".to_string(),
1329                byte_len: 42,
1330            }],
1331        };
1332        let yaml = serde_yaml::to_string(&analysis).unwrap();
1333        assert!(yaml.contains("file_diffs"));
1334        assert!(yaml.contains("src/main.rs"));
1335        assert!(yaml.contains("byte_len: 42"));
1336    }
1337
1338    // ── from_commit_info_partial ────────────────────────────────────
1339
1340    /// Helper: creates a `CommitInfo` with N file diffs backed by temp files.
1341    fn make_commit_with_file_diffs(
1342        dir: &tempfile::TempDir,
1343        files: &[(&str, &str)], // (path, diff_content)
1344    ) -> CommitInfo {
1345        let file_diffs: Vec<FileDiffRef> = files
1346            .iter()
1347            .enumerate()
1348            .map(|(i, (path, content))| {
1349                let diff_path = dir.path().join(format!("{i:04}.diff"));
1350                fs::write(&diff_path, content).unwrap();
1351                FileDiffRef {
1352                    path: (*path).to_string(),
1353                    diff_file: diff_path.to_string_lossy().to_string(),
1354                    byte_len: content.len(),
1355                }
1356            })
1357            .collect();
1358
1359        CommitInfo {
1360            hash: "abc123def456abc123def456abc123def456abc1".to_string(),
1361            author: "Test Author".to_string(),
1362            date: DateTime::parse_from_rfc3339("2025-01-01T00:00:00+00:00").unwrap(),
1363            original_message: "feat(cli): original message".to_string(),
1364            in_main_branches: vec!["main".to_string()],
1365            analysis: CommitAnalysis {
1366                detected_type: "feat".to_string(),
1367                detected_scope: "cli".to_string(),
1368                proposed_message: "feat(cli): proposed".to_string(),
1369                file_changes: make_file_changes(
1370                    &files.iter().map(|(p, _)| ("M", *p)).collect::<Vec<_>>(),
1371                ),
1372                diff_summary: " src/main.rs | 10 ++++\n src/lib.rs | 5 ++\n".to_string(),
1373                diff_file: dir.path().join("full.diff").to_string_lossy().to_string(),
1374                file_diffs,
1375            },
1376        }
1377    }
1378
1379    #[test]
1380    fn from_commit_info_partial_loads_subset() -> Result<()> {
1381        let dir = tempfile::tempdir()?;
1382        let commit = make_commit_with_file_diffs(
1383            &dir,
1384            &[
1385                ("src/main.rs", "diff --git a/src/main.rs\n+main\n"),
1386                ("src/lib.rs", "diff --git a/src/lib.rs\n+lib\n"),
1387                ("src/utils.rs", "diff --git a/src/utils.rs\n+utils\n"),
1388            ],
1389        );
1390
1391        let paths = vec!["src/main.rs".to_string(), "src/utils.rs".to_string()];
1392        let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
1393
1394        // Only requested files in diff_content
1395        assert!(partial.base.analysis.diff_content.contains("+main"));
1396        assert!(partial.base.analysis.diff_content.contains("+utils"));
1397        assert!(!partial.base.analysis.diff_content.contains("+lib"));
1398
1399        // file_diffs filtered to requested paths
1400        let ref_paths: Vec<&str> = partial
1401            .base
1402            .analysis
1403            .base
1404            .file_diffs
1405            .iter()
1406            .map(|r| r.path.as_str())
1407            .collect();
1408        assert_eq!(ref_paths, &["src/main.rs", "src/utils.rs"]);
1409
1410        Ok(())
1411    }
1412
1413    #[test]
1414    fn from_commit_info_partial_deduplicates_paths() -> Result<()> {
1415        let dir = tempfile::tempdir()?;
1416        let commit = make_commit_with_file_diffs(
1417            &dir,
1418            &[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
1419        );
1420
1421        // Duplicate path (simulates hunk-split scenario)
1422        let paths = vec!["src/main.rs".to_string(), "src/main.rs".to_string()];
1423        let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
1424
1425        // Content loaded only once (no duplicate)
1426        assert_eq!(
1427            partial.base.analysis.diff_content.matches("+main").count(),
1428            1
1429        );
1430
1431        Ok(())
1432    }
1433
1434    #[test]
1435    fn from_commit_info_partial_preserves_metadata() -> Result<()> {
1436        let dir = tempfile::tempdir()?;
1437        let commit = make_commit_with_file_diffs(
1438            &dir,
1439            &[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
1440        );
1441
1442        let original_hash = commit.hash.clone();
1443        let original_author = commit.author.clone();
1444        let original_date = commit.date;
1445        let original_message = commit.original_message.clone();
1446        let original_summary = commit.analysis.diff_summary.clone();
1447
1448        let paths = vec!["src/main.rs".to_string()];
1449        let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
1450
1451        assert_eq!(partial.base.hash, original_hash);
1452        assert_eq!(partial.base.author, original_author);
1453        assert_eq!(partial.base.date, original_date);
1454        assert_eq!(partial.base.original_message, original_message);
1455        assert_eq!(partial.base.analysis.base.diff_summary, original_summary);
1456
1457        Ok(())
1458    }
1459
1460    // ── from_commit_info_partial_with_overrides ─────────────────────
1461
1462    #[test]
1463    fn with_overrides_uses_override_content() -> Result<()> {
1464        let dir = tempfile::tempdir()?;
1465        let commit = make_commit_with_file_diffs(
1466            &dir,
1467            &[(
1468                "src/big.rs",
1469                "diff --git a/src/big.rs\n+full-file-content\n",
1470            )],
1471        );
1472
1473        let paths = vec!["src/big.rs".to_string(), "src/big.rs".to_string()];
1474        let overrides = vec![
1475            Some("diff --git a/src/big.rs\n@@ -1,3 +1,4 @@\n+hunk1\n".to_string()),
1476            Some("diff --git a/src/big.rs\n@@ -10,3 +10,4 @@\n+hunk2\n".to_string()),
1477        ];
1478        let partial =
1479            CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
1480
1481        // Should contain hunk content, NOT full file content.
1482        assert!(partial.base.analysis.diff_content.contains("+hunk1"));
1483        assert!(partial.base.analysis.diff_content.contains("+hunk2"));
1484        assert!(
1485            !partial
1486                .base
1487                .analysis
1488                .diff_content
1489                .contains("+full-file-content"),
1490            "should not contain full file content"
1491        );
1492
1493        Ok(())
1494    }
1495
1496    #[test]
1497    fn with_overrides_mixed_override_and_disk() -> Result<()> {
1498        let dir = tempfile::tempdir()?;
1499        let commit = make_commit_with_file_diffs(
1500            &dir,
1501            &[
1502                ("src/big.rs", "diff --git a/src/big.rs\n+big-full\n"),
1503                ("src/small.rs", "diff --git a/src/small.rs\n+small-disk\n"),
1504            ],
1505        );
1506
1507        let paths = vec!["src/big.rs".to_string(), "src/small.rs".to_string()];
1508        let overrides = vec![
1509            Some("diff --git a/src/big.rs\n@@ -1,3 +1,4 @@\n+big-hunk\n".to_string()),
1510            None, // load from disk
1511        ];
1512        let partial =
1513            CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
1514
1515        // big.rs: override content
1516        assert!(partial.base.analysis.diff_content.contains("+big-hunk"));
1517        assert!(!partial.base.analysis.diff_content.contains("+big-full"));
1518        // small.rs: loaded from disk
1519        assert!(partial.base.analysis.diff_content.contains("+small-disk"));
1520
1521        // Both files should appear in file_diffs metadata.
1522        let ref_paths: Vec<&str> = partial
1523            .base
1524            .analysis
1525            .base
1526            .file_diffs
1527            .iter()
1528            .map(|r| r.path.as_str())
1529            .collect();
1530        assert!(ref_paths.contains(&"src/big.rs"));
1531        assert!(ref_paths.contains(&"src/small.rs"));
1532
1533        Ok(())
1534    }
1535
1536    #[test]
1537    fn with_overrides_deduplicates_disk_reads() -> Result<()> {
1538        let dir = tempfile::tempdir()?;
1539        let commit = make_commit_with_file_diffs(
1540            &dir,
1541            &[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
1542        );
1543
1544        // Two None entries for same path (simulates duplicate whole-file items).
1545        let paths = vec!["src/main.rs".to_string(), "src/main.rs".to_string()];
1546        let overrides = vec![None, None];
1547        let partial =
1548            CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
1549
1550        // Content loaded only once despite two None entries.
1551        assert_eq!(
1552            partial.base.analysis.diff_content.matches("+main").count(),
1553            1
1554        );
1555
1556        Ok(())
1557    }
1558
1559    #[test]
1560    fn with_overrides_preserves_metadata() -> Result<()> {
1561        let dir = tempfile::tempdir()?;
1562        let commit = make_commit_with_file_diffs(
1563            &dir,
1564            &[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
1565        );
1566
1567        let original_hash = commit.hash.clone();
1568        let original_author = commit.author.clone();
1569        let original_message = commit.original_message.clone();
1570
1571        let paths = vec!["src/main.rs".to_string()];
1572        let overrides = vec![Some("+override-content\n".to_string())];
1573        let partial =
1574            CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
1575
1576        assert_eq!(partial.base.hash, original_hash);
1577        assert_eq!(partial.base.author, original_author);
1578        assert_eq!(partial.base.original_message, original_message);
1579        assert!(partial.pre_validated_checks.is_empty());
1580
1581        Ok(())
1582    }
1583}