Skip to main content

omni_dev/git/
commit.rs

1//! Git commit operations and analysis.
2
3use std::fs;
4use std::sync::LazyLock;
5
6use anyhow::{Context, Result};
7use chrono::{DateTime, FixedOffset};
8use git2::{Commit, Repository};
9use globset::Glob;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12
13use crate::data::context::ScopeDefinition;
14use crate::git::diff_split::split_by_file;
15
16/// Matches conventional commit scope patterns including breaking-change syntax.
17#[allow(clippy::unwrap_used)] // Compile-time constant regex pattern
18static SCOPE_RE: LazyLock<Regex> =
19    LazyLock::new(|| Regex::new(r"^[a-z]+!\(([^)]+)\):|^[a-z]+\(([^)]+)\):").unwrap());
20
21/// Commit information structure, generic over analysis type.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct CommitInfo<A = CommitAnalysis> {
24    /// Full SHA-1 hash of the commit.
25    pub hash: String,
26    /// Commit author name and email address.
27    pub author: String,
28    /// Commit date in ISO format with timezone.
29    pub date: DateTime<FixedOffset>,
30    /// The original commit message as written by the author.
31    pub original_message: String,
32    /// Array of remote main branches that contain this commit.
33    pub in_main_branches: Vec<String>,
34    /// Automated analysis of the commit including type detection and proposed message.
35    pub analysis: A,
36}
37
38/// Commit analysis information.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct CommitAnalysis {
41    /// Automatically detected conventional commit type (feat, fix, docs, test, chore, etc.).
42    pub detected_type: String,
43    /// Automatically detected scope based on file paths (cli, git, data, etc.).
44    pub detected_scope: String,
45    /// AI-generated conventional commit message based on file changes.
46    pub proposed_message: String,
47    /// Detailed statistics about file changes in this commit.
48    pub file_changes: FileChanges,
49    /// Git diff --stat output showing lines changed per file.
50    pub diff_summary: String,
51    /// Path to diff file showing line-by-line changes.
52    pub diff_file: String,
53    /// Per-file diff references for individual file changes.
54    #[serde(default, skip_serializing_if = "Vec::is_empty")]
55    pub file_diffs: Vec<FileDiffRef>,
56}
57
58/// Reference to a per-file diff stored on disk.
59///
60/// Tracks the repository-relative file path, the absolute path to the
61/// diff file on disk, and the byte length of that diff. Gives consumers
62/// per-file size information without loading diff content into memory.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct FileDiffRef {
65    /// Repository-relative path of the changed file.
66    pub path: String,
67    /// Absolute path to the per-file diff file on disk.
68    pub diff_file: String,
69    /// Byte length of the per-file diff content.
70    pub byte_len: usize,
71}
72
73/// Enhanced commit analysis for AI processing with full diff content.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct CommitAnalysisForAI {
76    /// Base commit analysis fields.
77    #[serde(flatten)]
78    pub base: CommitAnalysis,
79    /// Full diff content for AI analysis.
80    pub diff_content: String,
81}
82
83/// Commit information with enhanced analysis for AI processing.
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct CommitInfoForAI {
86    /// Base commit information with AI-enhanced analysis.
87    #[serde(flatten)]
88    pub base: CommitInfo<CommitAnalysisForAI>,
89    /// Deterministic checks already performed; the LLM should treat these as authoritative.
90    #[serde(default, skip_serializing_if = "Vec::is_empty")]
91    pub pre_validated_checks: Vec<String>,
92}
93
94/// File changes statistics.
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct FileChanges {
97    /// Total number of files modified in this commit.
98    pub total_files: usize,
99    /// Number of new files added in this commit.
100    pub files_added: usize,
101    /// Number of files deleted in this commit.
102    pub files_deleted: usize,
103    /// Array of files changed with their git status (M=modified, A=added, D=deleted).
104    pub file_list: Vec<FileChange>,
105}
106
107/// Individual file change.
108#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct FileChange {
110    /// Git status code (A=added, M=modified, D=deleted, R=renamed).
111    pub status: String,
112    /// Path to the file relative to repository root.
113    pub file: String,
114}
115
116impl CommitInfo {
117    /// Creates a `CommitInfo` from a `git2::Commit`.
118    pub fn from_git_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
119        let hash = commit.id().to_string();
120
121        let author = format!(
122            "{} <{}>",
123            commit.author().name().unwrap_or("Unknown"),
124            commit.author().email().unwrap_or("unknown@example.com")
125        );
126
127        let timestamp = commit.author().when();
128        let date = DateTime::from_timestamp(timestamp.seconds(), 0)
129            .context("Invalid commit timestamp")?
130            .with_timezone(
131                #[allow(clippy::unwrap_used)] // Offset 0 is always valid
132                &FixedOffset::east_opt(timestamp.offset_minutes() * 60)
133                    .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()),
134            );
135
136        let original_message = commit.message().unwrap_or("").to_string();
137
138        // TODO: Implement main branch detection
139        let in_main_branches = Vec::new();
140
141        // TODO: Implement commit analysis
142        let analysis = CommitAnalysis::analyze_commit(repo, commit)?;
143
144        Ok(Self {
145            hash,
146            author,
147            date,
148            original_message,
149            in_main_branches,
150            analysis,
151        })
152    }
153}
154
155impl CommitAnalysis {
156    /// Analyzes a commit and generates analysis information.
157    pub fn analyze_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
158        // Get file changes
159        let file_changes = Self::analyze_file_changes(repo, commit)?;
160
161        // Detect conventional commit type based on files and message
162        let detected_type = Self::detect_commit_type(commit, &file_changes);
163
164        // Detect scope based on file paths
165        let detected_scope = Self::detect_scope(&file_changes);
166
167        // Generate proposed conventional commit message
168        let proposed_message =
169            Self::generate_proposed_message(commit, &detected_type, &detected_scope, &file_changes);
170
171        // Get diff summary
172        let diff_summary = Self::get_diff_summary(repo, commit)?;
173
174        // Write diff to file and get path
175        let (diff_file, file_diffs) = Self::write_diff_to_file(repo, commit)?;
176
177        Ok(Self {
178            detected_type,
179            detected_scope,
180            proposed_message,
181            file_changes,
182            diff_summary,
183            diff_file,
184            file_diffs,
185        })
186    }
187
188    /// Analyzes file changes in the commit.
189    fn analyze_file_changes(repo: &Repository, commit: &Commit) -> Result<FileChanges> {
190        let mut file_list = Vec::new();
191        let mut files_added = 0;
192        let mut files_deleted = 0;
193
194        // Get the tree for this commit
195        let commit_tree = commit.tree().context("Failed to get commit tree")?;
196
197        // Get parent tree if available
198        let parent_tree = if commit.parent_count() > 0 {
199            Some(
200                commit
201                    .parent(0)
202                    .context("Failed to get parent commit")?
203                    .tree()
204                    .context("Failed to get parent tree")?,
205            )
206        } else {
207            None
208        };
209
210        // Create diff between parent and commit
211        let diff = if let Some(parent_tree) = parent_tree {
212            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
213                .context("Failed to create diff")?
214        } else {
215            // Initial commit - diff against empty tree
216            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
217                .context("Failed to create diff for initial commit")?
218        };
219
220        // Process each diff delta
221        diff.foreach(
222            &mut |delta, _progress| {
223                let status = match delta.status() {
224                    git2::Delta::Added => {
225                        files_added += 1;
226                        "A"
227                    }
228                    git2::Delta::Deleted => {
229                        files_deleted += 1;
230                        "D"
231                    }
232                    git2::Delta::Modified => "M",
233                    git2::Delta::Renamed => "R",
234                    git2::Delta::Copied => "C",
235                    git2::Delta::Typechange => "T",
236                    _ => "?",
237                };
238
239                if let Some(path) = delta.new_file().path() {
240                    if let Some(path_str) = path.to_str() {
241                        file_list.push(FileChange {
242                            status: status.to_string(),
243                            file: path_str.to_string(),
244                        });
245                    }
246                }
247
248                true
249            },
250            None,
251            None,
252            None,
253        )
254        .context("Failed to process diff")?;
255
256        let total_files = file_list.len();
257
258        Ok(FileChanges {
259            total_files,
260            files_added,
261            files_deleted,
262            file_list,
263        })
264    }
265
266    /// Detects conventional commit type based on files and existing message.
267    fn detect_commit_type(commit: &Commit, file_changes: &FileChanges) -> String {
268        let message = commit.message().unwrap_or("");
269
270        // Check if message already has conventional commit format
271        if let Some(existing_type) = Self::extract_conventional_type(message) {
272            return existing_type;
273        }
274
275        // Analyze file patterns
276        let files: Vec<&str> = file_changes
277            .file_list
278            .iter()
279            .map(|f| f.file.as_str())
280            .collect();
281
282        // Check for specific patterns
283        if files
284            .iter()
285            .any(|f| f.contains("test") || f.contains("spec"))
286        {
287            "test".to_string()
288        } else if files
289            .iter()
290            .any(|f| f.ends_with(".md") || f.contains("README") || f.contains("docs/"))
291        {
292            "docs".to_string()
293        } else if files
294            .iter()
295            .any(|f| f.contains("Cargo.toml") || f.contains("package.json") || f.contains("config"))
296        {
297            if file_changes.files_added > 0 {
298                "feat".to_string()
299            } else {
300                "chore".to_string()
301            }
302        } else if file_changes.files_added > 0
303            && files
304                .iter()
305                .any(|f| f.ends_with(".rs") || f.ends_with(".js") || f.ends_with(".py"))
306        {
307            "feat".to_string()
308        } else if message.to_lowercase().contains("fix") || message.to_lowercase().contains("bug") {
309            "fix".to_string()
310        } else if file_changes.files_deleted > file_changes.files_added {
311            "refactor".to_string()
312        } else {
313            "chore".to_string()
314        }
315    }
316
317    /// Extracts conventional commit type from an existing message.
318    fn extract_conventional_type(message: &str) -> Option<String> {
319        let first_line = message.lines().next().unwrap_or("");
320        if let Some(colon_pos) = first_line.find(':') {
321            let prefix = &first_line[..colon_pos];
322            if let Some(paren_pos) = prefix.find('(') {
323                let type_part = &prefix[..paren_pos];
324                if Self::is_valid_conventional_type(type_part) {
325                    return Some(type_part.to_string());
326                }
327            } else if Self::is_valid_conventional_type(prefix) {
328                return Some(prefix.to_string());
329            }
330        }
331        None
332    }
333
334    /// Checks if a string is a valid conventional commit type.
335    fn is_valid_conventional_type(s: &str) -> bool {
336        matches!(
337            s,
338            "feat"
339                | "fix"
340                | "docs"
341                | "style"
342                | "refactor"
343                | "test"
344                | "chore"
345                | "build"
346                | "ci"
347                | "perf"
348        )
349    }
350
351    /// Detects scope from file paths.
352    fn detect_scope(file_changes: &FileChanges) -> String {
353        let files: Vec<&str> = file_changes
354            .file_list
355            .iter()
356            .map(|f| f.file.as_str())
357            .collect();
358
359        // Analyze common path patterns
360        if files.iter().any(|f| f.starts_with("src/cli/")) {
361            "cli".to_string()
362        } else if files.iter().any(|f| f.starts_with("src/git/")) {
363            "git".to_string()
364        } else if files.iter().any(|f| f.starts_with("src/data/")) {
365            "data".to_string()
366        } else if files.iter().any(|f| f.starts_with("tests/")) {
367            "test".to_string()
368        } else if files.iter().any(|f| f.starts_with("docs/")) {
369            "docs".to_string()
370        } else if files
371            .iter()
372            .any(|f| f.contains("Cargo.toml") || f.contains("deny.toml"))
373        {
374            "deps".to_string()
375        } else {
376            String::new()
377        }
378    }
379
380    /// Re-detects scope using file_patterns from scope definitions.
381    ///
382    /// More specific patterns (more literal path components) win regardless of
383    /// definition order in scopes.yaml. Equally specific matches are joined
384    /// with ", ". If no scope definitions match, the existing detected_scope
385    /// is kept as a fallback.
386    pub fn refine_scope(&mut self, scope_defs: &[ScopeDefinition]) {
387        let files: Vec<&str> = self
388            .file_changes
389            .file_list
390            .iter()
391            .map(|f| f.file.as_str())
392            .collect();
393
394        if let Some(resolved) = resolve_scope(&files, scope_defs) {
395            self.detected_scope = resolved;
396        }
397    }
398
399    /// Generates a proposed conventional commit message.
400    fn generate_proposed_message(
401        commit: &Commit,
402        commit_type: &str,
403        scope: &str,
404        file_changes: &FileChanges,
405    ) -> String {
406        let current_message = commit.message().unwrap_or("").lines().next().unwrap_or("");
407
408        // If already properly formatted, return as-is
409        if Self::extract_conventional_type(current_message).is_some() {
410            return current_message.to_string();
411        }
412
413        // Generate description based on changes
414        let description =
415            if !current_message.is_empty() && !current_message.eq_ignore_ascii_case("stuff") {
416                current_message.to_string()
417            } else {
418                Self::generate_description(commit_type, file_changes)
419            };
420
421        // Format with scope if available
422        if scope.is_empty() {
423            format!("{commit_type}: {description}")
424        } else {
425            format!("{commit_type}({scope}): {description}")
426        }
427    }
428
429    /// Generates a description based on commit type and changes.
430    fn generate_description(commit_type: &str, file_changes: &FileChanges) -> String {
431        match commit_type {
432            "feat" => {
433                if file_changes.total_files == 1 {
434                    format!("add {}", file_changes.file_list[0].file)
435                } else {
436                    format!("add {} new features", file_changes.total_files)
437                }
438            }
439            "fix" => "resolve issues".to_string(),
440            "docs" => "update documentation".to_string(),
441            "test" => "add tests".to_string(),
442            "refactor" => "improve code structure".to_string(),
443            "chore" => "update project files".to_string(),
444            _ => "update project".to_string(),
445        }
446    }
447
448    /// Returns diff summary statistics.
449    fn get_diff_summary(repo: &Repository, commit: &Commit) -> Result<String> {
450        let commit_tree = commit.tree().context("Failed to get commit tree")?;
451
452        let parent_tree = if commit.parent_count() > 0 {
453            Some(
454                commit
455                    .parent(0)
456                    .context("Failed to get parent commit")?
457                    .tree()
458                    .context("Failed to get parent tree")?,
459            )
460        } else {
461            None
462        };
463
464        let diff = if let Some(parent_tree) = parent_tree {
465            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
466                .context("Failed to create diff")?
467        } else {
468            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
469                .context("Failed to create diff for initial commit")?
470        };
471
472        let stats = diff.stats().context("Failed to get diff stats")?;
473
474        let mut summary = String::new();
475        for i in 0..stats.files_changed() {
476            if let Some(path) = diff
477                .get_delta(i)
478                .and_then(|d| d.new_file().path())
479                .and_then(|p| p.to_str())
480            {
481                let insertions = stats.insertions();
482                let deletions = stats.deletions();
483                summary.push_str(&format!(
484                    " {} | {} +{} -{}\n",
485                    path,
486                    insertions + deletions,
487                    insertions,
488                    deletions
489                ));
490            }
491        }
492
493        Ok(summary)
494    }
495
496    /// Writes full diff content to a file and returns the path and per-file refs.
497    fn write_diff_to_file(
498        repo: &Repository,
499        commit: &Commit,
500    ) -> Result<(String, Vec<FileDiffRef>)> {
501        // Get AI scratch directory
502        let ai_scratch_path = crate::utils::ai_scratch::get_ai_scratch_dir()
503            .context("Failed to determine AI scratch directory")?;
504
505        // Create diffs subdirectory
506        let diffs_dir = ai_scratch_path.join("diffs");
507        fs::create_dir_all(&diffs_dir).context("Failed to create diffs directory")?;
508
509        // Create filename with commit hash
510        let commit_hash = commit.id().to_string();
511        let diff_filename = format!("{commit_hash}.diff");
512        let diff_path = diffs_dir.join(&diff_filename);
513
514        let commit_tree = commit.tree().context("Failed to get commit tree")?;
515
516        let parent_tree = if commit.parent_count() > 0 {
517            Some(
518                commit
519                    .parent(0)
520                    .context("Failed to get parent commit")?
521                    .tree()
522                    .context("Failed to get parent tree")?,
523            )
524        } else {
525            None
526        };
527
528        let diff = if let Some(parent_tree) = parent_tree {
529            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
530                .context("Failed to create diff")?
531        } else {
532            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
533                .context("Failed to create diff for initial commit")?
534        };
535
536        let mut diff_content = String::new();
537
538        diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
539            let content = std::str::from_utf8(line.content()).unwrap_or("<binary>");
540            let prefix = match line.origin() {
541                '+' => "+",
542                '-' => "-",
543                ' ' => " ",
544                '@' => "@",
545                _ => "", // Header, file header, and other origins
546            };
547            diff_content.push_str(&format!("{prefix}{content}"));
548            true
549        })
550        .context("Failed to format diff")?;
551
552        // Ensure the diff content ends with a newline to encourage literal block style
553        if !diff_content.ends_with('\n') {
554            diff_content.push('\n');
555        }
556
557        // Write flat diff content to file
558        fs::write(&diff_path, &diff_content).context("Failed to write diff file")?;
559
560        // Split into per-file diffs and write each to disk
561        let per_file_diffs = split_by_file(&diff_content);
562        let mut file_diffs = Vec::with_capacity(per_file_diffs.len());
563
564        if !per_file_diffs.is_empty() {
565            let per_file_dir = diffs_dir.join(&commit_hash);
566            fs::create_dir_all(&per_file_dir)
567                .context("Failed to create per-file diffs directory")?;
568
569            for (index, file_diff) in per_file_diffs.iter().enumerate() {
570                let per_file_name = format!("{index:04}.diff");
571                let per_file_path = per_file_dir.join(&per_file_name);
572                fs::write(&per_file_path, &file_diff.content).with_context(|| {
573                    format!("Failed to write per-file diff: {}", per_file_path.display())
574                })?;
575
576                file_diffs.push(FileDiffRef {
577                    path: file_diff.path.clone(),
578                    diff_file: per_file_path.to_string_lossy().to_string(),
579                    byte_len: file_diff.byte_len,
580                });
581            }
582        }
583
584        Ok((diff_path.to_string_lossy().to_string(), file_diffs))
585    }
586}
587
588impl CommitInfoForAI {
589    /// Converts from a basic `CommitInfo` by loading diff content.
590    pub fn from_commit_info(commit_info: CommitInfo) -> Result<Self> {
591        let analysis = CommitAnalysisForAI::from_commit_analysis(commit_info.analysis)?;
592
593        Ok(Self {
594            base: CommitInfo {
595                hash: commit_info.hash,
596                author: commit_info.author,
597                date: commit_info.date,
598                original_message: commit_info.original_message,
599                in_main_branches: commit_info.in_main_branches,
600                analysis,
601            },
602            pre_validated_checks: Vec::new(),
603        })
604    }
605
606    /// Creates a partial view of a commit containing only the specified file diffs.
607    ///
608    /// Convenience wrapper around [`Self::from_commit_info_partial_with_overrides`]
609    /// with all-`None` overrides (every file loaded from disk).
610    #[cfg(test)]
611    pub(crate) fn from_commit_info_partial(
612        commit_info: CommitInfo,
613        file_paths: &[String],
614    ) -> Result<Self> {
615        let overrides: Vec<Option<String>> = vec![None; file_paths.len()];
616        Self::from_commit_info_partial_with_overrides(commit_info, file_paths, &overrides)
617    }
618
619    /// Creates a partial view using pre-sliced diff content where available.
620    ///
621    /// `file_paths` and `diff_overrides` must be parallel slices. When
622    /// `diff_overrides[i]` is `Some(content)`, that content is used directly
623    /// instead of reading the full per-file diff from disk. This enables
624    /// per-hunk partial views where each chunk receives only its assigned
625    /// hunk slices rather than the entire file.
626    ///
627    /// Entries with `None` overrides fall back to loading from disk via
628    /// [`FileDiffRef::diff_file`], deduplicated by path.
629    pub(crate) fn from_commit_info_partial_with_overrides(
630        commit_info: CommitInfo,
631        file_paths: &[String],
632        diff_overrides: &[Option<String>],
633    ) -> Result<Self> {
634        let mut diff_parts = Vec::new();
635        let mut included_refs = Vec::new();
636        let mut loaded_disk_paths: std::collections::HashSet<String> =
637            std::collections::HashSet::new();
638
639        for (path, override_content) in file_paths.iter().zip(diff_overrides.iter()) {
640            if let Some(content) = override_content {
641                // Pre-sliced hunk content — use directly.
642                diff_parts.push(content.clone());
643                // Include the FileDiffRef for metadata (deduplicated).
644                if let Some(file_ref) = commit_info
645                    .analysis
646                    .file_diffs
647                    .iter()
648                    .find(|r| r.path == *path)
649                {
650                    if !included_refs.iter().any(|r: &FileDiffRef| r.path == *path) {
651                        included_refs.push(file_ref.clone());
652                    }
653                }
654            } else {
655                // Whole-file item — load from disk (deduplicated).
656                if loaded_disk_paths.insert(path.clone()) {
657                    if let Some(file_ref) = commit_info
658                        .analysis
659                        .file_diffs
660                        .iter()
661                        .find(|r| r.path == *path)
662                    {
663                        let content =
664                            fs::read_to_string(&file_ref.diff_file).with_context(|| {
665                                format!("Failed to read per-file diff: {}", file_ref.diff_file)
666                            })?;
667                        diff_parts.push(content);
668                        included_refs.push(file_ref.clone());
669                    }
670                }
671            }
672        }
673
674        let diff_content = diff_parts.join("\n");
675
676        let partial_analysis = CommitAnalysisForAI {
677            base: CommitAnalysis {
678                file_diffs: included_refs,
679                ..commit_info.analysis
680            },
681            diff_content,
682        };
683
684        Ok(Self {
685            base: CommitInfo {
686                hash: commit_info.hash,
687                author: commit_info.author,
688                date: commit_info.date,
689                original_message: commit_info.original_message,
690                in_main_branches: commit_info.in_main_branches,
691                analysis: partial_analysis,
692            },
693            pre_validated_checks: Vec::new(),
694        })
695    }
696
697    /// Runs deterministic pre-validation checks on the commit message.
698    /// Passing checks are recorded in pre_validated_checks so the LLM
699    /// can skip re-checking them. Failing checks are not recorded.
700    pub fn run_pre_validation_checks(&mut self, valid_scopes: &[ScopeDefinition]) {
701        if let Some(caps) = SCOPE_RE.captures(&self.base.original_message) {
702            let scope = caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str());
703            if let Some(scope) = scope {
704                if scope.contains(',') && !scope.contains(", ") {
705                    self.pre_validated_checks.push(format!(
706                        "Scope format verified: multi-scope '{scope}' correctly uses commas without spaces"
707                    ));
708                }
709
710                // Deterministic scope validity check
711                if !valid_scopes.is_empty() {
712                    let scope_parts: Vec<&str> = scope.split(',').map(str::trim).collect();
713                    let all_valid = scope_parts
714                        .iter()
715                        .all(|part| valid_scopes.iter().any(|s| s.name == *part));
716                    if all_valid {
717                        self.pre_validated_checks.push(format!(
718                            "Scope validity verified: '{scope}' is in the valid scopes list"
719                        ));
720                    }
721                }
722            }
723        }
724    }
725}
726
727/// Resolves the best scope for a set of files using scope definition file patterns.
728///
729/// More specific patterns (more literal path components) win regardless of
730/// definition order in `scopes.yaml`. Equally specific matches are joined
731/// with ", ". Returns `None` when `scope_defs` or `files` is empty, or no
732/// scope definition matches.
733pub fn resolve_scope(files: &[&str], scope_defs: &[ScopeDefinition]) -> Option<String> {
734    if scope_defs.is_empty() || files.is_empty() {
735        return None;
736    }
737
738    let mut matches: Vec<(&str, usize)> = Vec::new();
739    for scope_def in scope_defs {
740        if let Some(specificity) = scope_matches_files(files, &scope_def.file_patterns) {
741            matches.push((&scope_def.name, specificity));
742        }
743    }
744
745    if matches.is_empty() {
746        return None;
747    }
748
749    // SAFETY: matches is non-empty (guarded by early return above)
750    #[allow(clippy::expect_used)] // Guarded by is_empty() check above
751    let max_specificity = matches.iter().map(|(_, s)| *s).max().expect("non-empty");
752    let best: Vec<&str> = matches
753        .into_iter()
754        .filter(|(_, s)| *s == max_specificity)
755        .map(|(name, _)| name)
756        .collect();
757
758    Some(best.join(", "))
759}
760
761/// Replaces the scope in a conventional commit message with the deterministically
762/// resolved scope based on the given files and scope definitions.
763///
764/// If the message does not contain a conventional commit scope, or if no scope
765/// can be resolved from the files, the message is returned unchanged.
766pub fn refine_message_scope(
767    message: &str,
768    files: &[&str],
769    scope_defs: &[ScopeDefinition],
770) -> String {
771    let Some(resolved) = resolve_scope(files, scope_defs) else {
772        return message.to_string();
773    };
774
775    // Split into first line and rest
776    let (first_line, rest) = message
777        .split_once('\n')
778        .map_or((message, ""), |(f, r)| (f, r));
779
780    let Some(caps) = SCOPE_RE.captures(first_line) else {
781        return message.to_string();
782    };
783
784    // Determine which capture group matched (group 1 = breaking, group 2 = normal)
785    let existing_scope = caps
786        .get(1)
787        .or_else(|| caps.get(2))
788        .map_or("", |m| m.as_str());
789
790    if existing_scope == resolved {
791        return message.to_string();
792    }
793
794    let new_first_line =
795        first_line.replacen(&format!("({existing_scope})"), &format!("({resolved})"), 1);
796
797    if rest.is_empty() {
798        new_first_line
799    } else {
800        format!("{new_first_line}\n{rest}")
801    }
802}
803
804/// Checks if a scope's file patterns match any of the given files.
805///
806/// Returns `Some(max_specificity)` if at least one file matches the scope
807/// (after applying negation patterns), or `None` if no file matches.
808fn scope_matches_files(files: &[&str], patterns: &[String]) -> Option<usize> {
809    let mut positive = Vec::new();
810    let mut negative = Vec::new();
811    for pat in patterns {
812        if let Some(stripped) = pat.strip_prefix('!') {
813            negative.push(stripped);
814        } else {
815            positive.push(pat.as_str());
816        }
817    }
818
819    // Build negative matchers
820    let neg_matchers: Vec<_> = negative
821        .iter()
822        .filter_map(|p| Glob::new(p).ok().map(|g| g.compile_matcher()))
823        .collect();
824
825    let mut max_specificity: Option<usize> = None;
826    for pat in &positive {
827        let Ok(glob) = Glob::new(pat) else {
828            continue;
829        };
830        let matcher = glob.compile_matcher();
831        for file in files {
832            if matcher.is_match(file) && !neg_matchers.iter().any(|neg| neg.is_match(file)) {
833                let specificity = count_specificity(pat);
834                max_specificity =
835                    Some(max_specificity.map_or(specificity, |cur| cur.max(specificity)));
836            }
837        }
838    }
839    max_specificity
840}
841
842/// Counts the number of literal (non-wildcard) path segments in a glob pattern.
843///
844/// - `docs/adrs/**` → 2 (`docs`, `adrs`)
845/// - `docs/**` → 1 (`docs`)
846/// - `*.md` → 0
847/// - `src/main/scala/**` → 3
848fn count_specificity(pattern: &str) -> usize {
849    pattern
850        .split('/')
851        .filter(|segment| !segment.contains('*') && !segment.contains('?'))
852        .count()
853}
854
855impl CommitAnalysisForAI {
856    /// Converts from a basic `CommitAnalysis` by loading diff content from file.
857    pub fn from_commit_analysis(analysis: CommitAnalysis) -> Result<Self> {
858        // Read the actual diff content from the file
859        let diff_content = fs::read_to_string(&analysis.diff_file)
860            .with_context(|| format!("Failed to read diff file: {}", analysis.diff_file))?;
861
862        Ok(Self {
863            base: analysis,
864            diff_content,
865        })
866    }
867}
868
869#[cfg(test)]
870#[allow(clippy::unwrap_used, clippy::expect_used)]
871mod tests {
872    use super::*;
873    use crate::data::context::ScopeDefinition;
874
875    // ── extract_conventional_type ────────────────────────────────────
876
877    #[test]
878    fn conventional_type_feat_with_scope() {
879        assert_eq!(
880            CommitAnalysis::extract_conventional_type("feat(cli): add flag"),
881            Some("feat".to_string())
882        );
883    }
884
885    #[test]
886    fn conventional_type_without_scope() {
887        assert_eq!(
888            CommitAnalysis::extract_conventional_type("fix: resolve bug"),
889            Some("fix".to_string())
890        );
891    }
892
893    #[test]
894    fn conventional_type_invalid_message() {
895        assert_eq!(
896            CommitAnalysis::extract_conventional_type("random message without colon"),
897            None
898        );
899    }
900
901    #[test]
902    fn conventional_type_unknown_type() {
903        assert_eq!(
904            CommitAnalysis::extract_conventional_type("yolo(scope): stuff"),
905            None
906        );
907    }
908
909    #[test]
910    fn conventional_type_all_valid_types() {
911        let types = [
912            "feat", "fix", "docs", "style", "refactor", "test", "chore", "build", "ci", "perf",
913        ];
914        for t in types {
915            let msg = format!("{t}: description");
916            assert_eq!(
917                CommitAnalysis::extract_conventional_type(&msg),
918                Some(t.to_string()),
919                "expected Some for type '{t}'"
920            );
921        }
922    }
923
924    // ── is_valid_conventional_type ───────────────────────────────────
925
926    #[test]
927    fn valid_conventional_types() {
928        for t in [
929            "feat", "fix", "docs", "style", "refactor", "test", "chore", "build", "ci", "perf",
930        ] {
931            assert!(
932                CommitAnalysis::is_valid_conventional_type(t),
933                "'{t}' should be valid"
934            );
935        }
936    }
937
938    #[test]
939    fn invalid_conventional_types() {
940        for t in ["yolo", "Feat", "", "FEAT", "feature", "bugfix"] {
941            assert!(
942                !CommitAnalysis::is_valid_conventional_type(t),
943                "'{t}' should be invalid"
944            );
945        }
946    }
947
948    // ── detect_scope ─────────────────────────────────────────────────
949
950    fn make_file_changes(files: &[(&str, &str)]) -> FileChanges {
951        FileChanges {
952            total_files: files.len(),
953            files_added: files.iter().filter(|(s, _)| *s == "A").count(),
954            files_deleted: files.iter().filter(|(s, _)| *s == "D").count(),
955            file_list: files
956                .iter()
957                .map(|(status, file)| FileChange {
958                    status: (*status).to_string(),
959                    file: (*file).to_string(),
960                })
961                .collect(),
962        }
963    }
964
965    #[test]
966    fn scope_from_cli_files() {
967        let changes = make_file_changes(&[("M", "src/cli/commands.rs")]);
968        assert_eq!(CommitAnalysis::detect_scope(&changes), "cli");
969    }
970
971    #[test]
972    fn scope_from_git_files() {
973        let changes = make_file_changes(&[("M", "src/git/remote.rs")]);
974        assert_eq!(CommitAnalysis::detect_scope(&changes), "git");
975    }
976
977    #[test]
978    fn scope_from_docs_files() {
979        let changes = make_file_changes(&[("M", "docs/README.md")]);
980        assert_eq!(CommitAnalysis::detect_scope(&changes), "docs");
981    }
982
983    #[test]
984    fn scope_from_data_files() {
985        let changes = make_file_changes(&[("M", "src/data/yaml.rs")]);
986        assert_eq!(CommitAnalysis::detect_scope(&changes), "data");
987    }
988
989    #[test]
990    fn scope_from_test_files() {
991        let changes = make_file_changes(&[("A", "tests/new_test.rs")]);
992        assert_eq!(CommitAnalysis::detect_scope(&changes), "test");
993    }
994
995    #[test]
996    fn scope_from_deps_files() {
997        let changes = make_file_changes(&[("M", "Cargo.toml")]);
998        assert_eq!(CommitAnalysis::detect_scope(&changes), "deps");
999    }
1000
1001    #[test]
1002    fn scope_unknown_files() {
1003        let changes = make_file_changes(&[("M", "random/path/file.txt")]);
1004        assert_eq!(CommitAnalysis::detect_scope(&changes), "");
1005    }
1006
1007    // ── count_specificity ────────────────────────────────────────────
1008
1009    #[test]
1010    fn count_specificity_deep_path() {
1011        assert_eq!(super::count_specificity("src/main/scala/**"), 3);
1012    }
1013
1014    #[test]
1015    fn count_specificity_shallow() {
1016        assert_eq!(super::count_specificity("docs/**"), 1);
1017    }
1018
1019    #[test]
1020    fn count_specificity_wildcard_only() {
1021        assert_eq!(super::count_specificity("*.md"), 0);
1022    }
1023
1024    #[test]
1025    fn count_specificity_no_wildcards() {
1026        assert_eq!(super::count_specificity("src/lib.rs"), 2);
1027    }
1028
1029    // ── scope_matches_files ──────────────────────────────────────────
1030
1031    #[test]
1032    fn scope_matches_positive_patterns() {
1033        let patterns = vec!["src/cli/**".to_string()];
1034        let files = &["src/cli/commands.rs"];
1035        assert!(super::scope_matches_files(files, &patterns).is_some());
1036    }
1037
1038    #[test]
1039    fn scope_matches_no_match() {
1040        let patterns = vec!["src/cli/**".to_string()];
1041        let files = &["src/git/remote.rs"];
1042        assert!(super::scope_matches_files(files, &patterns).is_none());
1043    }
1044
1045    #[test]
1046    fn scope_matches_with_negation() {
1047        let patterns = vec!["src/**".to_string(), "!src/test/**".to_string()];
1048        // File in src/ but not in src/test/ should match
1049        let files = &["src/lib.rs"];
1050        assert!(super::scope_matches_files(files, &patterns).is_some());
1051
1052        // File in src/test/ should be excluded
1053        let test_files = &["src/test/helper.rs"];
1054        assert!(super::scope_matches_files(test_files, &patterns).is_none());
1055    }
1056
1057    // ── refine_scope ─────────────────────────────────────────────────
1058
1059    fn make_scope_def(name: &str, patterns: &[&str]) -> ScopeDefinition {
1060        ScopeDefinition {
1061            name: name.to_string(),
1062            description: String::new(),
1063            examples: vec![],
1064            file_patterns: patterns.iter().map(|p| (*p).to_string()).collect(),
1065        }
1066    }
1067
1068    #[test]
1069    fn refine_scope_empty_defs() {
1070        let mut analysis = CommitAnalysis {
1071            detected_type: "feat".to_string(),
1072            detected_scope: "original".to_string(),
1073            proposed_message: String::new(),
1074            file_changes: make_file_changes(&[("M", "src/cli/commands.rs")]),
1075            diff_summary: String::new(),
1076            diff_file: String::new(),
1077            file_diffs: Vec::new(),
1078        };
1079        analysis.refine_scope(&[]);
1080        assert_eq!(analysis.detected_scope, "original");
1081    }
1082
1083    #[test]
1084    fn refine_scope_most_specific_wins() {
1085        let scope_defs = vec![
1086            make_scope_def("lib", &["src/**"]),
1087            make_scope_def("cli", &["src/cli/**"]),
1088        ];
1089        let mut analysis = CommitAnalysis {
1090            detected_type: "feat".to_string(),
1091            detected_scope: String::new(),
1092            proposed_message: String::new(),
1093            file_changes: make_file_changes(&[("M", "src/cli/commands.rs")]),
1094            diff_summary: String::new(),
1095            diff_file: String::new(),
1096            file_diffs: Vec::new(),
1097        };
1098        analysis.refine_scope(&scope_defs);
1099        assert_eq!(analysis.detected_scope, "cli");
1100    }
1101
1102    #[test]
1103    fn refine_scope_no_matching_files() {
1104        let scope_defs = vec![make_scope_def("cli", &["src/cli/**"])];
1105        let mut analysis = CommitAnalysis {
1106            detected_type: "feat".to_string(),
1107            detected_scope: "original".to_string(),
1108            proposed_message: String::new(),
1109            file_changes: make_file_changes(&[("M", "README.md")]),
1110            diff_summary: String::new(),
1111            diff_file: String::new(),
1112            file_diffs: Vec::new(),
1113        };
1114        analysis.refine_scope(&scope_defs);
1115        // No match → keeps original
1116        assert_eq!(analysis.detected_scope, "original");
1117    }
1118
1119    #[test]
1120    fn refine_scope_equal_specificity_joins() {
1121        let scope_defs = vec![
1122            make_scope_def("cli", &["src/cli/**"]),
1123            make_scope_def("git", &["src/git/**"]),
1124        ];
1125        let mut analysis = CommitAnalysis {
1126            detected_type: "feat".to_string(),
1127            detected_scope: String::new(),
1128            proposed_message: String::new(),
1129            file_changes: make_file_changes(&[
1130                ("M", "src/cli/commands.rs"),
1131                ("M", "src/git/remote.rs"),
1132            ]),
1133            diff_summary: String::new(),
1134            diff_file: String::new(),
1135            file_diffs: Vec::new(),
1136        };
1137        analysis.refine_scope(&scope_defs);
1138        // Both have specificity 2 and both match → joined
1139        assert!(
1140            analysis.detected_scope == "cli, git" || analysis.detected_scope == "git, cli",
1141            "expected joined scopes, got: {}",
1142            analysis.detected_scope
1143        );
1144    }
1145
1146    // ── refine_message_scope ───────────────────────────────────────────
1147
1148    #[test]
1149    fn refine_message_scope_replaces_less_specific() {
1150        let scope_defs = vec![
1151            make_scope_def("ci", &[".github/**"]),
1152            make_scope_def("workflows", &[".github/workflows/**"]),
1153        ];
1154        let files = &[".github/workflows/ci.yml"];
1155        let result = super::refine_message_scope(
1156            "chore(ci): bump EmbarkStudios/cargo-deny-action from 2.0.15 to 2.0.17",
1157            files,
1158            &scope_defs,
1159        );
1160        assert_eq!(
1161            result,
1162            "chore(workflows): bump EmbarkStudios/cargo-deny-action from 2.0.15 to 2.0.17"
1163        );
1164    }
1165
1166    #[test]
1167    fn refine_message_scope_keeps_already_correct() {
1168        let scope_defs = vec![
1169            make_scope_def("ci", &[".github/**"]),
1170            make_scope_def("workflows", &[".github/workflows/**"]),
1171        ];
1172        let files = &[".github/workflows/ci.yml"];
1173        let msg = "chore(workflows): bump something";
1174        assert_eq!(super::refine_message_scope(msg, files, &scope_defs), msg);
1175    }
1176
1177    #[test]
1178    fn refine_message_scope_no_scope_in_message() {
1179        let scope_defs = vec![make_scope_def("cli", &["src/cli/**"])];
1180        let files = &["src/cli/commands.rs"];
1181        let msg = "chore: do something";
1182        assert_eq!(super::refine_message_scope(msg, files, &scope_defs), msg);
1183    }
1184
1185    #[test]
1186    fn refine_message_scope_preserves_body() {
1187        let scope_defs = vec![
1188            make_scope_def("ci", &[".github/**"]),
1189            make_scope_def("workflows", &[".github/workflows/**"]),
1190        ];
1191        let files = &[".github/workflows/ci.yml"];
1192        let msg = "chore(ci): bump dep\n\nSome body text\nMore details";
1193        let result = super::refine_message_scope(msg, files, &scope_defs);
1194        assert_eq!(
1195            result,
1196            "chore(workflows): bump dep\n\nSome body text\nMore details"
1197        );
1198    }
1199
1200    #[test]
1201    fn refine_message_scope_breaking_change() {
1202        let scope_defs = vec![
1203            make_scope_def("ci", &[".github/**"]),
1204            make_scope_def("workflows", &[".github/workflows/**"]),
1205        ];
1206        let files = &[".github/workflows/ci.yml"];
1207        let result = super::refine_message_scope("feat!(ci): breaking change", files, &scope_defs);
1208        assert_eq!(result, "feat!(workflows): breaking change");
1209    }
1210
1211    #[test]
1212    fn refine_message_scope_no_matching_scope_defs() {
1213        let scope_defs = vec![make_scope_def("cli", &["src/cli/**"])];
1214        let files = &["README.md"];
1215        let msg = "docs(docs): update readme";
1216        assert_eq!(super::refine_message_scope(msg, files, &scope_defs), msg);
1217    }
1218
1219    // ── run_pre_validation_checks ────────────────────────────────────
1220
1221    fn make_commit_info_for_ai(message: &str) -> CommitInfoForAI {
1222        CommitInfoForAI {
1223            base: CommitInfo {
1224                hash: "a".repeat(40),
1225                author: "Test <test@example.com>".to_string(),
1226                date: chrono::DateTime::parse_from_rfc3339("2024-01-01T00:00:00+00:00").unwrap(),
1227                original_message: message.to_string(),
1228                in_main_branches: vec![],
1229                analysis: CommitAnalysisForAI {
1230                    base: CommitAnalysis {
1231                        detected_type: "feat".to_string(),
1232                        detected_scope: String::new(),
1233                        proposed_message: String::new(),
1234                        file_changes: make_file_changes(&[]),
1235                        diff_summary: String::new(),
1236                        diff_file: String::new(),
1237                        file_diffs: Vec::new(),
1238                    },
1239                    diff_content: String::new(),
1240                },
1241            },
1242            pre_validated_checks: vec![],
1243        }
1244    }
1245
1246    #[test]
1247    fn pre_validation_valid_single_scope() {
1248        let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
1249        let mut info = make_commit_info_for_ai("feat(cli): add command");
1250        info.run_pre_validation_checks(&scopes);
1251        assert!(
1252            info.pre_validated_checks
1253                .iter()
1254                .any(|c| c.contains("Scope validity verified")),
1255            "expected scope validity check, got: {:?}",
1256            info.pre_validated_checks
1257        );
1258    }
1259
1260    #[test]
1261    fn pre_validation_multi_scope() {
1262        let scopes = vec![
1263            make_scope_def("cli", &["src/cli/**"]),
1264            make_scope_def("git", &["src/git/**"]),
1265        ];
1266        let mut info = make_commit_info_for_ai("feat(cli,git): cross-cutting change");
1267        info.run_pre_validation_checks(&scopes);
1268        assert!(info
1269            .pre_validated_checks
1270            .iter()
1271            .any(|c| c.contains("Scope validity verified")),);
1272        assert!(info
1273            .pre_validated_checks
1274            .iter()
1275            .any(|c| c.contains("multi-scope")),);
1276    }
1277
1278    #[test]
1279    fn pre_validation_multi_scope_with_spaces() {
1280        let scopes = vec![
1281            make_scope_def("cli", &["src/cli/**"]),
1282            make_scope_def("lib", &["src/lib/**"]),
1283        ];
1284        let mut info = make_commit_info_for_ai("feat(cli, lib): add something");
1285        info.run_pre_validation_checks(&scopes);
1286        assert!(
1287            info.pre_validated_checks
1288                .iter()
1289                .any(|c| c.contains("Scope validity verified")),
1290            "expected scope validity check for spaced multi-scope, got: {:?}",
1291            info.pre_validated_checks
1292        );
1293    }
1294
1295    #[test]
1296    fn pre_validation_invalid_scope_not_added() {
1297        let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
1298        let mut info = make_commit_info_for_ai("feat(unknown): something");
1299        info.run_pre_validation_checks(&scopes);
1300        assert!(
1301            !info
1302                .pre_validated_checks
1303                .iter()
1304                .any(|c| c.contains("Scope validity verified")),
1305            "should not validate unknown scope"
1306        );
1307    }
1308
1309    #[test]
1310    fn pre_validation_no_scope_message() {
1311        let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
1312        let mut info = make_commit_info_for_ai("feat: no scope here");
1313        info.run_pre_validation_checks(&scopes);
1314        assert!(info.pre_validated_checks.is_empty());
1315    }
1316
1317    // ── property tests ────────────────────────────────────────────
1318
1319    mod prop {
1320        use super::*;
1321        use proptest::prelude::*;
1322
1323        fn arb_conventional_type() -> impl Strategy<Value = &'static str> {
1324            prop_oneof![
1325                Just("feat"),
1326                Just("fix"),
1327                Just("docs"),
1328                Just("style"),
1329                Just("refactor"),
1330                Just("test"),
1331                Just("chore"),
1332                Just("build"),
1333                Just("ci"),
1334                Just("perf"),
1335            ]
1336        }
1337
1338        proptest! {
1339            #[test]
1340            fn valid_conventional_format_extracts_type(
1341                ctype in arb_conventional_type(),
1342                scope in "[a-z]{1,10}",
1343                desc in "[a-zA-Z ]{1,50}",
1344            ) {
1345                let message = format!("{ctype}({scope}): {desc}");
1346                let result = CommitAnalysis::extract_conventional_type(&message);
1347                prop_assert_eq!(result, Some(ctype.to_string()));
1348            }
1349
1350            #[test]
1351            fn no_colon_returns_none(s in "[^:]{0,100}") {
1352                let result = CommitAnalysis::extract_conventional_type(&s);
1353                prop_assert!(result.is_none());
1354            }
1355
1356            #[test]
1357            fn count_specificity_nonnegative(pattern in ".*") {
1358                // usize is always >= 0; this test catches panics on arbitrary input
1359                let _ = super::count_specificity(&pattern);
1360            }
1361
1362            #[test]
1363            fn count_specificity_bounded_by_segments(
1364                segments in proptest::collection::vec("[a-z*?]{1,10}", 1..6),
1365            ) {
1366                let pattern = segments.join("/");
1367                let result = super::count_specificity(&pattern);
1368                prop_assert!(result <= segments.len());
1369            }
1370        }
1371    }
1372
1373    // ── conversion tests ────────────────────────────────────────────
1374
1375    #[test]
1376    fn from_commit_analysis_loads_diff_content() {
1377        let dir = tempfile::tempdir().unwrap();
1378        let diff_path = dir.path().join("test.diff");
1379        std::fs::write(&diff_path, "+added line\n-removed line\n").unwrap();
1380
1381        let analysis = CommitAnalysis {
1382            detected_type: "feat".to_string(),
1383            detected_scope: "cli".to_string(),
1384            proposed_message: "feat(cli): test".to_string(),
1385            file_changes: make_file_changes(&[]),
1386            diff_summary: "file.rs | 2 +-".to_string(),
1387            diff_file: diff_path.to_string_lossy().to_string(),
1388            file_diffs: Vec::new(),
1389        };
1390
1391        let ai = CommitAnalysisForAI::from_commit_analysis(analysis.clone()).unwrap();
1392        assert_eq!(ai.diff_content, "+added line\n-removed line\n");
1393        assert_eq!(ai.base.detected_type, analysis.detected_type);
1394        assert_eq!(ai.base.diff_file, analysis.diff_file);
1395    }
1396
1397    #[test]
1398    fn from_commit_info_wraps_and_loads_diff() {
1399        let dir = tempfile::tempdir().unwrap();
1400        let diff_path = dir.path().join("test.diff");
1401        std::fs::write(&diff_path, "diff content").unwrap();
1402
1403        let info = CommitInfo {
1404            hash: "a".repeat(40),
1405            author: "Test <test@example.com>".to_string(),
1406            date: chrono::DateTime::parse_from_rfc3339("2024-01-01T00:00:00+00:00").unwrap(),
1407            original_message: "feat(cli): add flag".to_string(),
1408            in_main_branches: vec!["origin/main".to_string()],
1409            analysis: CommitAnalysis {
1410                detected_type: "feat".to_string(),
1411                detected_scope: "cli".to_string(),
1412                proposed_message: "feat(cli): add flag".to_string(),
1413                file_changes: make_file_changes(&[("M", "src/cli.rs")]),
1414                diff_summary: "cli.rs | 1 +".to_string(),
1415                diff_file: diff_path.to_string_lossy().to_string(),
1416                file_diffs: Vec::new(),
1417            },
1418        };
1419
1420        let ai = CommitInfoForAI::from_commit_info(info).unwrap();
1421        assert_eq!(ai.base.analysis.diff_content, "diff content");
1422        assert_eq!(ai.base.hash, "a".repeat(40));
1423        assert_eq!(ai.base.original_message, "feat(cli): add flag");
1424        assert!(ai.pre_validated_checks.is_empty());
1425    }
1426
1427    #[test]
1428    fn file_diffs_default_empty_on_deserialize() {
1429        let yaml = r#"
1430detected_type: feat
1431detected_scope: cli
1432proposed_message: "feat(cli): test"
1433file_changes:
1434  total_files: 0
1435  files_added: 0
1436  files_deleted: 0
1437  file_list: []
1438diff_summary: ""
1439diff_file: "/tmp/test.diff"
1440"#;
1441        let analysis: CommitAnalysis = serde_yaml::from_str(yaml).unwrap();
1442        assert!(analysis.file_diffs.is_empty());
1443    }
1444
1445    #[test]
1446    fn file_diffs_omitted_when_empty_on_serialize() {
1447        let analysis = CommitAnalysis {
1448            detected_type: "feat".to_string(),
1449            detected_scope: "cli".to_string(),
1450            proposed_message: "feat(cli): test".to_string(),
1451            file_changes: make_file_changes(&[]),
1452            diff_summary: String::new(),
1453            diff_file: String::new(),
1454            file_diffs: Vec::new(),
1455        };
1456        let yaml = serde_yaml::to_string(&analysis).unwrap();
1457        assert!(!yaml.contains("file_diffs"));
1458    }
1459
1460    #[test]
1461    fn file_diffs_included_when_populated() {
1462        let analysis = CommitAnalysis {
1463            detected_type: "feat".to_string(),
1464            detected_scope: "cli".to_string(),
1465            proposed_message: "feat(cli): test".to_string(),
1466            file_changes: make_file_changes(&[]),
1467            diff_summary: String::new(),
1468            diff_file: String::new(),
1469            file_diffs: vec![FileDiffRef {
1470                path: "src/main.rs".to_string(),
1471                diff_file: "/tmp/diffs/abc/0000.diff".to_string(),
1472                byte_len: 42,
1473            }],
1474        };
1475        let yaml = serde_yaml::to_string(&analysis).unwrap();
1476        assert!(yaml.contains("file_diffs"));
1477        assert!(yaml.contains("src/main.rs"));
1478        assert!(yaml.contains("byte_len: 42"));
1479    }
1480
1481    // ── from_commit_info_partial ────────────────────────────────────
1482
1483    /// Helper: creates a `CommitInfo` with N file diffs backed by temp files.
1484    fn make_commit_with_file_diffs(
1485        dir: &tempfile::TempDir,
1486        files: &[(&str, &str)], // (path, diff_content)
1487    ) -> CommitInfo {
1488        let file_diffs: Vec<FileDiffRef> = files
1489            .iter()
1490            .enumerate()
1491            .map(|(i, (path, content))| {
1492                let diff_path = dir.path().join(format!("{i:04}.diff"));
1493                fs::write(&diff_path, content).unwrap();
1494                FileDiffRef {
1495                    path: (*path).to_string(),
1496                    diff_file: diff_path.to_string_lossy().to_string(),
1497                    byte_len: content.len(),
1498                }
1499            })
1500            .collect();
1501
1502        CommitInfo {
1503            hash: "abc123def456abc123def456abc123def456abc1".to_string(),
1504            author: "Test Author".to_string(),
1505            date: DateTime::parse_from_rfc3339("2025-01-01T00:00:00+00:00").unwrap(),
1506            original_message: "feat(cli): original message".to_string(),
1507            in_main_branches: vec!["main".to_string()],
1508            analysis: CommitAnalysis {
1509                detected_type: "feat".to_string(),
1510                detected_scope: "cli".to_string(),
1511                proposed_message: "feat(cli): proposed".to_string(),
1512                file_changes: make_file_changes(
1513                    &files.iter().map(|(p, _)| ("M", *p)).collect::<Vec<_>>(),
1514                ),
1515                diff_summary: " src/main.rs | 10 ++++\n src/lib.rs | 5 ++\n".to_string(),
1516                diff_file: dir.path().join("full.diff").to_string_lossy().to_string(),
1517                file_diffs,
1518            },
1519        }
1520    }
1521
1522    #[test]
1523    fn from_commit_info_partial_loads_subset() -> Result<()> {
1524        let dir = tempfile::tempdir()?;
1525        let commit = make_commit_with_file_diffs(
1526            &dir,
1527            &[
1528                ("src/main.rs", "diff --git a/src/main.rs\n+main\n"),
1529                ("src/lib.rs", "diff --git a/src/lib.rs\n+lib\n"),
1530                ("src/utils.rs", "diff --git a/src/utils.rs\n+utils\n"),
1531            ],
1532        );
1533
1534        let paths = vec!["src/main.rs".to_string(), "src/utils.rs".to_string()];
1535        let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
1536
1537        // Only requested files in diff_content
1538        assert!(partial.base.analysis.diff_content.contains("+main"));
1539        assert!(partial.base.analysis.diff_content.contains("+utils"));
1540        assert!(!partial.base.analysis.diff_content.contains("+lib"));
1541
1542        // file_diffs filtered to requested paths
1543        let ref_paths: Vec<&str> = partial
1544            .base
1545            .analysis
1546            .base
1547            .file_diffs
1548            .iter()
1549            .map(|r| r.path.as_str())
1550            .collect();
1551        assert_eq!(ref_paths, &["src/main.rs", "src/utils.rs"]);
1552
1553        Ok(())
1554    }
1555
1556    #[test]
1557    fn from_commit_info_partial_deduplicates_paths() -> Result<()> {
1558        let dir = tempfile::tempdir()?;
1559        let commit = make_commit_with_file_diffs(
1560            &dir,
1561            &[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
1562        );
1563
1564        // Duplicate path (simulates hunk-split scenario)
1565        let paths = vec!["src/main.rs".to_string(), "src/main.rs".to_string()];
1566        let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
1567
1568        // Content loaded only once (no duplicate)
1569        assert_eq!(
1570            partial.base.analysis.diff_content.matches("+main").count(),
1571            1
1572        );
1573
1574        Ok(())
1575    }
1576
1577    #[test]
1578    fn from_commit_info_partial_preserves_metadata() -> Result<()> {
1579        let dir = tempfile::tempdir()?;
1580        let commit = make_commit_with_file_diffs(
1581            &dir,
1582            &[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
1583        );
1584
1585        let original_hash = commit.hash.clone();
1586        let original_author = commit.author.clone();
1587        let original_date = commit.date;
1588        let original_message = commit.original_message.clone();
1589        let original_summary = commit.analysis.diff_summary.clone();
1590
1591        let paths = vec!["src/main.rs".to_string()];
1592        let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
1593
1594        assert_eq!(partial.base.hash, original_hash);
1595        assert_eq!(partial.base.author, original_author);
1596        assert_eq!(partial.base.date, original_date);
1597        assert_eq!(partial.base.original_message, original_message);
1598        assert_eq!(partial.base.analysis.base.diff_summary, original_summary);
1599
1600        Ok(())
1601    }
1602
1603    // ── from_commit_info_partial_with_overrides ─────────────────────
1604
1605    #[test]
1606    fn with_overrides_uses_override_content() -> Result<()> {
1607        let dir = tempfile::tempdir()?;
1608        let commit = make_commit_with_file_diffs(
1609            &dir,
1610            &[(
1611                "src/big.rs",
1612                "diff --git a/src/big.rs\n+full-file-content\n",
1613            )],
1614        );
1615
1616        let paths = vec!["src/big.rs".to_string(), "src/big.rs".to_string()];
1617        let overrides = vec![
1618            Some("diff --git a/src/big.rs\n@@ -1,3 +1,4 @@\n+hunk1\n".to_string()),
1619            Some("diff --git a/src/big.rs\n@@ -10,3 +10,4 @@\n+hunk2\n".to_string()),
1620        ];
1621        let partial =
1622            CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
1623
1624        // Should contain hunk content, NOT full file content.
1625        assert!(partial.base.analysis.diff_content.contains("+hunk1"));
1626        assert!(partial.base.analysis.diff_content.contains("+hunk2"));
1627        assert!(
1628            !partial
1629                .base
1630                .analysis
1631                .diff_content
1632                .contains("+full-file-content"),
1633            "should not contain full file content"
1634        );
1635
1636        Ok(())
1637    }
1638
1639    #[test]
1640    fn with_overrides_mixed_override_and_disk() -> Result<()> {
1641        let dir = tempfile::tempdir()?;
1642        let commit = make_commit_with_file_diffs(
1643            &dir,
1644            &[
1645                ("src/big.rs", "diff --git a/src/big.rs\n+big-full\n"),
1646                ("src/small.rs", "diff --git a/src/small.rs\n+small-disk\n"),
1647            ],
1648        );
1649
1650        let paths = vec!["src/big.rs".to_string(), "src/small.rs".to_string()];
1651        let overrides = vec![
1652            Some("diff --git a/src/big.rs\n@@ -1,3 +1,4 @@\n+big-hunk\n".to_string()),
1653            None, // load from disk
1654        ];
1655        let partial =
1656            CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
1657
1658        // big.rs: override content
1659        assert!(partial.base.analysis.diff_content.contains("+big-hunk"));
1660        assert!(!partial.base.analysis.diff_content.contains("+big-full"));
1661        // small.rs: loaded from disk
1662        assert!(partial.base.analysis.diff_content.contains("+small-disk"));
1663
1664        // Both files should appear in file_diffs metadata.
1665        let ref_paths: Vec<&str> = partial
1666            .base
1667            .analysis
1668            .base
1669            .file_diffs
1670            .iter()
1671            .map(|r| r.path.as_str())
1672            .collect();
1673        assert!(ref_paths.contains(&"src/big.rs"));
1674        assert!(ref_paths.contains(&"src/small.rs"));
1675
1676        Ok(())
1677    }
1678
1679    #[test]
1680    fn with_overrides_deduplicates_disk_reads() -> Result<()> {
1681        let dir = tempfile::tempdir()?;
1682        let commit = make_commit_with_file_diffs(
1683            &dir,
1684            &[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
1685        );
1686
1687        // Two None entries for same path (simulates duplicate whole-file items).
1688        let paths = vec!["src/main.rs".to_string(), "src/main.rs".to_string()];
1689        let overrides = vec![None, None];
1690        let partial =
1691            CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
1692
1693        // Content loaded only once despite two None entries.
1694        assert_eq!(
1695            partial.base.analysis.diff_content.matches("+main").count(),
1696            1
1697        );
1698
1699        Ok(())
1700    }
1701
1702    #[test]
1703    fn with_overrides_preserves_metadata() -> Result<()> {
1704        let dir = tempfile::tempdir()?;
1705        let commit = make_commit_with_file_diffs(
1706            &dir,
1707            &[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
1708        );
1709
1710        let original_hash = commit.hash.clone();
1711        let original_author = commit.author.clone();
1712        let original_message = commit.original_message.clone();
1713
1714        let paths = vec!["src/main.rs".to_string()];
1715        let overrides = vec![Some("+override-content\n".to_string())];
1716        let partial =
1717            CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
1718
1719        assert_eq!(partial.base.hash, original_hash);
1720        assert_eq!(partial.base.author, original_author);
1721        assert_eq!(partial.base.original_message, original_message);
1722        assert!(partial.pre_validated_checks.is_empty());
1723
1724        Ok(())
1725    }
1726}