Skip to main content

omni_dev/git/
commit.rs

1//! Git commit operations and analysis
2
3use anyhow::{Context, Result};
4use chrono::{DateTime, FixedOffset};
5use git2::{Commit, Repository};
6use globset::Glob;
7use serde::{Deserialize, Serialize};
8use std::fs;
9
10use regex::Regex;
11
12use crate::data::context::ScopeDefinition;
13
14/// Commit information structure
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct CommitInfo {
17    /// Full SHA-1 hash of the commit
18    pub hash: String,
19    /// Commit author name and email address
20    pub author: String,
21    /// Commit date in ISO format with timezone
22    pub date: DateTime<FixedOffset>,
23    /// The original commit message as written by the author
24    pub original_message: String,
25    /// Array of remote main branches that contain this commit
26    pub in_main_branches: Vec<String>,
27    /// Automated analysis of the commit including type detection and proposed message
28    pub analysis: CommitAnalysis,
29}
30
31/// Commit analysis information
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct CommitAnalysis {
34    /// Automatically detected conventional commit type (feat, fix, docs, test, chore, etc.)
35    pub detected_type: String,
36    /// Automatically detected scope based on file paths (cli, git, data, etc.)
37    pub detected_scope: String,
38    /// AI-generated conventional commit message based on file changes
39    pub proposed_message: String,
40    /// Detailed statistics about file changes in this commit
41    pub file_changes: FileChanges,
42    /// Git diff --stat output showing lines changed per file
43    pub diff_summary: String,
44    /// Path to diff file showing line-by-line changes
45    pub diff_file: String,
46}
47
48/// Enhanced commit analysis for AI processing with full diff content
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct CommitAnalysisForAI {
51    /// Automatically detected conventional commit type (feat, fix, docs, test, chore, etc.)
52    pub detected_type: String,
53    /// Automatically detected scope based on file paths (cli, git, data, etc.)
54    pub detected_scope: String,
55    /// AI-generated conventional commit message based on file changes
56    pub proposed_message: String,
57    /// Detailed statistics about file changes in this commit
58    pub file_changes: FileChanges,
59    /// Git diff --stat output showing lines changed per file
60    pub diff_summary: String,
61    /// Path to diff file showing line-by-line changes
62    pub diff_file: String,
63    /// Full diff content for AI analysis
64    pub diff_content: String,
65}
66
67/// Commit information with enhanced analysis for AI processing
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct CommitInfoForAI {
70    /// Full SHA-1 hash of the commit
71    pub hash: String,
72    /// Commit author name and email address
73    pub author: String,
74    /// Commit date in ISO format with timezone
75    pub date: DateTime<FixedOffset>,
76    /// The original commit message as written by the author
77    pub original_message: String,
78    /// Array of remote main branches that contain this commit
79    pub in_main_branches: Vec<String>,
80    /// Enhanced automated analysis of the commit including diff content
81    pub analysis: CommitAnalysisForAI,
82    /// Deterministic checks already performed; the LLM should treat these as authoritative
83    #[serde(default, skip_serializing_if = "Vec::is_empty")]
84    pub pre_validated_checks: Vec<String>,
85}
86
87/// File changes statistics
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct FileChanges {
90    /// Total number of files modified in this commit
91    pub total_files: usize,
92    /// Number of new files added in this commit
93    pub files_added: usize,
94    /// Number of files deleted in this commit
95    pub files_deleted: usize,
96    /// Array of files changed with their git status (M=modified, A=added, D=deleted)
97    pub file_list: Vec<FileChange>,
98}
99
100/// Individual file change
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct FileChange {
103    /// Git status code (A=added, M=modified, D=deleted, R=renamed)
104    pub status: String,
105    /// Path to the file relative to repository root
106    pub file: String,
107}
108
109impl CommitInfo {
110    /// Create CommitInfo from git2::Commit
111    pub fn from_git_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
112        let hash = commit.id().to_string();
113
114        let author = format!(
115            "{} <{}>",
116            commit.author().name().unwrap_or("Unknown"),
117            commit.author().email().unwrap_or("unknown@example.com")
118        );
119
120        let timestamp = commit.author().when();
121        let date = DateTime::from_timestamp(timestamp.seconds(), 0)
122            .context("Invalid commit timestamp")?
123            .with_timezone(
124                &FixedOffset::east_opt(timestamp.offset_minutes() * 60)
125                    .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()),
126            );
127
128        let original_message = commit.message().unwrap_or("").to_string();
129
130        // TODO: Implement main branch detection
131        let in_main_branches = Vec::new();
132
133        // TODO: Implement commit analysis
134        let analysis = CommitAnalysis::analyze_commit(repo, commit)?;
135
136        Ok(Self {
137            hash,
138            author,
139            date,
140            original_message,
141            in_main_branches,
142            analysis,
143        })
144    }
145}
146
147impl CommitAnalysis {
148    /// Analyze a commit and generate analysis information
149    pub fn analyze_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
150        // Get file changes
151        let file_changes = Self::analyze_file_changes(repo, commit)?;
152
153        // Detect conventional commit type based on files and message
154        let detected_type = Self::detect_commit_type(commit, &file_changes);
155
156        // Detect scope based on file paths
157        let detected_scope = Self::detect_scope(&file_changes);
158
159        // Generate proposed conventional commit message
160        let proposed_message =
161            Self::generate_proposed_message(commit, &detected_type, &detected_scope, &file_changes);
162
163        // Get diff summary
164        let diff_summary = Self::get_diff_summary(repo, commit)?;
165
166        // Write diff to file and get path
167        let diff_file = Self::write_diff_to_file(repo, commit)?;
168
169        Ok(Self {
170            detected_type,
171            detected_scope,
172            proposed_message,
173            file_changes,
174            diff_summary,
175            diff_file,
176        })
177    }
178
179    /// Analyze file changes in the commit
180    fn analyze_file_changes(repo: &Repository, commit: &Commit) -> Result<FileChanges> {
181        let mut file_list = Vec::new();
182        let mut files_added = 0;
183        let mut files_deleted = 0;
184
185        // Get the tree for this commit
186        let commit_tree = commit.tree().context("Failed to get commit tree")?;
187
188        // Get parent tree if available
189        let parent_tree = if commit.parent_count() > 0 {
190            Some(
191                commit
192                    .parent(0)
193                    .context("Failed to get parent commit")?
194                    .tree()
195                    .context("Failed to get parent tree")?,
196            )
197        } else {
198            None
199        };
200
201        // Create diff between parent and commit
202        let diff = if let Some(parent_tree) = parent_tree {
203            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
204                .context("Failed to create diff")?
205        } else {
206            // Initial commit - diff against empty tree
207            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
208                .context("Failed to create diff for initial commit")?
209        };
210
211        // Process each diff delta
212        diff.foreach(
213            &mut |delta, _progress| {
214                let status = match delta.status() {
215                    git2::Delta::Added => {
216                        files_added += 1;
217                        "A"
218                    }
219                    git2::Delta::Deleted => {
220                        files_deleted += 1;
221                        "D"
222                    }
223                    git2::Delta::Modified => "M",
224                    git2::Delta::Renamed => "R",
225                    git2::Delta::Copied => "C",
226                    git2::Delta::Typechange => "T",
227                    _ => "?",
228                };
229
230                if let Some(path) = delta.new_file().path() {
231                    if let Some(path_str) = path.to_str() {
232                        file_list.push(FileChange {
233                            status: status.to_string(),
234                            file: path_str.to_string(),
235                        });
236                    }
237                }
238
239                true
240            },
241            None,
242            None,
243            None,
244        )
245        .context("Failed to process diff")?;
246
247        let total_files = file_list.len();
248
249        Ok(FileChanges {
250            total_files,
251            files_added,
252            files_deleted,
253            file_list,
254        })
255    }
256
257    /// Detect conventional commit type based on files and existing message
258    fn detect_commit_type(commit: &Commit, file_changes: &FileChanges) -> String {
259        let message = commit.message().unwrap_or("");
260
261        // Check if message already has conventional commit format
262        if let Some(existing_type) = Self::extract_conventional_type(message) {
263            return existing_type;
264        }
265
266        // Analyze file patterns
267        let files: Vec<&str> = file_changes
268            .file_list
269            .iter()
270            .map(|f| f.file.as_str())
271            .collect();
272
273        // Check for specific patterns
274        if files
275            .iter()
276            .any(|f| f.contains("test") || f.contains("spec"))
277        {
278            "test".to_string()
279        } else if files
280            .iter()
281            .any(|f| f.ends_with(".md") || f.contains("README") || f.contains("docs/"))
282        {
283            "docs".to_string()
284        } else if files
285            .iter()
286            .any(|f| f.contains("Cargo.toml") || f.contains("package.json") || f.contains("config"))
287        {
288            if file_changes.files_added > 0 {
289                "feat".to_string()
290            } else {
291                "chore".to_string()
292            }
293        } else if file_changes.files_added > 0
294            && files
295                .iter()
296                .any(|f| f.ends_with(".rs") || f.ends_with(".js") || f.ends_with(".py"))
297        {
298            "feat".to_string()
299        } else if message.to_lowercase().contains("fix") || message.to_lowercase().contains("bug") {
300            "fix".to_string()
301        } else if file_changes.files_deleted > file_changes.files_added {
302            "refactor".to_string()
303        } else {
304            "chore".to_string()
305        }
306    }
307
308    /// Extract conventional commit type from existing message
309    fn extract_conventional_type(message: &str) -> Option<String> {
310        let first_line = message.lines().next().unwrap_or("");
311        if let Some(colon_pos) = first_line.find(':') {
312            let prefix = &first_line[..colon_pos];
313            if let Some(paren_pos) = prefix.find('(') {
314                let type_part = &prefix[..paren_pos];
315                if Self::is_valid_conventional_type(type_part) {
316                    return Some(type_part.to_string());
317                }
318            } else if Self::is_valid_conventional_type(prefix) {
319                return Some(prefix.to_string());
320            }
321        }
322        None
323    }
324
325    /// Check if a string is a valid conventional commit type
326    fn is_valid_conventional_type(s: &str) -> bool {
327        matches!(
328            s,
329            "feat"
330                | "fix"
331                | "docs"
332                | "style"
333                | "refactor"
334                | "test"
335                | "chore"
336                | "build"
337                | "ci"
338                | "perf"
339        )
340    }
341
342    /// Detect scope from file paths
343    fn detect_scope(file_changes: &FileChanges) -> String {
344        let files: Vec<&str> = file_changes
345            .file_list
346            .iter()
347            .map(|f| f.file.as_str())
348            .collect();
349
350        // Analyze common path patterns
351        if files.iter().any(|f| f.starts_with("src/cli/")) {
352            "cli".to_string()
353        } else if files.iter().any(|f| f.starts_with("src/git/")) {
354            "git".to_string()
355        } else if files.iter().any(|f| f.starts_with("src/data/")) {
356            "data".to_string()
357        } else if files.iter().any(|f| f.starts_with("tests/")) {
358            "test".to_string()
359        } else if files.iter().any(|f| f.starts_with("docs/")) {
360            "docs".to_string()
361        } else if files
362            .iter()
363            .any(|f| f.contains("Cargo.toml") || f.contains("deny.toml"))
364        {
365            "deps".to_string()
366        } else {
367            "".to_string()
368        }
369    }
370
371    /// Re-detect scope using file_patterns from scope definitions.
372    ///
373    /// More specific patterns (more literal path components) win regardless of
374    /// definition order in scopes.yaml. Equally specific matches are joined
375    /// with ", ". If no scope definitions match, the existing detected_scope
376    /// is kept as a fallback.
377    pub fn refine_scope(&mut self, scope_defs: &[ScopeDefinition]) {
378        if scope_defs.is_empty() {
379            return;
380        }
381        let files: Vec<&str> = self
382            .file_changes
383            .file_list
384            .iter()
385            .map(|f| f.file.as_str())
386            .collect();
387        if files.is_empty() {
388            return;
389        }
390
391        let mut matches: Vec<(&str, usize)> = Vec::new();
392        for scope_def in scope_defs {
393            if let Some(specificity) = Self::scope_matches_files(&files, &scope_def.file_patterns) {
394                matches.push((&scope_def.name, specificity));
395            }
396        }
397
398        if matches.is_empty() {
399            return;
400        }
401
402        let max_specificity = matches.iter().map(|(_, s)| *s).max().unwrap();
403        let best: Vec<&str> = matches
404            .into_iter()
405            .filter(|(_, s)| *s == max_specificity)
406            .map(|(name, _)| name)
407            .collect();
408
409        self.detected_scope = best.join(", ");
410    }
411
412    /// Check if a scope's file_patterns match any of the given files.
413    ///
414    /// Returns `Some(max_specificity)` if at least one file matches the scope
415    /// (after applying negation patterns), or `None` if no file matches.
416    fn scope_matches_files(files: &[&str], patterns: &[String]) -> Option<usize> {
417        let mut positive = Vec::new();
418        let mut negative = Vec::new();
419        for pat in patterns {
420            if let Some(stripped) = pat.strip_prefix('!') {
421                negative.push(stripped);
422            } else {
423                positive.push(pat.as_str());
424            }
425        }
426
427        // Build negative matchers
428        let neg_matchers: Vec<_> = negative
429            .iter()
430            .filter_map(|p| Glob::new(p).ok().map(|g| g.compile_matcher()))
431            .collect();
432
433        let mut max_specificity: Option<usize> = None;
434        for pat in &positive {
435            let glob = match Glob::new(pat) {
436                Ok(g) => g,
437                Err(_) => continue,
438            };
439            let matcher = glob.compile_matcher();
440            for file in files {
441                if matcher.is_match(file) && !neg_matchers.iter().any(|neg| neg.is_match(file)) {
442                    let specificity = Self::count_specificity(pat);
443                    max_specificity =
444                        Some(max_specificity.map_or(specificity, |cur| cur.max(specificity)));
445                }
446            }
447        }
448        max_specificity
449    }
450
451    /// Count the number of literal (non-wildcard) path segments in a glob pattern.
452    ///
453    /// - `docs/adrs/**` → 2 (`docs`, `adrs`)
454    /// - `docs/**` → 1 (`docs`)
455    /// - `*.md` → 0
456    /// - `src/main/scala/**` → 3
457    fn count_specificity(pattern: &str) -> usize {
458        pattern
459            .split('/')
460            .filter(|segment| !segment.contains('*') && !segment.contains('?'))
461            .count()
462    }
463
464    /// Generate a proposed conventional commit message
465    fn generate_proposed_message(
466        commit: &Commit,
467        commit_type: &str,
468        scope: &str,
469        file_changes: &FileChanges,
470    ) -> String {
471        let current_message = commit.message().unwrap_or("").lines().next().unwrap_or("");
472
473        // If already properly formatted, return as-is
474        if Self::extract_conventional_type(current_message).is_some() {
475            return current_message.to_string();
476        }
477
478        // Generate description based on changes
479        let description =
480            if !current_message.is_empty() && !current_message.eq_ignore_ascii_case("stuff") {
481                current_message.to_string()
482            } else {
483                Self::generate_description(commit_type, file_changes)
484            };
485
486        // Format with scope if available
487        if scope.is_empty() {
488            format!("{}: {}", commit_type, description)
489        } else {
490            format!("{}({}): {}", commit_type, scope, description)
491        }
492    }
493
494    /// Generate description based on commit type and changes
495    fn generate_description(commit_type: &str, file_changes: &FileChanges) -> String {
496        match commit_type {
497            "feat" => {
498                if file_changes.total_files == 1 {
499                    format!("add {}", file_changes.file_list[0].file)
500                } else {
501                    format!("add {} new features", file_changes.total_files)
502                }
503            }
504            "fix" => "resolve issues".to_string(),
505            "docs" => "update documentation".to_string(),
506            "test" => "add tests".to_string(),
507            "refactor" => "improve code structure".to_string(),
508            "chore" => "update project files".to_string(),
509            _ => "update project".to_string(),
510        }
511    }
512
513    /// Get diff summary statistics
514    fn get_diff_summary(repo: &Repository, commit: &Commit) -> Result<String> {
515        let commit_tree = commit.tree().context("Failed to get commit tree")?;
516
517        let parent_tree = if commit.parent_count() > 0 {
518            Some(
519                commit
520                    .parent(0)
521                    .context("Failed to get parent commit")?
522                    .tree()
523                    .context("Failed to get parent tree")?,
524            )
525        } else {
526            None
527        };
528
529        let diff = if let Some(parent_tree) = parent_tree {
530            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
531                .context("Failed to create diff")?
532        } else {
533            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
534                .context("Failed to create diff for initial commit")?
535        };
536
537        let stats = diff.stats().context("Failed to get diff stats")?;
538
539        let mut summary = String::new();
540        for i in 0..stats.files_changed() {
541            if let Some(path) = diff
542                .get_delta(i)
543                .and_then(|d| d.new_file().path())
544                .and_then(|p| p.to_str())
545            {
546                let insertions = stats.insertions();
547                let deletions = stats.deletions();
548                summary.push_str(&format!(
549                    " {} | {} +{} -{}\n",
550                    path,
551                    insertions + deletions,
552                    insertions,
553                    deletions
554                ));
555            }
556        }
557
558        Ok(summary)
559    }
560
561    /// Write full diff content to a file and return the path
562    fn write_diff_to_file(repo: &Repository, commit: &Commit) -> Result<String> {
563        // Get AI scratch directory
564        let ai_scratch_path = crate::utils::ai_scratch::get_ai_scratch_dir()
565            .context("Failed to determine AI scratch directory")?;
566
567        // Create diffs subdirectory
568        let diffs_dir = ai_scratch_path.join("diffs");
569        fs::create_dir_all(&diffs_dir).context("Failed to create diffs directory")?;
570
571        // Create filename with commit hash
572        let commit_hash = commit.id().to_string();
573        let diff_filename = format!("{}.diff", commit_hash);
574        let diff_path = diffs_dir.join(&diff_filename);
575
576        let commit_tree = commit.tree().context("Failed to get commit tree")?;
577
578        let parent_tree = if commit.parent_count() > 0 {
579            Some(
580                commit
581                    .parent(0)
582                    .context("Failed to get parent commit")?
583                    .tree()
584                    .context("Failed to get parent tree")?,
585            )
586        } else {
587            None
588        };
589
590        let diff = if let Some(parent_tree) = parent_tree {
591            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
592                .context("Failed to create diff")?
593        } else {
594            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
595                .context("Failed to create diff for initial commit")?
596        };
597
598        let mut diff_content = String::new();
599
600        diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
601            let content = std::str::from_utf8(line.content()).unwrap_or("<binary>");
602            let prefix = match line.origin() {
603                '+' => "+",
604                '-' => "-",
605                ' ' => " ",
606                '@' => "@",
607                'H' => "", // Header
608                'F' => "", // File header
609                _ => "",
610            };
611            diff_content.push_str(&format!("{}{}", prefix, content));
612            true
613        })
614        .context("Failed to format diff")?;
615
616        // Ensure the diff content ends with a newline to encourage literal block style
617        if !diff_content.ends_with('\n') {
618            diff_content.push('\n');
619        }
620
621        // Write diff content to file
622        fs::write(&diff_path, diff_content).context("Failed to write diff file")?;
623
624        // Return the path as a string
625        Ok(diff_path.to_string_lossy().to_string())
626    }
627}
628
629impl CommitInfoForAI {
630    /// Convert from basic CommitInfo by loading diff content
631    pub fn from_commit_info(commit_info: CommitInfo) -> Result<Self> {
632        let analysis = CommitAnalysisForAI::from_commit_analysis(commit_info.analysis)?;
633
634        Ok(Self {
635            hash: commit_info.hash,
636            author: commit_info.author,
637            date: commit_info.date,
638            original_message: commit_info.original_message,
639            in_main_branches: commit_info.in_main_branches,
640            analysis,
641            pre_validated_checks: Vec::new(),
642        })
643    }
644
645    /// Run deterministic pre-validation checks on the commit message.
646    /// Passing checks are recorded in pre_validated_checks so the LLM
647    /// can skip re-checking them. Failing checks are not recorded.
648    pub fn run_pre_validation_checks(&mut self) {
649        let re = Regex::new(r"^[a-z]+!\(([^)]+)\):|^[a-z]+\(([^)]+)\):").unwrap();
650        if let Some(caps) = re.captures(&self.original_message) {
651            let scope = caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str());
652            if let Some(scope) = scope {
653                if scope.contains(',') && !scope.contains(", ") {
654                    self.pre_validated_checks.push(format!(
655                        "Scope format verified: multi-scope '{}' correctly uses commas without spaces",
656                        scope
657                    ));
658                }
659            }
660        }
661    }
662}
663
664impl CommitAnalysisForAI {
665    /// Convert from basic CommitAnalysis by loading diff content from file
666    pub fn from_commit_analysis(analysis: CommitAnalysis) -> Result<Self> {
667        // Read the actual diff content from the file
668        let diff_content = fs::read_to_string(&analysis.diff_file)
669            .with_context(|| format!("Failed to read diff file: {}", analysis.diff_file))?;
670
671        Ok(Self {
672            detected_type: analysis.detected_type,
673            detected_scope: analysis.detected_scope,
674            proposed_message: analysis.proposed_message,
675            file_changes: analysis.file_changes,
676            diff_summary: analysis.diff_summary,
677            diff_file: analysis.diff_file,
678            diff_content,
679        })
680    }
681}