Skip to main content

omni_dev/git/
commit.rs

1//! Git commit operations and analysis
2
3use anyhow::{Context, Result};
4use chrono::{DateTime, FixedOffset};
5use git2::{Commit, Repository};
6use globset::Glob;
7use serde::{Deserialize, Serialize};
8use std::fs;
9
10use crate::data::context::ScopeDefinition;
11
12/// Commit information structure
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct CommitInfo {
15    /// Full SHA-1 hash of the commit
16    pub hash: String,
17    /// Commit author name and email address
18    pub author: String,
19    /// Commit date in ISO format with timezone
20    pub date: DateTime<FixedOffset>,
21    /// The original commit message as written by the author
22    pub original_message: String,
23    /// Array of remote main branches that contain this commit
24    pub in_main_branches: Vec<String>,
25    /// Automated analysis of the commit including type detection and proposed message
26    pub analysis: CommitAnalysis,
27}
28
29/// Commit analysis information
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct CommitAnalysis {
32    /// Automatically detected conventional commit type (feat, fix, docs, test, chore, etc.)
33    pub detected_type: String,
34    /// Automatically detected scope based on file paths (cli, git, data, etc.)
35    pub detected_scope: String,
36    /// AI-generated conventional commit message based on file changes
37    pub proposed_message: String,
38    /// Detailed statistics about file changes in this commit
39    pub file_changes: FileChanges,
40    /// Git diff --stat output showing lines changed per file
41    pub diff_summary: String,
42    /// Path to diff file showing line-by-line changes
43    pub diff_file: String,
44}
45
46/// Enhanced commit analysis for AI processing with full diff content
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct CommitAnalysisForAI {
49    /// Automatically detected conventional commit type (feat, fix, docs, test, chore, etc.)
50    pub detected_type: String,
51    /// Automatically detected scope based on file paths (cli, git, data, etc.)
52    pub detected_scope: String,
53    /// AI-generated conventional commit message based on file changes
54    pub proposed_message: String,
55    /// Detailed statistics about file changes in this commit
56    pub file_changes: FileChanges,
57    /// Git diff --stat output showing lines changed per file
58    pub diff_summary: String,
59    /// Path to diff file showing line-by-line changes
60    pub diff_file: String,
61    /// Full diff content for AI analysis
62    pub diff_content: String,
63}
64
65/// Commit information with enhanced analysis for AI processing
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct CommitInfoForAI {
68    /// Full SHA-1 hash of the commit
69    pub hash: String,
70    /// Commit author name and email address
71    pub author: String,
72    /// Commit date in ISO format with timezone
73    pub date: DateTime<FixedOffset>,
74    /// The original commit message as written by the author
75    pub original_message: String,
76    /// Array of remote main branches that contain this commit
77    pub in_main_branches: Vec<String>,
78    /// Enhanced automated analysis of the commit including diff content
79    pub analysis: CommitAnalysisForAI,
80}
81
82/// File changes statistics
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct FileChanges {
85    /// Total number of files modified in this commit
86    pub total_files: usize,
87    /// Number of new files added in this commit
88    pub files_added: usize,
89    /// Number of files deleted in this commit
90    pub files_deleted: usize,
91    /// Array of files changed with their git status (M=modified, A=added, D=deleted)
92    pub file_list: Vec<FileChange>,
93}
94
95/// Individual file change
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct FileChange {
98    /// Git status code (A=added, M=modified, D=deleted, R=renamed)
99    pub status: String,
100    /// Path to the file relative to repository root
101    pub file: String,
102}
103
104impl CommitInfo {
105    /// Create CommitInfo from git2::Commit
106    pub fn from_git_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
107        let hash = commit.id().to_string();
108
109        let author = format!(
110            "{} <{}>",
111            commit.author().name().unwrap_or("Unknown"),
112            commit.author().email().unwrap_or("unknown@example.com")
113        );
114
115        let timestamp = commit.author().when();
116        let date = DateTime::from_timestamp(timestamp.seconds(), 0)
117            .context("Invalid commit timestamp")?
118            .with_timezone(
119                &FixedOffset::east_opt(timestamp.offset_minutes() * 60)
120                    .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()),
121            );
122
123        let original_message = commit.message().unwrap_or("").to_string();
124
125        // TODO: Implement main branch detection
126        let in_main_branches = Vec::new();
127
128        // TODO: Implement commit analysis
129        let analysis = CommitAnalysis::analyze_commit(repo, commit)?;
130
131        Ok(Self {
132            hash,
133            author,
134            date,
135            original_message,
136            in_main_branches,
137            analysis,
138        })
139    }
140}
141
142impl CommitAnalysis {
143    /// Analyze a commit and generate analysis information
144    pub fn analyze_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
145        // Get file changes
146        let file_changes = Self::analyze_file_changes(repo, commit)?;
147
148        // Detect conventional commit type based on files and message
149        let detected_type = Self::detect_commit_type(commit, &file_changes);
150
151        // Detect scope based on file paths
152        let detected_scope = Self::detect_scope(&file_changes);
153
154        // Generate proposed conventional commit message
155        let proposed_message =
156            Self::generate_proposed_message(commit, &detected_type, &detected_scope, &file_changes);
157
158        // Get diff summary
159        let diff_summary = Self::get_diff_summary(repo, commit)?;
160
161        // Write diff to file and get path
162        let diff_file = Self::write_diff_to_file(repo, commit)?;
163
164        Ok(Self {
165            detected_type,
166            detected_scope,
167            proposed_message,
168            file_changes,
169            diff_summary,
170            diff_file,
171        })
172    }
173
174    /// Analyze file changes in the commit
175    fn analyze_file_changes(repo: &Repository, commit: &Commit) -> Result<FileChanges> {
176        let mut file_list = Vec::new();
177        let mut files_added = 0;
178        let mut files_deleted = 0;
179
180        // Get the tree for this commit
181        let commit_tree = commit.tree().context("Failed to get commit tree")?;
182
183        // Get parent tree if available
184        let parent_tree = if commit.parent_count() > 0 {
185            Some(
186                commit
187                    .parent(0)
188                    .context("Failed to get parent commit")?
189                    .tree()
190                    .context("Failed to get parent tree")?,
191            )
192        } else {
193            None
194        };
195
196        // Create diff between parent and commit
197        let diff = if let Some(parent_tree) = parent_tree {
198            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
199                .context("Failed to create diff")?
200        } else {
201            // Initial commit - diff against empty tree
202            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
203                .context("Failed to create diff for initial commit")?
204        };
205
206        // Process each diff delta
207        diff.foreach(
208            &mut |delta, _progress| {
209                let status = match delta.status() {
210                    git2::Delta::Added => {
211                        files_added += 1;
212                        "A"
213                    }
214                    git2::Delta::Deleted => {
215                        files_deleted += 1;
216                        "D"
217                    }
218                    git2::Delta::Modified => "M",
219                    git2::Delta::Renamed => "R",
220                    git2::Delta::Copied => "C",
221                    git2::Delta::Typechange => "T",
222                    _ => "?",
223                };
224
225                if let Some(path) = delta.new_file().path() {
226                    if let Some(path_str) = path.to_str() {
227                        file_list.push(FileChange {
228                            status: status.to_string(),
229                            file: path_str.to_string(),
230                        });
231                    }
232                }
233
234                true
235            },
236            None,
237            None,
238            None,
239        )
240        .context("Failed to process diff")?;
241
242        let total_files = file_list.len();
243
244        Ok(FileChanges {
245            total_files,
246            files_added,
247            files_deleted,
248            file_list,
249        })
250    }
251
252    /// Detect conventional commit type based on files and existing message
253    fn detect_commit_type(commit: &Commit, file_changes: &FileChanges) -> String {
254        let message = commit.message().unwrap_or("");
255
256        // Check if message already has conventional commit format
257        if let Some(existing_type) = Self::extract_conventional_type(message) {
258            return existing_type;
259        }
260
261        // Analyze file patterns
262        let files: Vec<&str> = file_changes
263            .file_list
264            .iter()
265            .map(|f| f.file.as_str())
266            .collect();
267
268        // Check for specific patterns
269        if files
270            .iter()
271            .any(|f| f.contains("test") || f.contains("spec"))
272        {
273            "test".to_string()
274        } else if files
275            .iter()
276            .any(|f| f.ends_with(".md") || f.contains("README") || f.contains("docs/"))
277        {
278            "docs".to_string()
279        } else if files
280            .iter()
281            .any(|f| f.contains("Cargo.toml") || f.contains("package.json") || f.contains("config"))
282        {
283            if file_changes.files_added > 0 {
284                "feat".to_string()
285            } else {
286                "chore".to_string()
287            }
288        } else if file_changes.files_added > 0
289            && files
290                .iter()
291                .any(|f| f.ends_with(".rs") || f.ends_with(".js") || f.ends_with(".py"))
292        {
293            "feat".to_string()
294        } else if message.to_lowercase().contains("fix") || message.to_lowercase().contains("bug") {
295            "fix".to_string()
296        } else if file_changes.files_deleted > file_changes.files_added {
297            "refactor".to_string()
298        } else {
299            "chore".to_string()
300        }
301    }
302
303    /// Extract conventional commit type from existing message
304    fn extract_conventional_type(message: &str) -> Option<String> {
305        let first_line = message.lines().next().unwrap_or("");
306        if let Some(colon_pos) = first_line.find(':') {
307            let prefix = &first_line[..colon_pos];
308            if let Some(paren_pos) = prefix.find('(') {
309                let type_part = &prefix[..paren_pos];
310                if Self::is_valid_conventional_type(type_part) {
311                    return Some(type_part.to_string());
312                }
313            } else if Self::is_valid_conventional_type(prefix) {
314                return Some(prefix.to_string());
315            }
316        }
317        None
318    }
319
320    /// Check if a string is a valid conventional commit type
321    fn is_valid_conventional_type(s: &str) -> bool {
322        matches!(
323            s,
324            "feat"
325                | "fix"
326                | "docs"
327                | "style"
328                | "refactor"
329                | "test"
330                | "chore"
331                | "build"
332                | "ci"
333                | "perf"
334        )
335    }
336
337    /// Detect scope from file paths
338    fn detect_scope(file_changes: &FileChanges) -> String {
339        let files: Vec<&str> = file_changes
340            .file_list
341            .iter()
342            .map(|f| f.file.as_str())
343            .collect();
344
345        // Analyze common path patterns
346        if files.iter().any(|f| f.starts_with("src/cli/")) {
347            "cli".to_string()
348        } else if files.iter().any(|f| f.starts_with("src/git/")) {
349            "git".to_string()
350        } else if files.iter().any(|f| f.starts_with("src/data/")) {
351            "data".to_string()
352        } else if files.iter().any(|f| f.starts_with("tests/")) {
353            "test".to_string()
354        } else if files.iter().any(|f| f.starts_with("docs/")) {
355            "docs".to_string()
356        } else if files
357            .iter()
358            .any(|f| f.contains("Cargo.toml") || f.contains("deny.toml"))
359        {
360            "deps".to_string()
361        } else {
362            "".to_string()
363        }
364    }
365
366    /// Re-detect scope using file_patterns from scope definitions.
367    ///
368    /// More specific patterns (more literal path components) win regardless of
369    /// definition order in scopes.yaml. Equally specific matches are joined
370    /// with ", ". If no scope definitions match, the existing detected_scope
371    /// is kept as a fallback.
372    pub fn refine_scope(&mut self, scope_defs: &[ScopeDefinition]) {
373        if scope_defs.is_empty() {
374            return;
375        }
376        let files: Vec<&str> = self
377            .file_changes
378            .file_list
379            .iter()
380            .map(|f| f.file.as_str())
381            .collect();
382        if files.is_empty() {
383            return;
384        }
385
386        let mut matches: Vec<(&str, usize)> = Vec::new();
387        for scope_def in scope_defs {
388            if let Some(specificity) = Self::scope_matches_files(&files, &scope_def.file_patterns) {
389                matches.push((&scope_def.name, specificity));
390            }
391        }
392
393        if matches.is_empty() {
394            return;
395        }
396
397        let max_specificity = matches.iter().map(|(_, s)| *s).max().unwrap();
398        let best: Vec<&str> = matches
399            .into_iter()
400            .filter(|(_, s)| *s == max_specificity)
401            .map(|(name, _)| name)
402            .collect();
403
404        self.detected_scope = best.join(", ");
405    }
406
407    /// Check if a scope's file_patterns match any of the given files.
408    ///
409    /// Returns `Some(max_specificity)` if at least one file matches the scope
410    /// (after applying negation patterns), or `None` if no file matches.
411    fn scope_matches_files(files: &[&str], patterns: &[String]) -> Option<usize> {
412        let mut positive = Vec::new();
413        let mut negative = Vec::new();
414        for pat in patterns {
415            if let Some(stripped) = pat.strip_prefix('!') {
416                negative.push(stripped);
417            } else {
418                positive.push(pat.as_str());
419            }
420        }
421
422        // Build negative matchers
423        let neg_matchers: Vec<_> = negative
424            .iter()
425            .filter_map(|p| Glob::new(p).ok().map(|g| g.compile_matcher()))
426            .collect();
427
428        let mut max_specificity: Option<usize> = None;
429        for pat in &positive {
430            let glob = match Glob::new(pat) {
431                Ok(g) => g,
432                Err(_) => continue,
433            };
434            let matcher = glob.compile_matcher();
435            for file in files {
436                if matcher.is_match(file) && !neg_matchers.iter().any(|neg| neg.is_match(file)) {
437                    let specificity = Self::count_specificity(pat);
438                    max_specificity =
439                        Some(max_specificity.map_or(specificity, |cur| cur.max(specificity)));
440                }
441            }
442        }
443        max_specificity
444    }
445
446    /// Count the number of literal (non-wildcard) path segments in a glob pattern.
447    ///
448    /// - `docs/adrs/**` → 2 (`docs`, `adrs`)
449    /// - `docs/**` → 1 (`docs`)
450    /// - `*.md` → 0
451    /// - `src/main/scala/**` → 3
452    fn count_specificity(pattern: &str) -> usize {
453        pattern
454            .split('/')
455            .filter(|segment| !segment.contains('*') && !segment.contains('?'))
456            .count()
457    }
458
459    /// Generate a proposed conventional commit message
460    fn generate_proposed_message(
461        commit: &Commit,
462        commit_type: &str,
463        scope: &str,
464        file_changes: &FileChanges,
465    ) -> String {
466        let current_message = commit.message().unwrap_or("").lines().next().unwrap_or("");
467
468        // If already properly formatted, return as-is
469        if Self::extract_conventional_type(current_message).is_some() {
470            return current_message.to_string();
471        }
472
473        // Generate description based on changes
474        let description =
475            if !current_message.is_empty() && !current_message.eq_ignore_ascii_case("stuff") {
476                current_message.to_string()
477            } else {
478                Self::generate_description(commit_type, file_changes)
479            };
480
481        // Format with scope if available
482        if scope.is_empty() {
483            format!("{}: {}", commit_type, description)
484        } else {
485            format!("{}({}): {}", commit_type, scope, description)
486        }
487    }
488
489    /// Generate description based on commit type and changes
490    fn generate_description(commit_type: &str, file_changes: &FileChanges) -> String {
491        match commit_type {
492            "feat" => {
493                if file_changes.total_files == 1 {
494                    format!("add {}", file_changes.file_list[0].file)
495                } else {
496                    format!("add {} new features", file_changes.total_files)
497                }
498            }
499            "fix" => "resolve issues".to_string(),
500            "docs" => "update documentation".to_string(),
501            "test" => "add tests".to_string(),
502            "refactor" => "improve code structure".to_string(),
503            "chore" => "update project files".to_string(),
504            _ => "update project".to_string(),
505        }
506    }
507
508    /// Get diff summary statistics
509    fn get_diff_summary(repo: &Repository, commit: &Commit) -> Result<String> {
510        let commit_tree = commit.tree().context("Failed to get commit tree")?;
511
512        let parent_tree = if commit.parent_count() > 0 {
513            Some(
514                commit
515                    .parent(0)
516                    .context("Failed to get parent commit")?
517                    .tree()
518                    .context("Failed to get parent tree")?,
519            )
520        } else {
521            None
522        };
523
524        let diff = if let Some(parent_tree) = parent_tree {
525            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
526                .context("Failed to create diff")?
527        } else {
528            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
529                .context("Failed to create diff for initial commit")?
530        };
531
532        let stats = diff.stats().context("Failed to get diff stats")?;
533
534        let mut summary = String::new();
535        for i in 0..stats.files_changed() {
536            if let Some(path) = diff
537                .get_delta(i)
538                .and_then(|d| d.new_file().path())
539                .and_then(|p| p.to_str())
540            {
541                let insertions = stats.insertions();
542                let deletions = stats.deletions();
543                summary.push_str(&format!(
544                    " {} | {} +{} -{}\n",
545                    path,
546                    insertions + deletions,
547                    insertions,
548                    deletions
549                ));
550            }
551        }
552
553        Ok(summary)
554    }
555
556    /// Write full diff content to a file and return the path
557    fn write_diff_to_file(repo: &Repository, commit: &Commit) -> Result<String> {
558        // Get AI scratch directory
559        let ai_scratch_path = crate::utils::ai_scratch::get_ai_scratch_dir()
560            .context("Failed to determine AI scratch directory")?;
561
562        // Create diffs subdirectory
563        let diffs_dir = ai_scratch_path.join("diffs");
564        fs::create_dir_all(&diffs_dir).context("Failed to create diffs directory")?;
565
566        // Create filename with commit hash
567        let commit_hash = commit.id().to_string();
568        let diff_filename = format!("{}.diff", commit_hash);
569        let diff_path = diffs_dir.join(&diff_filename);
570
571        let commit_tree = commit.tree().context("Failed to get commit tree")?;
572
573        let parent_tree = if commit.parent_count() > 0 {
574            Some(
575                commit
576                    .parent(0)
577                    .context("Failed to get parent commit")?
578                    .tree()
579                    .context("Failed to get parent tree")?,
580            )
581        } else {
582            None
583        };
584
585        let diff = if let Some(parent_tree) = parent_tree {
586            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
587                .context("Failed to create diff")?
588        } else {
589            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
590                .context("Failed to create diff for initial commit")?
591        };
592
593        let mut diff_content = String::new();
594
595        diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
596            let content = std::str::from_utf8(line.content()).unwrap_or("<binary>");
597            let prefix = match line.origin() {
598                '+' => "+",
599                '-' => "-",
600                ' ' => " ",
601                '@' => "@",
602                'H' => "", // Header
603                'F' => "", // File header
604                _ => "",
605            };
606            diff_content.push_str(&format!("{}{}", prefix, content));
607            true
608        })
609        .context("Failed to format diff")?;
610
611        // Ensure the diff content ends with a newline to encourage literal block style
612        if !diff_content.ends_with('\n') {
613            diff_content.push('\n');
614        }
615
616        // Write diff content to file
617        fs::write(&diff_path, diff_content).context("Failed to write diff file")?;
618
619        // Return the path as a string
620        Ok(diff_path.to_string_lossy().to_string())
621    }
622}
623
624impl CommitInfoForAI {
625    /// Convert from basic CommitInfo by loading diff content
626    pub fn from_commit_info(commit_info: CommitInfo) -> Result<Self> {
627        let analysis = CommitAnalysisForAI::from_commit_analysis(commit_info.analysis)?;
628
629        Ok(Self {
630            hash: commit_info.hash,
631            author: commit_info.author,
632            date: commit_info.date,
633            original_message: commit_info.original_message,
634            in_main_branches: commit_info.in_main_branches,
635            analysis,
636        })
637    }
638}
639
640impl CommitAnalysisForAI {
641    /// Convert from basic CommitAnalysis by loading diff content from file
642    pub fn from_commit_analysis(analysis: CommitAnalysis) -> Result<Self> {
643        // Read the actual diff content from the file
644        let diff_content = fs::read_to_string(&analysis.diff_file)
645            .with_context(|| format!("Failed to read diff file: {}", analysis.diff_file))?;
646
647        Ok(Self {
648            detected_type: analysis.detected_type,
649            detected_scope: analysis.detected_scope,
650            proposed_message: analysis.proposed_message,
651            file_changes: analysis.file_changes,
652            diff_summary: analysis.diff_summary,
653            diff_file: analysis.diff_file,
654            diff_content,
655        })
656    }
657}