Skip to main content

omni_dev/git/
commit.rs

1//! Git commit operations and analysis.
2
3use std::fs;
4use std::sync::LazyLock;
5
6use anyhow::{Context, Result};
7use chrono::{DateTime, FixedOffset};
8use git2::{Commit, Repository};
9use globset::Glob;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12
13use crate::data::context::ScopeDefinition;
14
15/// Matches conventional commit scope patterns including breaking-change syntax.
16static SCOPE_RE: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^[a-z]+!\(([^)]+)\):|^[a-z]+\(([^)]+)\):").unwrap());
18
19/// Commit information structure.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct CommitInfo {
22    /// Full SHA-1 hash of the commit.
23    pub hash: String,
24    /// Commit author name and email address.
25    pub author: String,
26    /// Commit date in ISO format with timezone.
27    pub date: DateTime<FixedOffset>,
28    /// The original commit message as written by the author.
29    pub original_message: String,
30    /// Array of remote main branches that contain this commit.
31    pub in_main_branches: Vec<String>,
32    /// Automated analysis of the commit including type detection and proposed message.
33    pub analysis: CommitAnalysis,
34}
35
36/// Commit analysis information.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct CommitAnalysis {
39    /// Automatically detected conventional commit type (feat, fix, docs, test, chore, etc.).
40    pub detected_type: String,
41    /// Automatically detected scope based on file paths (cli, git, data, etc.).
42    pub detected_scope: String,
43    /// AI-generated conventional commit message based on file changes.
44    pub proposed_message: String,
45    /// Detailed statistics about file changes in this commit.
46    pub file_changes: FileChanges,
47    /// Git diff --stat output showing lines changed per file.
48    pub diff_summary: String,
49    /// Path to diff file showing line-by-line changes.
50    pub diff_file: String,
51}
52
53/// Enhanced commit analysis for AI processing with full diff content.
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct CommitAnalysisForAI {
56    /// Automatically detected conventional commit type (feat, fix, docs, test, chore, etc.).
57    pub detected_type: String,
58    /// Automatically detected scope based on file paths (cli, git, data, etc.).
59    pub detected_scope: String,
60    /// AI-generated conventional commit message based on file changes.
61    pub proposed_message: String,
62    /// Detailed statistics about file changes in this commit.
63    pub file_changes: FileChanges,
64    /// Git diff --stat output showing lines changed per file.
65    pub diff_summary: String,
66    /// Path to diff file showing line-by-line changes.
67    pub diff_file: String,
68    /// Full diff content for AI analysis.
69    pub diff_content: String,
70}
71
72/// Commit information with enhanced analysis for AI processing.
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct CommitInfoForAI {
75    /// Full SHA-1 hash of the commit.
76    pub hash: String,
77    /// Commit author name and email address.
78    pub author: String,
79    /// Commit date in ISO format with timezone.
80    pub date: DateTime<FixedOffset>,
81    /// The original commit message as written by the author.
82    pub original_message: String,
83    /// Array of remote main branches that contain this commit.
84    pub in_main_branches: Vec<String>,
85    /// Enhanced automated analysis of the commit including diff content.
86    pub analysis: CommitAnalysisForAI,
87    /// Deterministic checks already performed; the LLM should treat these as authoritative.
88    #[serde(default, skip_serializing_if = "Vec::is_empty")]
89    pub pre_validated_checks: Vec<String>,
90}
91
92/// File changes statistics.
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct FileChanges {
95    /// Total number of files modified in this commit.
96    pub total_files: usize,
97    /// Number of new files added in this commit.
98    pub files_added: usize,
99    /// Number of files deleted in this commit.
100    pub files_deleted: usize,
101    /// Array of files changed with their git status (M=modified, A=added, D=deleted).
102    pub file_list: Vec<FileChange>,
103}
104
105/// Individual file change.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct FileChange {
108    /// Git status code (A=added, M=modified, D=deleted, R=renamed).
109    pub status: String,
110    /// Path to the file relative to repository root.
111    pub file: String,
112}
113
114impl CommitInfo {
115    /// Creates a `CommitInfo` from a `git2::Commit`.
116    pub fn from_git_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
117        let hash = commit.id().to_string();
118
119        let author = format!(
120            "{} <{}>",
121            commit.author().name().unwrap_or("Unknown"),
122            commit.author().email().unwrap_or("unknown@example.com")
123        );
124
125        let timestamp = commit.author().when();
126        let date = DateTime::from_timestamp(timestamp.seconds(), 0)
127            .context("Invalid commit timestamp")?
128            .with_timezone(
129                &FixedOffset::east_opt(timestamp.offset_minutes() * 60)
130                    .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()),
131            );
132
133        let original_message = commit.message().unwrap_or("").to_string();
134
135        // TODO: Implement main branch detection
136        let in_main_branches = Vec::new();
137
138        // TODO: Implement commit analysis
139        let analysis = CommitAnalysis::analyze_commit(repo, commit)?;
140
141        Ok(Self {
142            hash,
143            author,
144            date,
145            original_message,
146            in_main_branches,
147            analysis,
148        })
149    }
150}
151
152impl CommitAnalysis {
153    /// Analyzes a commit and generates analysis information.
154    pub fn analyze_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
155        // Get file changes
156        let file_changes = Self::analyze_file_changes(repo, commit)?;
157
158        // Detect conventional commit type based on files and message
159        let detected_type = Self::detect_commit_type(commit, &file_changes);
160
161        // Detect scope based on file paths
162        let detected_scope = Self::detect_scope(&file_changes);
163
164        // Generate proposed conventional commit message
165        let proposed_message =
166            Self::generate_proposed_message(commit, &detected_type, &detected_scope, &file_changes);
167
168        // Get diff summary
169        let diff_summary = Self::get_diff_summary(repo, commit)?;
170
171        // Write diff to file and get path
172        let diff_file = Self::write_diff_to_file(repo, commit)?;
173
174        Ok(Self {
175            detected_type,
176            detected_scope,
177            proposed_message,
178            file_changes,
179            diff_summary,
180            diff_file,
181        })
182    }
183
184    /// Analyzes file changes in the commit.
185    fn analyze_file_changes(repo: &Repository, commit: &Commit) -> Result<FileChanges> {
186        let mut file_list = Vec::new();
187        let mut files_added = 0;
188        let mut files_deleted = 0;
189
190        // Get the tree for this commit
191        let commit_tree = commit.tree().context("Failed to get commit tree")?;
192
193        // Get parent tree if available
194        let parent_tree = if commit.parent_count() > 0 {
195            Some(
196                commit
197                    .parent(0)
198                    .context("Failed to get parent commit")?
199                    .tree()
200                    .context("Failed to get parent tree")?,
201            )
202        } else {
203            None
204        };
205
206        // Create diff between parent and commit
207        let diff = if let Some(parent_tree) = parent_tree {
208            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
209                .context("Failed to create diff")?
210        } else {
211            // Initial commit - diff against empty tree
212            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
213                .context("Failed to create diff for initial commit")?
214        };
215
216        // Process each diff delta
217        diff.foreach(
218            &mut |delta, _progress| {
219                let status = match delta.status() {
220                    git2::Delta::Added => {
221                        files_added += 1;
222                        "A"
223                    }
224                    git2::Delta::Deleted => {
225                        files_deleted += 1;
226                        "D"
227                    }
228                    git2::Delta::Modified => "M",
229                    git2::Delta::Renamed => "R",
230                    git2::Delta::Copied => "C",
231                    git2::Delta::Typechange => "T",
232                    _ => "?",
233                };
234
235                if let Some(path) = delta.new_file().path() {
236                    if let Some(path_str) = path.to_str() {
237                        file_list.push(FileChange {
238                            status: status.to_string(),
239                            file: path_str.to_string(),
240                        });
241                    }
242                }
243
244                true
245            },
246            None,
247            None,
248            None,
249        )
250        .context("Failed to process diff")?;
251
252        let total_files = file_list.len();
253
254        Ok(FileChanges {
255            total_files,
256            files_added,
257            files_deleted,
258            file_list,
259        })
260    }
261
262    /// Detects conventional commit type based on files and existing message.
263    fn detect_commit_type(commit: &Commit, file_changes: &FileChanges) -> String {
264        let message = commit.message().unwrap_or("");
265
266        // Check if message already has conventional commit format
267        if let Some(existing_type) = Self::extract_conventional_type(message) {
268            return existing_type;
269        }
270
271        // Analyze file patterns
272        let files: Vec<&str> = file_changes
273            .file_list
274            .iter()
275            .map(|f| f.file.as_str())
276            .collect();
277
278        // Check for specific patterns
279        if files
280            .iter()
281            .any(|f| f.contains("test") || f.contains("spec"))
282        {
283            "test".to_string()
284        } else if files
285            .iter()
286            .any(|f| f.ends_with(".md") || f.contains("README") || f.contains("docs/"))
287        {
288            "docs".to_string()
289        } else if files
290            .iter()
291            .any(|f| f.contains("Cargo.toml") || f.contains("package.json") || f.contains("config"))
292        {
293            if file_changes.files_added > 0 {
294                "feat".to_string()
295            } else {
296                "chore".to_string()
297            }
298        } else if file_changes.files_added > 0
299            && files
300                .iter()
301                .any(|f| f.ends_with(".rs") || f.ends_with(".js") || f.ends_with(".py"))
302        {
303            "feat".to_string()
304        } else if message.to_lowercase().contains("fix") || message.to_lowercase().contains("bug") {
305            "fix".to_string()
306        } else if file_changes.files_deleted > file_changes.files_added {
307            "refactor".to_string()
308        } else {
309            "chore".to_string()
310        }
311    }
312
313    /// Extracts conventional commit type from an existing message.
314    fn extract_conventional_type(message: &str) -> Option<String> {
315        let first_line = message.lines().next().unwrap_or("");
316        if let Some(colon_pos) = first_line.find(':') {
317            let prefix = &first_line[..colon_pos];
318            if let Some(paren_pos) = prefix.find('(') {
319                let type_part = &prefix[..paren_pos];
320                if Self::is_valid_conventional_type(type_part) {
321                    return Some(type_part.to_string());
322                }
323            } else if Self::is_valid_conventional_type(prefix) {
324                return Some(prefix.to_string());
325            }
326        }
327        None
328    }
329
330    /// Checks if a string is a valid conventional commit type.
331    fn is_valid_conventional_type(s: &str) -> bool {
332        matches!(
333            s,
334            "feat"
335                | "fix"
336                | "docs"
337                | "style"
338                | "refactor"
339                | "test"
340                | "chore"
341                | "build"
342                | "ci"
343                | "perf"
344        )
345    }
346
347    /// Detects scope from file paths.
348    fn detect_scope(file_changes: &FileChanges) -> String {
349        let files: Vec<&str> = file_changes
350            .file_list
351            .iter()
352            .map(|f| f.file.as_str())
353            .collect();
354
355        // Analyze common path patterns
356        if files.iter().any(|f| f.starts_with("src/cli/")) {
357            "cli".to_string()
358        } else if files.iter().any(|f| f.starts_with("src/git/")) {
359            "git".to_string()
360        } else if files.iter().any(|f| f.starts_with("src/data/")) {
361            "data".to_string()
362        } else if files.iter().any(|f| f.starts_with("tests/")) {
363            "test".to_string()
364        } else if files.iter().any(|f| f.starts_with("docs/")) {
365            "docs".to_string()
366        } else if files
367            .iter()
368            .any(|f| f.contains("Cargo.toml") || f.contains("deny.toml"))
369        {
370            "deps".to_string()
371        } else {
372            "".to_string()
373        }
374    }
375
376    /// Re-detects scope using file_patterns from scope definitions.
377    ///
378    /// More specific patterns (more literal path components) win regardless of
379    /// definition order in scopes.yaml. Equally specific matches are joined
380    /// with ", ". If no scope definitions match, the existing detected_scope
381    /// is kept as a fallback.
382    pub fn refine_scope(&mut self, scope_defs: &[ScopeDefinition]) {
383        if scope_defs.is_empty() {
384            return;
385        }
386        let files: Vec<&str> = self
387            .file_changes
388            .file_list
389            .iter()
390            .map(|f| f.file.as_str())
391            .collect();
392        if files.is_empty() {
393            return;
394        }
395
396        let mut matches: Vec<(&str, usize)> = Vec::new();
397        for scope_def in scope_defs {
398            if let Some(specificity) = Self::scope_matches_files(&files, &scope_def.file_patterns) {
399                matches.push((&scope_def.name, specificity));
400            }
401        }
402
403        if matches.is_empty() {
404            return;
405        }
406
407        // SAFETY: matches is non-empty (guarded by early return above)
408        let max_specificity = matches.iter().map(|(_, s)| *s).max().expect("non-empty");
409        let best: Vec<&str> = matches
410            .into_iter()
411            .filter(|(_, s)| *s == max_specificity)
412            .map(|(name, _)| name)
413            .collect();
414
415        self.detected_scope = best.join(", ");
416    }
417
418    /// Checks if a scope's file_patterns match any of the given files.
419    ///
420    /// Returns `Some(max_specificity)` if at least one file matches the scope
421    /// (after applying negation patterns), or `None` if no file matches.
422    fn scope_matches_files(files: &[&str], patterns: &[String]) -> Option<usize> {
423        let mut positive = Vec::new();
424        let mut negative = Vec::new();
425        for pat in patterns {
426            if let Some(stripped) = pat.strip_prefix('!') {
427                negative.push(stripped);
428            } else {
429                positive.push(pat.as_str());
430            }
431        }
432
433        // Build negative matchers
434        let neg_matchers: Vec<_> = negative
435            .iter()
436            .filter_map(|p| Glob::new(p).ok().map(|g| g.compile_matcher()))
437            .collect();
438
439        let mut max_specificity: Option<usize> = None;
440        for pat in &positive {
441            let glob = match Glob::new(pat) {
442                Ok(g) => g,
443                Err(_) => continue,
444            };
445            let matcher = glob.compile_matcher();
446            for file in files {
447                if matcher.is_match(file) && !neg_matchers.iter().any(|neg| neg.is_match(file)) {
448                    let specificity = Self::count_specificity(pat);
449                    max_specificity =
450                        Some(max_specificity.map_or(specificity, |cur| cur.max(specificity)));
451                }
452            }
453        }
454        max_specificity
455    }
456
457    /// Counts the number of literal (non-wildcard) path segments in a glob pattern.
458    ///
459    /// - `docs/adrs/**` → 2 (`docs`, `adrs`)
460    /// - `docs/**` → 1 (`docs`)
461    /// - `*.md` → 0
462    /// - `src/main/scala/**` → 3
463    fn count_specificity(pattern: &str) -> usize {
464        pattern
465            .split('/')
466            .filter(|segment| !segment.contains('*') && !segment.contains('?'))
467            .count()
468    }
469
470    /// Generates a proposed conventional commit message.
471    fn generate_proposed_message(
472        commit: &Commit,
473        commit_type: &str,
474        scope: &str,
475        file_changes: &FileChanges,
476    ) -> String {
477        let current_message = commit.message().unwrap_or("").lines().next().unwrap_or("");
478
479        // If already properly formatted, return as-is
480        if Self::extract_conventional_type(current_message).is_some() {
481            return current_message.to_string();
482        }
483
484        // Generate description based on changes
485        let description =
486            if !current_message.is_empty() && !current_message.eq_ignore_ascii_case("stuff") {
487                current_message.to_string()
488            } else {
489                Self::generate_description(commit_type, file_changes)
490            };
491
492        // Format with scope if available
493        if scope.is_empty() {
494            format!("{}: {}", commit_type, description)
495        } else {
496            format!("{}({}): {}", commit_type, scope, description)
497        }
498    }
499
500    /// Generates a description based on commit type and changes.
501    fn generate_description(commit_type: &str, file_changes: &FileChanges) -> String {
502        match commit_type {
503            "feat" => {
504                if file_changes.total_files == 1 {
505                    format!("add {}", file_changes.file_list[0].file)
506                } else {
507                    format!("add {} new features", file_changes.total_files)
508                }
509            }
510            "fix" => "resolve issues".to_string(),
511            "docs" => "update documentation".to_string(),
512            "test" => "add tests".to_string(),
513            "refactor" => "improve code structure".to_string(),
514            "chore" => "update project files".to_string(),
515            _ => "update project".to_string(),
516        }
517    }
518
519    /// Returns diff summary statistics.
520    fn get_diff_summary(repo: &Repository, commit: &Commit) -> Result<String> {
521        let commit_tree = commit.tree().context("Failed to get commit tree")?;
522
523        let parent_tree = if commit.parent_count() > 0 {
524            Some(
525                commit
526                    .parent(0)
527                    .context("Failed to get parent commit")?
528                    .tree()
529                    .context("Failed to get parent tree")?,
530            )
531        } else {
532            None
533        };
534
535        let diff = if let Some(parent_tree) = parent_tree {
536            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
537                .context("Failed to create diff")?
538        } else {
539            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
540                .context("Failed to create diff for initial commit")?
541        };
542
543        let stats = diff.stats().context("Failed to get diff stats")?;
544
545        let mut summary = String::new();
546        for i in 0..stats.files_changed() {
547            if let Some(path) = diff
548                .get_delta(i)
549                .and_then(|d| d.new_file().path())
550                .and_then(|p| p.to_str())
551            {
552                let insertions = stats.insertions();
553                let deletions = stats.deletions();
554                summary.push_str(&format!(
555                    " {} | {} +{} -{}\n",
556                    path,
557                    insertions + deletions,
558                    insertions,
559                    deletions
560                ));
561            }
562        }
563
564        Ok(summary)
565    }
566
567    /// Writes full diff content to a file and returns the path.
568    fn write_diff_to_file(repo: &Repository, commit: &Commit) -> Result<String> {
569        // Get AI scratch directory
570        let ai_scratch_path = crate::utils::ai_scratch::get_ai_scratch_dir()
571            .context("Failed to determine AI scratch directory")?;
572
573        // Create diffs subdirectory
574        let diffs_dir = ai_scratch_path.join("diffs");
575        fs::create_dir_all(&diffs_dir).context("Failed to create diffs directory")?;
576
577        // Create filename with commit hash
578        let commit_hash = commit.id().to_string();
579        let diff_filename = format!("{}.diff", commit_hash);
580        let diff_path = diffs_dir.join(&diff_filename);
581
582        let commit_tree = commit.tree().context("Failed to get commit tree")?;
583
584        let parent_tree = if commit.parent_count() > 0 {
585            Some(
586                commit
587                    .parent(0)
588                    .context("Failed to get parent commit")?
589                    .tree()
590                    .context("Failed to get parent tree")?,
591            )
592        } else {
593            None
594        };
595
596        let diff = if let Some(parent_tree) = parent_tree {
597            repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
598                .context("Failed to create diff")?
599        } else {
600            repo.diff_tree_to_tree(None, Some(&commit_tree), None)
601                .context("Failed to create diff for initial commit")?
602        };
603
604        let mut diff_content = String::new();
605
606        diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
607            let content = std::str::from_utf8(line.content()).unwrap_or("<binary>");
608            let prefix = match line.origin() {
609                '+' => "+",
610                '-' => "-",
611                ' ' => " ",
612                '@' => "@",
613                'H' => "", // Header
614                'F' => "", // File header
615                _ => "",
616            };
617            diff_content.push_str(&format!("{}{}", prefix, content));
618            true
619        })
620        .context("Failed to format diff")?;
621
622        // Ensure the diff content ends with a newline to encourage literal block style
623        if !diff_content.ends_with('\n') {
624            diff_content.push('\n');
625        }
626
627        // Write diff content to file
628        fs::write(&diff_path, diff_content).context("Failed to write diff file")?;
629
630        // Return the path as a string
631        Ok(diff_path.to_string_lossy().to_string())
632    }
633}
634
635impl CommitInfoForAI {
636    /// Converts from a basic `CommitInfo` by loading diff content.
637    pub fn from_commit_info(commit_info: CommitInfo) -> Result<Self> {
638        let analysis = CommitAnalysisForAI::from_commit_analysis(commit_info.analysis)?;
639
640        Ok(Self {
641            hash: commit_info.hash,
642            author: commit_info.author,
643            date: commit_info.date,
644            original_message: commit_info.original_message,
645            in_main_branches: commit_info.in_main_branches,
646            analysis,
647            pre_validated_checks: Vec::new(),
648        })
649    }
650
651    /// Runs deterministic pre-validation checks on the commit message.
652    /// Passing checks are recorded in pre_validated_checks so the LLM
653    /// can skip re-checking them. Failing checks are not recorded.
654    pub fn run_pre_validation_checks(&mut self) {
655        if let Some(caps) = SCOPE_RE.captures(&self.original_message) {
656            let scope = caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str());
657            if let Some(scope) = scope {
658                if scope.contains(',') && !scope.contains(", ") {
659                    self.pre_validated_checks.push(format!(
660                        "Scope format verified: multi-scope '{}' correctly uses commas without spaces",
661                        scope
662                    ));
663                }
664            }
665        }
666    }
667}
668
669impl CommitAnalysisForAI {
670    /// Converts from a basic `CommitAnalysis` by loading diff content from file.
671    pub fn from_commit_analysis(analysis: CommitAnalysis) -> Result<Self> {
672        // Read the actual diff content from the file
673        let diff_content = fs::read_to_string(&analysis.diff_file)
674            .with_context(|| format!("Failed to read diff file: {}", analysis.diff_file))?;
675
676        Ok(Self {
677            detected_type: analysis.detected_type,
678            detected_scope: analysis.detected_scope,
679            proposed_message: analysis.proposed_message,
680            file_changes: analysis.file_changes,
681            diff_summary: analysis.diff_summary,
682            diff_file: analysis.diff_file,
683            diff_content,
684        })
685    }
686}