aigitcommit 1.9.5

A simple git commit message generator by OpenAI compaction model.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
/*!
 * Copyright (c) 2026 Ming Lyu, aka mingcheng
 *
 * This source code is licensed under the MIT License,
 * which is located in the LICENSE file in the source tree's root directory.
 *
 * File: repository.rs
 * Author: mingcheng <mingcheng@apache.org>
 * File Created: 2025-10-16 15:07:05
 *
 * Modified By: mingcheng <mingcheng@apache.org>
 * Last Modified: 2026-06-16 18:30:04
 */

use git2::{Oid, Repository as _Repo, RepositoryOpenFlags, Signature};
use regex::Regex;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::path::Path;
use std::sync::LazyLock;
use tracing::{info, trace, warn};

use crate::git::message::GitMessage;
use crate::utils::env;

/// Files commonly auto-generated or noisy that should be excluded from the
/// diff sent to the model.
const EXCLUDED_FILES: &[&str] = &[
    "go.mod",
    "go.sum",
    "Cargo.lock",
    "package-lock.json",
    "yarn.lock",
    "pnpm-lock.yaml",
];

/// Compiled once: validates a minimally well-formed `local@domain.tld` email.
static EMAIL_RE: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"^[^@\s]+@[^@\s]+\.[^@\s]+$").expect("valid email regex"));

/// Author information from git configuration
pub struct Author {
    pub name: String,
    pub email: String,
}

/// Git repository wrapper providing high-level operations
pub struct Repository {
    repository: _Repo,
}

impl Display for Repository {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(f, "Git repository at {}", self.repository.path().display())
    }
}

impl Repository {
    /// Create a new Git repository wrapper
    ///
    /// # Arguments
    /// * `path` - Path to the git repository (can be a subdirectory within the repo)
    ///
    /// # Returns
    /// * `Ok(Git)` - Successfully opened repository
    /// * `Err` - Repository not found or has no working directory (bare repo)
    pub fn new(path: &str) -> Result<Repository, Box<dyn Error>> {
        trace!("opening repository at {path}");
        // Allow upward discovery from `path` so callers can pass any
        // subdirectory inside a working tree. `ceiling_dirs` is empty so
        // discovery walks up to the filesystem root or first `.git`.
        let no_ceilings: [&str; 0] = [];
        let repository = _Repo::open_ext(
            path,
            RepositoryOpenFlags::empty(),
            no_ceilings.iter().copied(),
        )?;

        trace!("repository opened successfully");
        if let Some(work_dir) = repository.workdir() {
            trace!("the repository workdir is: {work_dir:?}");
        } else {
            return Err(
                "the repository has no workdir (bare repositories are not supported)".into(),
            );
        }

        Ok(Repository { repository })
    }

    /// Get the path to the underlying `.git` directory.
    ///
    /// Useful for storing repository-scoped state (e.g. local caches).
    pub fn git_dir(&self) -> &Path {
        self.repository.path()
    }

    /// Commit the staged changes in the repository
    ///
    /// # Arguments
    /// * `message` - The commit message to use
    ///
    /// # Returns
    /// * `Ok(())` - Commit created successfully
    /// * `Err` - Failed to create commit (no staged changes, invalid author info, etc.)
    pub fn commit(&self, message: &GitMessage) -> Result<Oid, Box<dyn Error>> {
        let message = message.to_string();
        let mut index = self.repository.index()?;

        // Write the current index (staged changes) to a tree object
        let oid = index.write_tree()?;
        let tree = self.repository.find_tree(oid)?;

        // Get the parent commit(s) - handle both initial commit and subsequent commits
        let parents = match self.repository.head() {
            Ok(head_ref) => {
                // Repository has commits, use HEAD as parent
                let head_commit = head_ref.peel_to_commit()?;
                vec![head_commit]
            }
            Err(e) if e.code() == git2::ErrorCode::UnbornBranch => {
                // Initial commit - no parents
                trace!("creating initial commit (no parent commits)");
                vec![]
            }
            Err(e) => return Err(Box::new(e)),
        };

        // Get author information from git config
        let author = self.get_author()?;

        // Create a signature with current timestamp
        let signature = Signature::now(&author.name, &author.email)?;

        // Create the commit with parent references
        let parent_refs: Vec<&git2::Commit> = parents.iter().collect();
        let result = self.repository.commit(
            Some("HEAD"),
            &signature,
            &signature,
            &message,
            &tree,
            &parent_refs,
        )?;

        Ok(result)
    }

    /// Resolve a git config value for this repository using the `git` CLI.
    ///
    /// libgit2 does not evaluate the newer conditional include forms such as
    /// `includeIf "hasconfig:remote.*.url:..."`, so a repository relying on
    /// them would silently inherit the global identity instead of its own.
    /// Delegating to the `git` binary (scoped to the repository working
    /// directory) guarantees the resolved value matches exactly what
    /// `git commit` would use for this specific repository path.
    ///
    /// # Returns
    /// * `Some(String)` - Trimmed, non-empty config value
    /// * `None` - git unavailable, key unset, or value empty
    fn git_cli_config(&self, key: &str) -> Option<String> {
        let workdir = self.repository.workdir()?;

        let output = std::process::Command::new("git")
            .arg("-C")
            .arg(workdir)
            .args(["config", "--get", key])
            .output()
            .ok()?;

        if !output.status.success() {
            return None;
        }

        let value = String::from_utf8(output.stdout).ok()?;
        let value = value.trim();
        if value.is_empty() {
            None
        } else {
            trace!("resolved {key} via git CLI: {value}");
            Some(value.to_string())
        }
    }

    /// Resolve a non-empty config value through the shared fallback chain.
    ///
    /// Resolution order:
    /// 1. The `git` CLI scoped to this repository (honors conditional includes
    ///    so different repository paths get their own identity).
    /// 2. libgit2's merged config view (covers environments without `git`).
    /// 3. The given environment variable.
    ///
    /// The returned value is trimmed; empty results yield `None` so the caller
    /// can apply its own validation and default.
    fn resolve_identity(&self, config: &git2::Config, key: &str, env_var: &str) -> Option<String> {
        self.git_cli_config(key)
            .or_else(|| config.get_string(key).ok())
            .or_else(|| std::env::var(env_var).ok())
            .map(|value| value.trim().to_string())
            .filter(|value| !value.is_empty())
    }

    /// Get the author email and name from the repository configuration
    ///
    /// Each value is resolved via [`resolve_identity`](Self::resolve_identity);
    /// the email is additionally validated against [`EMAIL_RE`]. When a value
    /// is missing or invalid, a configurable fallback default is used.
    ///
    /// # Returns
    /// * `Ok(Author)` - Author information retrieved successfully
    /// * `Err` - Failed to read configuration
    pub fn get_author(&self) -> Result<Author, Box<dyn Error>> {
        let config = self.repository.config()?;

        const UNKNOWN_EMAIL: &str = "unknown@users.noreply.github.com";
        const UNKNOWN_AUTHOR: &str = "Unknown Author";

        let email = self
            .resolve_identity(&config, "user.email", "GIT_AUTHOR_EMAIL")
            .filter(|email| EMAIL_RE.is_match(email))
            .unwrap_or_else(|| {
                warn!("user.email missing or invalid, using default: {UNKNOWN_EMAIL}");
                env::get("GIT_FALLBACK_EMAIL", UNKNOWN_EMAIL)
            });

        let name = self
            .resolve_identity(&config, "user.name", "GIT_AUTHOR_NAME")
            .unwrap_or_else(|| {
                warn!("user.name missing or empty, using default: {UNKNOWN_AUTHOR}");
                env::get("GIT_FALLBACK_NAME", UNKNOWN_AUTHOR)
            });

        Ok(Author { name, email })
    }

    /// Get the diff of staged changes (index vs HEAD).
    ///
    /// Lock files and other generated noise listed in [`EXCLUDED_FILES`] are
    /// stripped so they don't dominate the prompt.
    ///
    /// # Returns
    /// * `Ok(Vec<String>)` - Lines of the diff in patch format
    /// * `Err` - Failed to generate diff
    pub fn get_diff(&self) -> Result<Vec<String>, Box<dyn Error>> {
        let index = self.repository.index()?;

        // Get the HEAD tree, or None for initial commit
        let head_tree = match self.repository.head() {
            Ok(head_ref) => Some(head_ref.peel_to_commit()?.tree()?),
            Err(e) if e.code() == git2::ErrorCode::UnbornBranch => {
                trace!("generating diff for initial commit");
                None
            }
            Err(e) => return Err(Box::new(e)),
        };

        // Configure diff options
        let mut diffopts = git2::DiffOptions::new();
        diffopts
            .show_binary(false)
            .force_binary(false)
            .ignore_submodules(true)
            .minimal(true)
            .context_lines(3);

        // Generate diff between HEAD and index (staged changes)
        let diff = self.repository.diff_tree_to_index(
            head_tree.as_ref(),
            Some(&index),
            Some(&mut diffopts),
        )?;

        let mut result = Vec::new();

        diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| {
            // Skip excluded files entirely.
            if let Some(name) = delta
                .new_file()
                .path()
                .and_then(|p| p.file_name())
                .map(|f| f.to_string_lossy().into_owned())
                && EXCLUDED_FILES.contains(&name.as_str())
            {
                info!("skipping excluded file: {name}");
                return true;
            }

            // Add non-empty lines to result
            let content = String::from_utf8_lossy(line.content());
            let trimmed = content.trim();
            if !trimmed.is_empty() {
                result.push(trimmed.to_string());
            }
            true
        })?;

        Ok(result)
    }

    /// Check if commit should be signed off
    /// Returns true when git config `aigitcommit.signoff` is enabled or the
    /// `AIGITCOMMIT_SIGNOFF` environment variable evaluates to true.
    ///
    /// The env variable is consulted whenever the config key is missing or
    /// not set to true, so users can opt in globally without touching git
    /// config in every repository.
    pub fn should_signoff(&self) -> bool {
        // Define the config key for signoff
        const SIGNOFF_KEY: &str = "aigitcommit.signoff";

        let from_config = self
            .repository
            .config()
            .ok()
            .and_then(|c| c.get_bool(SIGNOFF_KEY).ok())
            .unwrap_or(false);
        trace!("✍️ git config signoff: {}", from_config);

        from_config || env::get_bool("AIGITCOMMIT_SIGNOFF")
    }

    /// Get the latest `size` commit messages from the repository
    ///
    /// Retrieves commit messages in reverse chronological order (newest first).
    ///
    /// # Arguments
    /// * `size` - Maximum number of commit messages to retrieve
    ///
    /// # Returns
    /// * `Ok(Vec<String>)` - List of commit messages (may be fewer than `size` if repo has fewer commits)
    /// * `Err` - Failed to walk commit history
    pub fn get_logs(&self, size: usize) -> Result<Vec<String>, Box<dyn Error>> {
        let mut revwalk = self.repository.revwalk()?;

        // Start walking from HEAD. On a brand-new repository there is no
        // HEAD commit yet (unborn branch); treat that as an empty history
        // rather than a hard error so the tool still works for the very
        // first commit.
        match revwalk.push_head() {
            Ok(()) => {}
            Err(e) if e.code() == git2::ErrorCode::UnbornBranch => {
                trace!("unborn branch: returning empty commit history");
                return Ok(Vec::new());
            }
            Err(e) => return Err(Box::new(e)),
        }

        // Sort by time (newest first) - this is the default but made explicit
        revwalk.set_sorting(git2::Sort::TIME)?;

        // Collect up to `size` commit messages
        let commits: Vec<String> = revwalk
            .take(size)
            .filter_map(|oid_result| {
                oid_result
                    .ok()
                    .and_then(|oid| self.repository.find_commit(oid).ok())
                    .and_then(|commit| {
                        commit
                            .message()
                            .ok()
                            .map(str::trim)
                            .filter(|msg| !msg.is_empty())
                            .map(String::from)
                    })
            })
            .collect();

        trace!("retrieved {} commit messages", commits.len());
        Ok(commits)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::PathBuf;
    use std::sync::atomic::{AtomicU32, Ordering};

    /// A throwaway git repository that removes itself on drop.
    ///
    /// Each instance lives in a uniquely named temp directory so tests can run
    /// in parallel without colliding, and is seeded with a *local* identity so
    /// assertions are independent of the developer's global git config.
    struct TempRepo {
        path: PathBuf,
        repo: Repository,
    }

    impl TempRepo {
        /// Initialize an empty repo, optionally seeding `user.name` / `user.email`.
        fn new(name: Option<&str>, email: Option<&str>) -> Self {
            static COUNTER: AtomicU32 = AtomicU32::new(0);
            let id = COUNTER.fetch_add(1, Ordering::Relaxed);
            let path =
                std::env::temp_dir().join(format!("aigitcommit-test-{}-{id}", std::process::id()));
            let _ = std::fs::remove_dir_all(&path);
            std::fs::create_dir_all(&path).expect("create temp dir");

            let raw = _Repo::init(&path).expect("init repo");
            let mut config = raw.config().expect("open config");
            if let Some(name) = name {
                config.set_str("user.name", name).expect("set name");
            }
            if let Some(email) = email {
                config.set_str("user.email", email).expect("set email");
            }
            drop(config);
            drop(raw);

            let repo = Repository::new(path.to_str().unwrap()).expect("open repo");
            Self { path, repo }
        }
    }

    impl Drop for TempRepo {
        fn drop(&mut self) {
            let _ = std::fs::remove_dir_all(&self.path);
        }
    }

    #[test]
    fn new_rejects_nonexistent_path() {
        assert!(Repository::new("/nonexistent/path/should/not/exist").is_err());
    }

    #[test]
    fn get_author_reads_local_identity() {
        // Core regression guard: a repository's *own* local config must win,
        // independent of any global identity or conditional includes.
        let tmp = TempRepo::new(Some("Local Dev"), Some("local.dev@example.com"));
        let author = tmp.repo.get_author().unwrap();
        assert_eq!(author.name, "Local Dev");
        assert_eq!(author.email, "local.dev@example.com");
    }

    #[test]
    fn get_author_falls_back_when_email_invalid() {
        // A malformed email is rejected and replaced by a valid default.
        let tmp = TempRepo::new(Some("Local Dev"), Some("not-an-email"));
        let author = tmp.repo.get_author().unwrap();
        assert_ne!(author.email, "not-an-email");
        assert!(EMAIL_RE.is_match(&author.email));
    }

    #[test]
    fn git_cli_config_returns_none_for_missing_key() {
        let tmp = TempRepo::new(None, None);
        assert!(tmp.repo.git_cli_config("aigitcommit.nonexistent").is_none());
    }

    #[test]
    fn resolve_identity_trims_and_filters_empty() {
        let tmp = TempRepo::new(None, None);
        let mut config = tmp.repo.repository.config().unwrap();
        config.set_str("user.name", "  Padded Name  ").unwrap();

        let resolved = tmp
            .repo
            .resolve_identity(&config, "user.name", "GIT_AUTHOR_NAME");
        assert_eq!(resolved.as_deref(), Some("Padded Name"));
    }

    #[test]
    fn get_logs_errors_on_unborn_branch() {
        // A repository with no commits has no HEAD to walk from.
        let tmp = TempRepo::new(Some("Local Dev"), Some("local.dev@example.com"));
        assert!(tmp.repo.get_logs(5).is_err());
    }
}