use anyhow::Result;
use clap::Args;
use std::io::{self, Write};
use std::process::Command;
use tokio::task::JoinSet;
use octocode::config::Config;
use octocode::indexer::git_utils::GitUtils;
use octocode::utils::diff_chunker;
const MAX_RETRIES: usize = 2;
const RETRY_DELAY_MS: u64 = 1000;
async fn call_llm_with_retry<F, Fut>(operation: F, context: &str) -> Result<String>
where
F: Fn() -> Fut,
Fut: std::future::Future<Output = Result<String>>,
{
let mut last_error = None;
for attempt in 1..=MAX_RETRIES + 1 {
match operation().await {
Ok(response) => return Ok(response),
Err(e) => {
last_error = Some(e);
if attempt <= MAX_RETRIES {
eprintln!(
"Warning: {} attempt {} failed, retrying in {}ms...",
context, attempt, RETRY_DELAY_MS
);
tokio::time::sleep(tokio::time::Duration::from_millis(RETRY_DELAY_MS)).await;
}
}
}
}
if let Some(e) = last_error {
Err(anyhow::anyhow!(
"{} failed after {} attempts: {}",
context,
MAX_RETRIES + 1,
e
))
} else {
Err(anyhow::anyhow!("{} failed with unknown error", context))
}
}
#[derive(Args, Debug)]
pub struct CommitArgs {
#[arg(short, long)]
pub all: bool,
#[arg(short, long)]
pub message: Option<String>,
#[arg(short, long)]
pub yes: bool,
#[arg(short, long)]
pub no_verify: bool,
}
pub async fn execute(config: &Config, args: &CommitArgs) -> Result<()> {
let current_dir = std::env::current_dir()?;
let git_root = GitUtils::find_git_root(¤t_dir)
.ok_or_else(|| anyhow::anyhow!("â Not in a git repository!"))?;
let current_dir = git_root;
if args.all {
println!("đ Adding all changes...");
let output = Command::new("git")
.args(["add", "."])
.current_dir(¤t_dir)
.output()?;
if !output.status.success() {
return Err(anyhow::anyhow!(
"Failed to add files: {}",
String::from_utf8_lossy(&output.stderr)
));
}
}
let output = Command::new("git")
.args(["diff", "--cached", "--name-only"])
.current_dir(¤t_dir)
.output()?;
if !output.status.success() {
return Err(anyhow::anyhow!(
"Failed to check staged changes: {}",
String::from_utf8_lossy(&output.stderr)
));
}
let staged_files = String::from_utf8(output.stdout)?;
if staged_files.trim().is_empty() {
return Err(anyhow::anyhow!(
"â No staged changes to commit. Use 'git add' or --all flag."
));
}
println!("đ Staged files:");
for file in staged_files.lines() {
println!(" âĸ {}", file);
}
if !args.no_verify {
let originally_staged_files: Vec<String> =
staged_files.lines().map(|s| s.to_string()).collect();
run_precommit_hooks(¤t_dir, args.all, &originally_staged_files).await?;
}
let output = Command::new("git")
.args(["diff", "--cached", "--name-only"])
.current_dir(¤t_dir)
.output()?;
if !output.status.success() {
return Err(anyhow::anyhow!(
"Failed to check staged changes after pre-commit: {}",
String::from_utf8_lossy(&output.stderr)
));
}
let final_staged_files = String::from_utf8(output.stdout)?;
if final_staged_files.trim().is_empty() {
return Err(anyhow::anyhow!(
"â No staged changes remaining after pre-commit hooks."
));
}
if final_staged_files != staged_files {
println!("\nđ Updated staged files after pre-commit:");
for file in final_staged_files.lines() {
println!(" âĸ {}", file);
}
}
println!("\nđ¤ Generating commit message...");
let commit_message =
generate_commit_message_chunked(¤t_dir, config, args.message.as_deref()).await?;
println!("\nđ Generated commit message:");
println!("âââââââââââââââââââââââââââââââââââ");
println!("{}", commit_message);
println!("âââââââââââââââââââââââââââââââââââ");
if !args.yes {
print!("\nProceed with this commit? [y/N] ");
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
if !input.trim().to_lowercase().starts_with('y') {
println!("â Commit cancelled.");
return Ok(());
}
}
println!("đž Committing changes...");
let mut git_args = vec!["commit", "-m", &commit_message];
if args.no_verify {
git_args.push("--no-verify");
}
let output = Command::new("git")
.args(&git_args)
.current_dir(¤t_dir)
.output()?;
if !output.status.success() {
return Err(anyhow::anyhow!(
"Failed to commit: {}",
String::from_utf8_lossy(&output.stderr)
));
}
println!("â
Successfully committed changes!");
let output = Command::new("git")
.args(["log", "--oneline", "-1"])
.current_dir(¤t_dir)
.output()?;
if output.status.success() {
let commit_info = String::from_utf8_lossy(&output.stdout);
println!("đ Commit: {}", commit_info.trim());
}
Ok(())
}
async fn generate_commit_message_chunked(
repo_path: &std::path::Path,
config: &Config,
extra_context: Option<&str>,
) -> Result<String> {
let output = Command::new("git")
.args(["diff", "--cached"])
.current_dir(repo_path)
.output()?;
if !output.status.success() {
return Err(anyhow::anyhow!(
"Failed to get diff: {}",
String::from_utf8_lossy(&output.stderr)
));
}
let diff = String::from_utf8(output.stdout)?;
if diff.trim().is_empty() {
return Err(anyhow::anyhow!("No staged changes found"));
}
let staged_files = GitUtils::get_staged_files(repo_path)?;
let changed_files = staged_files.join("\n");
let has_markdown_files = changed_files
.lines()
.any(|file| file.ends_with(".md") || file.ends_with(".markdown") || file.ends_with(".rst"));
let has_non_markdown_files = changed_files.lines().any(|file| {
!file.ends_with(".md")
&& !file.ends_with(".markdown")
&& !file.ends_with(".rst")
&& !file.trim().is_empty()
});
let file_count = diff.matches("diff --git").count();
let additions = diff
.matches("\n+")
.count()
.saturating_sub(diff.matches("\n+++").count());
let deletions = diff
.matches("\n-")
.count()
.saturating_sub(diff.matches("\n---").count());
let mut guidance_section = String::new();
if let Some(context) = extra_context {
guidance_section = format!("\n\nUser guidance for commit intent:\n{}", context);
}
let docs_restriction = if has_non_markdown_files && !has_markdown_files {
"\n\nCRITICAL - DOCS TYPE RESTRICTION:\n\
- NEVER use 'docs(...)' when only non-markdown files are changed\n\
- Current changes include ONLY non-markdown files (.rs, .js, .py, .toml, etc.)\n\
- Use 'fix', 'feat', 'refactor', 'chore', etc. instead of 'docs'\n\
- 'docs' is ONLY for .md, .markdown, .rst files or documentation-only changes"
} else if has_non_markdown_files && has_markdown_files {
"\n\nDOCS TYPE GUIDANCE:\n\
- Use 'docs(...)' ONLY if the primary change is documentation\n\
- If code changes are the main focus, use appropriate code type (fix, feat, refactor)\n\
- Mixed changes: prioritize the most significant change type"
} else {
""
};
let chunks = diff_chunker::chunk_diff(&diff);
if chunks.len() == 1 {
let prompt = create_commit_prompt(
&chunks[0].content,
file_count,
additions,
deletions,
&guidance_section,
docs_restriction,
);
return call_llm_with_retry(
|| call_llm_for_commit_message(&prompt, config),
"Single chunk commit message",
)
.await;
}
println!(
"đ Processing large diff in {} chunks in parallel...",
chunks.len()
);
let responses = process_commit_chunks_parallel(
&chunks,
file_count,
additions,
deletions,
&guidance_section,
docs_restriction,
config,
)
.await;
if responses.is_empty() {
return Ok("chore: update files".to_string());
}
let combined = diff_chunker::combine_commit_messages(responses);
println!("đ¯ Refining commit message with AI...");
match refine_commit_message_with_ai(&combined, config).await {
Ok(refined) => Ok(refined),
Err(e) => {
eprintln!(
"Warning: AI refinement failed ({}), using combined message",
e
);
Ok(combined)
}
}
}
fn create_commit_prompt(
diff_content: &str,
file_count: usize,
additions: usize,
deletions: usize,
guidance_section: &str,
docs_restriction: &str,
) -> String {
format!(
"Analyze this Git diff and create an appropriate commit message. Be specific and concise.\\n\\n\\\
STRICT FORMATTING RULES:\\n\\\
- Format: type(scope): description (under 50 chars)\\n\\\
- Types: feat, fix, docs, style, refactor, test, chore, perf, ci, build\\n\\\
- Add '!' after type for breaking changes: feat!: or fix!:\\n\\\
- Be specific, avoid generic words like \\\"update\\\", \\\"change\\\", \\\"modify\\\", \\\"various\\\", \\\"several\\\"\\n\\\
- Use imperative mood: \\\"add\\\" not \\\"added\\\", \\\"fix\\\" not \\\"fixed\\\"\\n\\\
- Focus on WHAT functionality changed, not implementation details\\n\\\
- If user guidance provided, use it to understand the INTENT but create your own message{}\\n\\n\\\
COMMIT TYPE SELECTION (READ CAREFULLY):\\n\\\
- feat: NEW functionality being added (new features, capabilities, commands)\\n\\\
- fix: CORRECTING bugs, errors, or broken functionality (including fixes to existing features)\\n\\\
- refactor: IMPROVING existing code without changing functionality (code restructuring)\\n\\\
- perf: OPTIMIZING performance without adding features\\n\\\
- docs: DOCUMENTATION changes ONLY (.md, .markdown, .rst files)\\n\\\
- test: ADDING or fixing tests\\n\\\
- style: CODE formatting, whitespace, missing semicolons (no logic changes)\\n\\\
- chore: MAINTENANCE tasks (dependencies, build, tooling, config)\\n\\\
- ci: CONTINUOUS integration changes (workflows, pipelines)\\n\\\
- build: BUILD system changes (Cargo.toml, package.json, Makefile){}\\n\\n\\\
FEATURE vs FIX DECISION GUIDE:\\n\\\
- If code was working but had bugs/errors â use 'fix' (even for new features with bugs)\\n\\\
- If adding completely new functionality that didn't exist â use 'feat'\\n\\\
- If improving existing working code structure â use 'refactor' or 'perf'\\n\\\
- Examples: 'fix(auth): resolve token validation error', 'feat(auth): add OAuth2 support'\\n\\\
- When fixing issues in recently added features â use 'fix(scope): correct feature-name issue'\\n\\\
- When in doubt between feat/fix: choose 'fix' if addressing problems, 'feat' if adding completely new\\n\\n\\\
BREAKING CHANGE DETECTION:\\n\\\
- Look for function signature changes, API modifications, removed public methods\\n\\\
- Check for interface/trait changes, configuration schema changes\\n\\\
- Identify database migrations, dependency version bumps with breaking changes\\n\\\
- If breaking changes detected, use type! format and add BREAKING CHANGE footer\\n\\n\\\
BODY RULES (add body with bullet points if ANY of these apply):\\n\\\
- 4+ files changed OR 25+ lines changed\\n\\\
- Multiple different types of changes (feat+fix, refactor+feat, etc.)\\n\\\
- Complex refactoring or architectural changes\\n\\\
- Breaking changes or major feature additions\\n\\\
- Changes affect multiple modules/components\\n\\n\\\
Body format when needed:\\n\\\
- Blank line after subject\\n\\\
- Start each point with \\\"- \\\"\\n\\\
- Focus on key changes and their purpose\\n\\\
- Explain WHY if not obvious from subject\\n\\\
- Keep each bullet concise (1 line max)\\n\\\
- For breaking changes, add footer: \\\"BREAKING CHANGE: description\\\"\\n\\n\\\
Changes: {} files (+{} -{} lines)\\n\\n\\\
Git diff:\\n\\\
```\\n{}\\n```\\n\\n\\\
Generate commit message:",
guidance_section,
docs_restriction,
file_count,
additions,
deletions,
diff_content
)
}
async fn collect_ordered_responses(
mut join_set: JoinSet<Result<(usize, String)>>,
expected_count: usize,
) -> Vec<String> {
let mut ordered_responses = vec![None; expected_count];
while let Some(result) = join_set.join_next().await {
match result {
Ok(Ok((index, response))) => {
ordered_responses[index] = Some(response);
}
Ok(Err(_)) => {
}
Err(e) => {
eprintln!("Warning: Task join error: {}", e);
}
}
}
ordered_responses.into_iter().flatten().collect()
}
async fn process_commit_chunks_parallel(
chunks: &[diff_chunker::DiffChunk],
file_count: usize,
additions: usize,
deletions: usize,
guidance_section: &str,
docs_restriction: &str,
config: &Config,
) -> Vec<String> {
let chunk_limit = std::cmp::min(chunks.len(), diff_chunker::MAX_PARALLEL_CHUNKS);
let mut join_set = JoinSet::new();
for (i, chunk) in chunks.iter().take(chunk_limit).enumerate() {
let chunk_content = chunk.content.clone();
let chunk_summary = chunk.file_summary.clone();
let config = config.clone();
let guidance_section = guidance_section.to_string();
let docs_restriction = docs_restriction.to_string();
join_set.spawn(async move {
println!(
" Processing chunk {}/{}: {}",
i + 1,
chunk_limit,
chunk_summary
);
let chunk_prompt = create_commit_prompt(
&chunk_content,
file_count,
additions,
deletions,
&guidance_section,
&docs_restriction,
);
match call_llm_for_commit_message(&chunk_prompt, &config).await {
Ok(response) => Ok((i, response)),
Err(e) => {
eprintln!("Warning: Chunk {} failed ({})", i + 1, e);
Err(e)
}
}
});
}
collect_ordered_responses(join_set, chunk_limit).await
}
async fn call_llm_for_commit_message(prompt: &str, config: &Config) -> Result<String> {
use reqwest::Client;
use serde_json::{json, Value};
let client = Client::new();
let api_key = if let Some(key) = &config.openrouter.api_key {
key.clone()
} else if let Ok(key) = std::env::var("OPENROUTER_API_KEY") {
key
} else {
return Err(anyhow::anyhow!("No OpenRouter API key found"));
};
let payload = json!({
"model": config.openrouter.model,
"messages": [
{
"role": "user",
"content": prompt
}
],
"temperature": 0.1,
"max_tokens": 300
});
let response = client
.post(format!(
"{}/chat/completions",
config.openrouter.base_url.trim_end_matches('/')
))
.header("Authorization", format!("Bearer {}", api_key))
.header("HTTP-Referer", "https://github.com/muvon/octocode")
.header("X-Title", "Octocode")
.header("Content-Type", "application/json")
.json(&payload)
.timeout(std::time::Duration::from_secs(config.openrouter.timeout))
.send()
.await?;
if !response.status().is_success() {
let error_text = response.text().await?;
return Err(anyhow::anyhow!("LLM API error: {}", error_text));
}
let response_json: Value = response.json().await?;
let message = response_json
.get("choices")
.and_then(|choices| choices.get(0))
.and_then(|choice| choice.get("message"))
.and_then(|message| message.get("content"))
.and_then(|content| content.as_str())
.ok_or_else(|| anyhow::anyhow!("Invalid response format from LLM"))?;
Ok(message.to_string())
}
async fn refine_commit_message_with_ai(verbose_message: &str, config: &Config) -> Result<String> {
let refinement_prompt = format!(
r#"You are an expert at creating concise, professional commit messages.
I have a verbose commit message that was generated by combining multiple chunks of a large diff. Your task is to refine it into a clean, concise commit message that follows conventional commit format.
ORIGINAL VERBOSE MESSAGE:
{}
REFINEMENT REQUIREMENTS:
1. Keep the conventional commit format: type(scope): description
2. Choose the MOST APPROPRIATE single type (feat, fix, refactor, chore, docs, etc.)
3. Remove ALL duplication and redundancy
4. Create concise bullet points for the body (if needed)
5. Focus on WHAT changed, not implementation details
6. Maximum 50 characters for subject line
7. Maximum 72 characters per body line
8. Group related changes together
9. Remove verbose explanations - keep it factual and brief
EXAMPLE OUTPUT FORMAT:
refactor(diff_chunker): improve chunking with limits and robustness
- Add resource limits to prevent exhaustion
- Enhance filename extraction accuracy
- Improve error handling and logging
- Add comprehensive test coverage
Return ONLY the refined commit message, nothing else."#,
verbose_message
);
call_llm_with_retry(
|| call_llm_for_commit_message(&refinement_prompt, config),
"AI commit message refinement",
)
.await
}
fn is_precommit_available() -> bool {
Command::new("pre-commit")
.arg("--version")
.output()
.map(|output| output.status.success())
.unwrap_or(false)
}
fn has_precommit_config(repo_path: &std::path::Path) -> bool {
repo_path.join(".pre-commit-config.yaml").exists()
|| repo_path.join(".pre-commit-config.yml").exists()
}
async fn run_precommit_hooks(
repo_path: &std::path::Path,
run_all: bool,
originally_staged_files: &[String],
) -> Result<()> {
if !is_precommit_available() {
return Ok(());
}
if !has_precommit_config(repo_path) {
return Ok(());
}
println!("đ§ Running pre-commit hooks...");
let pre_commit_args = if run_all {
vec!["run", "--all-files"]
} else {
vec!["run"]
};
let output = Command::new("pre-commit")
.args(&pre_commit_args)
.current_dir(repo_path)
.output()?;
match output.status.code() {
Some(0) => {
println!("â
Pre-commit hooks passed successfully");
}
Some(1) => {
let stderr = String::from_utf8_lossy(&output.stderr);
let stdout = String::from_utf8_lossy(&output.stdout);
if !stdout.is_empty() {
println!("đ Pre-commit output:\n{}", stdout);
}
let modified_output = Command::new("git")
.args(["diff", "--name-only"])
.current_dir(repo_path)
.output()?;
if modified_output.status.success() {
let all_modified_files = String::from_utf8_lossy(&modified_output.stdout);
let all_modified_set: std::collections::HashSet<&str> =
all_modified_files.lines().collect();
let staged_and_modified: Vec<&String> = originally_staged_files
.iter()
.filter(|file| all_modified_set.contains(file.as_str()))
.collect();
if !staged_and_modified.is_empty() {
println!("đ Pre-commit hooks modified originally staged files:");
for file in &staged_and_modified {
println!(" âĸ {}", file);
}
println!("đ Re-staging modified files...");
for file in &staged_and_modified {
let add_output = Command::new("git")
.args(["add", file.trim()])
.current_dir(repo_path)
.output()?;
if !add_output.status.success() {
eprintln!(
"â ī¸ Warning: Failed to re-stage {}: {}",
file,
String::from_utf8_lossy(&add_output.stderr)
);
}
}
println!("â
Modified files re-staged successfully");
}
}
if !stderr.is_empty() && stderr.contains("FAILED") {
println!("â ī¸ Some pre-commit hooks failed:\n{}", stderr);
}
}
Some(3) => {
println!("âšī¸ No pre-commit hooks configured to run");
}
Some(code) => {
let stderr = String::from_utf8_lossy(&output.stderr);
println!("â ī¸ Pre-commit exited with code {}: {}", code, stderr);
}
None => {
println!("â ī¸ Pre-commit was terminated by signal");
}
}
Ok(())
}