use anyhow::Result;
use std::path::Path;
use crate::config::Config;
use crate::embedding;
use crate::indexer::git_utils::{CommitEntry, GitUtils};
use crate::llm::{LlmClient, Message};
use crate::state::SharedState;
use crate::store::{CommitBlock, Store};
const COMMIT_DESCRIPTION_SYSTEM_PROMPT: &str =
"You are a senior software engineer. For each commit, write a concise 2-3 sentence summary \
of WHAT changed and WHY. Focus on the intent and impact, not individual line changes.";
const MAX_DIFF_CHARS: usize = 8000;
pub async fn index_commits(
config: &Config,
store: &Store,
repo_path: &Path,
state: SharedState,
quiet: bool,
) -> Result<()> {
let branch = match GitUtils::get_default_branch(repo_path) {
Ok(b) => b,
Err(e) => {
tracing::warn!("Could not detect default branch: {}", e);
return Ok(());
}
};
let last_commit = store.get_commits_last_commit_hash().await?;
let commits = GitUtils::get_commit_log(repo_path, &branch, last_commit.as_deref())?;
if commits.is_empty() {
tracing::debug!("No new commits to index");
return Ok(());
}
if !quiet {
eprintln!(
"📝 Indexing {} commits from branch '{}'...",
commits.len(),
branch
);
}
{
let mut state_guard = state.write();
state_guard.status_message = format!("Indexing {} commits...", commits.len());
}
let mut commit_blocks: Vec<CommitBlock> = Vec::with_capacity(commits.len());
let descriptions = if config.commits.use_llm {
generate_descriptions(config, repo_path, &commits, quiet).await?
} else {
std::collections::HashMap::new()
};
for entry in &commits {
let files =
GitUtils::get_changed_files_for_commit(repo_path, &entry.hash).unwrap_or_default();
let files_json = serde_json::to_string(&files).unwrap_or_else(|_| "[]".to_string());
let description = descriptions.get(&entry.hash).cloned().unwrap_or_default();
let mut content = entry.message.clone();
if !files.is_empty() {
content.push_str("\n\nFiles: ");
content.push_str(&files.join(", "));
}
if !description.is_empty() {
content.push_str("\n\n");
content.push_str(&description);
}
commit_blocks.push(CommitBlock {
hash: entry.hash.clone(),
author: entry.author.clone(),
date: entry.date,
message: entry.message.clone(),
content,
files: files_json,
description,
distance: None,
});
}
let texts: Vec<String> = commit_blocks.iter().map(|b| b.content.clone()).collect();
let batch_size = config.index.embeddings_batch_size;
for chunk_start in (0..texts.len()).step_by(batch_size) {
let chunk_end = (chunk_start + batch_size).min(texts.len());
let text_chunk = texts[chunk_start..chunk_end].to_vec();
let block_chunk = &commit_blocks[chunk_start..chunk_end];
let embeddings = embedding::generate_embeddings_batch(
text_chunk,
false, config,
embedding::types::InputType::Document,
)
.await?;
store.store_commit_blocks(block_chunk, &embeddings).await?;
}
if let Some(latest) = commits.last() {
store.store_commits_last_commit_hash(&latest.hash).await?;
}
if !quiet {
eprintln!("✅ Indexed {} commits", commits.len());
}
{
let mut state_guard = state.write();
state_guard.status_message = String::new();
}
Ok(())
}
async fn generate_descriptions(
config: &Config,
repo_path: &Path,
commits: &[CommitEntry],
quiet: bool,
) -> Result<std::collections::HashMap<String, String>> {
let mut descriptions = std::collections::HashMap::new();
let client = match LlmClient::from_config(config) {
Ok(c) => c,
Err(e) => {
if !quiet {
eprintln!("⚠️ LLM not available for commit descriptions: {}", e);
}
return Ok(descriptions);
}
};
let batch_size = 5;
for chunk in commits.chunks(batch_size) {
let mut prompt = String::new();
for (i, entry) in chunk.iter().enumerate() {
let diff = GitUtils::get_commit_diff(repo_path, &entry.hash, MAX_DIFF_CHARS)
.unwrap_or_default();
prompt.push_str(&format!(
"=== COMMIT {} ===\nHash: {}\nMessage: {}\nDiff:\n{}\n\n",
i + 1,
&entry.hash[..8.min(entry.hash.len())],
entry.message,
diff
));
}
prompt.push_str(&format!(
"Provide a JSON object with commit short hashes as keys and description strings as values \
for each of the {} commits above.",
chunk.len()
));
let messages = vec![
Message::system(COMMIT_DESCRIPTION_SYSTEM_PROMPT),
Message::user(&prompt),
];
match client.chat_completion_json(messages, None).await {
Ok(json) => {
if let Some(obj) = json.as_object() {
for entry in chunk {
let short_hash = &entry.hash[..8.min(entry.hash.len())];
if let Some(desc) = obj.get(short_hash).and_then(|v| v.as_str()) {
let trimmed = if desc.len() > 300 {
format!("{}...", &desc[..297])
} else {
desc.to_string()
};
descriptions.insert(entry.hash.clone(), trimmed);
}
}
}
}
Err(e) => {
if !quiet {
eprintln!(
"⚠️ AI description batch failed for {} commits: {}",
chunk.len(),
e
);
}
}
}
}
Ok(descriptions)
}
#[cfg(test)]
mod tests {
use crate::indexer::git_utils::CommitEntry;
use crate::store::CommitBlock;
#[test]
fn test_commit_block_content_composition() {
let entry = CommitEntry {
hash: "abc12345".to_string(),
author: "Alice".to_string(),
date: 1700000000,
message: "feat: add retry logic".to_string(),
};
let files = vec!["src/client.rs".to_string(), "src/retry.rs".to_string()];
let files_json = serde_json::to_string(&files).unwrap();
let description = "Adds exponential backoff retry".to_string();
let mut content = entry.message.clone();
content.push_str("\n\nFiles: ");
content.push_str(&files.join(", "));
content.push_str("\n\n");
content.push_str(&description);
let block = CommitBlock {
hash: entry.hash.clone(),
author: entry.author.clone(),
date: entry.date,
message: entry.message.clone(),
content: content.clone(),
files: files_json,
description,
distance: None,
};
assert_eq!(block.hash, "abc12345");
assert!(block.content.contains("feat: add retry logic"));
assert!(block.content.contains("src/client.rs"));
assert!(block.content.contains("Adds exponential backoff retry"));
}
#[test]
fn test_commit_block_content_without_llm() {
let message = "fix: resolve null pointer".to_string();
let files = ["src/main.rs".to_string()];
let mut content = message.clone();
content.push_str("\n\nFiles: ");
content.push_str(&files.join(", "));
assert!(content.contains("fix: resolve null pointer"));
assert!(content.contains("src/main.rs"));
assert!(!content.contains("\n\n\n")); }
#[test]
fn test_commit_entry_parsing() {
let line = "abc12345deadbeef|Alice|1700000000|feat: add feature";
let parts: Vec<&str> = line.splitn(4, '|').collect();
assert_eq!(parts.len(), 4);
assert_eq!(parts[0], "abc12345deadbeef");
assert_eq!(parts[1], "Alice");
assert_eq!(parts[2], "1700000000");
assert_eq!(parts[3], "feat: add feature");
}
}