use crate::rag::git::walker::CommitInfo;
use crate::rag::indexer::CodeChunk;
use anyhow::Result;
use brainwires_core::ChunkMetadata;
const DEFAULT_GIT_MAX_CONTENT_LENGTH: usize = 6000;
pub struct CommitChunker {
max_content_length: usize,
}
impl CommitChunker {
pub fn new() -> Self {
Self {
max_content_length: DEFAULT_GIT_MAX_CONTENT_LENGTH,
}
}
pub fn with_max_length(max_content_length: usize) -> Self {
Self { max_content_length }
}
pub fn commit_to_chunk(
&self,
commit: &CommitInfo,
repo_path: &str,
project: Option<String>,
) -> Result<CodeChunk> {
let mut content = String::new();
content.push_str("Commit Message:\n");
content.push_str(&commit.message);
content.push_str("\n\n");
content.push_str("Author: ");
content.push_str(&commit.author_name);
if !commit.author_email.is_empty() {
content.push_str(" <");
content.push_str(&commit.author_email);
content.push('>');
}
content.push_str("\n\n");
if !commit.files_changed.is_empty() {
content.push_str("Files Changed:\n");
for file in &commit.files_changed {
content.push_str("- ");
content.push_str(file);
content.push('\n');
}
content.push('\n');
}
if !commit.diff_content.is_empty() {
content.push_str("Diff:\n");
content.push_str(&commit.diff_content);
}
if content.len() > self.max_content_length {
let mut truncate_at = self.max_content_length;
while !content.is_char_boundary(truncate_at) {
truncate_at -= 1;
}
content.truncate(truncate_at);
content.push_str("\n\n[... content truncated for embedding ...]");
}
let metadata = ChunkMetadata {
file_path: format!("git://{}/{}", repo_path, commit.hash),
root_path: None,
project,
start_line: 0,
end_line: 0,
language: Some("git-commit".to_string()),
extension: Some("commit".to_string()),
file_hash: commit.hash.clone(),
indexed_at: commit.commit_date,
};
Ok(CodeChunk { content, metadata })
}
pub fn commits_to_chunks(
&self,
commits: &[CommitInfo],
repo_path: &str,
project: Option<String>,
) -> Result<Vec<CodeChunk>> {
commits
.iter()
.map(|commit| self.commit_to_chunk(commit, repo_path, project.clone()))
.collect()
}
}
impl Default for CommitChunker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_commit() -> CommitInfo {
CommitInfo {
hash: "abc123def456".to_string(),
message:
"Fix authentication bug\n\nThis commit fixes a critical bug in the auth module."
.to_string(),
author_name: "John Doe".to_string(),
author_email: "john@example.com".to_string(),
commit_date: 1704067200, files_changed: vec!["src/auth.rs".to_string(), "tests/auth_tests.rs".to_string()],
diff_content: "@@ -10,7 +10,7 @@\n- old_line\n+ new_line\n".to_string(),
parent_hashes: vec!["parent123".to_string()],
}
}
#[test]
fn test_commit_to_chunk() {
let chunker = CommitChunker::new();
let commit = create_test_commit();
let chunk = chunker
.commit_to_chunk(&commit, "/repo/path", None)
.expect("Should convert commit to chunk");
assert_eq!(chunk.metadata.file_path, "git:///repo/path/abc123def456");
assert_eq!(chunk.metadata.language, Some("git-commit".to_string()));
assert_eq!(chunk.metadata.file_hash, "abc123def456");
assert!(chunk.content.contains("Fix authentication bug"));
assert!(chunk.content.contains("John Doe"));
assert!(chunk.content.contains("src/auth.rs"));
assert!(chunk.content.contains("new_line"));
}
#[test]
fn test_content_truncation() {
let chunker = CommitChunker::with_max_length(100);
let mut commit = create_test_commit();
commit.diff_content = "x".repeat(10000);
let chunk = chunker
.commit_to_chunk(&commit, "/repo/path", None)
.expect("Should convert commit");
assert!(chunk.content.len() <= 150); assert!(chunk.content.contains("[... content truncated"));
}
#[test]
fn test_commits_to_chunks_batch() {
let chunker = CommitChunker::new();
let commits = vec![create_test_commit(), {
let mut c = create_test_commit();
c.hash = "different_hash".to_string();
c
}];
let chunks = chunker
.commits_to_chunks(&commits, "/repo/path", Some("my-project".to_string()))
.expect("Should convert batch");
assert_eq!(chunks.len(), 2);
assert_eq!(chunks[0].metadata.file_hash, "abc123def456");
assert_eq!(chunks[1].metadata.file_hash, "different_hash");
assert_eq!(chunks[0].metadata.project, Some("my-project".to_string()));
}
#[test]
fn test_empty_author_email() {
let chunker = CommitChunker::new();
let mut commit = create_test_commit();
commit.author_email = String::new();
let chunk = chunker
.commit_to_chunk(&commit, "/repo/path", None)
.expect("Should handle empty email");
assert!(chunk.content.contains("John Doe"));
assert!(!chunk.content.contains("<>"));
}
#[test]
fn test_no_files_changed() {
let chunker = CommitChunker::new();
let mut commit = create_test_commit();
commit.files_changed = vec![];
let chunk = chunker
.commit_to_chunk(&commit, "/repo/path", None)
.expect("Should handle no files");
assert!(!chunk.content.contains("Files Changed:"));
}
#[test]
fn test_no_diff_content() {
let chunker = CommitChunker::new();
let mut commit = create_test_commit();
commit.diff_content = String::new();
let chunk = chunker
.commit_to_chunk(&commit, "/repo/path", None)
.expect("Should handle no diff");
assert!(!chunk.content.contains("Diff:"));
}
}