project_rag/git/
chunker.rs1use crate::git::walker::CommitInfo;
2use crate::indexer::CodeChunk;
3use crate::types::ChunkMetadata;
4use anyhow::Result;
5
6pub struct CommitChunker {
8 max_content_length: usize,
10}
11
12impl CommitChunker {
13 pub fn new() -> Self {
15 Self {
16 max_content_length: 6000, }
18 }
19
20 pub fn with_max_length(max_content_length: usize) -> Self {
22 Self { max_content_length }
23 }
24
25 pub fn commit_to_chunk(
27 &self,
28 commit: &CommitInfo,
29 repo_path: &str,
30 project: Option<String>,
31 ) -> Result<CodeChunk> {
32 let mut content = String::new();
34
35 content.push_str("Commit Message:\n");
37 content.push_str(&commit.message);
38 content.push_str("\n\n");
39
40 content.push_str("Author: ");
42 content.push_str(&commit.author_name);
43 if !commit.author_email.is_empty() {
44 content.push_str(" <");
45 content.push_str(&commit.author_email);
46 content.push('>');
47 }
48 content.push_str("\n\n");
49
50 if !commit.files_changed.is_empty() {
52 content.push_str("Files Changed:\n");
53 for file in &commit.files_changed {
54 content.push_str("- ");
55 content.push_str(file);
56 content.push('\n');
57 }
58 content.push('\n');
59 }
60
61 if !commit.diff_content.is_empty() {
63 content.push_str("Diff:\n");
64 content.push_str(&commit.diff_content);
65 }
66
67 if content.len() > self.max_content_length {
69 content.truncate(self.max_content_length);
70 content.push_str("\n\n[... content truncated for embedding ...]");
71 }
72
73 let metadata = ChunkMetadata {
76 file_path: format!("git://{}", repo_path),
77 root_path: None,
78 project,
79 start_line: 0,
80 end_line: 0,
81 language: Some("git-commit".to_string()),
82 extension: Some("commit".to_string()),
83 file_hash: commit.hash.clone(),
84 indexed_at: commit.commit_date,
85 };
86
87 Ok(CodeChunk { content, metadata })
88 }
89
90 pub fn commits_to_chunks(
92 &self,
93 commits: &[CommitInfo],
94 repo_path: &str,
95 project: Option<String>,
96 ) -> Result<Vec<CodeChunk>> {
97 commits
98 .iter()
99 .map(|commit| self.commit_to_chunk(commit, repo_path, project.clone()))
100 .collect()
101 }
102}
103
104impl Default for CommitChunker {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110#[cfg(test)]
111mod tests {
112 use super::*;
113
114 fn create_test_commit() -> CommitInfo {
115 CommitInfo {
116 hash: "abc123def456".to_string(),
117 message:
118 "Fix authentication bug\n\nThis commit fixes a critical bug in the auth module."
119 .to_string(),
120 author_name: "John Doe".to_string(),
121 author_email: "john@example.com".to_string(),
122 commit_date: 1704067200, files_changed: vec!["src/auth.rs".to_string(), "tests/auth_tests.rs".to_string()],
124 diff_content: "@@ -10,7 +10,7 @@\n- old_line\n+ new_line\n".to_string(),
125 parent_hashes: vec!["parent123".to_string()],
126 }
127 }
128
129 #[test]
130 fn test_commit_to_chunk() {
131 let chunker = CommitChunker::new();
132 let commit = create_test_commit();
133
134 let chunk = chunker
135 .commit_to_chunk(&commit, "/repo/path", None)
136 .expect("Should convert commit to chunk");
137
138 assert_eq!(chunk.metadata.file_path, "git:///repo/path");
139 assert_eq!(chunk.metadata.language, Some("git-commit".to_string()));
140 assert_eq!(chunk.metadata.file_hash, "abc123def456");
141 assert!(chunk.content.contains("Fix authentication bug"));
142 assert!(chunk.content.contains("John Doe"));
143 assert!(chunk.content.contains("src/auth.rs"));
144 assert!(chunk.content.contains("new_line"));
145 }
146
147 #[test]
148 fn test_content_truncation() {
149 let chunker = CommitChunker::with_max_length(100);
150 let mut commit = create_test_commit();
151 commit.diff_content = "x".repeat(10000); let chunk = chunker
154 .commit_to_chunk(&commit, "/repo/path", None)
155 .expect("Should convert commit");
156
157 assert!(chunk.content.len() <= 150); assert!(chunk.content.contains("[... content truncated"));
159 }
160
161 #[test]
162 fn test_commits_to_chunks_batch() {
163 let chunker = CommitChunker::new();
164 let commits = vec![create_test_commit(), {
165 let mut c = create_test_commit();
166 c.hash = "different_hash".to_string();
167 c
168 }];
169
170 let chunks = chunker
171 .commits_to_chunks(&commits, "/repo/path", Some("my-project".to_string()))
172 .expect("Should convert batch");
173
174 assert_eq!(chunks.len(), 2);
175 assert_eq!(chunks[0].metadata.file_hash, "abc123def456");
176 assert_eq!(chunks[1].metadata.file_hash, "different_hash");
177 assert_eq!(chunks[0].metadata.project, Some("my-project".to_string()));
178 }
179
180 #[test]
181 fn test_empty_author_email() {
182 let chunker = CommitChunker::new();
183 let mut commit = create_test_commit();
184 commit.author_email = String::new();
185
186 let chunk = chunker
187 .commit_to_chunk(&commit, "/repo/path", None)
188 .expect("Should handle empty email");
189
190 assert!(chunk.content.contains("John Doe"));
191 assert!(!chunk.content.contains("<>"));
192 }
193
194 #[test]
195 fn test_no_files_changed() {
196 let chunker = CommitChunker::new();
197 let mut commit = create_test_commit();
198 commit.files_changed = vec![];
199
200 let chunk = chunker
201 .commit_to_chunk(&commit, "/repo/path", None)
202 .expect("Should handle no files");
203
204 assert!(!chunk.content.contains("Files Changed:"));
205 }
206
207 #[test]
208 fn test_no_diff_content() {
209 let chunker = CommitChunker::new();
210 let mut commit = create_test_commit();
211 commit.diff_content = String::new();
212
213 let chunk = chunker
214 .commit_to_chunk(&commit, "/repo/path", None)
215 .expect("Should handle no diff");
216
217 assert!(!chunk.content.contains("Diff:"));
218 }
219}