1use anyhow::{Context, Result};
8use sha2::{Digest, Sha256};
9use std::collections::{HashMap, HashSet};
10
11const MAX_DIRECT_FILE_CHARS: usize = 50_000;
13const LARGE_FILE_CHUNK_SIZE: usize = 10_000;
15const MAX_FILE_CHUNKS: usize = 5;
17
18#[derive(Debug, Clone)]
20pub enum FileContent {
21 Full(String),
23 Chunked {
25 path: String,
27 total_size: usize,
29 chunks: Vec<FileChunk>,
31 has_more: bool,
33 },
34 AlreadyInContext(String),
36}
37
38#[derive(Debug, Clone)]
40pub struct FileChunk {
41 pub content: String,
43 pub line_start: usize,
45 pub line_end: usize,
47 pub relevance_score: f32,
49}
50
51pub struct FileContextManager {
53 context_files: HashSet<String>,
55 file_chunks: HashMap<String, Vec<FileChunk>>,
57}
58
59impl Default for FileContextManager {
60 fn default() -> Self {
61 Self::new()
62 }
63}
64
65impl FileContextManager {
66 pub fn new() -> Self {
68 Self {
69 context_files: HashSet::new(),
70 file_chunks: HashMap::new(),
71 }
72 }
73
74 pub fn compute_hash(content: &str) -> String {
76 let mut hasher = Sha256::new();
77 hasher.update(content.as_bytes());
78 format!("{:x}", hasher.finalize())
79 }
80
81 pub fn is_in_context(&self, path: &str) -> bool {
83 self.context_files.contains(path)
84 }
85
86 pub fn mark_in_context(&mut self, path: &str) {
88 self.context_files.insert(path.to_string());
89 }
90
91 pub fn clear_context(&mut self) {
93 self.context_files.clear();
94 }
95
96 pub fn context_file_count(&self) -> usize {
98 self.context_files.len()
99 }
100
101 pub async fn get_file_content(
112 &mut self,
113 path: &str,
114 query_context: Option<&str>,
115 ) -> Result<FileContent> {
116 if self.is_in_context(path) {
118 return Ok(FileContent::AlreadyInContext(path.to_string()));
119 }
120
121 let content = tokio::fs::read_to_string(path)
123 .await
124 .with_context(|| format!("Failed to read file: {}", path))?;
125
126 if content.len() <= MAX_DIRECT_FILE_CHARS {
128 self.mark_in_context(path);
129 return Ok(FileContent::Full(content));
130 }
131
132 let chunks = self.get_relevant_chunks(path, &content, query_context)?;
134
135 self.mark_in_context(path);
136
137 Ok(FileContent::Chunked {
138 path: path.to_string(),
139 total_size: content.len(),
140 chunks,
141 has_more: content.len() > MAX_DIRECT_FILE_CHARS,
142 })
143 }
144
145 fn get_relevant_chunks(
147 &mut self,
148 path: &str,
149 content: &str,
150 query_context: Option<&str>,
151 ) -> Result<Vec<FileChunk>> {
152 let all_chunks = self.build_file_chunks(content);
154
155 self.file_chunks
157 .insert(path.to_string(), all_chunks.clone());
158
159 if let Some(query) = query_context {
161 let relevant = self.find_relevant_chunks(&all_chunks, query);
162 if !relevant.is_empty() {
163 return Ok(relevant);
164 }
165 }
166
167 Ok(all_chunks.into_iter().take(MAX_FILE_CHUNKS).collect())
169 }
170
171 fn build_file_chunks(&self, content: &str) -> Vec<FileChunk> {
173 let lines: Vec<&str> = content.lines().collect();
174 let mut chunks = Vec::new();
175 let mut current_line = 0;
176
177 while current_line < lines.len() {
178 let mut chunk_content = String::new();
179 let start_line = current_line + 1; while current_line < lines.len() && chunk_content.len() < LARGE_FILE_CHUNK_SIZE {
183 if !chunk_content.is_empty() {
184 chunk_content.push('\n');
185 }
186 chunk_content.push_str(lines[current_line]);
187 current_line += 1;
188 }
189
190 if !chunk_content.is_empty() {
191 chunks.push(FileChunk {
192 content: chunk_content,
193 line_start: start_line,
194 line_end: current_line,
195 relevance_score: 1.0,
196 });
197 }
198 }
199
200 chunks
201 }
202
203 fn find_relevant_chunks(&self, chunks: &[FileChunk], query: &str) -> Vec<FileChunk> {
205 let query_lower = query.to_lowercase();
206 let query_words: Vec<&str> = query_lower.split_whitespace().collect();
207
208 let mut scored_chunks: Vec<(FileChunk, f32)> = chunks
209 .iter()
210 .filter_map(|chunk| {
211 let content_lower = chunk.content.to_lowercase();
212
213 let matching_words = query_words
215 .iter()
216 .filter(|word| content_lower.contains(*word))
217 .count();
218
219 if matching_words > 0 {
220 let score = matching_words as f32 / query_words.len() as f32;
221 Some((
222 FileChunk {
223 content: chunk.content.clone(),
224 line_start: chunk.line_start,
225 line_end: chunk.line_end,
226 relevance_score: score,
227 },
228 score,
229 ))
230 } else {
231 None
232 }
233 })
234 .collect();
235
236 scored_chunks.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
238
239 scored_chunks
241 .into_iter()
242 .take(MAX_FILE_CHUNKS)
243 .map(|(chunk, _)| chunk)
244 .collect()
245 }
246
247 pub async fn get_file_lines(
249 &mut self,
250 path: &str,
251 start_line: usize,
252 end_line: usize,
253 ) -> Result<FileContent> {
254 let content = tokio::fs::read_to_string(path)
255 .await
256 .with_context(|| format!("Failed to read file: {}", path))?;
257
258 let lines: Vec<&str> = content.lines().collect();
259 let total_lines = lines.len();
260
261 let start = (start_line.saturating_sub(1)).min(total_lines);
262 let end = end_line.min(total_lines);
263
264 if start >= end {
265 return Ok(FileContent::Full(String::new()));
266 }
267
268 let selected_content: String = lines[start..end].join("\n");
269
270 self.mark_in_context(path);
271
272 if selected_content.len() <= MAX_DIRECT_FILE_CHARS {
273 Ok(FileContent::Full(selected_content))
274 } else {
275 Ok(FileContent::Chunked {
276 path: path.to_string(),
277 total_size: content.len(),
278 chunks: vec![FileChunk {
279 content: selected_content,
280 line_start: start + 1,
281 line_end: end,
282 relevance_score: 1.0,
283 }],
284 has_more: true,
285 })
286 }
287 }
288
289 pub fn format_content(file_content: &FileContent) -> String {
291 match file_content {
292 FileContent::Full(content) => content.clone(),
293 FileContent::AlreadyInContext(path) => {
294 format!("[File {} is already shown above]", path)
295 }
296 FileContent::Chunked {
297 path,
298 total_size,
299 chunks,
300 has_more,
301 } => {
302 let mut result = format!(
303 "[File: {} | Size: {} chars | Showing {} relevant sections]\n\n",
304 path,
305 total_size,
306 chunks.len()
307 );
308
309 for chunk in chunks {
310 result.push_str(&format!(
311 "--- Lines {}-{} (relevance: {:.2}) ---\n{}\n\n",
312 chunk.line_start, chunk.line_end, chunk.relevance_score, chunk.content
313 ));
314 }
315
316 if *has_more {
317 result.push_str(
318 "[... more content available, ask for specific sections or line numbers ...]\n",
319 );
320 }
321
322 result
323 }
324 }
325 }
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn test_file_chunk_creation() {
334 let chunk = FileChunk {
335 content: "fn main() {}".to_string(),
336 line_start: 1,
337 line_end: 1,
338 relevance_score: 0.95,
339 };
340
341 assert_eq!(chunk.line_start, 1);
342 assert_eq!(chunk.line_end, 1);
343 assert!((chunk.relevance_score - 0.95).abs() < 0.01);
344 }
345
346 #[test]
347 fn test_format_full_content() {
348 let content = FileContent::Full("hello world".to_string());
349 let formatted = FileContextManager::format_content(&content);
350 assert_eq!(formatted, "hello world");
351 }
352
353 #[test]
354 fn test_format_already_in_context() {
355 let content = FileContent::AlreadyInContext("/path/to/file.rs".to_string());
356 let formatted = FileContextManager::format_content(&content);
357 assert!(formatted.contains("already shown above"));
358 assert!(formatted.contains("/path/to/file.rs"));
359 }
360
361 #[test]
362 fn test_format_chunked_content() {
363 let content = FileContent::Chunked {
364 path: "/path/to/file.rs".to_string(),
365 total_size: 50000,
366 chunks: vec![
367 FileChunk {
368 content: "fn main() {}".to_string(),
369 line_start: 1,
370 line_end: 1,
371 relevance_score: 0.95,
372 },
373 FileChunk {
374 content: "fn helper() {}".to_string(),
375 line_start: 10,
376 line_end: 10,
377 relevance_score: 0.85,
378 },
379 ],
380 has_more: true,
381 };
382
383 let formatted = FileContextManager::format_content(&content);
384
385 assert!(formatted.contains("/path/to/file.rs"));
386 assert!(formatted.contains("50000 chars"));
387 assert!(formatted.contains("2 relevant sections"));
388 assert!(formatted.contains("fn main()"));
389 assert!(formatted.contains("fn helper()"));
390 assert!(formatted.contains("more content available"));
391 }
392
393 #[test]
394 fn test_compute_hash() {
395 let hash1 = FileContextManager::compute_hash("hello world");
396 let hash2 = FileContextManager::compute_hash("hello world");
397 let hash3 = FileContextManager::compute_hash("different content");
398
399 assert_eq!(hash1, hash2);
400 assert_ne!(hash1, hash3);
401 assert_eq!(hash1.len(), 64); }
403
404 #[test]
405 fn test_context_tracking() {
406 let mut manager = FileContextManager::new();
407
408 assert!(!manager.is_in_context("/some/file.rs"));
409 assert_eq!(manager.context_file_count(), 0);
410
411 manager.mark_in_context("/some/file.rs");
412 assert!(manager.is_in_context("/some/file.rs"));
413 assert_eq!(manager.context_file_count(), 1);
414
415 manager.clear_context();
416 assert!(!manager.is_in_context("/some/file.rs"));
417 assert_eq!(manager.context_file_count(), 0);
418 }
419
420 #[test]
421 fn test_build_file_chunks() {
422 let manager = FileContextManager::new();
423 let content = "line 1\nline 2\nline 3\nline 4\nline 5";
424
425 let chunks = manager.build_file_chunks(content);
426
427 assert!(!chunks.is_empty());
428 assert_eq!(chunks[0].line_start, 1);
429 }
430
431 #[test]
432 fn test_find_relevant_chunks() {
433 let manager = FileContextManager::new();
434 let chunks = vec![
435 FileChunk {
436 content: "This is about authentication and login".to_string(),
437 line_start: 1,
438 line_end: 1,
439 relevance_score: 1.0,
440 },
441 FileChunk {
442 content: "This is about database queries".to_string(),
443 line_start: 2,
444 line_end: 2,
445 relevance_score: 1.0,
446 },
447 FileChunk {
448 content: "This handles user login flow".to_string(),
449 line_start: 3,
450 line_end: 3,
451 relevance_score: 1.0,
452 },
453 ];
454
455 let relevant = manager.find_relevant_chunks(&chunks, "login authentication");
456
457 assert!(!relevant.is_empty());
458 assert!(
460 relevant[0].content.contains("login") || relevant[0].content.contains("authentication")
461 );
462 }
463}