use anyhow::{Context, Result};
use sha2::{Digest, Sha256};
use std::collections::{HashMap, HashSet};
const MAX_DIRECT_FILE_CHARS: usize = 50_000;
const LARGE_FILE_CHUNK_SIZE: usize = 10_000;
const MAX_FILE_CHUNKS: usize = 5;
#[derive(Debug, Clone)]
pub enum FileContent {
Full(String),
Chunked {
path: String,
total_size: usize,
chunks: Vec<FileChunk>,
has_more: bool,
},
AlreadyInContext(String),
}
#[derive(Debug, Clone)]
pub struct FileChunk {
pub content: String,
pub line_start: usize,
pub line_end: usize,
pub relevance_score: f32,
}
pub struct FileContextManager {
context_files: HashSet<String>,
file_chunks: HashMap<String, Vec<FileChunk>>,
}
impl Default for FileContextManager {
fn default() -> Self {
Self::new()
}
}
impl FileContextManager {
pub fn new() -> Self {
Self {
context_files: HashSet::new(),
file_chunks: HashMap::new(),
}
}
pub fn compute_hash(content: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
format!("{:x}", hasher.finalize())
}
pub fn is_in_context(&self, path: &str) -> bool {
self.context_files.contains(path)
}
pub fn mark_in_context(&mut self, path: &str) {
self.context_files.insert(path.to_string());
}
pub fn clear_context(&mut self) {
self.context_files.clear();
}
pub fn context_file_count(&self) -> usize {
self.context_files.len()
}
pub async fn get_file_content(
&mut self,
path: &str,
query_context: Option<&str>,
) -> Result<FileContent> {
if self.is_in_context(path) {
return Ok(FileContent::AlreadyInContext(path.to_string()));
}
let content = tokio::fs::read_to_string(path)
.await
.with_context(|| format!("Failed to read file: {}", path))?;
if content.len() <= MAX_DIRECT_FILE_CHARS {
self.mark_in_context(path);
return Ok(FileContent::Full(content));
}
let chunks = self.get_relevant_chunks(path, &content, query_context)?;
self.mark_in_context(path);
Ok(FileContent::Chunked {
path: path.to_string(),
total_size: content.len(),
chunks,
has_more: content.len() > MAX_DIRECT_FILE_CHARS,
})
}
fn get_relevant_chunks(
&mut self,
path: &str,
content: &str,
query_context: Option<&str>,
) -> Result<Vec<FileChunk>> {
let all_chunks = self.build_file_chunks(content);
self.file_chunks
.insert(path.to_string(), all_chunks.clone());
if let Some(query) = query_context {
let relevant = self.find_relevant_chunks(&all_chunks, query);
if !relevant.is_empty() {
return Ok(relevant);
}
}
Ok(all_chunks.into_iter().take(MAX_FILE_CHUNKS).collect())
}
fn build_file_chunks(&self, content: &str) -> Vec<FileChunk> {
let lines: Vec<&str> = content.lines().collect();
let mut chunks = Vec::new();
let mut current_line = 0;
while current_line < lines.len() {
let mut chunk_content = String::new();
let start_line = current_line + 1;
while current_line < lines.len() && chunk_content.len() < LARGE_FILE_CHUNK_SIZE {
if !chunk_content.is_empty() {
chunk_content.push('\n');
}
chunk_content.push_str(lines[current_line]);
current_line += 1;
}
if !chunk_content.is_empty() {
chunks.push(FileChunk {
content: chunk_content,
line_start: start_line,
line_end: current_line,
relevance_score: 1.0,
});
}
}
chunks
}
fn find_relevant_chunks(&self, chunks: &[FileChunk], query: &str) -> Vec<FileChunk> {
let query_lower = query.to_lowercase();
let query_words: Vec<&str> = query_lower.split_whitespace().collect();
let mut scored_chunks: Vec<(FileChunk, f32)> = chunks
.iter()
.filter_map(|chunk| {
let content_lower = chunk.content.to_lowercase();
let matching_words = query_words
.iter()
.filter(|word| content_lower.contains(*word))
.count();
if matching_words > 0 {
let score = matching_words as f32 / query_words.len() as f32;
Some((
FileChunk {
content: chunk.content.clone(),
line_start: chunk.line_start,
line_end: chunk.line_end,
relevance_score: score,
},
score,
))
} else {
None
}
})
.collect();
scored_chunks.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored_chunks
.into_iter()
.take(MAX_FILE_CHUNKS)
.map(|(chunk, _)| chunk)
.collect()
}
pub async fn get_file_lines(
&mut self,
path: &str,
start_line: usize,
end_line: usize,
) -> Result<FileContent> {
let content = tokio::fs::read_to_string(path)
.await
.with_context(|| format!("Failed to read file: {}", path))?;
let lines: Vec<&str> = content.lines().collect();
let total_lines = lines.len();
let start = (start_line.saturating_sub(1)).min(total_lines);
let end = end_line.min(total_lines);
if start >= end {
return Ok(FileContent::Full(String::new()));
}
let selected_content: String = lines[start..end].join("\n");
self.mark_in_context(path);
if selected_content.len() <= MAX_DIRECT_FILE_CHARS {
Ok(FileContent::Full(selected_content))
} else {
Ok(FileContent::Chunked {
path: path.to_string(),
total_size: content.len(),
chunks: vec![FileChunk {
content: selected_content,
line_start: start + 1,
line_end: end,
relevance_score: 1.0,
}],
has_more: true,
})
}
}
pub fn format_content(file_content: &FileContent) -> String {
match file_content {
FileContent::Full(content) => content.clone(),
FileContent::AlreadyInContext(path) => {
format!("[File {} is already shown above]", path)
}
FileContent::Chunked {
path,
total_size,
chunks,
has_more,
} => {
let mut result = format!(
"[File: {} | Size: {} chars | Showing {} relevant sections]\n\n",
path,
total_size,
chunks.len()
);
for chunk in chunks {
result.push_str(&format!(
"--- Lines {}-{} (relevance: {:.2}) ---\n{}\n\n",
chunk.line_start, chunk.line_end, chunk.relevance_score, chunk.content
));
}
if *has_more {
result.push_str(
"[... more content available, ask for specific sections or line numbers ...]\n",
);
}
result
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_file_chunk_creation() {
let chunk = FileChunk {
content: "fn main() {}".to_string(),
line_start: 1,
line_end: 1,
relevance_score: 0.95,
};
assert_eq!(chunk.line_start, 1);
assert_eq!(chunk.line_end, 1);
assert!((chunk.relevance_score - 0.95).abs() < 0.01);
}
#[test]
fn test_format_full_content() {
let content = FileContent::Full("hello world".to_string());
let formatted = FileContextManager::format_content(&content);
assert_eq!(formatted, "hello world");
}
#[test]
fn test_format_already_in_context() {
let content = FileContent::AlreadyInContext("/path/to/file.rs".to_string());
let formatted = FileContextManager::format_content(&content);
assert!(formatted.contains("already shown above"));
assert!(formatted.contains("/path/to/file.rs"));
}
#[test]
fn test_format_chunked_content() {
let content = FileContent::Chunked {
path: "/path/to/file.rs".to_string(),
total_size: 50000,
chunks: vec![
FileChunk {
content: "fn main() {}".to_string(),
line_start: 1,
line_end: 1,
relevance_score: 0.95,
},
FileChunk {
content: "fn helper() {}".to_string(),
line_start: 10,
line_end: 10,
relevance_score: 0.85,
},
],
has_more: true,
};
let formatted = FileContextManager::format_content(&content);
assert!(formatted.contains("/path/to/file.rs"));
assert!(formatted.contains("50000 chars"));
assert!(formatted.contains("2 relevant sections"));
assert!(formatted.contains("fn main()"));
assert!(formatted.contains("fn helper()"));
assert!(formatted.contains("more content available"));
}
#[test]
fn test_compute_hash() {
let hash1 = FileContextManager::compute_hash("hello world");
let hash2 = FileContextManager::compute_hash("hello world");
let hash3 = FileContextManager::compute_hash("different content");
assert_eq!(hash1, hash2);
assert_ne!(hash1, hash3);
assert_eq!(hash1.len(), 64); }
#[test]
fn test_context_tracking() {
let mut manager = FileContextManager::new();
assert!(!manager.is_in_context("/some/file.rs"));
assert_eq!(manager.context_file_count(), 0);
manager.mark_in_context("/some/file.rs");
assert!(manager.is_in_context("/some/file.rs"));
assert_eq!(manager.context_file_count(), 1);
manager.clear_context();
assert!(!manager.is_in_context("/some/file.rs"));
assert_eq!(manager.context_file_count(), 0);
}
#[test]
fn test_build_file_chunks() {
let manager = FileContextManager::new();
let content = "line 1\nline 2\nline 3\nline 4\nline 5";
let chunks = manager.build_file_chunks(content);
assert!(!chunks.is_empty());
assert_eq!(chunks[0].line_start, 1);
}
#[test]
fn test_find_relevant_chunks() {
let manager = FileContextManager::new();
let chunks = vec![
FileChunk {
content: "This is about authentication and login".to_string(),
line_start: 1,
line_end: 1,
relevance_score: 1.0,
},
FileChunk {
content: "This is about database queries".to_string(),
line_start: 2,
line_end: 2,
relevance_score: 1.0,
},
FileChunk {
content: "This handles user login flow".to_string(),
line_start: 3,
line_end: 3,
relevance_score: 1.0,
},
];
let relevant = manager.find_relevant_chunks(&chunks, "login authentication");
assert!(!relevant.is_empty());
assert!(
relevant[0].content.contains("login") || relevant[0].content.contains("authentication")
);
}
}