use anyhow::Result;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::time::{SystemTime, UNIX_EPOCH};
use super::file_collector::{CollectorConfig, FileCollector};
use super::token_counter::TokenCounter;
use crate::models::{LazyProjectContext, ProjectContext};
use crate::utils::MutexExt;
const DEFAULT_PRIORITY_EXTENSIONS: &[&str] = &[
"rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "cpp", "c", "h", "hpp", "cs", "rb", "php",
"swift", "kt", "scala", "r", "sql", "sh", "yaml", "yml", "toml", "json", "xml", "html", "css",
"scss", "md", "txt",
];
const DEFAULT_IGNORE_PATTERNS: &[&str] = &[
"*.log", "*.tmp", "*.cache", "*.pyc", "*.pyo", "*.pyd", "*.so", "*.dylib", "*.dll", "*.exe",
"*.o", "*.a", "*.lib", "*.png", "*.jpg", "*.jpeg", "*.gif", "*.bmp", "*.ico", "*.svg", "*.pdf",
"*.zip", "*.tar", "*.gz", "*.rar", "*.7z",
];
#[derive(Debug, Clone)]
pub struct ContextConfig {
pub max_file_size: usize,
pub max_files: usize,
pub max_context_tokens: usize,
pub priority_extensions: Vec<&'static str>,
pub ignore_patterns: Vec<&'static str>,
}
impl Default for ContextConfig {
fn default() -> Self {
Self {
max_file_size: 1024 * 1024, max_files: 100,
max_context_tokens: 50000,
priority_extensions: DEFAULT_PRIORITY_EXTENSIONS.to_vec(),
ignore_patterns: DEFAULT_IGNORE_PATTERNS.to_vec(),
}
}
}
#[derive(Debug, Clone)]
struct LoadingState {
files_loaded: usize,
tokens_used: usize,
}
impl LoadingState {
fn new() -> Self {
Self {
files_loaded: 0,
tokens_used: 0,
}
}
fn try_add_file(&mut self, tokens: usize, max_files: usize, max_tokens: usize) -> bool {
if self.files_loaded >= max_files {
return false;
}
if self.tokens_used + tokens > max_tokens {
return false;
}
self.files_loaded += 1;
self.tokens_used += tokens;
true
}
}
#[derive(Clone)]
pub struct Context {
root_path: PathBuf,
config: ContextConfig,
cache_manager: Option<Arc<crate::cache::CacheManager>>,
last_file_hash: Option<u64>,
last_load_time: Option<u64>,
cached_files: Vec<PathBuf>,
}
impl std::fmt::Debug for Context {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Context")
.field("root_path", &self.root_path)
.field("config", &self.config)
.field("last_file_hash", &self.last_file_hash)
.field("last_load_time", &self.last_load_time)
.field("cached_files", &self.cached_files.len())
.finish()
}
}
impl Context {
pub fn new(root_path: impl AsRef<Path>) -> Result<Self> {
Ok(Self {
root_path: root_path.as_ref().to_path_buf(),
config: ContextConfig::default(),
cache_manager: crate::cache::CacheManager::new().ok().map(Arc::new),
last_file_hash: None,
last_load_time: None,
cached_files: Vec::new(),
})
}
pub fn with_config(root_path: impl AsRef<Path>, config: ContextConfig) -> Result<Self> {
Ok(Self {
root_path: root_path.as_ref().to_path_buf(),
config,
cache_manager: crate::cache::CacheManager::new().ok().map(Arc::new),
last_file_hash: None,
last_load_time: None,
cached_files: Vec::new(),
})
}
pub async fn load(root_path: impl AsRef<Path>) -> Result<ProjectContext> {
let ctx = Self::new(&root_path)?;
ctx.load_full_context().await
}
pub async fn load_full_context(&self) -> Result<ProjectContext> {
let mut context = ProjectContext::new(self.root_path.to_string_lossy().to_string());
let collector = self.create_collector();
let files = collector.collect_files(&self.root_path).await?;
let loading_state = Arc::new(Mutex::new(LoadingState::new()));
let token_counter = TokenCounter::new(self.cache_manager.clone());
let max_files = self.config.max_files;
let max_tokens = self.config.max_context_tokens;
let loaded_contents: Vec<(String, String, usize)> = files
.iter()
.filter_map(|file_path| {
let remaining_budget = {
let state = loading_state.lock_mut_safe();
max_tokens.saturating_sub(state.tokens_used)
};
if remaining_budget == 0 {
return None;
}
let (content, tokens) = token_counter
.load_file_cached(file_path, remaining_budget)
.ok()?;
let mut state = loading_state.lock_mut_safe();
if !state.try_add_file(tokens, max_files, max_tokens) {
return None;
}
let relative_path = file_path
.strip_prefix(&self.root_path)
.unwrap_or(file_path)
.to_string_lossy()
.replace('\\', "/");
Some((relative_path, content, tokens))
})
.collect();
let mut actual_total_tokens = 0;
for (path, content, tokens) in loaded_contents {
context.add_file(path, content);
actual_total_tokens += tokens;
}
context.token_count = actual_total_tokens;
Ok(context)
}
pub async fn load_structure(&self) -> Result<LazyProjectContext> {
let collector = self.create_collector();
let files = collector.collect_files(&self.root_path).await?;
let lazy_context =
LazyProjectContext::new(self.root_path.to_string_lossy().to_string(), files);
Ok(lazy_context)
}
pub async fn needs_reload(&self) -> bool {
match self.compute_file_hash().await {
Ok(current_hash) => {
if let Some(last_hash) = self.last_file_hash {
current_hash != last_hash
} else {
true
}
}
Err(_) => false, }
}
pub async fn reload_if_needed(&mut self) -> Result<bool> {
if self.needs_reload().await {
self.reload().await?;
Ok(true)
} else {
Ok(false)
}
}
pub async fn reload(&mut self) -> Result<()> {
let collector = self.create_collector();
let files = collector.collect_files(&self.root_path).await?;
let hash = self.compute_hash_from_files(&files)?;
self.cached_files = files;
self.last_file_hash = Some(hash);
self.last_load_time = Some(
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
);
Ok(())
}
pub fn build_context(&self) -> ProjectContext {
let mut context = ProjectContext::new(self.root_path.to_string_lossy().to_string());
for file_path in &self.cached_files {
if let Ok(rel_path) = file_path.strip_prefix(&self.root_path) {
if let Some(path_str) = rel_path.to_str() {
context.add_file(path_str.to_string(), String::new());
}
}
}
context
}
pub fn get_file_list(&self) -> Vec<String> {
self.cached_files
.iter()
.filter_map(|p| {
p.strip_prefix(&self.root_path)
.ok()
.and_then(|p| p.to_str())
.map(|s| s.to_string())
})
.collect()
}
pub fn total_files(&self) -> usize {
self.cached_files.len()
}
fn create_collector(&self) -> FileCollector {
let collector_config = CollectorConfig {
max_file_size: self.config.max_file_size,
max_files: self.config.max_files,
priority_extensions: self.config.priority_extensions.clone(),
ignore_patterns: self.config.ignore_patterns.clone(),
};
FileCollector::new(collector_config)
}
async fn compute_file_hash(&self) -> Result<u64> {
let collector = self.create_collector();
let current_files = collector.collect_files(&self.root_path).await?;
self.compute_hash_from_files(¤t_files)
}
fn compute_hash_from_files(&self, files: &[PathBuf]) -> Result<u64> {
let mut hasher = DefaultHasher::new();
let mut file_paths: Vec<_> = files
.iter()
.filter_map(|p| {
p.strip_prefix(&self.root_path)
.ok()
.and_then(|p| p.to_str())
})
.collect();
file_paths.sort();
for path in file_paths {
path.hash(&mut hasher);
}
Ok(hasher.finish())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::fs::File;
use std::io::Write;
use tempfile::TempDir;
#[tokio::test]
async fn test_context_creation() {
let temp_dir = TempDir::new().unwrap();
let ctx = Context::new(temp_dir.path()).unwrap();
assert_eq!(ctx.root_path, temp_dir.path());
assert_eq!(ctx.total_files(), 0);
assert!(ctx.needs_reload().await);
}
#[tokio::test]
async fn test_file_tree_change_detection() {
let temp_dir = TempDir::new().unwrap();
let mut ctx = Context::new(temp_dir.path()).unwrap();
ctx.reload().await.unwrap();
let initial_hash = ctx.last_file_hash;
assert!(!ctx.needs_reload().await);
let test_file = temp_dir.path().join("test.py");
fs::write(&test_file, "print('test')").unwrap();
assert!(ctx.needs_reload().await);
ctx.reload().await.unwrap();
assert_ne!(ctx.last_file_hash, initial_hash);
}
#[tokio::test]
async fn test_project_context_building() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("main.py"), "print('hello')").unwrap();
fs::write(temp_dir.path().join("lib.py"), "def helper(): pass").unwrap();
fs::write(temp_dir.path().join("requirements.txt"), "requests\n").unwrap();
let mut ctx = Context::new(temp_dir.path()).unwrap();
ctx.reload().await.unwrap();
let context = ctx.build_context();
assert_eq!(
context.root_path,
temp_dir.path().to_string_lossy().to_string()
);
assert_eq!(context.files.len(), 3);
}
#[tokio::test]
async fn test_load_full_context() {
let temp_dir = TempDir::new().unwrap();
let mut cargo_file = File::create(temp_dir.path().join("Cargo.toml")).unwrap();
writeln!(cargo_file, "[package]\nname = \"test\"").unwrap();
let src_dir = temp_dir.path().join("src");
std::fs::create_dir(&src_dir).unwrap();
let mut main_file = File::create(src_dir.join("main.rs")).unwrap();
writeln!(main_file, "fn main() {{\n println!(\"Hello\");\n}}").unwrap();
let context = Context::load(temp_dir.path()).await.unwrap();
assert!(context.files.contains_key("Cargo.toml"));
assert!(context.files.contains_key("src/main.rs"));
assert!(context.token_count > 0);
}
#[test]
fn test_loading_state_atomicity() {
let mut state = LoadingState::new();
assert!(state.try_add_file(10, 100, 1000));
assert_eq!(state.files_loaded, 1);
assert_eq!(state.tokens_used, 10);
state.files_loaded = 100;
assert!(!state.try_add_file(5, 100, 1000));
assert_eq!(state.files_loaded, 100);
let mut state2 = LoadingState::new();
state2.tokens_used = 990;
assert!(!state2.try_add_file(100, 100, 1000));
assert_eq!(state2.tokens_used, 990);
}
#[test]
fn test_concurrent_file_loading_safety() {
use std::thread;
let state = Arc::new(Mutex::new(LoadingState::new()));
let mut handles = vec![];
for _ in 0..10 {
let state_clone = Arc::clone(&state);
let handle = thread::spawn(move || {
let mut state = state_clone.lock().unwrap();
state.try_add_file(100, 100, 500)
});
handles.push(handle);
}
let results: Vec<bool> = handles.into_iter().map(|h| h.join().unwrap()).collect();
assert_eq!(results.iter().filter(|&&r| r).count(), 5);
assert_eq!(results.iter().filter(|&&r| !r).count(), 5);
let final_state = state.lock().unwrap();
assert_eq!(final_state.files_loaded, 5);
assert_eq!(final_state.tokens_used, 500);
}
}