use anyhow::{Context, Result};
use rusqlite::{Connection, OptionalExtension};
use std::collections::HashMap;
use std::fs::File;
use std::path::{Path, PathBuf};
use crate::models::IndexedFile;
pub const CACHE_DIR: &str = ".reflex";
pub const META_DB: &str = "meta.db";
pub const TOKENS_BIN: &str = "tokens.bin";
pub const HASHES_JSON: &str = "hashes.json";
pub const CONFIG_TOML: &str = "config.toml";
#[derive(Clone)]
pub struct CacheManager {
cache_path: PathBuf,
}
impl CacheManager {
pub fn new(root: impl AsRef<Path>) -> Self {
let cache_path = root.as_ref().join(CACHE_DIR);
Self { cache_path }
}
pub fn init(&self) -> Result<()> {
log::info!("Initializing cache at {:?}", self.cache_path);
if !self.cache_path.exists() {
std::fs::create_dir_all(&self.cache_path)?;
}
self.init_meta_db()?;
self.init_config_toml()?;
log::info!("Cache initialized successfully");
Ok(())
}
fn init_meta_db(&self) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
if db_path.exists() {
return Ok(());
}
let conn = Connection::open(&db_path)
.context("Failed to create meta.db")?;
conn.execute(
"CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
path TEXT NOT NULL UNIQUE,
last_indexed INTEGER NOT NULL,
language TEXT NOT NULL,
token_count INTEGER DEFAULT 0,
line_count INTEGER DEFAULT 0
)",
[],
)?;
conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)", [])?;
conn.execute(
"CREATE TABLE IF NOT EXISTS statistics (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at INTEGER NOT NULL
)",
[],
)?;
let now = chrono::Utc::now().timestamp();
conn.execute(
"INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
["total_files", "0", &now.to_string()],
)?;
conn.execute(
"INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
["cache_version", "1", &now.to_string()],
)?;
let schema_hash = env!("CACHE_SCHEMA_HASH");
conn.execute(
"INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
["schema_hash", schema_hash, &now.to_string()],
)?;
conn.execute(
"INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
["last_compaction", "0", &now.to_string()],
)?;
conn.execute(
"CREATE TABLE IF NOT EXISTS config (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
)",
[],
)?;
conn.execute(
"CREATE TABLE IF NOT EXISTS file_branches (
file_id INTEGER NOT NULL,
branch_id INTEGER NOT NULL,
hash TEXT NOT NULL,
last_indexed INTEGER NOT NULL,
PRIMARY KEY (file_id, branch_id),
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE
)",
[],
)?;
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch_id, file_id)",
[],
)?;
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
[],
)?;
conn.execute(
"CREATE TABLE IF NOT EXISTS branches (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
commit_sha TEXT NOT NULL,
last_indexed INTEGER NOT NULL,
file_count INTEGER DEFAULT 0,
is_dirty INTEGER DEFAULT 0
)",
[],
)?;
conn.execute(
"CREATE TABLE IF NOT EXISTS file_dependencies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_id INTEGER NOT NULL,
imported_path TEXT NOT NULL,
resolved_file_id INTEGER,
import_type TEXT NOT NULL,
line_number INTEGER NOT NULL,
imported_symbols TEXT,
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
)",
[],
)?;
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_deps_file ON file_dependencies(file_id)",
[],
)?;
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_deps_resolved ON file_dependencies(resolved_file_id)",
[],
)?;
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_deps_type ON file_dependencies(import_type)",
[],
)?;
conn.execute(
"CREATE TABLE IF NOT EXISTS file_exports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_id INTEGER NOT NULL,
exported_symbol TEXT,
source_path TEXT NOT NULL,
resolved_source_id INTEGER,
line_number INTEGER NOT NULL,
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
FOREIGN KEY (resolved_source_id) REFERENCES files(id) ON DELETE SET NULL
)",
[],
)?;
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_exports_file ON file_exports(file_id)",
[],
)?;
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_exports_resolved ON file_exports(resolved_source_id)",
[],
)?;
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_exports_symbol ON file_exports(exported_symbol)",
[],
)?;
log::debug!("Created meta.db with schema");
Ok(())
}
fn init_config_toml(&self) -> Result<()> {
let config_path = self.cache_path.join(CONFIG_TOML);
if config_path.exists() {
return Ok(());
}
let default_config = r#"[index]
languages = [] # Empty = all supported languages
max_file_size = 10485760 # 10 MB
follow_symlinks = false
[index.include]
patterns = []
[index.exclude]
patterns = []
[search]
default_limit = 100
fuzzy_threshold = 0.8
[performance]
parallel_threads = 0 # 0 = auto (80% of available cores), or set a specific number
compression_level = 3 # zstd level
[semantic]
# Semantic query generation using LLMs
# Translate natural language questions into rfx query commands
provider = "groq" # Options: openai, anthropic, groq
# model = "llama-3.3-70b-versatile" # Optional: override provider default model
# auto_execute = false # Optional: auto-execute queries without confirmation
"#;
std::fs::write(&config_path, default_config)?;
log::debug!("Created default config.toml");
Ok(())
}
pub fn exists(&self) -> bool {
self.cache_path.exists()
&& self.cache_path.join(META_DB).exists()
}
pub fn validate(&self) -> Result<()> {
let start = std::time::Instant::now();
if !self.cache_path.exists() {
anyhow::bail!("Cache directory does not exist: {}", self.cache_path.display());
}
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
anyhow::bail!("Database file missing: {}", db_path.display());
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db - database may be corrupted")?;
let tables: Result<Vec<String>, _> = conn
.prepare("SELECT name FROM sqlite_master WHERE type='table'")
.and_then(|mut stmt| {
stmt.query_map([], |row| row.get(0))
.map(|rows| rows.collect())
})
.and_then(|result| result);
match tables {
Ok(table_list) => {
let required_tables = vec!["files", "statistics", "config", "file_branches", "branches", "file_dependencies", "file_exports"];
for table in &required_tables {
if !table_list.iter().any(|t| t == table) {
anyhow::bail!("Required table '{}' missing from database schema", table);
}
}
}
Err(e) => {
anyhow::bail!("Failed to read database schema: {}", e);
}
}
let integrity_result: String = conn
.query_row("PRAGMA quick_check", [], |row| row.get(0))?;
if integrity_result != "ok" {
log::warn!("Database integrity check failed: {}", integrity_result);
anyhow::bail!(
"Database integrity check failed: {}. Cache may be corrupted. \
Run 'rfx index' to rebuild cache.",
integrity_result
);
}
let trigrams_path = self.cache_path.join("trigrams.bin");
if trigrams_path.exists() {
use std::io::Read;
match File::open(&trigrams_path) {
Ok(mut file) => {
let mut header = [0u8; 4];
match file.read_exact(&mut header) {
Ok(_) => {
if &header != b"RFTG" {
log::warn!("trigrams.bin has invalid magic bytes - may be corrupted");
anyhow::bail!("trigrams.bin appears to be corrupted (invalid magic bytes)");
}
}
Err(_) => {
anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
}
}
}
Err(e) => {
anyhow::bail!("Failed to open trigrams.bin: {}", e);
}
}
}
let content_path = self.cache_path.join("content.bin");
if content_path.exists() {
use std::io::Read;
match File::open(&content_path) {
Ok(mut file) => {
let mut header = [0u8; 4];
match file.read_exact(&mut header) {
Ok(_) => {
if &header != b"RFCT" {
log::warn!("content.bin has invalid magic bytes - may be corrupted");
anyhow::bail!("content.bin appears to be corrupted (invalid magic bytes)");
}
}
Err(_) => {
anyhow::bail!("content.bin is too small - appears to be corrupted");
}
}
}
Err(e) => {
anyhow::bail!("Failed to open content.bin: {}", e);
}
}
}
let current_schema_hash = env!("CACHE_SCHEMA_HASH");
let stored_schema_hash: Option<String> = conn
.query_row(
"SELECT value FROM statistics WHERE key = 'schema_hash'",
[],
|row| row.get(0),
)
.optional()?;
if let Some(stored_hash) = stored_schema_hash {
if stored_hash != current_schema_hash {
log::warn!(
"Cache schema hash mismatch! Stored: {}, Current: {}",
stored_hash,
current_schema_hash
);
anyhow::bail!(
"Cache schema version mismatch.\n\
\n\
- Cache was built with version {}\n\
- Current binary expects version {}\n\
\n\
The cache format may be incompatible with this version of Reflex.\n\
Please rebuild the index by running:\n\
\n\
rfx index\n\
\n\
This usually happens after upgrading Reflex or making code changes.",
stored_hash,
current_schema_hash
);
}
} else {
log::debug!("No schema_hash found in cache - this cache was created before automatic invalidation was implemented");
}
let elapsed = start.elapsed();
log::debug!("Cache validation passed (schema hash: {}, took {:?})", current_schema_hash, elapsed);
Ok(())
}
pub fn path(&self) -> &Path {
&self.cache_path
}
pub fn workspace_root(&self) -> PathBuf {
self.cache_path
.parent()
.expect(".reflex directory should have a parent")
.to_path_buf()
}
pub fn clear(&self) -> Result<()> {
log::warn!("Clearing cache at {:?}", self.cache_path);
if self.cache_path.exists() {
std::fs::remove_dir_all(&self.cache_path)?;
}
Ok(())
}
pub fn checkpoint_wal(&self) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(());
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for WAL checkpoint")?;
conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
let busy: i64 = row.get(0)?;
let log_pages: i64 = row.get(1)?;
let checkpointed: i64 = row.get(2)?;
log::debug!(
"WAL checkpoint completed: busy={}, log_pages={}, checkpointed_pages={}",
busy, log_pages, checkpointed
);
Ok(())
}).context("Failed to execute WAL checkpoint")?;
log::debug!("Executed WAL checkpoint (TRUNCATE) on meta.db");
Ok(())
}
pub fn load_all_hashes(&self) -> Result<HashMap<String, String>> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(HashMap::new());
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let mut stmt = conn.prepare(
"SELECT f.path, fb.hash
FROM file_branches fb
JOIN files f ON fb.file_id = f.id"
)?;
let hashes: HashMap<String, String> = stmt.query_map([], |row| {
Ok((row.get(0)?, row.get(1)?))
})?
.collect::<Result<HashMap<_, _>, _>>()?;
log::debug!("Loaded {} file hashes across all branches from SQLite", hashes.len());
Ok(hashes)
}
pub fn load_hashes_for_branch(&self, branch: &str) -> Result<HashMap<String, String>> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(HashMap::new());
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let mut stmt = conn.prepare(
"SELECT f.path, fb.hash
FROM file_branches fb
JOIN files f ON fb.file_id = f.id
JOIN branches b ON fb.branch_id = b.id
WHERE b.name = ?"
)?;
let hashes: HashMap<String, String> = stmt.query_map([branch], |row| {
Ok((row.get(0)?, row.get(1)?))
})?
.collect::<Result<HashMap<_, _>, _>>()?;
log::debug!("Loaded {} file hashes for branch '{}' from SQLite", hashes.len(), branch);
Ok(hashes)
}
#[deprecated(note = "Hashes are now stored in file_branches table via record_branch_file()")]
pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
Ok(())
}
pub fn update_file(&self, path: &str, language: &str, line_count: usize) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for file update")?;
let now = chrono::Utc::now().timestamp();
conn.execute(
"INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
VALUES (?, ?, ?, ?)",
[path, &now.to_string(), language, &line_count.to_string()],
)?;
Ok(())
}
pub fn batch_update_files(&self, files: &[(String, String, usize)]) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
let mut conn = Connection::open(&db_path)
.context("Failed to open meta.db for batch update")?;
let now = chrono::Utc::now().timestamp();
let now_str = now.to_string();
let tx = conn.transaction()?;
for (path, language, line_count) in files {
tx.execute(
"INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
VALUES (?, ?, ?, ?)",
[path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
)?;
}
tx.commit()?;
Ok(())
}
pub fn batch_update_files_and_branch(
&self,
files: &[(String, String, usize)], branch_files: &[(String, String)], branch: &str,
commit_sha: Option<&str>,
) -> Result<()> {
log::info!("batch_update_files_and_branch: Processing {} files for branch '{}'", files.len(), branch);
let db_path = self.cache_path.join(META_DB);
let mut conn = Connection::open(&db_path)
.context("Failed to open meta.db for batch update and branch recording")?;
let now = chrono::Utc::now().timestamp();
let now_str = now.to_string();
let tx = conn.transaction()?;
for (path, language, line_count) in files {
tx.execute(
"INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
VALUES (?, ?, ?, ?)",
[path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
)?;
}
log::info!("Inserted {} files into files table", files.len());
let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
let mut inserted = 0;
for (path, hash) in branch_files {
let file_id: i64 = tx.query_row(
"SELECT id FROM files WHERE path = ?",
[path.as_str()],
|row| row.get(0)
).context(format!("File not found in index after insert: {}", path))?;
tx.execute(
"INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
VALUES (?, ?, ?, ?)",
rusqlite::params![file_id, branch_id, hash.as_str(), now],
)?;
inserted += 1;
}
log::info!("Inserted {} file_branches entries", inserted);
tx.commit()?;
log::info!("Transaction committed successfully (files + file_branches)");
let verify_conn = Connection::open(&db_path)
.context("Failed to open meta.db for verification")?;
let actual_file_count: i64 = verify_conn.query_row(
"SELECT COUNT(*) FROM files WHERE path IN (SELECT path FROM files ORDER BY id DESC LIMIT ?)",
[files.len()],
|row| row.get(0)
).unwrap_or(0);
let actual_fb_count: i64 = verify_conn.query_row(
"SELECT COUNT(*) FROM file_branches fb
JOIN branches b ON fb.branch_id = b.id
WHERE b.name = ?",
[branch],
|row| row.get(0)
).unwrap_or(0);
log::info!(
"Post-commit verification: {} files in files table (expected {}), {} file_branches entries for '{}' (expected {})",
actual_file_count,
files.len(),
actual_fb_count,
branch,
inserted
);
if actual_file_count < files.len() as i64 {
log::warn!(
"MISMATCH: Expected {} files in database, but only found {}! Data may not have persisted.",
files.len(),
actual_file_count
);
}
if actual_fb_count < inserted as i64 {
log::warn!(
"MISMATCH: Expected {} file_branches entries for branch '{}', but only found {}! Data may not have persisted.",
inserted,
branch,
actual_fb_count
);
}
Ok(())
}
pub fn update_stats(&self, branch: &str) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for stats update")?;
let total_files: usize = conn.query_row(
"SELECT COUNT(DISTINCT fb.file_id)
FROM file_branches fb
JOIN branches b ON fb.branch_id = b.id
WHERE b.name = ?",
[branch],
|row| row.get(0),
).unwrap_or(0);
let now = chrono::Utc::now().timestamp();
conn.execute(
"INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
["total_files", &total_files.to_string(), &now.to_string()],
)?;
log::debug!("Updated statistics for branch '{}': {} files", branch, total_files);
Ok(())
}
pub fn update_schema_hash(&self) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for schema hash update")?;
let schema_hash = env!("CACHE_SCHEMA_HASH");
let now = chrono::Utc::now().timestamp();
conn.execute(
"INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
["schema_hash", schema_hash, &now.to_string()],
)?;
log::debug!("Updated schema hash to: {}", schema_hash);
Ok(())
}
pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(Vec::new());
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let mut stmt = conn.prepare(
"SELECT path, language, last_indexed FROM files ORDER BY path"
)?;
let files = stmt.query_map([], |row| {
let path: String = row.get(0)?;
let language: String = row.get(1)?;
let last_indexed: i64 = row.get(2)?;
Ok(IndexedFile {
path,
language,
last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
.unwrap_or_else(chrono::Utc::now)
.to_rfc3339(),
})
})?
.collect::<Result<Vec<_>, _>>()?;
Ok(files)
}
pub fn stats(&self) -> Result<crate::models::IndexStats> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(crate::models::IndexStats {
total_files: 0,
index_size_bytes: 0,
last_updated: chrono::Utc::now().to_rfc3339(),
files_by_language: std::collections::HashMap::new(),
lines_by_language: std::collections::HashMap::new(),
});
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let workspace_root = self.workspace_root();
let current_branch = if crate::git::is_git_repo(&workspace_root) {
crate::git::get_git_state(&workspace_root)
.ok()
.map(|state| state.branch)
} else {
Some("_default".to_string())
};
log::debug!("stats(): current_branch = {:?}", current_branch);
let total_files: usize = if let Some(ref branch) = current_branch {
log::debug!("stats(): Counting files for branch '{}'", branch);
let branches: Vec<(i64, String, i64)> = conn.prepare(
"SELECT id, name, file_count FROM branches"
)
.and_then(|mut stmt| {
stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
.map(|rows| rows.collect())
})
.and_then(|result| result)
.unwrap_or_default();
for (id, name, count) in &branches {
log::debug!("stats(): Branch ID={}, Name='{}', FileCount={}", id, name, count);
}
let fb_counts: Vec<(String, i64)> = conn.prepare(
"SELECT b.name, COUNT(*) FROM file_branches fb
JOIN branches b ON fb.branch_id = b.id
GROUP BY b.name"
)
.and_then(|mut stmt| {
stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
.map(|rows| rows.collect())
})
.and_then(|result| result)
.unwrap_or_default();
for (name, count) in &fb_counts {
log::debug!("stats(): file_branches count for branch '{}': {}", name, count);
}
let count: usize = conn.query_row(
"SELECT COUNT(DISTINCT fb.file_id)
FROM file_branches fb
JOIN branches b ON fb.branch_id = b.id
WHERE b.name = ?",
[branch],
|row| row.get(0),
).unwrap_or(0);
log::debug!("stats(): Query returned total_files = {}", count);
count
} else {
log::warn!("stats(): No current_branch detected!");
0
};
let last_updated: String = conn.query_row(
"SELECT updated_at FROM statistics WHERE key = 'total_files'",
[],
|row| {
let timestamp: i64 = row.get(0)?;
Ok(chrono::DateTime::from_timestamp(timestamp, 0)
.unwrap_or_else(chrono::Utc::now)
.to_rfc3339())
},
).unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
let mut index_size_bytes: u64 = 0;
for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
let file_path = self.cache_path.join(file_name);
if let Ok(metadata) = std::fs::metadata(&file_path) {
index_size_bytes += metadata.len();
}
}
let mut files_by_language = std::collections::HashMap::new();
if let Some(ref branch) = current_branch {
let mut stmt = conn.prepare(
"SELECT f.language, COUNT(DISTINCT f.id)
FROM files f
JOIN file_branches fb ON f.id = fb.file_id
JOIN branches b ON fb.branch_id = b.id
WHERE b.name = ?
GROUP BY f.language"
)?;
let lang_counts = stmt.query_map([branch], |row| {
let language: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((language, count as usize))
})?;
for result in lang_counts {
let (language, count) = result?;
files_by_language.insert(language, count);
}
} else {
let mut stmt = conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
let lang_counts = stmt.query_map([], |row| {
let language: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((language, count as usize))
})?;
for result in lang_counts {
let (language, count) = result?;
files_by_language.insert(language, count);
}
}
let mut lines_by_language = std::collections::HashMap::new();
if let Some(ref branch) = current_branch {
let mut stmt = conn.prepare(
"SELECT f.language, SUM(f.line_count)
FROM files f
JOIN file_branches fb ON f.id = fb.file_id
JOIN branches b ON fb.branch_id = b.id
WHERE b.name = ?
GROUP BY f.language"
)?;
let line_counts = stmt.query_map([branch], |row| {
let language: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((language, count as usize))
})?;
for result in line_counts {
let (language, count) = result?;
lines_by_language.insert(language, count);
}
} else {
let mut stmt = conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
let line_counts = stmt.query_map([], |row| {
let language: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((language, count as usize))
})?;
for result in line_counts {
let (language, count) = result?;
lines_by_language.insert(language, count);
}
}
Ok(crate::models::IndexStats {
total_files,
index_size_bytes,
last_updated,
files_by_language,
lines_by_language,
})
}
fn get_or_create_branch_id(&self, conn: &Connection, branch_name: &str, commit_sha: Option<&str>) -> Result<i64> {
let existing_id: Option<i64> = conn
.query_row(
"SELECT id FROM branches WHERE name = ?",
[branch_name],
|row| row.get(0),
)
.optional()?;
if let Some(id) = existing_id {
return Ok(id);
}
let now = chrono::Utc::now().timestamp();
conn.execute(
"INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
VALUES (?, ?, ?, 0, 0)",
[branch_name, commit_sha.unwrap_or("unknown"), &now.to_string()],
)?;
let id: i64 = conn.last_insert_rowid();
Ok(id)
}
pub fn record_branch_file(
&self,
path: &str,
branch: &str,
hash: &str,
commit_sha: Option<&str>,
) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for branch file recording")?;
let file_id: i64 = conn.query_row(
"SELECT id FROM files WHERE path = ?",
[path],
|row| row.get(0)
).context(format!("File not found in index: {}", path))?;
let branch_id = self.get_or_create_branch_id(&conn, branch, commit_sha)?;
let now = chrono::Utc::now().timestamp();
conn.execute(
"INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
VALUES (?, ?, ?, ?)",
rusqlite::params![file_id, branch_id, hash, now],
)?;
Ok(())
}
pub fn batch_record_branch_files(
&self,
files: &[(String, String)], branch: &str,
commit_sha: Option<&str>,
) -> Result<()> {
log::info!("batch_record_branch_files: Processing {} files for branch '{}'", files.len(), branch);
let db_path = self.cache_path.join(META_DB);
let mut conn = Connection::open(&db_path)
.context("Failed to open meta.db for batch branch recording")?;
let now = chrono::Utc::now().timestamp();
let tx = conn.transaction()?;
let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
let mut inserted = 0;
for (path, hash) in files {
log::trace!("Looking up file_id for path: {}", path);
let file_id: i64 = tx.query_row(
"SELECT id FROM files WHERE path = ?",
[path.as_str()],
|row| row.get(0)
).context(format!("File not found in index: {}", path))?;
log::trace!("Found file_id={} for path: {}", file_id, path);
tx.execute(
"INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
VALUES (?, ?, ?, ?)",
rusqlite::params![file_id, branch_id, hash.as_str(), now],
)?;
inserted += 1;
}
log::info!("Inserted {} file_branches entries", inserted);
tx.commit()?;
log::info!("Transaction committed successfully");
Ok(())
}
pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(HashMap::new());
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let mut stmt = conn.prepare(
"SELECT f.path, fb.hash
FROM file_branches fb
JOIN files f ON fb.file_id = f.id
JOIN branches b ON fb.branch_id = b.id
WHERE b.name = ?"
)?;
let files: HashMap<String, String> = stmt
.query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
.collect::<Result<HashMap<_, _>, _>>()?;
log::debug!(
"Loaded {} files for branch '{}' from file_branches table",
files.len(),
branch
);
Ok(files)
}
pub fn branch_exists(&self, branch: &str) -> Result<bool> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(false);
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let count: i64 = conn
.query_row(
"SELECT COUNT(*)
FROM file_branches fb
JOIN branches b ON fb.branch_id = b.id
WHERE b.name = ?
LIMIT 1",
[branch],
|row| row.get(0),
)
.unwrap_or(0);
Ok(count > 0)
}
pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
anyhow::bail!("Database not initialized");
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let info = conn.query_row(
"SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE name = ?",
[branch],
|row| {
Ok(BranchInfo {
branch: branch.to_string(),
commit_sha: row.get(0)?,
last_indexed: row.get(1)?,
file_count: row.get(2)?,
is_dirty: row.get::<_, i64>(3)? != 0,
})
},
)?;
Ok(info)
}
pub fn update_branch_metadata(
&self,
branch: &str,
commit_sha: Option<&str>,
file_count: usize,
is_dirty: bool,
) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for branch metadata update")?;
let now = chrono::Utc::now().timestamp();
let is_dirty_int = if is_dirty { 1 } else { 0 };
let rows_updated = conn.execute(
"UPDATE branches
SET commit_sha = ?, last_indexed = ?, file_count = ?, is_dirty = ?
WHERE name = ?",
rusqlite::params![
commit_sha.unwrap_or("unknown"),
now,
file_count,
is_dirty_int,
branch
],
)?;
if rows_updated == 0 {
conn.execute(
"INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
VALUES (?, ?, ?, ?, ?)",
rusqlite::params![
branch,
commit_sha.unwrap_or("unknown"),
now,
file_count,
is_dirty_int
],
)?;
}
log::debug!(
"Updated branch metadata for '{}': commit={}, files={}, dirty={}",
branch,
commit_sha.unwrap_or("unknown"),
file_count,
is_dirty
);
Ok(())
}
pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(None);
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let result = conn
.query_row(
"SELECT f.path, b.name
FROM file_branches fb
JOIN files f ON fb.file_id = f.id
JOIN branches b ON fb.branch_id = b.id
WHERE fb.hash = ?
LIMIT 1",
[hash],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.optional()?;
Ok(result)
}
pub fn get_file_id(&self, path: &str) -> Result<Option<i64>> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(None);
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
let result = conn
.query_row(
"SELECT id FROM files WHERE path = ?",
[path],
|row| row.get(0),
)
.optional()?;
Ok(result)
}
pub fn batch_get_file_ids(&self, paths: &[String]) -> Result<HashMap<String, i64>> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(HashMap::new());
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db")?;
const BATCH_SIZE: usize = 900;
let mut results = HashMap::new();
for chunk in paths.chunks(BATCH_SIZE) {
let placeholders = chunk.iter()
.map(|_| "?")
.collect::<Vec<_>>()
.join(", ");
let query = format!("SELECT path, id FROM files WHERE path IN ({})", placeholders);
let params: Vec<&str> = chunk.iter().map(|s| s.as_str()).collect();
let mut stmt = conn.prepare(&query)?;
let chunk_results = stmt.query_map(rusqlite::params_from_iter(params), |row| {
Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
})?
.collect::<Result<HashMap<_, _>, _>>()?;
results.extend(chunk_results);
}
log::debug!("Batch loaded {} file IDs (out of {} requested, {} chunks)",
results.len(), paths.len(), paths.len().div_ceil(BATCH_SIZE));
Ok(results)
}
pub fn should_compact(&self) -> Result<bool> {
let db_path = self.cache_path.join(META_DB);
if !db_path.exists() {
return Ok(false);
}
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for compaction check")?;
let last_compaction: i64 = conn
.query_row(
"SELECT value FROM statistics WHERE key = 'last_compaction'",
[],
|row| {
let value: String = row.get(0)?;
Ok(value.parse::<i64>().unwrap_or(0))
},
)
.unwrap_or(0);
let now = chrono::Utc::now().timestamp();
const COMPACTION_THRESHOLD_SECS: i64 = 86400;
let elapsed_secs = now - last_compaction;
let should_run = elapsed_secs >= COMPACTION_THRESHOLD_SECS;
log::debug!(
"Compaction check: last={}, now={}, elapsed={}s, should_compact={}",
last_compaction,
now,
elapsed_secs,
should_run
);
Ok(should_run)
}
pub fn update_compaction_timestamp(&self) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for compaction timestamp update")?;
let now = chrono::Utc::now().timestamp();
conn.execute(
"INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
["last_compaction", &now.to_string(), &now.to_string()],
)?;
log::debug!("Updated last_compaction timestamp to: {}", now);
Ok(())
}
pub fn compact(&self) -> Result<crate::models::CompactionReport> {
let start_time = std::time::Instant::now();
log::info!("Starting cache compaction...");
let size_before = self.calculate_cache_size()?;
let deleted_files = self.identify_deleted_files()?;
log::info!("Found {} deleted files to remove from cache", deleted_files.len());
if deleted_files.is_empty() {
log::info!("No deleted files to compact - cache is clean");
self.update_compaction_timestamp()?;
return Ok(crate::models::CompactionReport {
files_removed: 0,
space_saved_bytes: 0,
duration_ms: start_time.elapsed().as_millis() as u64,
});
}
self.delete_files_from_db(&deleted_files)?;
log::info!("Deleted {} files from database", deleted_files.len());
self.vacuum_database()?;
log::info!("Completed VACUUM operation");
let size_after = self.calculate_cache_size()?;
let space_saved = size_before.saturating_sub(size_after);
self.update_compaction_timestamp()?;
let duration_ms = start_time.elapsed().as_millis() as u64;
log::info!(
"Cache compaction completed: {} files removed, {} bytes saved ({:.2} MB), took {}ms",
deleted_files.len(),
space_saved,
space_saved as f64 / 1_048_576.0,
duration_ms
);
Ok(crate::models::CompactionReport {
files_removed: deleted_files.len(),
space_saved_bytes: space_saved,
duration_ms,
})
}
fn identify_deleted_files(&self) -> Result<Vec<i64>> {
let db_path = self.cache_path.join(META_DB);
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for deleted file identification")?;
let workspace_root = self.workspace_root();
let mut stmt = conn.prepare("SELECT id, path FROM files")?;
let files = stmt.query_map([], |row| {
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
})?
.collect::<Result<Vec<_>, _>>()?;
log::debug!("Checking {} files for deletion status", files.len());
let mut deleted_file_ids = Vec::new();
for (file_id, file_path) in files {
let full_path = workspace_root.join(&file_path);
if !full_path.exists() {
log::trace!("File no longer exists: {} (id={})", file_path, file_id);
deleted_file_ids.push(file_id);
}
}
Ok(deleted_file_ids)
}
fn delete_files_from_db(&self, file_ids: &[i64]) -> Result<()> {
if file_ids.is_empty() {
return Ok(());
}
let db_path = self.cache_path.join(META_DB);
let mut conn = Connection::open(&db_path)
.context("Failed to open meta.db for file deletion")?;
let tx = conn.transaction()?;
const BATCH_SIZE: usize = 900;
for chunk in file_ids.chunks(BATCH_SIZE) {
let placeholders = chunk.iter()
.map(|_| "?")
.collect::<Vec<_>>()
.join(", ");
let delete_query = format!("DELETE FROM files WHERE id IN ({})", placeholders);
let params: Vec<i64> = chunk.to_vec();
tx.execute(&delete_query, rusqlite::params_from_iter(params))?;
}
tx.commit()?;
log::debug!("Deleted {} files from database (CASCADE handled related tables)", file_ids.len());
Ok(())
}
fn vacuum_database(&self) -> Result<()> {
let db_path = self.cache_path.join(META_DB);
let conn = Connection::open(&db_path)
.context("Failed to open meta.db for VACUUM")?;
conn.execute("VACUUM", [])?;
log::debug!("VACUUM completed successfully");
Ok(())
}
fn calculate_cache_size(&self) -> Result<u64> {
let mut total_size: u64 = 0;
for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
let file_path = self.cache_path.join(file_name);
if let Ok(metadata) = std::fs::metadata(&file_path) {
total_size += metadata.len();
}
}
Ok(total_size)
}
}
#[derive(Debug, Clone)]
pub struct BranchInfo {
pub branch: String,
pub commit_sha: String,
pub last_indexed: i64,
pub file_count: usize,
pub is_dirty: bool,
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_cache_init() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
assert!(!cache.exists());
cache.init().unwrap();
assert!(cache.exists());
assert!(cache.path().exists());
assert!(cache.path().join(META_DB).exists());
assert!(cache.path().join(CONFIG_TOML).exists());
}
#[test]
fn test_cache_init_idempotent() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.init().unwrap();
assert!(cache.exists());
}
#[test]
fn test_cache_clear() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
assert!(cache.exists());
cache.clear().unwrap();
assert!(!cache.exists());
}
#[test]
fn test_cache_clear_nonexistent() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
assert!(!cache.exists());
cache.clear().unwrap();
assert!(!cache.exists());
}
#[test]
fn test_load_all_hashes_empty() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let hashes = cache.load_all_hashes().unwrap();
assert_eq!(hashes.len(), 0);
}
#[test]
fn test_load_all_hashes_before_init() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
let hashes = cache.load_all_hashes().unwrap();
assert_eq!(hashes.len(), 0);
}
#[test]
fn test_load_hashes_for_branch_empty() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let hashes = cache.load_hashes_for_branch("main").unwrap();
assert_eq!(hashes.len(), 0);
}
#[test]
fn test_update_file() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_file("src/main.rs", "rust", 100).unwrap();
let files = cache.list_files().unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "src/main.rs");
assert_eq!(files[0].language, "rust");
}
#[test]
fn test_update_file_multiple() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_file("src/main.rs", "rust", 100).unwrap();
cache.update_file("src/lib.rs", "rust", 200).unwrap();
cache.update_file("README.md", "markdown", 50).unwrap();
let files = cache.list_files().unwrap();
assert_eq!(files.len(), 3);
}
#[test]
fn test_update_file_replace() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_file("src/main.rs", "rust", 100).unwrap();
cache.update_file("src/main.rs", "rust", 150).unwrap();
let files = cache.list_files().unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "src/main.rs");
}
#[test]
fn test_batch_update_files() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let files = vec![
("src/main.rs".to_string(), "rust".to_string(), 100),
("src/lib.rs".to_string(), "rust".to_string(), 200),
("test.py".to_string(), "python".to_string(), 50),
];
cache.batch_update_files(&files).unwrap();
let stored_files = cache.list_files().unwrap();
assert_eq!(stored_files.len(), 3);
}
#[test]
fn test_update_stats() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_file("src/main.rs", "rust", 100).unwrap();
cache.update_file("src/lib.rs", "rust", 200).unwrap();
cache.record_branch_file("src/main.rs", "_default", "hash1", None).unwrap();
cache.record_branch_file("src/lib.rs", "_default", "hash2", None).unwrap();
cache.update_stats("_default").unwrap();
let stats = cache.stats().unwrap();
assert_eq!(stats.total_files, 2);
}
#[test]
fn test_stats_empty_cache() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let stats = cache.stats().unwrap();
assert_eq!(stats.total_files, 0);
assert_eq!(stats.files_by_language.len(), 0);
}
#[test]
fn test_stats_before_init() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
let stats = cache.stats().unwrap();
assert_eq!(stats.total_files, 0);
}
#[test]
fn test_stats_by_language() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_file("main.rs", "Rust", 100).unwrap();
cache.update_file("lib.rs", "Rust", 200).unwrap();
cache.update_file("script.py", "Python", 50).unwrap();
cache.update_file("test.py", "Python", 80).unwrap();
cache.record_branch_file("main.rs", "_default", "hash1", None).unwrap();
cache.record_branch_file("lib.rs", "_default", "hash2", None).unwrap();
cache.record_branch_file("script.py", "_default", "hash3", None).unwrap();
cache.record_branch_file("test.py", "_default", "hash4", None).unwrap();
cache.update_stats("_default").unwrap();
let stats = cache.stats().unwrap();
assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
assert_eq!(stats.files_by_language.get("Python"), Some(&2));
assert_eq!(stats.lines_by_language.get("Rust"), Some(&300)); assert_eq!(stats.lines_by_language.get("Python"), Some(&130)); }
#[test]
fn test_list_files_empty() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let files = cache.list_files().unwrap();
assert_eq!(files.len(), 0);
}
#[test]
fn test_list_files() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_file("src/main.rs", "rust", 100).unwrap();
cache.update_file("src/lib.rs", "rust", 200).unwrap();
let files = cache.list_files().unwrap();
assert_eq!(files.len(), 2);
assert_eq!(files[0].path, "src/lib.rs");
assert_eq!(files[1].path, "src/main.rs");
assert_eq!(files[0].language, "rust");
}
#[test]
fn test_list_files_before_init() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
let files = cache.list_files().unwrap();
assert_eq!(files.len(), 0);
}
#[test]
fn test_branch_exists() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
assert!(!cache.branch_exists("main").unwrap());
cache.update_file("src/main.rs", "rust", 100).unwrap();
cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
assert!(cache.branch_exists("main").unwrap());
assert!(!cache.branch_exists("feature-branch").unwrap());
}
#[test]
fn test_record_branch_file() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_file("src/main.rs", "rust", 100).unwrap();
cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
let files = cache.get_branch_files("main").unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
}
#[test]
fn test_get_branch_files_empty() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let files = cache.get_branch_files("nonexistent").unwrap();
assert_eq!(files.len(), 0);
}
#[test]
fn test_batch_record_branch_files() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let file_metadata = vec![
("src/main.rs".to_string(), "rust".to_string(), 100),
("src/lib.rs".to_string(), "rust".to_string(), 200),
("README.md".to_string(), "markdown".to_string(), 50),
];
cache.batch_update_files(&file_metadata).unwrap();
let files = vec![
("src/main.rs".to_string(), "hash1".to_string()),
("src/lib.rs".to_string(), "hash2".to_string()),
("README.md".to_string(), "hash3".to_string()),
];
cache.batch_record_branch_files(&files, "main", Some("commit123")).unwrap();
let branch_files = cache.get_branch_files("main").unwrap();
assert_eq!(branch_files.len(), 3);
assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
}
#[test]
fn test_update_branch_metadata() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_branch_metadata("main", Some("commit123"), 10, false).unwrap();
let info = cache.get_branch_info("main").unwrap();
assert_eq!(info.branch, "main");
assert_eq!(info.commit_sha, "commit123");
assert_eq!(info.file_count, 10);
assert_eq!(info.is_dirty, false);
}
#[test]
fn test_update_branch_metadata_dirty() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_branch_metadata("feature", Some("commit456"), 5, true).unwrap();
let info = cache.get_branch_info("feature").unwrap();
assert_eq!(info.is_dirty, true);
}
#[test]
fn test_find_file_with_hash() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
cache.update_file("src/main.rs", "rust", 100).unwrap();
cache.record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123")).unwrap();
let result = cache.find_file_with_hash("unique_hash").unwrap();
assert!(result.is_some());
let (path, branch) = result.unwrap();
assert_eq!(path, "src/main.rs");
assert_eq!(branch, "main");
}
#[test]
fn test_find_file_with_hash_not_found() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
assert!(result.is_none());
}
#[test]
fn test_config_toml_created() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let config_path = cache.path().join(CONFIG_TOML);
let config_content = std::fs::read_to_string(&config_path).unwrap();
assert!(config_content.contains("[index]"));
assert!(config_content.contains("[search]"));
assert!(config_content.contains("[performance]"));
assert!(config_content.contains("max_file_size"));
}
#[test]
fn test_meta_db_schema() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let db_path = cache.path().join(META_DB);
let conn = Connection::open(&db_path).unwrap();
let tables: Vec<String> = conn
.prepare("SELECT name FROM sqlite_master WHERE type='table'").unwrap()
.query_map([], |row| row.get(0)).unwrap()
.collect::<Result<Vec<_>, _>>().unwrap();
assert!(tables.contains(&"files".to_string()));
assert!(tables.contains(&"statistics".to_string()));
assert!(tables.contains(&"config".to_string()));
assert!(tables.contains(&"file_branches".to_string()));
assert!(tables.contains(&"branches".to_string()));
assert!(tables.contains(&"file_dependencies".to_string()));
assert!(tables.contains(&"file_exports".to_string()));
}
#[test]
fn test_concurrent_file_updates() {
use std::thread;
let temp = TempDir::new().unwrap();
let cache_path = temp.path().to_path_buf();
let cache = CacheManager::new(&cache_path);
cache.init().unwrap();
let handles: Vec<_> = (0..10)
.map(|i| {
let path = cache_path.clone();
thread::spawn(move || {
let cache = CacheManager::new(&path);
cache
.update_file(
&format!("file_{}.rs", i),
"rust",
i * 10,
)
.unwrap();
})
})
.collect();
for handle in handles {
handle.join().unwrap();
}
let cache = CacheManager::new(&cache_path);
let files = cache.list_files().unwrap();
assert_eq!(files.len(), 10);
}
#[test]
fn test_validate_corrupted_database() {
use std::io::Write;
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let db_path = cache.path().join(META_DB);
let mut file = File::create(&db_path).unwrap();
file.write_all(b"CORRUPTED DATA").unwrap();
let result = cache.validate();
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
eprintln!("Error message: {}", err_msg);
assert!(err_msg.contains("corrupted") || err_msg.contains("not a database"));
}
#[test]
fn test_validate_corrupted_trigrams() {
use std::io::Write;
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let trigrams_path = cache.path().join("trigrams.bin");
let mut file = File::create(&trigrams_path).unwrap();
file.write_all(b"BADM").unwrap();
let result = cache.validate();
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("trigrams.bin") && err.contains("corrupted"));
}
#[test]
fn test_validate_corrupted_content() {
use std::io::Write;
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let content_path = cache.path().join("content.bin");
let mut file = File::create(&content_path).unwrap();
file.write_all(b"BADM").unwrap();
let result = cache.validate();
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("content.bin") && err.contains("corrupted"));
}
#[test]
fn test_validate_missing_schema_table() {
let temp = TempDir::new().unwrap();
let cache = CacheManager::new(temp.path());
cache.init().unwrap();
let db_path = cache.path().join(META_DB);
let conn = Connection::open(&db_path).unwrap();
conn.execute("DROP TABLE files", []).unwrap();
let result = cache.validate();
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("files") && err.contains("missing"));
}
}