use crate::db::index_state::{get_last_indexed_tree, UpdateStats};
use crate::db::SqliteStore;
use crate::git::{get_git_tree_sha, git_diff_tree, FileStatus};
use anyhow::{Context, Result};
use std::path::Path;
use tracing::{debug, info};
impl UpdateStats {
pub fn new() -> Self {
Self {
files_processed: 0,
chunks_processed: 0,
embeddings_generated: 0,
}
}
pub fn skipped() -> Self {
Self::new()
}
pub fn cache_hit_rate(&self) -> f64 {
if self.chunks_processed == 0 {
return 1.0;
}
1.0 - (self.embeddings_generated as f64 / self.chunks_processed as f64)
}
pub fn cost(&self) -> f64 {
self.embeddings_generated as f64 * 0.00002
}
}
impl Default for UpdateStats {
fn default() -> Self {
Self::new()
}
}
pub async fn remove_worktree_from_chunks(
store: &SqliteStore,
worktree_id: i64,
relpath: &str,
) -> Result<i64> {
let relpath = relpath.to_string();
store
.run(move |conn| {
let chunk_ids: Vec<i64> = {
let mut stmt = conn.prepare(
"SELECT c.id FROM chunks c
JOIN files f ON c.file_id = f.id
JOIN chunk_worktrees cw ON cw.chunk_id = c.id
WHERE f.relpath = ?1 AND cw.worktree_id = ?2",
)?;
let rows =
stmt.query_map(rusqlite::params![relpath, worktree_id], |row| row.get(0))?;
rows.filter_map(|r| r.ok()).collect()
};
if chunk_ids.is_empty() {
return Ok(0);
}
let placeholders: String = chunk_ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
let sql = format!(
"DELETE FROM chunk_worktrees WHERE chunk_id IN ({}) AND worktree_id = ?",
placeholders
);
let mut params: Vec<Box<dyn rusqlite::ToSql>> = chunk_ids
.iter()
.map(|id| Box::new(*id) as Box<dyn rusqlite::ToSql>)
.collect();
params.push(Box::new(worktree_id));
let affected = conn.execute(
&sql,
rusqlite::params_from_iter(chunk_ids.iter().chain(std::iter::once(&worktree_id))),
)?;
conn.execute(
"DELETE FROM chunks WHERE id NOT IN (SELECT DISTINCT chunk_id FROM chunk_worktrees)",
[]
)?;
Ok(affected as i64)
})
.await
}
pub async fn incremental_update(
store: &SqliteStore,
worktree_id: i64,
repo_path: &Path,
) -> Result<UpdateStats> {
let current_tree_sha = get_git_tree_sha(repo_path)
.with_context(|| format!("Failed to get git tree SHA for {:?}", repo_path))?;
debug!(
worktree_id = worktree_id,
tree_sha = %current_tree_sha,
"Got current git tree SHA"
);
let last_indexed = get_last_indexed_tree(store, worktree_id)
.await
.with_context(|| {
format!(
"Failed to get last indexed tree for worktree {}",
worktree_id
)
})?;
if last_indexed != "init" && last_indexed == current_tree_sha {
info!(
worktree_id = worktree_id,
tree_sha = %current_tree_sha,
"Tree SHA unchanged, skipping incremental update"
);
return Ok(UpdateStats::skipped());
}
if last_indexed != "init" {
debug!(
worktree_id = worktree_id,
last_sha = %last_indexed,
current_sha = %current_tree_sha,
"Tree SHA changed, processing diff"
);
} else {
debug!(
worktree_id = worktree_id,
"No previous tree SHA found, this is likely first index"
);
}
let changes = if last_indexed != "init" {
git_diff_tree(&last_indexed, ¤t_tree_sha, repo_path).with_context(|| {
format!(
"Failed to get diff-tree between {} and {}",
last_indexed, current_tree_sha
)
})?
} else {
debug!("No previous tree SHA, returning empty diff (full index handled separately)");
Vec::new()
};
let mut stats = UpdateStats::new();
stats.files_processed = changes.len() as i32;
for change in &changes {
let relpath = change.path.to_string_lossy();
match change.status {
FileStatus::Added | FileStatus::Modified => {
debug!(
file = %relpath,
status = ?change.status,
"File needs processing"
);
}
FileStatus::Deleted => {
debug!(
file = %relpath,
"File deleted, removing chunks"
);
let affected = remove_worktree_from_chunks(store, worktree_id, &relpath).await?;
debug!(
file = %relpath,
chunks_affected = affected,
"Removed worktree from chunks"
);
}
}
}
store
.run({
let tree_sha = current_tree_sha.clone();
move |conn| {
conn.execute(
"INSERT OR REPLACE INTO index_state (worktree_id, tree_sha, last_indexed)
VALUES (?1, ?2, datetime('now'))",
rusqlite::params![worktree_id, tree_sha],
)?;
Ok(())
}
})
.await
.with_context(|| format!("Failed to update index state for worktree {}", worktree_id))?;
info!(
worktree_id = worktree_id,
files_processed = stats.files_processed,
tree_sha = %current_tree_sha,
"Incremental update complete"
);
Ok(stats)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_update_stats_new() {
let stats = UpdateStats::new();
assert_eq!(stats.files_processed, 0);
assert_eq!(stats.chunks_processed, 0);
assert_eq!(stats.embeddings_generated, 0);
}
#[test]
fn test_update_stats_skipped() {
let stats = UpdateStats::skipped();
assert_eq!(stats.files_processed, 0);
assert_eq!(stats.chunks_processed, 0);
assert_eq!(stats.embeddings_generated, 0);
}
#[test]
fn test_cache_hit_rate_no_chunks() {
let stats = UpdateStats::new();
assert_eq!(stats.cache_hit_rate(), 1.0);
}
#[test]
fn test_cache_hit_rate_all_cached() {
let stats = UpdateStats {
files_processed: 10,
chunks_processed: 100,
embeddings_generated: 0,
};
assert_eq!(stats.cache_hit_rate(), 1.0);
}
#[test]
fn test_cache_hit_rate_partial() {
let stats = UpdateStats {
files_processed: 10,
chunks_processed: 100,
embeddings_generated: 50,
};
assert_eq!(stats.cache_hit_rate(), 0.5); }
#[test]
fn test_cost_calculation() {
let stats = UpdateStats {
files_processed: 10,
chunks_processed: 100,
embeddings_generated: 1000,
};
assert_eq!(stats.cost(), 0.02); }
#[test]
fn test_default() {
let stats = UpdateStats::default();
assert_eq!(stats.files_processed, 0);
assert_eq!(stats.chunks_processed, 0);
assert_eq!(stats.embeddings_generated, 0);
}
}