use std::collections::{HashMap, HashSet};
use std::io::Write;
use std::path::Path;
use rusqlite::{params, Connection};
use sem_core::model::entity::SemanticEntity;
use sem_core::parser::graph::{EntityGraph, EntityInfo, EntityRef, RefType};
use sem_mcp::cache as shared_cache;
pub struct PartialCache {
pub stale_files: Vec<String>,
pub cached_entities: Vec<SemanticEntity>,
pub cached_edges: Vec<EntityRef>,
pub cached_importing_stale_files: Vec<String>,
pub stale_file_entities: Vec<SemanticEntity>,
}
pub struct DiskCache {
conn: Connection,
}
impl DiskCache {
pub fn open(repo_root: &Path) -> Result<Self, rusqlite::Error> {
let cache_dir = shared_cache::cache_dir_for_repo(repo_root)
.ok_or_else(|| rusqlite::Error::InvalidPath(repo_root.to_path_buf()))?;
shared_cache::create_cache_dir(&cache_dir)?;
let db_path = cache_dir.join("cache.db");
let conn = Connection::open(db_path)?;
shared_cache::initialize_schema(&conn)?;
Ok(Self { conn })
}
pub fn save(
&self,
root: &Path,
files: &[String],
graph: &EntityGraph,
entities: &[SemanticEntity],
) -> Result<(), rusqlite::Error> {
let tx = self.conn.unchecked_transaction()?;
tx.execute_batch(
"DELETE FROM files; DELETE FROM entities; DELETE FROM edges; DELETE FROM file_imports; DELETE FROM entity_flags;",
)?;
{
let mut stmt = tx.prepare(
"INSERT INTO files (path, mtime_secs, mtime_nanos, content_hash) VALUES (?1, ?2, ?3, ?4)",
)?;
for file in files {
if shared_cache::is_manifest_file_name(file) {
continue;
}
let full = root.join(file);
if let Some((secs, nanos, content_hash)) = shared_cache::file_fingerprint(&full) {
stmt.execute(params![file, secs, nanos, content_hash])?;
}
}
}
shared_cache::refresh_manifest_entries(&tx, root)?;
shared_cache::refresh_file_import_entries(&tx, root, files, files)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO entities (id, name, entity_type, file_path, start_line, end_line, content, content_hash, structural_hash, parent_id, metadata_json) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
)?;
for e in entities {
let metadata_json = e
.metadata
.as_ref()
.and_then(|m| serde_json::to_string(m).ok());
stmt.execute(params![
e.id,
e.name,
e.entity_type,
e.file_path,
e.start_line as i64,
e.end_line as i64,
e.content,
e.content_hash,
e.structural_hash,
e.parent_id,
metadata_json,
])?;
}
}
{
let mut stmt = tx.prepare(
"INSERT INTO edges (from_entity, to_entity, ref_type) VALUES (?1, ?2, ?3)",
)?;
for edge in &graph.edges {
let rt = match edge.ref_type {
RefType::Calls => "calls",
RefType::TypeRef => "typeref",
RefType::Imports => "imports",
};
stmt.execute(params![edge.from_entity, edge.to_entity, rt])?;
}
}
shared_cache::set_cache_kind(&tx, shared_cache::CACHE_KIND_FULL)?;
tx.commit()?;
Ok(())
}
pub fn save_topology(
&self,
root: &Path,
files: &[String],
graph: &EntityGraph,
entities: &[SemanticEntity],
custom_test_dirs: &[String],
) -> Result<(), rusqlite::Error> {
let tx = self.conn.unchecked_transaction()?;
tx.execute_batch(
"DELETE FROM files; DELETE FROM entities; DELETE FROM edges; DELETE FROM file_imports; DELETE FROM entity_flags;",
)?;
{
let mut stmt = tx.prepare(
"INSERT INTO files (path, mtime_secs, mtime_nanos, content_hash) VALUES (?1, ?2, ?3, ?4)",
)?;
for file in files {
if shared_cache::is_manifest_file_name(file) {
continue;
}
let full = root.join(file);
if let Some((secs, nanos, content_hash)) = shared_cache::file_fingerprint(&full) {
stmt.execute(params![file, secs, nanos, content_hash])?;
}
}
}
shared_cache::refresh_manifest_entries(&tx, root)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO entities (id, name, entity_type, file_path, start_line, end_line, content, content_hash, structural_hash, parent_id, metadata_json) VALUES (?1, ?2, ?3, ?4, ?5, ?6, '', '', NULL, ?7, NULL)",
)?;
for e in graph.entities.values() {
stmt.execute(params![
e.id,
e.name,
e.entity_type,
e.file_path,
e.start_line as i64,
e.end_line as i64,
e.parent_id,
])?;
}
}
{
let mut stmt = tx.prepare(
"INSERT INTO edges (from_entity, to_entity, ref_type) VALUES (?1, ?2, ?3)",
)?;
for edge in &graph.edges {
let rt = match edge.ref_type {
RefType::Calls => "calls",
RefType::TypeRef => "typeref",
RefType::Imports => "imports",
};
stmt.execute(params![edge.from_entity, edge.to_entity, rt])?;
}
}
let test_entity_ids = graph.filter_test_entities_with_custom_dirs(entities, custom_test_dirs);
{
let mut stmt =
tx.prepare("INSERT INTO entity_flags (entity_id, is_test) VALUES (?1, 1)")?;
for entity_id in &test_entity_ids {
stmt.execute(params![entity_id])?;
}
}
shared_cache::set_cache_kind(&tx, shared_cache::CACHE_KIND_TOPOLOGY)?;
tx.commit()?;
Ok(())
}
pub fn load(
&self,
root: &Path,
files: &[String],
) -> Option<(EntityGraph, Vec<SemanticEntity>)> {
if !self.has_fresh_complete_cache(root, files) {
return None;
}
let mut entity_stmt = self
.conn
.prepare("SELECT id, name, entity_type, file_path, start_line, end_line, content, content_hash, structural_hash, parent_id, metadata_json FROM entities")
.ok()?;
let entities: Vec<SemanticEntity> = entity_stmt
.query_map([], |row| {
let metadata_json: Option<String> = row.get(10)?;
let metadata = metadata_json.and_then(|j| serde_json::from_str(&j).ok());
Ok(SemanticEntity {
id: row.get(0)?,
name: row.get(1)?,
entity_type: row.get(2)?,
file_path: row.get(3)?,
start_line: row.get::<_, i64>(4)? as usize,
end_line: row.get::<_, i64>(5)? as usize,
content: row.get(6)?,
content_hash: row.get(7)?,
structural_hash: row.get(8)?,
parent_id: row.get(9)?,
metadata,
})
})
.ok()?
.filter_map(|r| r.ok())
.collect();
let mut edge_stmt = self
.conn
.prepare("SELECT from_entity, to_entity, ref_type FROM edges")
.ok()?;
let edges: Vec<EntityRef> = edge_stmt
.query_map([], |row| {
let rt: String = row.get(2)?;
let ref_type = match rt.as_str() {
"calls" => RefType::Calls,
"imports" => RefType::Imports,
_ => RefType::TypeRef,
};
Ok(EntityRef {
from_entity: row.get(0)?,
to_entity: row.get(1)?,
ref_type,
})
})
.ok()?
.filter_map(|r| r.ok())
.collect();
let entity_map: HashMap<String, EntityInfo> = entities
.iter()
.map(|e| {
(
e.id.clone(),
EntityInfo {
id: e.id.clone(),
name: e.name.clone(),
entity_type: e.entity_type.clone(),
file_path: e.file_path.clone(),
start_line: e.start_line,
end_line: e.end_line,
parent_id: e.parent_id.clone(),
},
)
})
.collect();
let graph = EntityGraph::from_parts(entity_map, edges);
Some((graph, entities))
}
pub fn load_graph_topology(&self, root: &Path, files: &[String]) -> Option<EntityGraph> {
if !self.has_fresh_topology_cache(root, files) {
return None;
}
self.load_graph_topology_rows()
}
pub fn load_graph_topology_with_test_ids(
&self,
root: &Path,
files: &[String],
) -> Option<(EntityGraph, HashSet<String>)> {
if !self.has_fresh_topology_only_cache(root, files) {
return None;
}
let graph = self.load_graph_topology_rows()?;
let test_entity_ids = self.load_test_entity_ids()?;
Some((graph, test_entity_ids))
}
fn load_graph_topology_rows(&self) -> Option<EntityGraph> {
let mut entity_stmt = self
.conn
.prepare(
"SELECT id, name, entity_type, file_path, start_line, end_line, parent_id FROM entities",
)
.ok()?;
let entity_map: HashMap<String, EntityInfo> = entity_stmt
.query_map([], |row| {
let id: String = row.get(0)?;
Ok((
id.clone(),
EntityInfo {
id,
name: row.get(1)?,
entity_type: row.get(2)?,
file_path: row.get(3)?,
start_line: row.get::<_, i64>(4)? as usize,
end_line: row.get::<_, i64>(5)? as usize,
parent_id: row.get(6)?,
},
))
})
.ok()?
.filter_map(|r| r.ok())
.collect();
let edges = self.load_edges()?;
Some(EntityGraph::from_parts(entity_map, edges))
}
fn load_test_entity_ids(&self) -> Option<HashSet<String>> {
let mut stmt = self
.conn
.prepare("SELECT entity_id FROM entity_flags WHERE is_test != 0")
.ok()?;
let ids = stmt
.query_map([], |row| row.get::<_, String>(0))
.ok()?
.filter_map(|r| r.ok())
.collect();
Some(ids)
}
pub fn write_graph_json_topology<W: Write>(
&self,
root: &Path,
files: &[String],
mut writer: W,
) -> std::io::Result<bool> {
if !self.has_fresh_topology_cache(root, files) {
return Ok(false);
}
let entity_count: i64 =
match self
.conn
.query_row("SELECT COUNT(*) FROM entities", [], |row| row.get(0))
{
Ok(count) => count,
Err(_) => return Ok(false),
};
let edge_count: i64 = match self
.conn
.query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))
{
Ok(count) => count,
Err(_) => return Ok(false),
};
let mut entity_stmt = match self.conn.prepare(
"SELECT id, name, entity_type, file_path, start_line, end_line, parent_id FROM entities ORDER BY id",
) {
Ok(stmt) => stmt,
Err(_) => return Ok(false),
};
let mut edge_stmt = match self.conn.prepare(
"SELECT from_entity, to_entity, ref_type FROM edges ORDER BY from_entity, to_entity, CASE ref_type WHEN 'calls' THEN 0 WHEN 'imports' THEN 1 ELSE 2 END",
) {
Ok(stmt) => stmt,
Err(_) => return Ok(false),
};
writer.write_all(b"{\"entities\":[")?;
let mut entity_rows = entity_stmt.query([]).map_err(sql_io_error)?;
let mut first = true;
while let Some(row) = entity_rows.next().map_err(sql_io_error)? {
if first {
first = false;
} else {
writer.write_all(b",")?;
}
let entity = EntityInfo {
id: row.get(0).map_err(sql_io_error)?,
name: row.get(1).map_err(sql_io_error)?,
entity_type: row.get(2).map_err(sql_io_error)?,
file_path: row.get(3).map_err(sql_io_error)?,
start_line: row.get::<_, i64>(4).map_err(sql_io_error)? as usize,
end_line: row.get::<_, i64>(5).map_err(sql_io_error)? as usize,
parent_id: row.get(6).map_err(sql_io_error)?,
};
serde_json::to_writer(&mut writer, &entity).map_err(json_io_error)?;
}
writer.write_all(b"],\"edges\":[")?;
let mut edge_rows = edge_stmt.query([]).map_err(sql_io_error)?;
let mut first = true;
while let Some(row) = edge_rows.next().map_err(sql_io_error)? {
if first {
first = false;
} else {
writer.write_all(b",")?;
}
let rt: String = row.get(2).map_err(sql_io_error)?;
let ref_type = match rt.as_str() {
"calls" => RefType::Calls,
"imports" => RefType::Imports,
_ => RefType::TypeRef,
};
let edge = EntityRef {
from_entity: row.get(0).map_err(sql_io_error)?,
to_entity: row.get(1).map_err(sql_io_error)?,
ref_type,
};
serde_json::to_writer(&mut writer, &edge).map_err(json_io_error)?;
}
write!(
writer,
"],\"stats\":{{\"entityCount\":{},\"edgeCount\":{}}}}}\n",
entity_count, edge_count
)?;
Ok(true)
}
fn has_fresh_complete_cache(&self, root: &Path, files: &[String]) -> bool {
if !shared_cache::cache_has_kind(&self.conn, &[shared_cache::CACHE_KIND_FULL]) {
return false;
}
self.has_fresh_cache(root, files)
}
fn has_fresh_topology_cache(&self, root: &Path, files: &[String]) -> bool {
if !shared_cache::cache_has_kind(
&self.conn,
&[
shared_cache::CACHE_KIND_FULL,
shared_cache::CACHE_KIND_TOPOLOGY,
],
) {
return false;
}
self.has_fresh_cache(root, files)
}
fn has_fresh_topology_only_cache(&self, root: &Path, files: &[String]) -> bool {
if !shared_cache::cache_has_kind(&self.conn, &[shared_cache::CACHE_KIND_TOPOLOGY]) {
return false;
}
self.has_fresh_cache(root, files)
}
fn has_fresh_cache(&self, root: &Path, files: &[String]) -> bool {
if shared_cache::is_manifest_stale(&self.conn, root) {
return false;
}
let cached_count: i64 = match self
.conn
.query_row("SELECT COUNT(*) FROM files", [], |row| row.get(0))
{
Ok(count) => count,
Err(_) => return false,
};
if (cached_count - shared_cache::manifest_entry_count(&self.conn)) as usize
!= shared_cache::source_file_count(files)
{
return false;
}
let cached_mtimes: HashMap<String, (i64, i64, Option<String>)> = {
let Ok(mut stmt) = self
.conn
.prepare("SELECT path, mtime_secs, mtime_nanos, content_hash FROM files")
else {
return false;
};
let cached_mtimes = match stmt.query_map([], |row| {
Ok((
row.get::<_, String>(0)?,
(
row.get::<_, i64>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, Option<String>>(3)?,
),
))
}) {
Ok(rows) => rows.filter_map(|r| r.ok()).collect(),
Err(_) => return false,
};
cached_mtimes
};
let mut fingerprint_refreshes = Vec::new();
for file in files {
if shared_cache::is_manifest_file_name(file) {
continue;
}
let Some((secs, nanos, content_hash)) = cached_mtimes.get(file.as_str()) else {
return false;
};
let full = root.join(file);
match shared_cache::file_freshness(&full, *secs, *nanos, content_hash.as_deref()) {
Some(shared_cache::FileFreshness::Fresh) => {}
Some(shared_cache::FileFreshness::FreshWithUpdatedFingerprint {
secs,
nanos,
content_hash,
}) => {
fingerprint_refreshes.push(shared_cache::FileFingerprintRefresh {
path: file.clone(),
mtime_secs: secs,
mtime_nanos: nanos,
content_hash,
});
}
Some(shared_cache::FileFreshness::Stale) | None => return false,
}
}
shared_cache::refresh_file_fingerprints_best_effort(&self.conn, &fingerprint_refreshes);
true
}
fn load_edges(&self) -> Option<Vec<EntityRef>> {
let mut edge_stmt = self
.conn
.prepare("SELECT from_entity, to_entity, ref_type FROM edges")
.ok()?;
let edges: Vec<EntityRef> = edge_stmt
.query_map([], |row| {
let rt: String = row.get(2)?;
let ref_type = match rt.as_str() {
"calls" => RefType::Calls,
"imports" => RefType::Imports,
_ => RefType::TypeRef,
};
Ok(EntityRef {
from_entity: row.get(0)?,
to_entity: row.get(1)?,
ref_type,
})
})
.ok()?
.filter_map(|r| r.ok())
.collect();
Some(edges)
}
pub fn load_partial(&self, root: &Path, files: &[String]) -> Option<PartialCache> {
if !shared_cache::cache_has_kind(&self.conn, &[shared_cache::CACHE_KIND_FULL]) {
return None;
}
if shared_cache::is_manifest_stale(&self.conn, root) {
return None;
}
let mut stmt = self
.conn
.prepare("SELECT path, mtime_secs, mtime_nanos, content_hash FROM files")
.ok()?;
let cached_files: HashMap<String, (i64, i64, Option<String>)> = stmt
.query_map([], |row| {
Ok((
row.get::<_, String>(0)?,
(
row.get::<_, i64>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, Option<String>>(3)?,
),
))
})
.ok()?
.filter_map(|r| r.ok())
.collect();
drop(stmt);
if cached_files.is_empty() {
return None;
}
let source_files: Vec<&String> = files
.iter()
.filter(|file| !shared_cache::is_manifest_file_name(file))
.collect();
let source_file_count = source_files.len();
let current_set: HashSet<&str> = source_files.iter().map(|file| file.as_str()).collect();
let mut stale_source_files: Vec<String> = Vec::new();
let mut stale_current_file_count = 0;
let mut fingerprint_refreshes = Vec::new();
for file in &source_files {
match cached_files.get(file.as_str()) {
Some((secs, nanos, content_hash)) => {
let full = root.join(file.as_str());
match shared_cache::file_freshness(
&full,
*secs,
*nanos,
content_hash.as_deref(),
) {
Some(shared_cache::FileFreshness::Fresh) => {}
Some(shared_cache::FileFreshness::FreshWithUpdatedFingerprint {
secs,
nanos,
content_hash,
}) => {
fingerprint_refreshes.push(shared_cache::FileFingerprintRefresh {
path: (*file).clone(),
mtime_secs: secs,
mtime_nanos: nanos,
content_hash,
});
}
Some(shared_cache::FileFreshness::Stale) | None => {
stale_current_file_count += 1;
stale_source_files.push((*file).clone());
}
}
}
None => {
stale_current_file_count += 1;
stale_source_files.push((*file).clone());
}
}
}
let mut deleted_cached_files: Vec<String> = Vec::new();
for cached_path in cached_files.keys() {
if !shared_cache::is_cache_manifest_key(cached_path)
&& !shared_cache::is_manifest_file_name(cached_path)
&& !current_set.contains(cached_path.as_str())
{
deleted_cached_files.push(cached_path.clone());
}
}
shared_cache::refresh_file_fingerprints_best_effort(&self.conn, &fingerprint_refreshes);
if stale_source_files.is_empty() && deleted_cached_files.is_empty() {
return None;
}
if stale_current_file_count >= source_file_count {
return None;
}
let stale_set: HashSet<&str> = stale_source_files
.iter()
.chain(deleted_cached_files.iter())
.map(|s| s.as_str())
.collect();
let mut import_stale_files = stale_source_files.clone();
import_stale_files.extend(deleted_cached_files.iter().cloned());
let cached_importing_stale_files = shared_cache::cached_importing_files_for_stale_files(
&self.conn,
&import_stale_files,
&source_files,
);
let mut entity_stmt = self
.conn
.prepare("SELECT id, name, entity_type, file_path, start_line, end_line, content, content_hash, structural_hash, parent_id, metadata_json FROM entities")
.ok()?;
let mut cached_entities = Vec::new();
let mut stale_file_entities = Vec::new();
let mut entity_rows = entity_stmt.query([]).ok()?;
while let Some(row) = entity_rows.next().ok()? {
let metadata_json: Option<String> = row.get(10).ok()?;
let entity = SemanticEntity {
id: row.get(0).ok()?,
name: row.get(1).ok()?,
entity_type: row.get(2).ok()?,
file_path: row.get(3).ok()?,
start_line: row.get::<_, i64>(4).ok()? as usize,
end_line: row.get::<_, i64>(5).ok()? as usize,
content: row.get(6).ok()?,
content_hash: row.get(7).ok()?,
structural_hash: row.get(8).ok()?,
parent_id: row.get(9).ok()?,
metadata: metadata_json.and_then(|j| serde_json::from_str(&j).ok()),
};
if stale_set.contains(entity.file_path.as_str()) {
stale_file_entities.push(entity);
} else {
cached_entities.push(entity);
}
}
let mut edge_stmt = self
.conn
.prepare("SELECT from_entity, to_entity, ref_type FROM edges")
.ok()?;
let cached_edges: Vec<EntityRef> = edge_stmt
.query_map([], |row| {
let rt: String = row.get(2)?;
let ref_type = match rt.as_str() {
"calls" => RefType::Calls,
"imports" => RefType::Imports,
_ => RefType::TypeRef,
};
Ok(EntityRef {
from_entity: row.get(0)?,
to_entity: row.get(1)?,
ref_type,
})
})
.ok()?
.filter_map(|r| r.ok())
.collect();
Some(PartialCache {
stale_files: stale_source_files,
cached_entities,
cached_edges,
cached_importing_stale_files,
stale_file_entities,
})
}
pub fn save_incremental_with_repair_metadata(
&self,
root: &Path,
all_files: &[String],
stale_files: &[String],
graph: &EntityGraph,
entities: &[SemanticEntity],
repair_changed_clean_entity_ids: bool,
recomputed_edge_source_ids: &[String],
deleted_entity_ids: &[String],
) -> Result<(), rusqlite::Error> {
let source_stale_files: Vec<&String> = stale_files
.iter()
.filter(|file| !shared_cache::is_manifest_file_name(file))
.collect();
let source_stale_set: HashSet<&str> = source_stale_files
.iter()
.map(|file| file.as_str())
.collect();
let tx = self.conn.unchecked_transaction()?;
{
let mut del_files = tx.prepare("DELETE FROM files WHERE path = ?1")?;
for f in &source_stale_files {
del_files.execute(params![f])?;
}
}
let current_set: HashSet<&str> = all_files
.iter()
.map(|s| s.as_str())
.filter(|path| !shared_cache::is_manifest_file_name(path))
.collect();
let cached_paths: Vec<String> = {
let mut cached_stmt = tx.prepare("SELECT path FROM files")?;
cached_stmt
.query_map([], |row| row.get(0))
.map(|rows| rows.filter_map(|r| r.ok()).collect())
.unwrap_or_default()
};
let deleted_cached_files: Vec<String> = cached_paths
.into_iter()
.filter(|path| {
!shared_cache::is_cache_manifest_key(path)
&& !shared_cache::is_manifest_file_name(path)
&& !current_set.contains(path.as_str())
})
.collect();
let use_legacy_edge_fallback = !repair_changed_clean_entity_ids
&& recomputed_edge_source_ids.is_empty()
&& deleted_entity_ids.is_empty();
let cached_rewritten_entity_ids: HashSet<String> = if use_legacy_edge_fallback {
let rewritten_file_paths: HashSet<&str> = source_stale_files
.iter()
.map(|file| file.as_str())
.chain(deleted_cached_files.iter().map(String::as_str))
.collect();
let mut stmt = tx.prepare("SELECT id, file_path FROM entities")?;
let rows = stmt.query_map([], |row| {
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
})?;
rows.filter_map(|row| row.ok())
.filter_map(|(id, file_path)| {
rewritten_file_paths
.contains(file_path.as_str())
.then_some(id)
})
.collect()
} else {
HashSet::new()
};
{
let mut del_files = tx.prepare("DELETE FROM files WHERE path = ?1")?;
for path in &deleted_cached_files {
del_files.execute(params![path])?;
}
}
{
let mut ins = tx.prepare(
"INSERT OR REPLACE INTO files (path, mtime_secs, mtime_nanos, content_hash) VALUES (?1, ?2, ?3, ?4)",
)?;
for file in &source_stale_files {
let full = root.join(file);
if let Some((secs, nanos, content_hash)) = shared_cache::file_fingerprint(&full) {
ins.execute(params![file, secs, nanos, content_hash])?;
}
}
}
shared_cache::refresh_manifest_entries(&tx, root)?;
let mut import_files_to_refresh: Vec<String> = source_stale_files
.iter()
.map(|file| (*file).clone())
.collect();
import_files_to_refresh.extend(deleted_cached_files.iter().cloned());
shared_cache::refresh_file_import_entries(&tx, root, &import_files_to_refresh, all_files)?;
if repair_changed_clean_entity_ids {
tx.execute("DELETE FROM entities", [])?;
} else {
let mut del = tx.prepare("DELETE FROM entities WHERE file_path = ?1")?;
for f in &source_stale_files {
del.execute(params![f])?;
}
for f in &deleted_cached_files {
del.execute(params![f])?;
}
}
{
let mut ins = tx.prepare(
"INSERT OR REPLACE INTO entities (id, name, entity_type, file_path, start_line, end_line, content, content_hash, structural_hash, parent_id, metadata_json) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
)?;
for e in entities {
if !repair_changed_clean_entity_ids
&& !source_stale_set.contains(e.file_path.as_str())
{
continue;
}
let metadata_json = e
.metadata
.as_ref()
.and_then(|m| serde_json::to_string(m).ok());
ins.execute(params![
e.id,
e.name,
e.entity_type,
e.file_path,
e.start_line as i64,
e.end_line as i64,
e.content,
e.content_hash,
e.structural_hash,
e.parent_id,
metadata_json,
])?;
}
}
if repair_changed_clean_entity_ids {
tx.execute("DELETE FROM edges", [])?;
let mut ins = tx.prepare(
"INSERT INTO edges (from_entity, to_entity, ref_type) VALUES (?1, ?2, ?3)",
)?;
for edge in &graph.edges {
let rt = match edge.ref_type {
RefType::Calls => "calls",
RefType::TypeRef => "typeref",
RefType::Imports => "imports",
};
ins.execute(params![edge.from_entity, edge.to_entity, rt])?;
}
} else {
let mut affected_sources: HashSet<String> =
recomputed_edge_source_ids.iter().cloned().collect();
let mut deleted_ids: HashSet<String> = deleted_entity_ids.iter().cloned().collect();
if use_legacy_edge_fallback {
let current_rewritten_entity_ids: HashSet<&str> = entities
.iter()
.filter(|entity| source_stale_set.contains(entity.file_path.as_str()))
.map(|entity| entity.id.as_str())
.collect();
affected_sources.extend(cached_rewritten_entity_ids.iter().cloned());
affected_sources.extend(
current_rewritten_entity_ids
.iter()
.map(|entity_id| (*entity_id).to_string()),
);
deleted_ids.extend(
cached_rewritten_entity_ids
.iter()
.filter(|entity_id| {
!current_rewritten_entity_ids.contains(entity_id.as_str())
})
.cloned(),
);
}
affected_sources.extend(deleted_ids.iter().cloned());
{
let mut del_from = tx.prepare("DELETE FROM edges WHERE from_entity = ?1")?;
for entity_id in &affected_sources {
del_from.execute(params![entity_id])?;
}
}
{
let mut del_to = tx.prepare("DELETE FROM edges WHERE to_entity = ?1")?;
for entity_id in &deleted_ids {
del_to.execute(params![entity_id])?;
}
}
let mut ins = tx.prepare(
"INSERT INTO edges (from_entity, to_entity, ref_type) VALUES (?1, ?2, ?3)",
)?;
for edge in &graph.edges {
if !affected_sources.contains(&edge.from_entity)
|| deleted_ids.contains(&edge.from_entity)
|| deleted_ids.contains(&edge.to_entity)
{
continue;
}
let rt = match edge.ref_type {
RefType::Calls => "calls",
RefType::TypeRef => "typeref",
RefType::Imports => "imports",
};
ins.execute(params![edge.from_entity, edge.to_entity, rt])?;
}
}
shared_cache::set_cache_kind(&tx, shared_cache::CACHE_KIND_FULL)?;
tx.commit()?;
Ok(())
}
}
fn sql_io_error(error: rusqlite::Error) -> std::io::Error {
std::io::Error::new(std::io::ErrorKind::Other, error)
}
fn json_io_error(error: serde_json::Error) -> std::io::Error {
std::io::Error::new(std::io::ErrorKind::Other, error)
}
#[cfg(test)]
mod tests {
use super::*;
fn test_cache_root() -> &'static Path {
static CACHE_ROOT: std::sync::OnceLock<std::path::PathBuf> = std::sync::OnceLock::new();
CACHE_ROOT
.get_or_init(|| {
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let root = std::env::temp_dir()
.join(format!("sem-cli-test-cache-{}-{nanos}", std::process::id()));
std::fs::create_dir_all(&root).unwrap();
root
})
.as_path()
}
fn configure_test_cache_root() {
std::env::set_var("SEM_CACHE_DIR", test_cache_root());
}
fn temp_repo_root(test_name: &str) -> std::path::PathBuf {
configure_test_cache_root();
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let root = std::env::temp_dir().join(format!(
"sem-cli-cache-{test_name}-{}-{nanos}",
std::process::id()
));
std::fs::create_dir_all(&root).unwrap();
root
}
fn write_file(path: &Path, content: &str) {
std::fs::write(path, content).unwrap();
}
fn empty_graph() -> EntityGraph {
EntityGraph::from_parts(HashMap::new(), Vec::new())
}
fn entity(id: &str, file_path: &str, name: &str, content: &str) -> SemanticEntity {
SemanticEntity {
id: id.to_string(),
file_path: file_path.to_string(),
entity_type: "function".to_string(),
name: name.to_string(),
parent_id: None,
content: content.to_string(),
content_hash: format!("hash:{content}"),
structural_hash: None,
start_line: 1,
end_line: 1,
metadata: None,
}
}
fn entity_content(cache: &DiskCache, id: &str) -> Option<String> {
let mut stmt = cache
.conn
.prepare("SELECT content FROM entities WHERE id = ?1")
.unwrap();
let mut rows = stmt.query(rusqlite::params![id]).unwrap();
rows.next().unwrap().map(|row| row.get(0).unwrap())
}
fn entity_info(id: &str, file_path: &str, name: &str) -> EntityInfo {
EntityInfo {
id: id.to_string(),
file_path: file_path.to_string(),
entity_type: "function".to_string(),
name: name.to_string(),
parent_id: None,
start_line: 1,
end_line: 1,
}
}
fn graph_with_edges(entities: &[SemanticEntity], edges: Vec<EntityRef>) -> EntityGraph {
let entity_map = entities
.iter()
.map(|entity| {
(
entity.id.clone(),
entity_info(&entity.id, &entity.file_path, &entity.name),
)
})
.collect();
EntityGraph::from_parts(entity_map, edges)
}
fn edge(from_entity: &str, to_entity: &str) -> EntityRef {
EntityRef {
from_entity: from_entity.to_string(),
to_entity: to_entity.to_string(),
ref_type: RefType::Calls,
}
}
fn edge_rowid(cache: &DiskCache, from_entity: &str, to_entity: &str) -> Option<i64> {
cache
.conn
.query_row(
"SELECT rowid FROM edges WHERE from_entity = ?1 AND to_entity = ?2",
rusqlite::params![from_entity, to_entity],
|row| row.get(0),
)
.ok()
}
fn edge_count(cache: &DiskCache, from_entity: &str, to_entity: &str) -> i64 {
cache
.conn
.query_row(
"SELECT COUNT(*) FROM edges WHERE from_entity = ?1 AND to_entity = ?2",
rusqlite::params![from_entity, to_entity],
|row| row.get(0),
)
.unwrap()
}
fn file_import_count(cache: &DiskCache, importing_file: &str, imported_file: &str) -> i64 {
cache
.conn
.query_row(
"SELECT COUNT(*) FROM file_imports WHERE importing_file = ?1 AND imported_file = ?2",
rusqlite::params![importing_file, imported_file],
|row| row.get(0),
)
.unwrap()
}
fn cached_file_mtime(cache: &DiskCache, file: &str) -> (i64, i64) {
cache
.conn
.query_row(
"SELECT mtime_secs, mtime_nanos FROM files WHERE path = ?1",
rusqlite::params![file],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap()
}
fn sample_files(root: &Path) -> Vec<String> {
write_file(&root.join("sample.foo"), "export const alpha = () => 1;\n");
vec!["sample.foo".to_string()]
}
fn cleanup(root: std::path::PathBuf) {
let _ = std::fs::remove_dir_all(&root);
if let Some(cache_dir) = shared_cache::cache_dir_for_repo(&root) {
let _ = std::fs::remove_dir_all(cache_dir);
}
}
fn save_empty_cache(root: &Path, files: &[String]) -> DiskCache {
let cache = DiskCache::open(root).unwrap();
cache.save(root, files, &empty_graph(), &[]).unwrap();
assert!(cache.load(root, files).is_some());
cache
}
#[test]
fn write_graph_json_topology_streams_fresh_cache() {
let root = temp_repo_root("graph-json-topology");
write_file(&root.join("a.rs"), "fn a() {}\n");
write_file(&root.join("b.rs"), "fn b() { a(); }\n");
let files = vec!["b.rs".to_string(), "a.rs".to_string()];
let entities = vec![
entity("b-id", "b.rs", "b", "fn b() { a(); }"),
entity("a-id", "a.rs", "a", "fn a() {}"),
];
let graph = graph_with_edges(&entities, vec![edge("b-id", "a-id")]);
let cache = DiskCache::open(&root).unwrap();
cache.save(&root, &files, &graph, &entities).unwrap();
let mut output = Vec::new();
assert!(cache
.write_graph_json_topology(&root, &files, &mut output)
.unwrap());
let value: serde_json::Value = serde_json::from_slice(&output).unwrap();
assert_eq!(
value["stats"],
serde_json::json!({"entityCount": 2, "edgeCount": 1})
);
assert_eq!(value["entities"][0]["id"], "a-id");
assert_eq!(value["entities"][1]["id"], "b-id");
assert_eq!(value["edges"][0]["fromEntity"], "b-id");
assert_eq!(value["edges"][0]["toEntity"], "a-id");
drop(cache);
cleanup(root);
}
#[test]
fn topology_cache_loads_only_topology_readers() {
let root = temp_repo_root("topology-only-cache");
write_file(&root.join("a.rs"), "fn a() {}\n");
write_file(&root.join("b.rs"), "fn b() { a(); }\n");
write_file(&root.join("a_test.rs"), "#[test]\nfn test_a() { a(); }\n");
let files = vec![
"b.rs".to_string(),
"a.rs".to_string(),
"a_test.rs".to_string(),
];
let entities = vec![
entity("b-id", "b.rs", "b", "fn b() { a(); }"),
entity("a-id", "a.rs", "a", "fn a() {}"),
entity(
"test-id",
"a_test.rs",
"test_a",
"#[test]\nfn test_a() { a(); }",
),
];
let graph = graph_with_edges(
&entities,
vec![edge("b-id", "a-id"), edge("test-id", "a-id")],
);
let cache = DiskCache::open(&root).unwrap();
cache
.save_topology(&root, &files, &graph, &entities, &[])
.unwrap();
assert!(cache.load(&root, &files).is_none());
let topology = cache.load_graph_topology(&root, &files).unwrap();
assert_eq!(topology.entities.len(), 3);
assert_eq!(topology.edges.len(), 2);
let (_, test_entity_ids) = cache
.load_graph_topology_with_test_ids(&root, &files)
.unwrap();
assert!(test_entity_ids.contains("test-id"));
assert!(!test_entity_ids.contains("a-id"));
let mut output = Vec::new();
assert!(cache
.write_graph_json_topology(&root, &files, &mut output)
.unwrap());
let value: serde_json::Value = serde_json::from_slice(&output).unwrap();
assert_eq!(
value["stats"],
serde_json::json!({"entityCount": 3, "edgeCount": 2})
);
rewrite_after_mtime_tick(&root.join("a.rs"), "fn a() { let _x = 1; }\n");
assert!(cache.load_partial(&root, &files).is_none());
drop(cache);
cleanup(root);
}
#[test]
fn load_refreshes_mtime_when_file_content_is_unchanged() {
let root = temp_repo_root("mtime-only-refresh");
let file_contents = [
("same_a.rs", "fn same_a() {}\n"),
("same_b.rs", "fn same_b() {}\n"),
("same_c.rs", "fn same_c() {}\n"),
];
for (file, content) in &file_contents {
write_file(&root.join(*file), content);
}
let files: Vec<String> = file_contents
.iter()
.map(|(file, _)| (*file).to_string())
.collect();
let cache = save_empty_cache(&root, &files);
let before: Vec<(i64, i64)> = files
.iter()
.map(|file| cached_file_mtime(&cache, file))
.collect();
let rewrite_all = || -> Vec<(i64, i64)> {
for (file, content) in &file_contents {
rewrite_after_mtime_tick(&root.join(*file), content);
}
file_contents
.iter()
.map(|(file, _)| shared_cache::file_mtime_parts(&root.join(*file)).unwrap())
.collect()
};
let assert_cached_mtimes = |expected: &[(i64, i64)]| {
for (file, expected) in files.iter().zip(expected) {
assert_eq!(cached_file_mtime(&cache, file), *expected);
}
};
let full_current = rewrite_all();
assert!(before
.iter()
.zip(&full_current)
.all(|(before, current)| before != current));
assert!(cache.load(&root, &files).is_some());
assert_cached_mtimes(&full_current);
let topology_current = rewrite_all();
assert!(cache.load_graph_topology(&root, &files).is_some());
assert_cached_mtimes(&topology_current);
let partial_current = rewrite_all();
assert!(cache.load_partial(&root, &files).is_none());
assert_cached_mtimes(&partial_current);
drop(cache);
cleanup(root);
}
#[test]
fn cache_loads_ignore_fingerprint_refresh_failure() {
let root = temp_repo_root("refresh-failure-cache-hit");
write_file(&root.join("same.rs"), "fn same() {}\n");
write_file(&root.join("stale.rs"), "fn stale() {}\n");
let files = vec!["same.rs".to_string(), "stale.rs".to_string()];
let cache = save_empty_cache(&root, &files);
let before_same = cached_file_mtime(&cache, "same.rs");
cache
.conn
.execute_batch(
"CREATE TRIGGER fail_fingerprint_refresh
BEFORE UPDATE ON files
BEGIN
SELECT RAISE(FAIL, 'stop refresh');
END;",
)
.unwrap();
rewrite_after_mtime_tick(&root.join("same.rs"), "fn same() {}\n");
assert!(cache.load(&root, &files).is_some());
assert!(cache.load_graph_topology(&root, &files).is_some());
assert_eq!(cached_file_mtime(&cache, "same.rs"), before_same);
rewrite_after_mtime_tick(&root.join("stale.rs"), "fn stale() { 1; }\n");
let partial = cache.load_partial(&root, &files).unwrap();
assert_eq!(partial.stale_files, vec!["stale.rs"]);
assert_eq!(cached_file_mtime(&cache, "same.rs"), before_same);
drop(cache);
cleanup(root);
}
#[test]
fn partial_cache_reports_clean_files_that_import_stale_js_ts_files() {
let root = temp_repo_root("incremental-import-metadata");
write_file(
&root.join("a.ts"),
"import { target } from './b';\nexport function useIt() { return target(); }\n",
);
write_file(
&root.join("b.ts"),
"export function target() { return 1; }\n",
);
write_file(
&root.join("c.ts"),
"export function other() { return 2; }\n",
);
let files = vec!["a.ts".to_string(), "b.ts".to_string(), "c.ts".to_string()];
let cache = DiskCache::open(&root).unwrap();
cache.save(&root, &files, &empty_graph(), &[]).unwrap();
assert_eq!(file_import_count(&cache, "a.ts", "b.ts"), 1);
rewrite_after_mtime_tick(
&root.join("b.ts"),
"export function target() { return 3; }\n",
);
rewrite_after_mtime_tick(
&root.join("c.ts"),
"export function other() { return 2; }\n",
);
let current_c = shared_cache::file_mtime_parts(&root.join("c.ts")).unwrap();
let partial = cache.load_partial(&root, &files).unwrap();
assert_eq!(partial.stale_files, vec!["b.ts"]);
assert_eq!(partial.cached_importing_stale_files, vec!["a.ts"]);
assert_eq!(cached_file_mtime(&cache, "c.ts"), current_c);
rewrite_after_mtime_tick(
&root.join("a.ts"),
"import { other } from './c';\nexport function useIt() { return other(); }\n",
);
cache
.save_incremental_with_repair_metadata(
&root,
&files,
&["a.ts".to_string()],
&empty_graph(),
&[],
false,
&[],
&[],
)
.unwrap();
assert_eq!(file_import_count(&cache, "a.ts", "b.ts"), 0);
assert_eq!(file_import_count(&cache, "a.ts", "c.ts"), 1);
drop(cache);
cleanup(root);
}
fn write_gitattributes(root: &Path) {
write_file(
&root.join(".gitattributes"),
"*.foo linguist-language=javascript\n",
);
}
fn rewrite_after_mtime_tick(path: &Path, content: &str) {
let before = shared_cache::file_mtime_parts(path).unwrap();
for _ in 0..200 {
std::thread::sleep(std::time::Duration::from_millis(10));
write_file(path, content);
if shared_cache::file_mtime_parts(path).unwrap() != before {
return;
}
}
panic!("mtime did not change for {}", path.display());
}
fn read_user_version(cache: &DiskCache) -> i32 {
cache
.conn
.query_row("PRAGMA user_version", [], |row| row.get(0))
.unwrap()
}
fn assert_lookup_indexes(cache: &DiskCache) {
let mut stmt = cache
.conn
.prepare(
"SELECT name FROM sqlite_master
WHERE type = 'index' AND name NOT LIKE 'sqlite_autoindex%'
ORDER BY name",
)
.unwrap();
let indexes: HashSet<String> = stmt
.query_map([], |row| row.get::<_, String>(0))
.unwrap()
.map(|result| result.unwrap())
.collect();
for (expected, _, _) in shared_cache::CACHE_INDEXES {
assert!(indexes.contains(*expected), "missing index {expected}");
}
}
fn assert_table_empty(cache: &DiskCache, table: &str) {
let sql = format!("SELECT COUNT(*) FROM {table}");
let count: i64 = cache.conn.query_row(&sql, [], |row| row.get(0)).unwrap();
assert_eq!(count, 0, "{table} should be empty after schema rebuild");
}
fn seed_unsupported_cache(root: &Path, version: i32) {
let cache_dir = shared_cache::cache_dir_for_repo(root).unwrap();
std::fs::create_dir_all(&cache_dir).unwrap();
let db_path = cache_dir.join("cache.db");
let conn = Connection::open(&db_path).unwrap();
conn.execute_batch(&format!(
"PRAGMA user_version = {version};
CREATE TABLE files (
path TEXT PRIMARY KEY,
mtime_secs INTEGER NOT NULL,
mtime_nanos INTEGER NOT NULL
);
CREATE TABLE entities (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
entity_type TEXT NOT NULL,
file_path TEXT NOT NULL,
start_line INTEGER NOT NULL,
end_line INTEGER NOT NULL,
content TEXT NOT NULL,
content_hash TEXT NOT NULL,
structural_hash TEXT,
parent_id TEXT,
metadata_json TEXT
);
CREATE TABLE edges (
from_entity TEXT NOT NULL,
to_entity TEXT NOT NULL,
ref_type TEXT NOT NULL
);
INSERT INTO files (path, mtime_secs, mtime_nanos)
VALUES ('stale.rs', 1, 2);
INSERT INTO entities (
id, name, entity_type, file_path, start_line, end_line,
content, content_hash, structural_hash, parent_id, metadata_json
)
VALUES (
'stale-id', 'stale', 'function', 'stale.rs', 1, 1,
'fn stale() {{}}', 'old-content', NULL, NULL, NULL
);
INSERT INTO edges (from_entity, to_entity, ref_type)
VALUES ('stale-id', 'other-id', 'calls');"
))
.unwrap();
}
#[test]
fn load_invalidates_when_gitattributes_is_added() {
let root = temp_repo_root("gitattributes-added");
let files = sample_files(&root);
let cache = save_empty_cache(&root, &files);
write_file(
&root.join(".gitattributes"),
"*.foo linguist-language=javascript\n",
);
assert!(cache.load(&root, &files).is_none());
assert!(cache.load_partial(&root, &files).is_none());
drop(cache);
cleanup(root);
}
#[test]
fn load_invalidates_when_gitattributes_is_modified() {
let root = temp_repo_root("gitattributes-modified");
let files = sample_files(&root);
let gitattributes = root.join(".gitattributes");
write_file(&gitattributes, "*.foo linguist-language=javascript\n");
let cache = save_empty_cache(&root, &files);
rewrite_after_mtime_tick(&gitattributes, "*.foo linguist-language=typescript\n");
assert!(cache.load(&root, &files).is_none());
assert!(cache.load_partial(&root, &files).is_none());
drop(cache);
cleanup(root);
}
#[test]
fn load_invalidates_when_gitattributes_is_removed() {
let root = temp_repo_root("gitattributes-removed");
let files = sample_files(&root);
let gitattributes = root.join(".gitattributes");
write_file(&gitattributes, "*.foo linguist-language=javascript\n");
let cache = save_empty_cache(&root, &files);
std::fs::remove_file(&gitattributes).unwrap();
assert!(cache.load(&root, &files).is_none());
assert!(cache.load_partial(&root, &files).is_none());
drop(cache);
cleanup(root);
}
#[test]
fn save_incremental_keeps_clean_entity_rows_without_clean_id_repair() {
let root = temp_repo_root("incremental-entities");
write_file(&root.join("stale.rs"), "fn stale() {}\n");
write_file(&root.join("clean.rs"), "fn clean() {}\n");
let files = vec!["stale.rs".to_string(), "clean.rs".to_string()];
let cache = DiskCache::open(&root).unwrap();
cache
.save(
&root,
&files,
&empty_graph(),
&[
entity("stale-id", "stale.rs", "stale", "stale old"),
entity("clean-id", "clean.rs", "clean", "clean old"),
],
)
.unwrap();
let entities = vec![
entity("stale-id", "stale.rs", "stale", "stale new"),
entity("clean-id", "clean.rs", "clean", "clean should stay cached"),
];
cache
.save_incremental_with_repair_metadata(
&root,
&files,
&["stale.rs".to_string()],
&empty_graph(),
&entities,
false,
&["stale-id".to_string()],
&[],
)
.unwrap();
assert_eq!(
entity_content(&cache, "stale-id"),
Some("stale new".to_string())
);
assert_eq!(
entity_content(&cache, "clean-id"),
Some("clean old".to_string())
);
drop(cache);
cleanup(root);
}
#[test]
fn save_incremental_rewrites_entities_after_clean_id_repair() {
let root = temp_repo_root("incremental-clean-repair");
write_file(&root.join("stale.rs"), "fn stale() {}\n");
write_file(&root.join("clean.rs"), "fn clean() {}\n");
let files = vec!["stale.rs".to_string(), "clean.rs".to_string()];
let cache = DiskCache::open(&root).unwrap();
cache
.save(
&root,
&files,
&empty_graph(),
&[
entity("stale-id", "stale.rs", "stale", "stale old"),
entity("clean-old-id", "clean.rs", "clean", "clean old"),
],
)
.unwrap();
let entities = vec![
entity("stale-id", "stale.rs", "stale", "stale new"),
entity("clean-new-id", "clean.rs", "clean", "clean repaired"),
];
cache
.save_incremental_with_repair_metadata(
&root,
&files,
&["stale.rs".to_string()],
&empty_graph(),
&entities,
true,
&[],
&[],
)
.unwrap();
assert_eq!(entity_content(&cache, "clean-old-id"), None);
assert_eq!(
entity_content(&cache, "clean-new-id"),
Some("clean repaired".to_string())
);
assert_eq!(
entity_content(&cache, "stale-id"),
Some("stale new".to_string())
);
drop(cache);
cleanup(root);
}
#[test]
fn save_incremental_rewrites_only_recomputed_edge_sources() {
let root = temp_repo_root("incremental-edge-sources");
write_file(&root.join("stale.rs"), "fn stale() {}\n");
write_file(&root.join("clean.rs"), "fn clean() {}\n");
write_file(&root.join("other.rs"), "fn other() {}\n");
write_file(&root.join("target.rs"), "fn target() {}\n");
let files = vec![
"stale.rs".to_string(),
"clean.rs".to_string(),
"other.rs".to_string(),
"target.rs".to_string(),
];
let cache = DiskCache::open(&root).unwrap();
let entities = vec![
entity("stale-id", "stale.rs", "stale", "stale old"),
entity("clean-id", "clean.rs", "clean", "clean old"),
entity("other-id", "other.rs", "other", "other"),
entity("old-target-id", "target.rs", "oldTarget", "old target"),
entity("new-target-id", "target.rs", "newTarget", "new target"),
];
let initial_graph = graph_with_edges(
&entities,
vec![
edge("stale-id", "old-target-id"),
edge("clean-id", "other-id"),
],
);
cache
.save(&root, &files, &initial_graph, &entities)
.unwrap();
let clean_edge_rowid = edge_rowid(&cache, "clean-id", "other-id").unwrap();
let updated_graph = graph_with_edges(
&entities,
vec![
edge("stale-id", "new-target-id"),
edge("clean-id", "other-id"),
],
);
cache
.save_incremental_with_repair_metadata(
&root,
&files,
&["stale.rs".to_string()],
&updated_graph,
&entities,
false,
&["stale-id".to_string()],
&["old-target-id".to_string()],
)
.unwrap();
assert_eq!(edge_count(&cache, "stale-id", "old-target-id"), 0);
assert_eq!(edge_count(&cache, "stale-id", "new-target-id"), 1);
assert_eq!(
edge_rowid(&cache, "clean-id", "other-id"),
Some(clean_edge_rowid)
);
drop(cache);
cleanup(root);
}
#[test]
fn cli_and_mcp_caches_share_manifest_entries() {
let cli_to_mcp = temp_repo_root("cli-to-mcp");
let cli_to_mcp_files = sample_files(&cli_to_mcp);
write_gitattributes(&cli_to_mcp);
let cli_cache = DiskCache::open(&cli_to_mcp).unwrap();
cli_cache
.save(&cli_to_mcp, &cli_to_mcp_files, &empty_graph(), &[])
.unwrap();
let mcp_cache = shared_cache::DiskCache::open(&cli_to_mcp).unwrap();
assert!(mcp_cache.load(&cli_to_mcp, &cli_to_mcp_files).is_some());
drop(mcp_cache);
drop(cli_cache);
cleanup(cli_to_mcp);
let mcp_to_cli = temp_repo_root("mcp-to-cli");
let mcp_to_cli_files = sample_files(&mcp_to_cli);
write_gitattributes(&mcp_to_cli);
let mcp_cache = shared_cache::DiskCache::open(&mcp_to_cli).unwrap();
mcp_cache
.save(&mcp_to_cli, &mcp_to_cli_files, &empty_graph(), &[])
.unwrap();
let cli_cache = DiskCache::open(&mcp_to_cli).unwrap();
assert!(cli_cache.load(&mcp_to_cli, &mcp_to_cli_files).is_some());
drop(cli_cache);
drop(mcp_cache);
cleanup(mcp_to_cli);
let cli_topology_to_mcp = temp_repo_root("cli-topology-to-mcp");
let cli_topology_to_mcp_files = sample_files(&cli_topology_to_mcp);
let cli_cache = DiskCache::open(&cli_topology_to_mcp).unwrap();
cli_cache
.save_topology(
&cli_topology_to_mcp,
&cli_topology_to_mcp_files,
&empty_graph(),
&[],
&[],
)
.unwrap();
let mcp_cache = shared_cache::DiskCache::open(&cli_topology_to_mcp).unwrap();
assert!(mcp_cache
.load(&cli_topology_to_mcp, &cli_topology_to_mcp_files)
.is_none());
assert!(mcp_cache
.load_graph_topology(&cli_topology_to_mcp, &cli_topology_to_mcp_files)
.is_some());
drop(mcp_cache);
drop(cli_cache);
cleanup(cli_topology_to_mcp);
}
#[test]
fn open_creates_schema_version_and_lookup_indexes() {
let root = temp_repo_root("schema");
let cache = DiskCache::open(&root).unwrap();
assert_eq!(
read_user_version(&cache),
shared_cache::CACHE_SCHEMA_VERSION
);
assert_lookup_indexes(&cache);
assert!(shared_cache::cache_db_path(&root).unwrap().exists());
assert!(!root.join(".sem").exists());
drop(cache);
cleanup(root);
}
#[test]
fn open_uses_shared_external_cache_path() {
let root = temp_repo_root("external-path");
let cache = DiskCache::open(&root).unwrap();
assert!(shared_cache::cache_db_path(&root).unwrap().exists());
assert!(!root.join(".sem").exists());
drop(cache);
cleanup(root);
}
#[test]
fn open_rebuilds_cache_when_schema_version_is_unsupported() {
for version in [
0,
shared_cache::CACHE_SCHEMA_VERSION - 1,
shared_cache::CACHE_SCHEMA_VERSION + 1,
] {
let root = temp_repo_root(&format!("unsupported-{version}"));
seed_unsupported_cache(&root, version);
let cache = DiskCache::open(&root).unwrap();
assert_eq!(
read_user_version(&cache),
shared_cache::CACHE_SCHEMA_VERSION
);
assert_lookup_indexes(&cache);
for table in ["files", "entities", "edges"] {
assert_table_empty(&cache, table);
}
drop(cache);
cleanup(root);
}
}
}