use crate::path_resolver::WorkspaceResolver;
use anyhow::{Context, Result, bail};
use chrono::{DateTime, Utc};
use parking_lot::RwLock;
use sqry_core::graph::unified::concurrent::CodeGraph;
use sqry_core::graph::unified::persistence::{
GraphStorage, Manifest, load_from_bytes, verify_snapshot_bytes,
};
use sqry_core::graph::{
AcquisitionOperation, FilesystemGraphProvider, GraphAcquirer, GraphAcquisitionError,
GraphAcquisitionRequest, MissingGraphPolicy, PathPolicy, PluginSelectionPolicy, StalePolicy,
};
use sqry_core::plugin::PluginManager;
use sqry_core::query::QueryExecutor;
use sqry_plugin_registry::create_plugin_manager;
use std::path::{Path, PathBuf};
use std::sync::{Arc, OnceLock};
use std::time::{Duration, SystemTime};
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct GraphIdentity {
pub snapshot_sha256: String,
pub built_at: DateTime<Utc>,
pub schema_version: u32,
pub snapshot_format_version: u32,
pub workspace_root: PathBuf,
}
#[derive(Clone, Debug)]
pub struct ManifestMetadata {
pub mtime: SystemTime,
pub size: u64,
pub file_id: Option<u64>,
}
pub struct CachedEngine {
pub engine: Arc<Engine>,
pub identity: GraphIdentity,
pub metadata: ManifestMetadata,
}
#[allow(dead_code)]
static ENGINE: RwLock<Option<Arc<Engine>>> = RwLock::new(None);
pub struct Engine {
workspace_root: PathBuf,
executor: Arc<QueryExecutor>,
graph_cache: RwLock<Option<Arc<CodeGraph>>>,
}
impl Engine {
#[allow(dead_code)]
fn initialize() -> Result<Self> {
let workspace_root = resolve_workspace_root()?;
tracing::info!(
workspace_root = %workspace_root.display(),
"Engine initializing with workspace root"
);
let plugin_manager = build_plugin_manager();
let executor = Arc::new(QueryExecutor::with_plugin_manager(plugin_manager));
Ok(Self {
workspace_root,
executor,
graph_cache: RwLock::new(None),
})
}
#[allow(clippy::unnecessary_wraps)] pub fn for_workspace(workspace_root: PathBuf) -> Result<Self> {
tracing::info!(
workspace_root = %workspace_root.display(),
"Creating Engine for specific workspace"
);
let plugin_manager = build_plugin_manager();
let executor = Arc::new(QueryExecutor::with_plugin_manager(plugin_manager));
Ok(Self {
workspace_root,
executor,
graph_cache: RwLock::new(None),
})
}
pub fn workspace_root(&self) -> &Path {
&self.workspace_root
}
pub fn executor(&self) -> &QueryExecutor {
&self.executor
}
#[must_use]
pub fn executor_arc(&self) -> Arc<QueryExecutor> {
Arc::clone(&self.executor)
}
#[must_use]
pub fn plugin_manager() -> PluginManager {
build_plugin_manager()
}
#[must_use]
pub fn cached_graph(&self) -> Option<Arc<CodeGraph>> {
let cache = self.graph_cache.read();
cache.as_ref().map(Arc::clone)
}
#[allow(dead_code)]
pub fn graph(&self) -> Option<Arc<CodeGraph>> {
if let Some(graph) = self.cached_graph() {
tracing::debug!("Returning cached graph");
return Some(graph);
}
let storage = GraphStorage::new(&self.workspace_root);
let snapshot_path = storage.snapshot_path();
tracing::info!(
snapshot_path = %snapshot_path.display(),
exists = snapshot_path.exists(),
"Checking for unified graph snapshot"
);
if !storage.exists() {
tracing::warn!(
workspace_root = %self.workspace_root.display(),
"No unified graph snapshot found"
);
return None;
}
let expected_sha256 = if storage.manifest_path().exists() {
match std::fs::File::open(storage.manifest_path()).and_then(|f| {
serde_json::from_reader::<_, Manifest>(f)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}) {
Ok(manifest) => manifest.snapshot_sha256,
Err(e) => {
tracing::warn!(
error = %e,
manifest_path = %storage.manifest_path().display(),
"Manifest exists but cannot be read/parsed — refusing to load snapshot"
);
return None;
}
}
} else {
String::new()
};
let snapshot_bytes = match std::fs::read(storage.snapshot_path()) {
Ok(bytes) => bytes,
Err(e) => {
tracing::warn!(
error = %e,
snapshot_path = %storage.snapshot_path().display(),
"Failed to read snapshot"
);
return None;
}
};
if let Err(e) = verify_snapshot_bytes(&snapshot_bytes, &expected_sha256) {
tracing::warn!(
error = %e,
snapshot_path = %storage.snapshot_path().display(),
"Snapshot integrity verification failed"
);
return None;
}
match load_from_bytes(&snapshot_bytes, Some(self.executor.plugin_manager())) {
Ok(graph) => {
let arc = Arc::new(graph);
let mut cache = self.graph_cache.write();
*cache = Some(arc.clone());
tracing::info!("Successfully loaded unified graph");
Some(arc)
}
Err(e) => {
tracing::error!(
error = %e,
snapshot_path = %storage.snapshot_path().display(),
"Failed to load unified graph snapshot"
);
None
}
}
}
pub fn ensure_graph(&self) -> Result<Arc<CodeGraph>> {
if let Some(graph) = self.cached_graph() {
return Ok(graph);
}
let provider_plugins = build_plugin_manager();
let workspace_root_for_hook = self.workspace_root.clone();
let auto_build_hook: sqry_core::graph::AutoBuildHook = Arc::new(move |_req_path| {
if !is_auto_index_enabled() {
return Err(GraphAcquisitionError::BuildFailed {
workspace_root: workspace_root_for_hook.clone(),
reason: "auto-indexing disabled (SQRY_AUTO_INDEX=false). Run `sqry index` to create the graph.".to_string(),
});
}
check_daemon_workspace_conflict(&workspace_root_for_hook).map_err(|e| {
GraphAcquisitionError::BuildFailed {
workspace_root: workspace_root_for_hook.clone(),
reason: format!("{e}"),
}
})?;
tracing::info!(
workspace = %workspace_root_for_hook.display(),
"Auto-building graph index (no existing snapshot found)"
);
let plugins = create_plugin_manager();
let config = sqry_core::graph::unified::build::BuildConfig::default();
let (graph, _build_result) = sqry_core::graph::unified::build::build_and_persist_graph(
&workspace_root_for_hook,
&plugins,
&config,
"mcp:auto_index",
)
.map_err(|e| GraphAcquisitionError::BuildFailed {
workspace_root: workspace_root_for_hook.clone(),
reason: format!("{e}"),
})?;
Ok(Arc::new(graph))
});
let provider = FilesystemGraphProvider::new(Arc::new(provider_plugins))
.with_auto_build_hook(auto_build_hook);
let request = GraphAcquisitionRequest {
requested_path: self.workspace_root.clone(),
operation: AcquisitionOperation::ReadOnlyQuery,
path_policy: PathPolicy::default(),
missing_graph_policy: MissingGraphPolicy::AutoBuildIfEnabled,
stale_policy: StalePolicy::default(),
plugin_selection_policy: PluginSelectionPolicy::default(),
tool_name: Some("mcp_ensure_graph"),
};
let acquisition = provider
.acquire(request)
.map_err(map_acquisition_error_for_engine)?;
let arc = acquisition.graph;
{
let mut cache = self.graph_cache.write();
*cache = Some(Arc::clone(&arc));
}
Ok(arc)
}
#[allow(dead_code)]
pub fn clear_graph_cache(&self) {
let mut cache = self.graph_cache.write();
*cache = None;
}
}
fn map_acquisition_error_for_engine(err: GraphAcquisitionError) -> anyhow::Error {
match err {
GraphAcquisitionError::InvalidPath { path, reason } => {
anyhow::anyhow!("invalid path {}: {}", path.display(), reason)
}
GraphAcquisitionError::NoGraph { workspace_root } => anyhow::anyhow!(
"No unified graph found at {}. Run `sqry index` to create the graph.",
workspace_root.display()
),
GraphAcquisitionError::IncompatibleGraph {
source_root,
status,
} => anyhow::anyhow!(
"Incompatible graph at {}: {:?}. Rebuild the index with `sqry index --force` after upgrading sqry.",
source_root.display(),
status
),
GraphAcquisitionError::LoadFailed {
source_root,
reason,
} => anyhow::anyhow!(
"Failed to load graph at {}: {}",
source_root.display(),
reason
),
GraphAcquisitionError::BuildFailed {
workspace_root,
reason,
} => anyhow::anyhow!(
"Graph build failed for {}: {}",
workspace_root.display(),
reason
),
other => anyhow::anyhow!("graph acquisition failed: {other}"),
}
}
#[allow(dead_code)]
pub fn engine() -> Result<Arc<Engine>> {
{
let guard = ENGINE.read();
if let Some(ref engine) = *guard {
return Ok(engine.clone());
}
}
let mut guard = ENGINE.write();
if let Some(ref engine) = *guard {
return Ok(engine.clone());
}
let new_engine = Arc::new(Engine::initialize()?);
*guard = Some(new_engine.clone());
Ok(new_engine)
}
pub fn engine_for_workspace(explicit_path: Option<&PathBuf>) -> Result<Arc<Engine>> {
if let Some(workspace_root) = crate::workspace_session::current_workspace_override() {
return engine_for_workspace_root(&workspace_root);
}
let workspace_root = if let Some(path) = explicit_path {
crate::path_resolver::resolve_workspace_path(&path.to_string_lossy())?
} else {
let resolver = WorkspaceResolver::new(None);
resolver.resolve()?
};
engine_for_workspace_root(&workspace_root)
}
fn engine_for_workspace_root(workspace_root: &Path) -> Result<Arc<Engine>> {
if !workspace_root.is_absolute() {
bail!(
"BUG: engine_for_workspace requires canonical path, got: {}",
workspace_root.display()
);
}
if let Some(engine) = get_cached_engine(workspace_root)? {
tracing::debug!(
workspace = %workspace_root.display(),
"Using cached engine"
);
return Ok(engine);
}
tracing::info!(
workspace = %workspace_root.display(),
"Loading fresh engine (cache miss or stale)"
);
let engine = Arc::new(Engine::for_workspace(workspace_root.to_path_buf())?);
match read_graph_identity_with_metadata(workspace_root) {
Ok((identity, metadata)) => {
let mut cache = ENGINE_CACHE.lock();
let lru = cache
.as_mut()
.context("Engine cache not initialized - call init_engine_cache() first")?;
lru.put(
workspace_root.to_path_buf(),
CachedEngine {
engine: Arc::clone(&engine),
identity,
metadata,
},
);
tracing::debug!(
workspace = %workspace_root.display(),
cache_size = lru.len(),
"Engine cached"
);
}
Err(e) => {
tracing::info!(
workspace = %workspace_root.display(),
error = %e,
"No manifest found — engine created without cache identity \
(auto-index will create it on first tool call)"
);
}
}
Ok(engine)
}
#[allow(dead_code)]
pub fn resolve_workspace_root() -> Result<PathBuf> {
let root = std::env::var("SQRY_MCP_WORKSPACE_ROOT").ok();
let root_path = match root {
Some(r) => PathBuf::from(r),
None => std::env::current_dir().context("Failed to get current directory")?,
};
let canon = std::fs::canonicalize(&root_path).with_context(|| {
format!(
"Failed to canonicalize workspace root: {}",
root_path.display()
)
})?;
Ok(canon)
}
#[derive(Debug, thiserror::Error)]
pub enum WorkspacePathError {
#[error("Path '{path}' is outside of the workspace root '{workspace_root}'")]
OutsideWorkspace {
path: PathBuf,
workspace_root: PathBuf,
},
#[error("Path '{path}' is excluded by the logical workspace policy")]
Excluded {
path: PathBuf,
},
#[error("Failed to canonicalize path: {path} ({source})")]
Canonicalize {
path: PathBuf,
source: std::io::Error,
},
}
pub fn canonicalize_in_workspace(path_str: &str, workspace_root: &Path) -> Result<PathBuf> {
canonicalize_in_workspace_inner(path_str, workspace_root, None).map_err(Into::into)
}
#[allow(dead_code)]
pub fn canonicalize_in_workspace_with_logical(
path_str: &str,
workspace_root: &Path,
workspace: Option<&sqry_core::workspace::LogicalWorkspace>,
) -> Result<PathBuf, WorkspacePathError> {
canonicalize_in_workspace_inner(path_str, workspace_root, workspace)
}
fn canonicalize_in_workspace_inner(
path_str: &str,
workspace_root: &Path,
workspace: Option<&sqry_core::workspace::LogicalWorkspace>,
) -> Result<PathBuf, WorkspacePathError> {
let input_path = Path::new(path_str);
let joined = if input_path.is_absolute() {
input_path.to_path_buf()
} else {
workspace_root.join(input_path)
};
let normalized = normalize_path(&joined);
if let Some(ws) = workspace
&& exclusion_matches(&normalized, ws)
{
return Err(WorkspacePathError::Excluded {
path: normalized.clone(),
});
}
if !normalized.starts_with(workspace_root) {
return Err(WorkspacePathError::OutsideWorkspace {
path: normalized,
workspace_root: workspace_root.to_path_buf(),
});
}
let canon =
std::fs::canonicalize(&joined).map_err(|source| WorkspacePathError::Canonicalize {
path: joined.clone(),
source,
})?;
if let Some(ws) = workspace
&& exclusion_matches(&canon, ws)
{
return Err(WorkspacePathError::Excluded { path: canon });
}
let canon_root =
std::fs::canonicalize(workspace_root).unwrap_or_else(|_| workspace_root.to_path_buf());
if !canon.starts_with(&canon_root) {
return Err(WorkspacePathError::OutsideWorkspace {
path: canon,
workspace_root: canon_root,
});
}
Ok(canon)
}
fn exclusion_matches(path: &Path, workspace: &sqry_core::workspace::LogicalWorkspace) -> bool {
workspace
.exclusions()
.iter()
.any(|excl| path == excl.as_path() || path.starts_with(excl))
}
fn normalize_path(path: &Path) -> PathBuf {
let mut components = Vec::new();
for component in path.components() {
match component {
std::path::Component::ParentDir => {
if !components.is_empty() {
components.pop();
}
}
std::path::Component::CurDir => {
}
comp => {
components.push(comp);
}
}
}
components.iter().collect()
}
fn build_plugin_manager() -> PluginManager {
create_plugin_manager()
}
fn is_auto_index_enabled() -> bool {
if let Ok(val) = std::env::var("SQRY_AUTO_INDEX") {
return val != "false" && val != "0";
}
true
}
fn check_daemon_workspace_conflict(workspace_root: &Path) -> Result<()> {
if is_force_standalone() {
tracing::warn!(
workspace = %workspace_root.display(),
"SQRY_FORCE_STANDALONE=1: skipping daemon workspace conflict check. \
Concurrent writer risk accepted by caller."
);
return Ok(());
}
let socket_path = crate::daemon_shim::resolve_daemon_socket(None);
if !socket_path.exists() {
tracing::debug!(
socket = %socket_path.display(),
"Daemon socket not present — no conflict check needed"
);
return Ok(());
}
match tokio::runtime::Handle::try_current() {
Ok(handle) => {
let workspace_owned = workspace_root.to_path_buf();
let socket_owned = socket_path.clone();
tokio::task::block_in_place(move || {
handle.block_on(async move {
check_daemon_workspace_conflict_async(&workspace_owned, &socket_owned).await
})
})
}
Err(_) => {
tracing::debug!(
socket = %socket_path.display(),
"No async runtime available — skipping daemon conflict check"
);
Ok(())
}
}
}
async fn check_daemon_workspace_conflict_async(
workspace_root: &Path,
socket_path: &Path,
) -> Result<()> {
let connect_budget = Duration::from_secs(1);
let hello_budget = Duration::from_secs(1);
let status_budget = Duration::from_secs(2);
let mut client = match sqry_daemon_client::DaemonClient::connect_with_timeouts(
socket_path,
connect_budget,
hello_budget,
)
.await
{
Ok(c) => c,
Err(e) => {
tracing::debug!(
socket = %socket_path.display(),
error = %e,
"Daemon socket present but connect/handshake failed — proceeding standalone"
);
return Ok(());
}
};
let status_value = match tokio::time::timeout(status_budget, client.status()).await {
Ok(Ok(v)) => v,
Ok(Err(e)) => {
tracing::debug!(
socket = %socket_path.display(),
error = %e,
"daemon/status RPC failed — proceeding standalone"
);
return Ok(());
}
Err(_elapsed) => {
tracing::warn!(
socket = %socket_path.display(),
timeout_secs = status_budget.as_secs(),
"daemon/status timed out — proceeding standalone \
(do not block auto-index on unresponsive daemon)"
);
return Ok(());
}
};
let canonical_workspace = match std::fs::canonicalize(workspace_root) {
Ok(p) => p,
Err(e) => {
tracing::debug!(
workspace = %workspace_root.display(),
error = %e,
"Could not canonicalize workspace root — skipping conflict check"
);
return Ok(());
}
};
let daemon_pid = read_daemon_pid_from_socket_dir(socket_path)
.map_or_else(|| "unknown".to_owned(), |pid| pid.to_string());
let Some(workspaces) = status_value
.get("result")
.and_then(|r| r.get("workspaces"))
.and_then(serde_json::Value::as_array)
else {
tracing::debug!(
socket = %socket_path.display(),
"daemon/status returned no 'result.workspaces' array — no conflict detected"
);
return Ok(());
};
for ws in workspaces {
let Some(root_str) = ws.get("index_root").and_then(serde_json::Value::as_str) else {
continue;
};
let daemon_ws_root = PathBuf::from(root_str);
let canonical_daemon_root =
std::fs::canonicalize(&daemon_ws_root).unwrap_or(daemon_ws_root);
if canonical_daemon_root == canonical_workspace {
bail!(
"sqryd daemon (PID {daemon_pid}) is managing workspace '{}'. \
Use --daemon mode or stop the daemon before standalone auto-indexing.",
canonical_workspace.display()
);
}
}
tracing::debug!(
workspace = %workspace_root.display(),
socket = %socket_path.display(),
"No daemon workspace conflict detected — proceeding with standalone auto-index"
);
Ok(())
}
fn is_force_standalone() -> bool {
matches!(
std::env::var("SQRY_FORCE_STANDALONE").as_deref(),
Ok("1") | Ok("true")
)
}
fn read_daemon_pid_from_socket_dir(socket_path: &Path) -> Option<u32> {
let pid_path = socket_path.parent()?.join("sqryd.pid");
let contents = std::fs::read_to_string(&pid_path).ok()?;
contents.trim().parse::<u32>().ok()
}
#[allow(dead_code)]
pub static WORKSPACE_LOCK: OnceLock<RwLock<()>> = OnceLock::new();
#[allow(dead_code)]
pub fn workspace_lock() -> &'static RwLock<()> {
WORKSPACE_LOCK.get_or_init(|| RwLock::new(()))
}
pub fn read_graph_identity(workspace: &Path) -> Result<GraphIdentity> {
let manifest_path = workspace.join(".sqry/graph/manifest.json");
let file = std::fs::File::open(&manifest_path).with_context(|| {
format!(
"Manifest missing - run `sqry index` in workspace: {}",
workspace.display()
)
})?;
let manifest: sqry_core::graph::unified::persistence::Manifest = serde_json::from_reader(file)
.context("Failed to parse manifest.json - index may be corrupt")?;
let canonical_workspace = std::fs::canonicalize(workspace)?;
let manifest_root_path = PathBuf::from(&manifest.root_path);
let manifest_root = if manifest_root_path.is_absolute() {
std::fs::canonicalize(&manifest_root_path)?
} else {
std::fs::canonicalize(workspace.join(&manifest_root_path))?
};
if canonical_workspace != manifest_root {
bail!(
"Manifest root_path mismatch: expected {}, got {}. \
Possible symlinked .sqry/graph from different repo.",
canonical_workspace.display(),
manifest_root.display()
);
}
let built_at = DateTime::parse_from_rfc3339(&manifest.built_at)
.with_context(|| {
format!(
"Invalid built_at timestamp in manifest: {}",
manifest.built_at
)
})?
.with_timezone(&Utc);
Ok(GraphIdentity {
snapshot_sha256: manifest.snapshot_sha256,
built_at,
schema_version: manifest.schema_version,
snapshot_format_version: manifest.snapshot_format_version,
workspace_root: canonical_workspace,
})
}
#[allow(dead_code)]
pub fn read_manifest_metadata(workspace: &Path) -> Result<ManifestMetadata> {
let manifest_path = workspace.join(".sqry/graph/manifest.json");
let metadata = std::fs::metadata(&manifest_path).context("Failed to stat manifest.json")?;
let file_id = extract_file_id(&metadata);
Ok(ManifestMetadata {
mtime: metadata.modified()?,
size: metadata.len(),
file_id,
})
}
pub fn read_graph_identity_with_metadata(
workspace: &Path,
) -> Result<(GraphIdentity, ManifestMetadata)> {
let manifest_path = workspace.join(".sqry/graph/manifest.json");
let file = std::fs::File::open(&manifest_path).with_context(|| {
format!(
"Manifest missing - run `sqry index` in workspace: {}",
workspace.display()
)
})?;
let file_metadata = file
.metadata()
.context("Failed to stat manifest.json from open file handle")?;
let manifest: sqry_core::graph::unified::persistence::Manifest = serde_json::from_reader(file)
.context("Failed to parse manifest.json - index may be corrupt")?;
let canonical_workspace = std::fs::canonicalize(workspace)?;
let manifest_root_path = PathBuf::from(&manifest.root_path);
let manifest_root = if manifest_root_path.is_absolute() {
std::fs::canonicalize(&manifest_root_path)?
} else {
std::fs::canonicalize(workspace.join(&manifest_root_path))?
};
if canonical_workspace != manifest_root {
bail!(
"Manifest root_path mismatch: expected {}, got {}. \
Possible symlinked .sqry/graph from different repo.",
canonical_workspace.display(),
manifest_root.display()
);
}
let built_at = DateTime::parse_from_rfc3339(&manifest.built_at)
.with_context(|| {
format!(
"Invalid built_at timestamp in manifest: {}",
manifest.built_at
)
})?
.with_timezone(&Utc);
let identity = GraphIdentity {
snapshot_sha256: manifest.snapshot_sha256,
built_at,
schema_version: manifest.schema_version,
snapshot_format_version: manifest.snapshot_format_version,
workspace_root: canonical_workspace,
};
let file_id = extract_file_id(&file_metadata);
let metadata = ManifestMetadata {
mtime: file_metadata.modified()?,
size: file_metadata.len(),
file_id,
};
Ok((identity, metadata))
}
#[cfg(unix)]
#[allow(clippy::unnecessary_wraps)] fn extract_file_id(metadata: &std::fs::Metadata) -> Option<u64> {
use std::os::unix::fs::MetadataExt;
Some(metadata.ino())
}
#[cfg(windows)]
#[allow(clippy::unnecessary_wraps)] fn extract_file_id(_metadata: &std::fs::Metadata) -> Option<u64> {
None
}
#[cfg(not(any(unix, windows)))]
fn extract_file_id(_metadata: &std::fs::Metadata) -> Option<u64> {
None }
static ENGINE_CACHE: parking_lot::Mutex<Option<lru::LruCache<PathBuf, CachedEngine>>> =
parking_lot::Mutex::new(None);
pub fn init_engine_cache(capacity: std::num::NonZeroUsize) {
let mut cache = ENGINE_CACHE.lock();
if cache.is_none() {
tracing::info!(capacity = capacity.get(), "Initializing engine cache");
*cache = Some(lru::LruCache::new(capacity));
}
}
fn get_cached_engine(workspace: &Path) -> Result<Option<Arc<Engine>>> {
let (cached_engine, cached_identity, cached_metadata) = {
let mut cache = ENGINE_CACHE.lock();
let lru = cache
.as_mut()
.context("Engine cache not initialized - call init_engine_cache() first")?;
if let Some(cached) = lru.get(workspace) {
(
Arc::clone(&cached.engine),
cached.identity.clone(),
cached.metadata.clone(),
)
} else {
return Ok(None);
}
};
if is_manifest_fresh(&cached_metadata, workspace)? {
tracing::debug!(
workspace = %workspace.display(),
"Engine cache hit (fresh)"
);
return Ok(Some(cached_engine));
}
let manifest_path = workspace.join(".sqry/graph/manifest.json");
match std::fs::metadata(&manifest_path) {
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
tracing::debug!(
workspace = %workspace.display(),
"Manifest absent during cache reload — evicting cache entry"
);
let mut cache = ENGINE_CACHE.lock();
if let Some(lru) = cache.as_mut() {
lru.pop(workspace);
}
return Ok(None);
}
Err(e) => {
return Err(e).context("Failed to stat manifest.json during cache reload");
}
Ok(_) => {} }
tracing::debug!(
workspace = %workspace.display(),
"Manifest changed, reloading identity"
);
let (new_identity, new_metadata) = match read_graph_identity_with_metadata(workspace) {
Ok(pair) => pair,
Err(e) => {
let is_not_found = e.chain().any(|c| {
c.downcast_ref::<std::io::Error>()
.is_some_and(|ioe| ioe.kind() == std::io::ErrorKind::NotFound)
});
if is_not_found {
tracing::debug!(
workspace = %workspace.display(),
"Manifest removed between stat and open (TOCTOU) — evicting cache entry"
);
let mut cache = ENGINE_CACHE.lock();
if let Some(lru) = cache.as_mut() {
lru.pop(workspace);
}
return Ok(None);
}
return Err(e);
}
};
let mut cache = ENGINE_CACHE.lock();
let Some(lru) = cache.as_mut() else {
return Ok(None);
};
if let Some(current) = lru.get(workspace) {
if current.identity != cached_identity {
tracing::debug!(
workspace = %workspace.display(),
"Another thread updated cache, using their engine"
);
return Ok(Some(Arc::clone(¤t.engine)));
}
} else {
tracing::debug!(
workspace = %workspace.display(),
"Cache entry evicted during reload"
);
return Ok(None);
}
if new_identity == cached_identity {
tracing::debug!(
workspace = %workspace.display(),
"GraphIdentity unchanged, updating metadata only"
);
if let Some(cached) = lru.get_mut(workspace) {
cached.metadata = new_metadata;
}
Ok(Some(cached_engine))
} else {
tracing::info!(
workspace = %workspace.display(),
old_sha = %cached_identity.snapshot_sha256,
new_sha = %new_identity.snapshot_sha256,
"GraphIdentity changed, invalidating cache"
);
lru.pop(workspace);
Ok(None)
}
}
fn is_manifest_fresh(cached: &ManifestMetadata, workspace: &Path) -> Result<bool> {
let manifest_path = workspace.join(".sqry/graph/manifest.json");
let current = match std::fs::metadata(&manifest_path) {
Ok(meta) => meta,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
tracing::debug!(
workspace = %workspace.display(),
"Manifest missing during freshness check — treating as stale"
);
return Ok(false);
}
Err(e) => {
return Err(e).context("Failed to stat manifest.json for freshness check");
}
};
Ok(current.modified()? == cached.mtime
&& current.len() == cached.size
&& extract_file_id(¤t) == cached.file_id)
}
pub fn get_graph_identity(workspace: &Path) -> Result<GraphIdentity> {
{
let cache = ENGINE_CACHE.lock();
let lru = cache
.as_ref()
.context("Engine cache not initialized - call init_engine_cache() first")?;
if let Some(cached) = lru.peek(workspace) {
if is_manifest_fresh(&cached.metadata, workspace).unwrap_or(false) {
tracing::debug!(
workspace = %workspace.display(),
"Returning cached GraphIdentity"
);
return Ok(cached.identity.clone());
}
}
}
tracing::debug!(
workspace = %workspace.display(),
"Reading GraphIdentity from manifest"
);
read_graph_identity(workspace)
}
#[cfg(test)]
mod engine_cache_tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
fn reset_engine_cache() {
let mut cache = ENGINE_CACHE.lock();
*cache = None;
}
fn create_test_workspace() -> Result<TempDir> {
let temp_dir = TempDir::new()?;
let graph_dir = temp_dir.path().join(".sqry/graph");
std::fs::create_dir_all(&graph_dir)?;
let manifest = serde_json::json!({
"schema_version": 1,
"snapshot_format_version": 1,
"built_at": "2026-01-01T00:00:00Z",
"root_path": temp_dir.path().to_string_lossy(),
"node_count": 0,
"edge_count": 0,
"snapshot_sha256": "aaaa",
"build_provenance": {
"sqry_version": "4.10.0",
"build_timestamp": "2026-01-01T00:00:00Z",
"build_command": "test"
}
});
let manifest_path = graph_dir.join("manifest.json");
let mut file = std::fs::File::create(&manifest_path)?;
file.write_all(serde_json::to_string_pretty(&manifest)?.as_bytes())?;
file.sync_all()?;
Ok(temp_dir)
}
#[test]
fn test_manifest_freshness_detection() -> Result<()> {
let workspace = create_test_workspace()?;
let workspace_path = workspace.path();
let metadata1 = read_manifest_metadata(workspace_path)?;
assert!(is_manifest_fresh(&metadata1, workspace_path)?);
let manifest_path = workspace_path.join(".sqry/graph/manifest.json");
std::thread::sleep(std::time::Duration::from_millis(10)); let mut file = std::fs::OpenOptions::new()
.write(true)
.truncate(true)
.open(&manifest_path)?;
let new_manifest = serde_json::json!({
"schema_version": 1,
"snapshot_format_version": 1,
"built_at": "2026-01-02T00:00:00Z",
"root_path": workspace_path.to_string_lossy(),
"node_count": 0,
"edge_count": 0,
"snapshot_sha256": "bbbb",
"build_provenance": {
"sqry_version": "4.10.0",
"build_timestamp": "2026-01-02T00:00:00Z",
"build_command": "test"
}
});
file.write_all(serde_json::to_string_pretty(&new_manifest)?.as_bytes())?;
file.sync_all()?;
assert!(!is_manifest_fresh(&metadata1, workspace_path)?);
Ok(())
}
#[test]
#[serial_test::serial(engine_cache)]
fn test_cache_requires_initialization() {
reset_engine_cache();
let temp_dir = TempDir::new().unwrap();
let result = get_cached_engine(temp_dir.path());
match result {
Err(e) => assert!(e.to_string().contains("not initialized")),
Ok(_) => panic!("Expected error, got success"),
}
}
#[test]
#[serial_test::serial(engine_cache)]
fn test_cache_miss_returns_none() -> Result<()> {
init_engine_cache(std::num::NonZeroUsize::new(5).unwrap());
let temp_dir = TempDir::new()?;
let workspace_path = temp_dir.path();
let result = get_cached_engine(workspace_path)?;
assert!(result.is_none());
Ok(())
}
#[test]
fn test_normalize_path_resolves_parent_dir() {
let path = std::path::Path::new("/workspace/src/../lib");
let normalized = normalize_path(path);
assert_eq!(normalized, std::path::PathBuf::from("/workspace/lib"));
}
#[test]
fn test_normalize_path_resolves_current_dir() {
let path = std::path::Path::new("/workspace/./src");
let normalized = normalize_path(path);
assert_eq!(normalized, std::path::PathBuf::from("/workspace/src"));
}
#[test]
fn test_normalize_path_handles_multiple_traversals() {
let path = std::path::Path::new("/a/b/c/../../d");
let normalized = normalize_path(path);
assert_eq!(normalized, std::path::PathBuf::from("/a/d"));
}
#[test]
fn test_normalize_path_at_root_ignores_parent() {
let path = std::path::Path::new("/workspace");
let normalized = normalize_path(path);
assert_eq!(normalized, std::path::PathBuf::from("/workspace"));
}
#[test]
fn test_normalize_path_simple_path_unchanged() {
let path = std::path::Path::new("/workspace/src/lib.rs");
let normalized = normalize_path(path);
assert_eq!(
normalized,
std::path::PathBuf::from("/workspace/src/lib.rs")
);
}
#[test]
fn test_canonicalize_in_workspace_dot_returns_workspace_root() -> Result<()> {
let temp = TempDir::new()?;
let workspace = temp.path();
let result = canonicalize_in_workspace(".", workspace)?;
assert_eq!(result, workspace.canonicalize()?);
Ok(())
}
#[test]
fn test_canonicalize_in_workspace_outside_path_rejected() {
let temp = TempDir::new().unwrap();
let workspace = temp.path();
let result = canonicalize_in_workspace("../../etc/passwd", workspace);
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("outside") || err.contains("Failed to canonicalize"));
}
#[test]
fn test_canonicalize_in_workspace_absolute_outside_rejected() {
let temp = TempDir::new().unwrap();
let workspace = temp.path();
let result = canonicalize_in_workspace("/etc/passwd", workspace);
assert!(result.is_err());
}
#[test]
fn test_canonicalize_in_workspace_valid_subdir() -> Result<()> {
let temp = TempDir::new()?;
let workspace = temp.path();
let subdir = workspace.join("src");
std::fs::create_dir(&subdir)?;
let result = canonicalize_in_workspace("src", workspace)?;
assert_eq!(result, subdir.canonicalize()?);
Ok(())
}
#[test]
fn test_read_graph_identity_valid_manifest() -> Result<()> {
let workspace = create_test_workspace()?;
let workspace_path = workspace.path();
let identity = read_graph_identity(workspace_path)?;
assert!(!identity.snapshot_sha256.is_empty());
assert_eq!(identity.schema_version, 1);
assert_eq!(identity.snapshot_format_version, 1);
Ok(())
}
#[test]
fn test_read_graph_identity_missing_manifest_errors() {
let temp = TempDir::new().unwrap();
let result = read_graph_identity(temp.path());
assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(msg.contains("Manifest missing") || msg.contains("run `sqry index`"));
}
#[test]
fn test_read_graph_identity_with_metadata_returns_both() -> Result<()> {
let workspace = create_test_workspace()?;
let workspace_path = workspace.path();
let (identity, metadata) = read_graph_identity_with_metadata(workspace_path)?;
assert!(!identity.snapshot_sha256.is_empty());
assert!(metadata.size > 0);
Ok(())
}
#[test]
#[serial_test::serial(engine_cache)]
fn test_init_engine_cache_idempotent() {
reset_engine_cache();
let cap = std::num::NonZeroUsize::new(4).unwrap();
init_engine_cache(cap);
init_engine_cache(cap);
let temp = TempDir::new().unwrap();
let result = get_cached_engine(temp.path());
assert!(result.is_ok());
assert!(result.unwrap().is_none()); }
#[test]
#[serial_test::serial(sqry_auto_index_env)]
fn test_is_auto_index_enabled_defaults_true() {
unsafe { std::env::remove_var("SQRY_AUTO_INDEX") };
assert!(is_auto_index_enabled());
}
#[test]
#[serial_test::serial(sqry_auto_index_env)]
fn test_is_auto_index_enabled_false_when_set_to_false() {
unsafe { std::env::set_var("SQRY_AUTO_INDEX", "false") };
assert!(!is_auto_index_enabled());
unsafe { std::env::remove_var("SQRY_AUTO_INDEX") };
}
#[test]
#[serial_test::serial(sqry_auto_index_env)]
fn test_is_auto_index_enabled_false_when_set_to_zero() {
unsafe { std::env::set_var("SQRY_AUTO_INDEX", "0") };
assert!(!is_auto_index_enabled());
unsafe { std::env::remove_var("SQRY_AUTO_INDEX") };
}
#[test]
#[serial_test::serial(sqry_auto_index_env)]
fn test_is_auto_index_enabled_true_when_set_to_true() {
unsafe { std::env::set_var("SQRY_AUTO_INDEX", "true") };
assert!(is_auto_index_enabled());
unsafe { std::env::remove_var("SQRY_AUTO_INDEX") };
}
#[test]
fn test_engine_for_workspace_sets_root() -> Result<()> {
let temp = TempDir::new()?;
let workspace = temp.path().to_path_buf();
let engine = Engine::for_workspace(workspace.clone())?;
assert_eq!(engine.workspace_root(), workspace.as_path());
Ok(())
}
#[test]
fn test_engine_graph_returns_none_without_snapshot() -> Result<()> {
let temp = TempDir::new()?;
let engine = Engine::for_workspace(temp.path().to_path_buf())?;
let graph = engine.graph();
assert!(graph.is_none());
Ok(())
}
#[test]
fn test_engine_graph_returns_none_when_manifest_missing() -> Result<()> {
let temp = TempDir::new()?;
let engine = Engine::for_workspace(temp.path().to_path_buf())?;
let graph = engine.graph();
assert!(
graph.is_none(),
"engine.graph() must return None when manifest is absent"
);
Ok(())
}
#[test]
#[serial_test::serial(engine_cache)]
fn test_get_cached_engine_returns_none_when_manifest_removed() -> Result<()> {
reset_engine_cache();
let cap = std::num::NonZeroUsize::new(4).unwrap();
init_engine_cache(cap);
let workspace = create_test_workspace()?;
let workspace_path = workspace.path().canonicalize()?;
let manifest_path = workspace_path.join(".sqry/graph/manifest.json");
let (identity, metadata) = read_graph_identity_with_metadata(&workspace_path)?;
{
let mut cache = ENGINE_CACHE.lock();
let lru = cache.as_mut().expect("cache initialized");
lru.put(
workspace_path.clone(),
CachedEngine {
engine: Arc::new(Engine::for_workspace(workspace_path.clone())?),
identity,
metadata,
},
);
}
assert!(
get_cached_engine(&workspace_path)?.is_some(),
"cache should be hot before manifest removal"
);
std::fs::remove_file(&manifest_path)?;
let result = get_cached_engine(&workspace_path);
assert!(
result.is_ok(),
"get_cached_engine must return Ok when manifest is absent, got Err"
);
assert!(
result.unwrap().is_none(),
"get_cached_engine must return None (stale) when manifest is absent"
);
let result2 = get_cached_engine(&workspace_path);
assert!(result2.is_ok());
assert!(
result2.unwrap().is_none(),
"cache entry must be evicted after manifest-absent eviction"
);
Ok(())
}
#[test]
fn test_graph_identity_equality() -> Result<()> {
let workspace = create_test_workspace()?;
let workspace_path = workspace.path();
let id1 = read_graph_identity(workspace_path)?;
let id2 = read_graph_identity(workspace_path)?;
assert_eq!(id1, id2);
Ok(())
}
#[test]
fn test_read_manifest_metadata_valid() -> Result<()> {
let workspace = create_test_workspace()?;
let metadata = read_manifest_metadata(workspace.path())?;
assert!(metadata.size > 0);
Ok(())
}
#[test]
fn test_read_manifest_metadata_missing_errors() {
let temp = TempDir::new().unwrap();
let result = read_manifest_metadata(temp.path());
assert!(result.is_err());
}
#[test]
#[serial_test::serial(engine_cache)]
fn test_get_graph_identity_not_initialized_errors() {
reset_engine_cache();
let temp = TempDir::new().unwrap();
let result = get_graph_identity(temp.path());
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("not initialized"));
}
#[test]
#[serial_test::serial(engine_cache)]
fn test_get_graph_identity_falls_back_to_manifest() -> Result<()> {
init_engine_cache(std::num::NonZeroUsize::new(4).unwrap());
let workspace = create_test_workspace()?;
let identity = get_graph_identity(workspace.path())?;
assert!(!identity.snapshot_sha256.is_empty());
Ok(())
}
#[test]
fn test_workspace_lock_returns_same_instance() {
let lock1 = workspace_lock();
let lock2 = workspace_lock();
assert!(std::ptr::eq(lock1, lock2));
}
#[test]
fn test_is_manifest_fresh_missing_file_returns_stale() {
let temp = TempDir::new().unwrap();
let fake_metadata = ManifestMetadata {
mtime: std::time::SystemTime::now(),
size: 100,
file_id: None,
};
let result = is_manifest_fresh(&fake_metadata, temp.path());
assert!(
result.is_ok(),
"Missing manifest should return Ok, not Err: {result:?}"
);
assert!(
!result.unwrap(),
"Missing manifest should be treated as stale (Ok(false))"
);
}
#[test]
fn test_engine_graph_rejects_corrupted_snapshot() {
let temp = tempfile::tempdir().unwrap();
let graph_dir = temp.path().join(".sqry/graph");
std::fs::create_dir_all(&graph_dir).unwrap();
let snapshot_data = b"fake snapshot data";
std::fs::write(graph_dir.join("snapshot.sqry"), snapshot_data).unwrap();
let manifest = serde_json::json!({
"root_path": temp.path().to_string_lossy(),
"node_count": 0,
"edge_count": 0,
"snapshot_sha256": "0000000000000000000000000000000000000000000000000000000000000000",
"built_at": "2026-01-01T00:00:00+00:00",
"schema_version": 5,
"snapshot_format_version": 5,
"build_provenance": { "sqry_version": "test", "rustc_version": "test" }
});
std::fs::write(
graph_dir.join("manifest.json"),
serde_json::to_string_pretty(&manifest).unwrap(),
)
.unwrap();
let engine =
Engine::for_workspace(temp.path().to_path_buf()).expect("engine should create");
assert!(
engine.graph().is_none(),
"Engine::graph() should return None when snapshot hash is wrong"
);
}
#[test]
fn test_engine_graph_rejects_corrupt_manifest() {
let temp = tempfile::tempdir().unwrap();
let graph_dir = temp.path().join(".sqry/graph");
std::fs::create_dir_all(&graph_dir).unwrap();
std::fs::write(graph_dir.join("snapshot.sqry"), b"some data").unwrap();
std::fs::write(graph_dir.join("manifest.json"), b"not valid json!!!").unwrap();
let engine =
Engine::for_workspace(temp.path().to_path_buf()).expect("engine should create");
assert!(
engine.graph().is_none(),
"Engine::graph() must return None when manifest is corrupt (fail closed)"
);
}
#[test]
fn test_engine_graph_accepts_empty_hash() {
let temp = tempfile::tempdir().unwrap();
let graph_dir = temp.path().join(".sqry/graph");
std::fs::create_dir_all(&graph_dir).unwrap();
std::fs::write(graph_dir.join("snapshot.sqry"), b"not a real snapshot").unwrap();
let manifest = serde_json::json!({
"root_path": temp.path().to_string_lossy(),
"node_count": 0,
"edge_count": 0,
"snapshot_sha256": "",
"built_at": "2026-01-01T00:00:00+00:00",
"schema_version": 5,
"snapshot_format_version": 5,
"build_provenance": { "sqry_version": "test", "rustc_version": "test" }
});
std::fs::write(
graph_dir.join("manifest.json"),
serde_json::to_string_pretty(&manifest).unwrap(),
)
.unwrap();
let engine =
Engine::for_workspace(temp.path().to_path_buf()).expect("engine should create");
assert!(
engine.graph().is_none(),
"Should return None (deserialization fails on garbage data, but integrity check skipped)"
);
}
}