use super::leindex::{ProjectFileScan, DEPENDENCY_MANIFEST_NAMES};
use crate::cli::memory::CacheEntry;
use crate::cli::skip_dirs::SKIP_DIRS;
use crate::storage::schema::Storage;
use anyhow::Result;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
pub(crate) struct FreshnessContext<'a> {
pub project_path: &'a Path,
pub storage_path: &'a Path,
pub project_id: &'a str,
pub storage: &'a Storage,
pub project_scan: Option<&'a ProjectFileScan>,
pub cache_spiller: &'a crate::cli::memory::CacheSpiller,
}
pub(crate) fn check_freshness(
ctx: &FreshnessContext<'_>,
scan_fn: impl Fn() -> Result<ProjectFileScan>,
hash_fn: impl Fn(&Path) -> Result<String>,
) -> Result<(Vec<PathBuf>, Vec<String>)> {
let indexed_files = crate::storage::pdg_store::get_indexed_files(ctx.storage, ctx.project_id)
.unwrap_or_default();
let scan = scan_fn()?;
let current: Vec<(PathBuf, String)> = scan
.source_paths
.iter()
.map(|path| Ok((path.clone(), hash_fn(path)?)))
.collect::<Result<_>>()?;
let current_map: HashMap<String, String> = current
.iter()
.map(|(p, h)| (p.display().to_string(), h.clone()))
.collect();
let changed: Vec<PathBuf> = current
.iter()
.filter(|(p, h)| indexed_files.get(&p.display().to_string()) != Some(h))
.map(|(p, _)| p.clone())
.collect();
let deleted: Vec<String> = indexed_files
.keys()
.filter(|k| !current_map.contains_key(*k))
.cloned()
.collect();
Ok((changed, deleted))
}
pub(crate) fn check_manifest_stale(
ctx: &FreshnessContext<'_>,
scan_fn: impl Fn() -> Result<ProjectFileScan>,
) -> bool {
let db_time = match ctx
.storage_path
.join("leindex.db")
.metadata()
.and_then(|m| m.modified())
{
Ok(t) => t,
Err(_) => return true,
};
let scan = ctx.project_scan;
let paths_to_check: Vec<PathBuf> = if let Some(scan) = scan {
scan.manifest_paths.clone()
} else {
match scan_fn() {
Ok(scan) => scan.manifest_paths,
Err(_) => return true,
}
};
let original_scan_paths: std::collections::HashSet<PathBuf> =
paths_to_check.iter().cloned().collect();
let mut all_paths: std::collections::HashSet<PathBuf> = paths_to_check.into_iter().collect();
for name in DEPENDENCY_MANIFEST_NAMES {
all_paths.insert(ctx.project_path.join(name));
}
for manifest_path in &all_paths {
match std::fs::metadata(manifest_path) {
Ok(metadata) => {
if let Ok(modified) = metadata.modified() {
if modified > db_time {
return true;
}
}
}
Err(_) => {
if original_scan_paths.contains(manifest_path) {
return true;
}
}
}
}
false
}
pub(crate) fn is_stale_fast(
ctx: &FreshnessContext<'_>,
scan_fn: impl Fn() -> Result<ProjectFileScan>,
) -> bool {
let indexed_files = crate::storage::pdg_store::get_indexed_files(ctx.storage, ctx.project_id)
.unwrap_or_default();
if indexed_files.is_empty() {
return true;
}
let db_time = match ctx
.storage_path
.join("leindex.db")
.metadata()
.and_then(|m| m.modified())
{
Ok(t) => t,
Err(_) => return true,
};
let mut cold_manifest_paths: Option<Vec<PathBuf>> = None;
let mut source_count: Option<usize> = None;
let mut cached_manifest_paths: Option<Vec<PathBuf>> = None;
let mut cached_scan: Option<ProjectFileScan> = None;
match ctx.project_scan {
Some(cache) => {
source_count = Some(cache.source_paths.len());
}
None => {
let cache_key = crate::cli::memory::project_scan_cache_key(ctx.project_id);
if let Some(entry) = ctx.cache_spiller.store().peek(&cache_key) {
if let CacheEntry::Binary {
serialized_data, ..
} = entry
{
if let Ok(scan) = bincode::deserialize::<ProjectFileScan>(serialized_data) {
cached_manifest_paths = Some(scan.manifest_paths.clone());
cached_scan = Some(scan.clone());
source_count = Some(scan.source_paths.len());
}
}
} else if let Ok(CacheEntry::Binary {
serialized_data, ..
}) = ctx.cache_spiller.store().load_from_disk(&cache_key)
{
if let Ok(scan) = bincode::deserialize::<ProjectFileScan>(&serialized_data) {
cached_manifest_paths = Some(scan.manifest_paths.clone());
cached_scan = Some(scan.clone());
source_count = Some(scan.source_paths.len());
}
}
if source_count.is_none() {
match scan_fn() {
Ok(scan) => {
cold_manifest_paths = Some(scan.manifest_paths.clone());
source_count = Some(scan.source_paths.len());
}
Err(_) => return true,
}
}
}
};
let source_count = source_count.unwrap_or(indexed_files.len());
if source_count != indexed_files.len() {
return true;
}
let source_dirs: Vec<PathBuf> = if let Some(scan) = ctx.project_scan {
scan.source_directories.clone()
} else if let Some(scan) = cached_scan.as_ref() {
scan.source_directories.clone()
} else {
let mut dirs: Vec<PathBuf> = indexed_files
.keys()
.filter_map(|p| PathBuf::from(p).parent().map(|d| d.to_path_buf()))
.collect();
dirs.sort();
dirs.dedup();
dirs
};
for dir in &source_dirs {
let full_path = if dir.is_absolute() {
dir.clone()
} else {
ctx.project_path.join(dir)
};
match std::fs::metadata(&full_path) {
Ok(metadata) => {
if let Ok(modified) = metadata.modified() {
if modified > db_time {
return true;
}
}
}
Err(_) => {
return true;
}
}
}
let sample_size = (indexed_files.len() / 20).clamp(50, 500);
for (checked, indexed_path) in indexed_files.keys().enumerate() {
if checked >= sample_size {
break;
}
let full_path = ctx.project_path.join(indexed_path);
if !full_path.exists() {
return true;
}
if let Ok(metadata) = std::fs::metadata(&full_path) {
if let Ok(modified) = metadata.modified() {
if modified > db_time {
return true;
}
}
}
}
let manifest_paths: Vec<PathBuf> = if let Some(scan) = ctx.project_scan {
scan.manifest_paths.clone()
} else if let Some(ref paths) = cached_manifest_paths {
paths.clone()
} else if let Some(ref paths) = cold_manifest_paths {
paths.clone()
} else {
Vec::new()
};
let already_listed: std::collections::HashSet<PathBuf> =
if let Some(scan) = ctx.project_scan {
if !scan.manifest_paths_canonical.is_empty() {
scan.manifest_paths_canonical.iter().cloned().collect()
} else {
build_already_listed(ctx.project_path, &scan.manifest_paths)
}
} else {
build_already_listed(ctx.project_path, &manifest_paths)
};
let new_root_manifest = find_new_root_manifest(ctx.project_path, &already_listed);
if new_root_manifest {
return true;
}
if find_new_nested_manifest(ctx.project_path, &already_listed) {
return true;
}
for manifest_path in &manifest_paths {
match std::fs::metadata(manifest_path) {
Ok(metadata) => {
if let Ok(modified) = metadata.modified() {
if modified > db_time {
return true;
}
}
}
Err(_) => {
return true;
}
}
}
false
}
pub(crate) fn find_new_root_manifest(
project_path: &Path,
already_listed: &std::collections::HashSet<PathBuf>,
) -> bool {
for name in DEPENDENCY_MANIFEST_NAMES {
let candidate = project_path.join(name);
if !candidate.exists() {
continue;
}
let candidate_canon = candidate.canonicalize().unwrap_or(candidate);
if !already_listed.contains(&candidate_canon) {
return true;
}
}
false
}
pub(crate) fn build_already_listed(
project_path: &Path,
manifest_paths: &[PathBuf],
) -> std::collections::HashSet<PathBuf> {
let mut already_listed: std::collections::HashSet<PathBuf> =
std::collections::HashSet::with_capacity(manifest_paths.len());
for p in manifest_paths {
let full_path = project_path.join(p);
already_listed.insert(full_path.canonicalize().unwrap_or(full_path));
}
already_listed
}
pub(crate) fn find_new_nested_manifest(
project_path: &Path,
already_listed: &std::collections::HashSet<PathBuf>,
) -> bool {
let walker = WalkDir::new(project_path)
.min_depth(1)
.max_depth(5)
.into_iter()
.filter_entry(|e| {
if e.depth() == 0 {
return true;
}
if !e.file_type().is_dir() {
return true;
}
let name = match e.file_name().to_str() {
Some(n) => n,
None => return true,
};
if name.starts_with('.') {
return false;
}
if SKIP_DIRS.contains(&name) {
return false;
}
true
});
for entry in walker {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
let path = entry.path();
if !entry.file_type().is_file() {
continue;
}
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => continue,
};
if !DEPENDENCY_MANIFEST_NAMES.contains(&file_name) {
continue;
}
let candidate_canon = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
if !already_listed.contains(&candidate_canon) {
return true;
}
}
false
}
pub(crate) fn extract_unique_dirs(paths: &[PathBuf]) -> Vec<PathBuf> {
let mut dirs: std::collections::HashSet<PathBuf> = std::collections::HashSet::new();
for path in paths {
if let Some(parent) = path.parent() {
dirs.insert(parent.to_path_buf());
}
}
let mut result: Vec<PathBuf> = dirs.into_iter().collect();
result.sort();
result
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::path::PathBuf;
fn make_fixture() -> (tempfile::TempDir, std::collections::HashSet<PathBuf>) {
let tmp = tempfile::tempdir().unwrap();
(tmp, std::collections::HashSet::new())
}
#[test]
fn find_new_nested_manifest_detects_monorepo_package_json() {
let (tmp, listed) = make_fixture();
let root = tmp.path();
fs::create_dir_all(root.join("packages/api/src")).unwrap();
fs::write(root.join("packages/api/package.json"), "{}").unwrap();
fs::write(root.join("packages/api/src/main.rs"), "fn main() {}").unwrap();
assert!(
find_new_nested_manifest(root, &listed),
"monorepo package.json at depth 2 must be flagged"
);
}
#[test]
fn find_new_nested_manifest_detects_cargo_toml_at_depth_3() {
let (tmp, listed) = make_fixture();
let root = tmp.path();
fs::create_dir_all(root.join("services/auth/config/src")).unwrap();
fs::write(
root.join("services/auth/config/Cargo.toml"),
"[package]\nname = \"auth\"\n",
)
.unwrap();
assert!(
find_new_nested_manifest(root, &listed),
"depth-3 Cargo.toml must be flagged"
);
}
#[test]
fn find_new_nested_manifest_skips_node_modules() {
let (tmp, listed) = make_fixture();
let root = tmp.path();
fs::create_dir_all(root.join("node_modules/foo/src")).unwrap();
fs::write(root.join("node_modules/foo/package.json"), "{}").unwrap();
assert!(
!find_new_nested_manifest(root, &listed),
"node_modules/package.json must be skipped"
);
}
#[test]
fn find_new_nested_manifest_skips_target() {
let (tmp, listed) = make_fixture();
let root = tmp.path();
fs::create_dir_all(root.join("target/some-build/src")).unwrap();
fs::write(root.join("target/some-build/Cargo.toml"), "[package]\nname=\"x\"\n").unwrap();
assert!(
!find_new_nested_manifest(root, &listed),
"target/Cargo.toml must be skipped"
);
}
#[test]
fn find_new_nested_manifest_respects_already_listed() {
let (tmp, mut listed) = make_fixture();
let root = tmp.path();
fs::create_dir_all(root.join("packages/api/src")).unwrap();
let manifest = root.join("packages/api/package.json");
fs::write(&manifest, "{}").unwrap();
let canon = manifest.canonicalize().unwrap();
listed.insert(canon);
assert!(
!find_new_nested_manifest(root, &listed),
"manifest already in listed set must not be flagged"
);
}
#[test]
fn find_new_nested_manifest_returns_false_when_no_manifests() {
let (tmp, listed) = make_fixture();
let root = tmp.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
fs::write(root.join("src/lib.rs"), "// lib\n").unwrap();
assert!(
!find_new_nested_manifest(root, &listed),
"no manifests present, must return false"
);
}
#[test]
fn find_new_nested_manifest_respects_max_depth_5() {
let (tmp, listed) = make_fixture();
let root = tmp.path();
let deep = root.join("a/b/c/d/e/f");
fs::create_dir_all(&deep).unwrap();
fs::write(deep.join("Cargo.toml"), "[package]\nname=\"deep\"\n").unwrap();
assert!(
!find_new_nested_manifest(root, &listed),
"depth-6 manifest must not be flagged (max_depth=5)"
);
}
#[test]
fn find_new_nested_manifest_ignores_dotfile_dirs() {
let (tmp, listed) = make_fixture();
let root = tmp.path();
fs::create_dir_all(root.join(".cargo")).unwrap();
fs::write(root.join(".cargo/config.toml"), "[net]\n").unwrap();
assert!(
!find_new_nested_manifest(root, &listed),
".cargo/config.toml must not be flagged"
);
}
#[test]
fn find_new_root_manifest_ignores_setup_py_when_cached() {
let (tmp, _) = make_fixture();
let root = tmp.path();
fs::write(root.join("setup.py"), "from setuptools import setup\n").unwrap();
let manifest = root.join("pyproject.toml");
fs::write(&manifest, "[tool.poetry]\nname = \"x\"\n").unwrap();
let mut listed: std::collections::HashSet<std::path::PathBuf> =
std::collections::HashSet::new();
listed.insert(manifest.canonicalize().unwrap());
assert!(
!find_new_root_manifest(root, &listed),
"setup.py must not be flagged when pyproject.toml is already listed"
);
}
#[test]
fn find_new_root_manifest_detects_new_cargo_toml() {
let (tmp, listed) = make_fixture();
let root = tmp.path();
fs::write(root.join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
assert!(
find_new_root_manifest(root, &listed),
"new Cargo.toml at root must be flagged"
);
}
#[test]
fn find_new_root_manifest_does_not_flag_listed_manifest() {
let (tmp, _) = make_fixture();
let root = tmp.path();
fs::write(root.join("package.json"), "{}").unwrap();
let mut listed: std::collections::HashSet<std::path::PathBuf> =
std::collections::HashSet::new();
listed.insert(root.join("package.json").canonicalize().unwrap());
assert!(
!find_new_root_manifest(root, &listed),
"listed package.json must not be flagged"
);
}
#[test]
fn build_already_listed_resolves_relative_paths_against_project_path() {
let (tmp, _) = make_fixture();
let root = tmp.path();
let manifest = root.join("Cargo.toml");
fs::write(&manifest, "[package]\nname = \"x\"\n").unwrap();
let relative = std::path::PathBuf::from("Cargo.toml");
let listed = build_already_listed(root, &[relative]);
let canon = manifest.canonicalize().unwrap();
assert!(
listed.contains(&canon),
"already_listed must contain the canonical absolute path of the joined manifest: {:?}",
canon
);
}
#[test]
fn build_already_listed_passes_through_absolute_paths() {
let (tmp, _) = make_fixture();
let root = tmp.path();
let manifest = root.join("package.json");
fs::write(&manifest, "{}").unwrap();
let abs = manifest.canonicalize().unwrap();
let listed = build_already_listed(root, &[abs.clone()]);
assert!(
listed.contains(&abs),
"absolute scanner output must round-trip through join+canonicalize"
);
}
#[test]
fn pre_canonicalized_manifest_paths_match_build_already_listed() {
let (tmp, _) = make_fixture();
let root = tmp.path();
let root_manifest = root.join("Cargo.toml");
fs::write(&root_manifest, "[package]\nname = \"root\"\n").unwrap();
let nested_dir = root.join("packages/api");
fs::create_dir_all(&nested_dir).unwrap();
let nested_manifest = nested_dir.join("package.json");
fs::write(&nested_manifest, "{}").unwrap();
let pyproject_manifest = root.join("pyproject.toml");
fs::write(&pyproject_manifest, "[project]\nname = \"x\"\n").unwrap();
let relative_paths = vec![
std::path::PathBuf::from("Cargo.toml"),
std::path::PathBuf::from("packages/api/package.json"),
std::path::PathBuf::from("pyproject.toml"),
];
let slow_set = build_already_listed(root, &relative_paths);
let manifest_paths_canonical: Vec<PathBuf> = relative_paths
.iter()
.map(|p| {
let full = if p.is_relative() {
root.join(p)
} else {
p.clone()
};
full.canonicalize().unwrap_or(full)
})
.collect();
let fast_set: std::collections::HashSet<PathBuf> =
manifest_paths_canonical.iter().cloned().collect();
assert_eq!(
slow_set, fast_set,
"pre-canonicalized set must match build_already_listed output"
);
assert_eq!(fast_set.len(), 3);
}
#[test]
fn legacy_scan_falls_back_to_build_already_listed() {
let (tmp, _) = make_fixture();
let root = tmp.path();
let manifest = root.join("Cargo.toml");
fs::write(&manifest, "[package]\nname = \"x\"\n").unwrap();
let scan = ProjectFileScan {
source_paths: vec![],
manifest_paths: vec![std::path::PathBuf::from("Cargo.toml")],
manifest_paths_canonical: Vec::new(),
source_directories: vec![],
manifest_hashes: std::collections::HashMap::new(),
};
assert!(
scan.manifest_paths_canonical.is_empty(),
"fixture must mirror legacy serialized form"
);
let fallback_set = build_already_listed(root, &scan.manifest_paths);
let canon = manifest.canonicalize().unwrap();
assert!(
fallback_set.contains(&canon),
"fallback must still resolve the manifest to its canonical form"
);
}
}