use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use globset::{Glob, GlobSetBuilder};
use ignore::WalkBuilder;
use seshat_core::{BranchId, Edge, EdgeId, NodeId, ProjectFile, ScanConfig};
use seshat_storage::{
BranchRepository, Database, EdgeRepository, FileIRRepository, NodeRepository,
RepoMetadataRepository, SqliteBranchRepository, SqliteEdgeRepository, SqliteFileIRRepository,
SqliteNodeRepository, SqliteRepoMetadataRepository,
};
use crate::discovery::discover_files;
use crate::documentation::parse_documentation;
use crate::error::ScanError;
use crate::git_dates::collect_git_file_dates;
use crate::manifest::{ManifestAnalysis, ManifestType, analyze_manifests};
use crate::module_structure::build_module_graph;
use crate::parser::{content_hash, parse_file};
#[derive(Debug, Clone)]
pub enum ScanProgress {
Discovering { count: usize },
DiscoveryDone { total: usize },
CollectingGitHistory,
GitHistoryDone,
Scanning { done: usize, total: usize },
ScanningDone,
BuildingModuleGraph,
ModuleGraphDone,
AnalyzingProjectFiles,
ProjectFilesDone,
SubmoduleDetected { path: String },
ScanningSubmodule { path: String, name: String },
ScanningSubmoduleDone { path: String },
SubmoduleUpToDate { path: String, hash: String },
SubmoduleSkipped { path: String, reason: String },
}
fn noop_progress(_: &ScanProgress) {}
#[derive(Debug, Clone)]
pub struct ScanResult {
pub files_discovered: usize,
pub files_parsed: usize,
pub nodes_persisted: usize,
pub edges_persisted: usize,
pub manifests_analyzed: usize,
pub docs_ingested: usize,
pub manifest_analyses: Vec<ManifestAnalysis>,
pub incremental: Option<IncrementalStats>,
pub file_dates: HashMap<PathBuf, i64>,
pub excluded_submodules: Vec<String>,
pub source_map: HashMap<PathBuf, String>,
pub changed_paths: HashSet<PathBuf>,
}
#[derive(Debug, Clone, Default)]
pub struct IncrementalStats {
pub files_unchanged: usize,
pub files_changed: usize,
pub files_new: usize,
pub files_deleted: usize,
}
pub fn scan_project(
root: &Path,
config: &ScanConfig,
db: &Database,
branch_id: BranchId,
) -> Result<ScanResult, ScanError> {
scan_project_with_progress(root, config, db, noop_progress, branch_id)
}
pub fn scan_project_with_progress(
root: &Path,
config: &ScanConfig,
db: &Database,
on_progress: impl Fn(&ScanProgress),
branch_id: BranchId,
) -> Result<ScanResult, ScanError> {
let conn = db.connection().clone();
let file_ir_repo = SqliteFileIRRepository::new(conn.clone());
let node_repo = SqliteNodeRepository::new(conn.clone());
let edge_repo = SqliteEdgeRepository::new(conn.clone());
let branch_repo = SqliteBranchRepository::new(conn);
let branch = branch_id;
branch_repo.ensure_branch_exists(&branch)?;
let head_at_scan_start: Option<String> = crate::git_utils::get_head_commit(root);
let discovery_result = discover_files(root, config)?;
let discovered = discovery_result.files;
let excluded_submodules = discovery_result.excluded_submodules;
let files_discovered = discovered.len();
on_progress(&ScanProgress::Discovering {
count: files_discovered,
});
on_progress(&ScanProgress::DiscoveryDone {
total: files_discovered,
});
tracing::info!(count = files_discovered, "Discovered source files");
on_progress(&ScanProgress::CollectingGitHistory);
let git_file_dates = collect_git_file_dates(root)?;
on_progress(&ScanProgress::GitHistoryDone);
if !git_file_dates.is_empty() {
tracing::info!(
files_with_dates = git_file_dates.len(),
"Collected git file dates"
);
}
let stored_hashes = file_ir_repo.get_file_hashes_by_branch(&branch)?;
let is_incremental = !stored_hashes.is_empty();
let discovered_paths: HashSet<String> = discovered
.iter()
.map(|df| df.path.to_string_lossy().to_string())
.collect();
let mut parsed_files: Vec<ProjectFile> = Vec::with_capacity(files_discovered);
let mut source_map: HashMap<PathBuf, String> = HashMap::new();
let mut changed_paths: HashSet<PathBuf> = HashSet::new();
let mut incremental_stats = IncrementalStats::default();
let mut scan_done: usize = 0;
for df in &discovered {
let file_path_str = df.path.to_string_lossy().to_string();
let abs_path = root.join(&df.path);
let source = match std::fs::read_to_string(&abs_path) {
Ok(s) => s,
Err(e) => {
tracing::warn!(path = %abs_path.display(), error = %e, "Failed to read file, skipping");
scan_done += 1;
on_progress(&ScanProgress::Scanning {
done: scan_done,
total: files_discovered,
});
continue;
}
};
if is_incremental {
let new_hash = content_hash(&source);
if let Some(stored_hash) = stored_hashes.get(&file_path_str) {
if *stored_hash == new_hash {
incremental_stats.files_unchanged += 1;
tracing::debug!(path = %df.path.display(), "File unchanged, skipping re-parse");
source_map.insert(df.path.clone(), source);
scan_done += 1;
on_progress(&ScanProgress::Scanning {
done: scan_done,
total: files_discovered,
});
continue;
}
incremental_stats.files_changed += 1;
tracing::debug!(path = %df.path.display(), "File changed, re-parsing");
} else {
incremental_stats.files_new += 1;
tracing::debug!(path = %df.path.display(), "New file, parsing");
}
}
let mut project_file = parse_file(&df.path, &source, df.language);
if !config.local_packages.is_empty() {
project_file
.dependencies_used
.retain(|dep| !config.local_packages.contains(&dep.package));
}
parsed_files.push(project_file);
changed_paths.insert(df.path.clone()); source_map.insert(df.path.clone(), source); scan_done += 1;
on_progress(&ScanProgress::Scanning {
done: scan_done,
total: files_discovered,
});
}
on_progress(&ScanProgress::ScanningDone);
let files_parsed = parsed_files.len();
tracing::info!(count = files_parsed, "Parsed source files");
on_progress(&ScanProgress::BuildingModuleGraph);
if is_incremental {
for stored_path in stored_hashes.keys() {
if !discovered_paths.contains(stored_path) {
tracing::info!(path = %stored_path, "File deleted, removing IR from DB");
let _ = file_ir_repo.delete_with_symbol_index(&branch, stored_path);
incremental_stats.files_deleted += 1;
}
}
}
for pf in &parsed_files {
let rel = pf.path.strip_prefix(root).unwrap_or(&pf.path);
let commit_date = git_file_dates.get(rel).copied();
file_ir_repo.upsert_with_symbol_index(&branch, pf, commit_date)?;
}
tracing::info!(count = files_parsed, "Stored file IR records");
let all_parsed_files = if is_incremental && incremental_stats.files_unchanged > 0 {
file_ir_repo.get_by_branch(&branch)?
} else {
parsed_files.clone()
};
if is_incremental {
let deleted_edges = edge_repo.delete_by_branch(&branch)?;
let deleted_nodes = node_repo.delete_facts_by_branch(&branch)?;
tracing::debug!(
nodes = deleted_nodes,
edges = deleted_edges,
"Cleared old module structure for rebuild"
);
}
let module_graph = build_module_graph(root, &all_parsed_files, &branch);
let mut id_remap: HashMap<NodeId, NodeId> = HashMap::new();
let mut nodes_persisted: usize = 0;
for node in &module_graph.nodes {
let inserted = node_repo.insert(node)?;
id_remap.insert(node.id, inserted.id);
nodes_persisted += 1;
}
let mut edges_persisted: usize = 0;
for edge in &module_graph.edges {
let remapped_edge = remap_edge(edge, &id_remap);
edge_repo.insert(&remapped_edge)?;
edges_persisted += 1;
}
tracing::info!(
nodes = nodes_persisted,
edges = edges_persisted,
"Persisted module structure"
);
on_progress(&ScanProgress::ModuleGraphDone);
on_progress(&ScanProgress::AnalyzingProjectFiles);
let manifests = discover_manifests(root)?;
let manifests_analyzed = manifests.len();
let manifest_analyses = if !manifests.is_empty() {
let analysis = analyze_manifests(&manifests, &all_parsed_files)?;
tracing::info!(count = analysis.len(), "Analyzed dependency manifests");
analysis
} else {
Vec::new()
};
{
let mut internal_names: Vec<String> = manifest_analyses
.iter()
.flat_map(|a| a.internal_names.iter().cloned())
.filter(|n| !n.trim().is_empty())
.collect();
let mut seen: HashSet<String> = internal_names.iter().cloned().collect();
for pkg in &config.local_packages {
let normalised = pkg.trim().replace('-', "_");
if !normalised.is_empty() && seen.insert(normalised.clone()) {
internal_names.push(normalised);
}
}
if internal_names.is_empty() {
tracing::debug!("No internal names to persist — skipping workspace_crates write");
} else {
let json = serde_json::to_string(&internal_names).unwrap_or_else(|e| {
tracing::warn!(error = %e, "Failed to serialise workspace_crates, storing []");
"[]".to_owned()
});
let meta_repo = SqliteRepoMetadataRepository::new(db.connection().clone());
if let Err(e) = meta_repo.set("workspace_crates", &json) {
tracing::warn!(error = %e, "Failed to persist workspace_crates to repo_metadata");
} else {
tracing::info!(
count = internal_names.len(),
"Persisted workspace_crates to repo_metadata"
);
}
}
}
let doc_files = discover_documentation(root, config)?;
let docs_ingested = doc_files.len();
for (doc_path, doc_content) in &doc_files {
match parse_documentation(doc_path, doc_content, &branch) {
Ok(doc_result) => {
for node in &doc_result.nodes {
node_repo.insert(node)?;
nodes_persisted += 1;
}
}
Err(e) => {
tracing::warn!(
path = %doc_path.display(),
error = %e,
"Failed to parse documentation, skipping"
);
}
}
}
tracing::info!(
count = docs_ingested,
nodes = nodes_persisted,
"Ingested documentation"
);
on_progress(&ScanProgress::ProjectFilesDone);
if let Some(head) = head_at_scan_start.as_deref()
&& let Err(e) = branch_repo.set_last_scanned_commit(&branch, head)
{
tracing::warn!(
error = %e,
branch = %branch.0,
"scan_project: failed to record last_scanned_commit; \
freshness gate may re-trigger sync next startup"
);
}
Ok(ScanResult {
files_discovered,
files_parsed,
nodes_persisted,
edges_persisted,
manifests_analyzed,
docs_ingested,
manifest_analyses,
incremental: if is_incremental {
Some(incremental_stats)
} else {
None
},
file_dates: git_file_dates,
excluded_submodules,
source_map,
changed_paths,
})
}
fn remap_edge(edge: &Edge, id_remap: &HashMap<NodeId, NodeId>) -> Edge {
Edge {
id: EdgeId(0), source_id: id_remap
.get(&edge.source_id)
.copied()
.unwrap_or(edge.source_id),
target_id: id_remap
.get(&edge.target_id)
.copied()
.unwrap_or(edge.target_id),
edge_type: edge.edge_type,
branch_id: edge.branch_id.clone(),
weight: edge.weight,
metadata: edge.metadata.clone(),
}
}
fn discover_manifests(root: &Path) -> Result<Vec<(PathBuf, String, ManifestType)>, ScanError> {
let mut manifests = Vec::new();
for filename in ManifestType::all_filenames() {
let path = root.join(filename);
if path.is_file() {
let content = std::fs::read_to_string(&path).map_err(|e| ScanError::ManifestError {
path: path.clone(),
reason: format!("Failed to read manifest: {e}"),
})?;
if let Some(manifest_type) = ManifestType::from_filename(filename) {
manifests.push((path, content, manifest_type));
}
}
}
Ok(manifests)
}
fn discover_documentation(
root: &Path,
config: &ScanConfig,
) -> Result<Vec<(PathBuf, String)>, ScanError> {
let doc_extensions = ["md", "json", "yaml", "yml"];
let exclude_globset = {
let mut builder = GlobSetBuilder::new();
for pattern in &config.exclude_paths {
let glob = Glob::new(pattern).map_err(|e| ScanError::DiscoveryError {
path: root.to_path_buf(),
reason: format!("Invalid exclude_paths pattern '{pattern}': {e}"),
})?;
builder.add(glob);
}
builder.build().map_err(|e| ScanError::DiscoveryError {
path: root.to_path_buf(),
reason: format!("Failed to build exclude globset: {e}"),
})?
};
let mut doc_files = Vec::new();
let walker = WalkBuilder::new(root)
.hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .build();
for entry_result in walker {
let entry = match entry_result {
Ok(e) => e,
Err(err) => {
tracing::warn!("Doc walk error: {err}");
continue;
}
};
let Some(file_type) = entry.file_type() else {
continue;
};
if !file_type.is_file() {
continue;
}
let path = entry.path();
let ext = match path.extension().and_then(|e| e.to_str()) {
Some(e) => e,
None => continue,
};
if !doc_extensions.contains(&ext) {
continue;
}
let relative = path.strip_prefix(root).unwrap_or(path).to_path_buf();
if !exclude_globset.is_empty() && exclude_globset.is_match(&relative) {
tracing::debug!(
path = %relative.display(),
"Skipping doc file (matched exclude_paths)"
);
continue;
}
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
tracing::warn!(path = %path.display(), error = %e, "Cannot read doc file");
continue;
}
};
if (ext == "json" || ext == "yaml" || ext == "yml")
&& !is_documentation_content(ext, &content)
{
continue;
}
doc_files.push((relative, content));
}
Ok(doc_files)
}
fn is_documentation_content(ext: &str, content: &str) -> bool {
match ext {
"json" => {
let Ok(value) = serde_json::from_str::<serde_json::Value>(content) else {
return false;
};
let obj = match value.as_object() {
Some(o) => o,
None => return false,
};
obj.contains_key("$schema")
|| obj.contains_key("properties")
|| (obj.contains_key("type") && obj.contains_key("title"))
}
"yaml" | "yml" => {
let Ok(value) = serde_yml::from_str::<serde_yml::Value>(content) else {
return false;
};
let mapping = match value.as_mapping() {
Some(m) => m,
None => return false,
};
let has_openapi = mapping.contains_key(serde_yml::Value::String("openapi".to_string()));
let has_swagger = mapping.contains_key(serde_yml::Value::String("swagger".to_string()));
has_openapi || has_swagger
}
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use seshat_core::ScanConfig;
use seshat_storage::Database;
use std::fs;
use tempfile::tempdir;
fn create_test_project() -> tempfile::TempDir {
let dir = tempdir().expect("create tempdir");
let root = dir.path();
fs::create_dir_all(root.join(".git")).unwrap();
let src = root.join("src");
fs::create_dir_all(&src).unwrap();
fs::write(
src.join("main.rs"),
r#"
use std::io;
use crate::config::Config;
pub fn main() {
println!("hello");
}
fn helper() -> bool {
true
}
"#,
)
.unwrap();
fs::write(
src.join("config.rs"),
r#"
pub struct Config {
pub name: String,
pub debug: bool,
}
impl Config {
pub fn new() -> Self {
Config {
name: String::new(),
debug: false,
}
}
}
"#,
)
.unwrap();
let utils = src.join("utils");
fs::create_dir_all(&utils).unwrap();
fs::write(
utils.join("format.rs"),
r#"
use crate::config::Config;
pub fn format_name(config: &Config) -> String {
config.name.clone()
}
"#,
)
.unwrap();
fs::write(
root.join("README.md"),
r#"# Test Project
## Overview
A simple test project.
## Features
- Feature one
- Feature two
"#,
)
.unwrap();
dir
}
#[test]
fn scan_project_discovers_and_parses_files() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
let result =
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
assert_eq!(result.files_discovered, 3, "should discover 3 .rs files");
assert_eq!(result.files_parsed, 3, "should parse all 3 files");
}
#[test]
fn scan_project_stores_ir_in_database() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
let conn = db.connection().clone();
let file_ir_repo = SqliteFileIRRepository::new(conn);
let branch_id = BranchId::from("main");
let all_files = file_ir_repo.get_by_branch(&branch_id).expect("get files");
assert_eq!(all_files.len(), 3, "should have 3 file IR records");
}
#[test]
fn scan_project_stores_content_hash() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
let conn = db.connection().clone();
let file_ir_repo = SqliteFileIRRepository::new(conn);
let branch_id = BranchId::from("main");
let all_files = file_ir_repo.get_by_branch(&branch_id).expect("get files");
for pf in &all_files {
assert!(
!pf.content_hash.is_empty(),
"content hash should be non-empty for {}",
pf.path.display()
);
}
}
#[test]
fn scan_project_persists_module_nodes() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
let result =
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
assert!(
result.nodes_persisted >= 2,
"should persist at least 2 module nodes, got {}",
result.nodes_persisted
);
let conn = db.connection().clone();
let node_repo = SqliteNodeRepository::new(conn);
let branch_id = BranchId::from("main");
let nodes = node_repo.find_by_branch(&branch_id).expect("find nodes");
assert!(
nodes.len() >= 2,
"should have at least 2 nodes in DB, got {}",
nodes.len()
);
}
#[test]
fn scan_project_persists_edges() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
let result =
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
assert!(
result.edges_persisted >= 1,
"should persist at least 1 edge, got {}",
result.edges_persisted
);
let conn = db.connection().clone();
let edge_repo = SqliteEdgeRepository::new(conn);
let part_of_edges = edge_repo
.find_by_type(seshat_core::EdgeType::PartOf)
.expect("find PartOf edges");
assert!(
!part_of_edges.is_empty(),
"should have at least 1 PartOf edge"
);
}
#[test]
fn scan_project_ingests_documentation() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
let result =
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
assert!(
result.docs_ingested >= 1,
"should ingest at least 1 documentation file (README.md), got {}",
result.docs_ingested
);
}
#[test]
fn scan_project_empty_directory() {
let dir = tempdir().expect("create tempdir");
let root = dir.path();
fs::create_dir_all(root.join(".git")).unwrap();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
let result = scan_project(root, &config, &db, BranchId::from("main"))
.expect("scan should succeed on empty project");
assert_eq!(result.files_discovered, 0);
assert_eq!(result.files_parsed, 0);
assert_eq!(result.nodes_persisted, 0);
assert_eq!(result.edges_persisted, 0);
}
#[test]
fn scan_project_respects_config_exclude_paths() {
let dir = create_test_project();
let root = dir.path();
let config = ScanConfig {
exclude_paths: vec!["**/utils/**".to_string()],
..ScanConfig::default()
};
let db = Database::open(":memory:").expect("open DB");
let result =
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
assert_eq!(
result.files_discovered, 2,
"should discover 2 files (utils excluded)"
);
}
#[test]
fn discover_manifests_finds_cargo_toml() {
let dir = tempdir().expect("create tempdir");
let root = dir.path();
fs::write(
root.join("Cargo.toml"),
r#"[package]
name = "test"
version = "0.1.0"
edition = "2021"
"#,
)
.unwrap();
let manifests = discover_manifests(root).expect("discover manifests");
assert_eq!(manifests.len(), 1);
assert_eq!(manifests[0].2, ManifestType::CargoToml);
}
#[test]
fn discover_manifests_finds_nothing_without_manifests() {
let dir = tempdir().expect("create tempdir");
let manifests = discover_manifests(dir.path()).expect("discover manifests");
assert!(manifests.is_empty());
}
#[test]
fn is_documentation_content_json_schema() {
let content = r#"{"$schema": "http://json-schema.org/draft-07/schema#", "type": "object"}"#;
assert!(is_documentation_content("json", content));
let content = r#"{"name": "foo", "value": 42}"#;
assert!(!is_documentation_content("json", content));
}
#[test]
fn is_documentation_content_openapi() {
let content = "openapi: '3.0.0'\ninfo:\n title: Test\n version: '1.0'\npaths: {}";
assert!(is_documentation_content("yaml", content));
let content = "name: test\nvalue: 42";
assert!(!is_documentation_content("yaml", content));
}
#[test]
fn remap_edge_applies_id_mapping() {
let mut remap = HashMap::new();
remap.insert(NodeId(1), NodeId(100));
remap.insert(NodeId(2), NodeId(200));
let edge = Edge {
id: EdgeId(0),
source_id: NodeId(1),
target_id: NodeId(2),
edge_type: seshat_core::EdgeType::DependsOn,
branch_id: BranchId::from("main"),
weight: 1.0,
metadata: None,
};
let remapped = remap_edge(&edge, &remap);
assert_eq!(remapped.source_id, NodeId(100));
assert_eq!(remapped.target_id, NodeId(200));
}
#[test]
fn scan_project_incremental_skips_unchanged() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
let r1 = scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
assert!(r1.incremental.is_none(), "first scan is not incremental");
assert_eq!(r1.files_parsed, 3);
let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
assert!(r2.incremental.is_some(), "second scan is incremental");
let stats = r2.incremental.unwrap();
assert_eq!(stats.files_unchanged, 3);
assert_eq!(stats.files_changed, 0);
assert_eq!(stats.files_new, 0);
assert_eq!(stats.files_deleted, 0);
assert_eq!(r2.files_parsed, 0, "no files re-parsed");
}
#[test]
fn scan_project_incremental_detects_modification() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
fs::write(
root.join("src/config.rs"),
"pub struct Config { pub name: String, pub extra: bool }\n",
)
.unwrap();
let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
let stats = r2.incremental.unwrap();
assert_eq!(stats.files_changed, 1, "config.rs changed");
assert_eq!(stats.files_unchanged, 2, "main.rs + format.rs unchanged");
assert_eq!(r2.files_parsed, 1, "only changed file parsed");
}
#[test]
fn scan_project_incremental_detects_addition() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
fs::write(root.join("src/extra.rs"), "pub fn extra() {}").unwrap();
let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
let stats = r2.incremental.unwrap();
assert_eq!(stats.files_new, 1);
assert_eq!(stats.files_unchanged, 3);
assert_eq!(r2.files_discovered, 4);
}
#[test]
fn scan_project_incremental_detects_deletion() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
fs::remove_file(root.join("src/utils/format.rs")).unwrap();
let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
let stats = r2.incremental.unwrap();
assert_eq!(stats.files_deleted, 1);
assert_eq!(stats.files_unchanged, 2);
assert_eq!(r2.files_discovered, 2);
let conn = db.connection().clone();
let file_ir_repo = SqliteFileIRRepository::new(conn);
let branch = BranchId::from("main");
let files = file_ir_repo.get_by_branch(&branch).unwrap();
assert_eq!(files.len(), 2);
}
#[test]
fn full_scan_source_map_contains_all_files() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
let result =
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
assert_eq!(
result.source_map.len(),
result.files_discovered,
"source_map must contain all {} discovered files on full scan, got {}",
result.files_discovered,
result.source_map.len()
);
assert_eq!(
result.changed_paths.len(),
result.files_discovered,
"changed_paths must equal files_discovered on full scan"
);
for (path, src) in &result.source_map {
assert!(!src.is_empty(), "source for {:?} must not be empty", path);
}
}
#[test]
fn incremental_scan_source_map_contains_all_files() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
let stats = r2.incremental.as_ref().unwrap();
assert_eq!(stats.files_unchanged, 3, "all 3 files should be unchanged");
assert_eq!(r2.files_parsed, 0, "no files should be re-parsed");
assert_eq!(
r2.source_map.len(),
r2.files_discovered,
"source_map must contain all {} files on incremental scan (no changes), got {} — \
this would cause empty snippets in convention evidence",
r2.files_discovered,
r2.source_map.len()
);
assert!(
r2.changed_paths.is_empty(),
"changed_paths must be empty when no files changed, got {} paths",
r2.changed_paths.len()
);
for (path, src) in &r2.source_map {
assert!(
!src.is_empty(),
"source for {:?} must not be empty on incremental scan",
path
);
}
}
#[test]
fn scan_persists_workspace_crates_with_local_packages_union() {
let dir = tempdir().expect("create tempdir");
let root = dir.path();
fs::create_dir_all(root.join(".git")).unwrap();
fs::write(
root.join("Cargo.toml"),
r#"[package]
name = "auto-detected-crate"
version = "0.1.0"
edition = "2021"
"#,
)
.unwrap();
let src = root.join("src");
fs::create_dir_all(&src).unwrap();
fs::write(src.join("lib.rs"), "pub fn hello() {}\n").unwrap();
let config = ScanConfig {
local_packages: vec![
"extra-package".to_owned(),
"auto_detected_crate".to_owned(),
],
..ScanConfig::default()
};
let db = Database::open(":memory:").expect("open DB");
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
let meta_repo = SqliteRepoMetadataRepository::new(db.connection().clone());
let json = meta_repo
.get("workspace_crates")
.expect("repo_metadata query must succeed")
.expect("workspace_crates key must be present after scan");
let names: Vec<String> =
serde_json::from_str(&json).expect("workspace_crates must be valid JSON array");
assert!(
names.contains(&"auto_detected_crate".to_owned()),
"auto-detected crate must be present; got {:?}",
names
);
assert!(
names.contains(&"extra_package".to_owned()),
"extra_package (normalised) from local_packages must be present; got {:?}",
names
);
let unique: std::collections::HashSet<_> = names.iter().collect();
assert_eq!(
unique.len(),
names.len(),
"workspace_crates must not contain duplicates; got {:?}",
names
);
}
#[test]
fn incremental_scan_changed_paths_contains_only_modified_files() {
let dir = create_test_project();
let root = dir.path();
let db = Database::open(":memory:").expect("open DB");
let config = ScanConfig::default();
scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
let changed_file_abs = root.join("src/config.rs");
fs::write(&changed_file_abs, "pub struct Config { pub extra: bool }\n").unwrap();
let changed_file = std::path::PathBuf::from("src/config.rs");
let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
assert_eq!(
r2.source_map.len(),
r2.files_discovered,
"source_map must contain all files even on incremental scan"
);
assert_eq!(
r2.changed_paths.len(),
1,
"changed_paths must contain exactly 1 file (the modified one), got: {:?}",
r2.changed_paths
);
assert!(
r2.changed_paths.contains(&changed_file),
"changed_paths must contain the modified file {:?}, got: {:?}",
changed_file,
r2.changed_paths
);
for path in r2.source_map.keys() {
if path != &changed_file {
assert!(
!r2.changed_paths.contains(path),
"unchanged file {:?} must not be in changed_paths",
path
);
}
}
}
}