use std::path::Path;
use std::time::Instant;
use indicatif::{ProgressBar, ProgressStyle};
use seshat_core::{BranchId, DetectionConfig};
use seshat_detectors::{aggregate_findings, run_all_detectors};
use seshat_scanner::{
ScanProgress, ScanResult, detect_submodule_paths, scan_project_with_progress,
};
use seshat_storage::{
Database, EmbeddingInput, EmbeddingRepository, RepoMetadataRepository,
SqliteEmbeddingRepository, SqliteRepoMetadataRepository, SqliteSubmoduleRepository,
StaleIrWipeReport, SubmoduleInput, SubmoduleRepository, wipe_stale_ir_cache,
};
use crate::config::AppConfig;
use crate::db::unix_now;
use crate::error::CliError;
use crate::format::{self, Verbosity};
pub fn run_scan(
path: &Path,
verbose: bool,
quiet: bool,
exclude_submodules: bool,
) -> Result<(), CliError> {
let verbosity = Verbosity::from_flags(verbose, quiet);
let color = format::color_enabled();
if !path.exists() {
return Err(CliError::InvalidPath {
path: path.display().to_string(),
reason: "path does not exist".to_owned(),
});
}
if !path.is_dir() {
return Err(CliError::InvalidPath {
path: path.display().to_string(),
reason: "path is not a directory".to_owned(),
});
}
let resolved = crate::db::resolve_project(Some(path), "scan")?;
let root = resolved.project_root.clone();
let db_path = resolved.db_path.clone();
let project_name = resolved.project_name.clone();
if verbosity.show_warnings() {
eprintln!("seshat v{}", env!("CARGO_PKG_VERSION"));
}
let mut config =
AppConfig::load().map_err(|e| CliError::scan(format!("failed to load config: {e}")))?;
if exclude_submodules {
config.scan.exclude_submodules = true;
}
if let Some(parent) = db_path.parent() {
std::fs::create_dir_all(parent)
.map_err(|e| CliError::scan(format!("failed to create database directory: {e}")))?;
}
let db = Database::open(&db_path)
.map_err(|e| CliError::scan(format!("failed to open database: {e}")))?;
let wipe = wipe_stale_ir_cache(&db)
.map_err(|e| CliError::scan(format!("failed to clear stale IR cache: {e}")))?;
report_ir_cache_wipe(&wipe, "root", verbosity.show_warnings());
let submodule_paths = detect_submodule_paths(&root);
let scan_branch = crate::db::get_current_branch(&root)
.map(seshat_core::BranchId::from)
.unwrap_or_else(|| {
tracing::debug!(root = %root.display(), "Could not detect git branch for scan root, defaulting to 'main'");
seshat_core::BranchId::from("main")
});
let start = Instant::now();
let show = verbosity.show_warnings();
struct ScannedSubmodule {
mount_path: String,
name: String,
db_path: String,
commit_hash: Option<String>,
}
let root_sub_repo_for_detect = SqliteSubmoduleRepository::new(db.connection().clone());
let scanned_submodules: Vec<ScannedSubmodule> = if !config.scan.exclude_submodules
&& !submodule_paths.is_empty()
{
enum SubmoduleAction {
Skip(ScannedSubmodule),
Scan {
mount_path: String,
name: String,
submodule_abs: std::path::PathBuf,
commit_hash: Option<String>,
},
}
let mut actions: Vec<SubmoduleAction> = Vec::new();
for mount_path in &submodule_paths {
let submodule_abs = root.join(mount_path);
let name = mount_path
.rsplit('/')
.next()
.unwrap_or(mount_path)
.to_string();
if show {
eprintln!(" \u{2139} Submodule detected: {mount_path}");
}
if !submodule_abs.is_dir()
|| (!submodule_abs.join(".git").exists() && !submodule_abs.join(".git").is_file())
{
if show {
let reason = "not initialized (no .git)";
eprintln!(" \u{2298} Submodule {name} skipped: {reason}");
}
continue;
}
let commit_hash = seshat_scanner::get_head_commit(&submodule_abs);
let stored_record = root_sub_repo_for_detect
.find_by_path(mount_path)
.map_err(|e| {
CliError::scan(format!("failed to look up submodule '{mount_path}': {e}"))
})?;
if let Some(ref stored) = stored_record {
if let (Some(current_hash), Some(stored_hash)) = (&commit_hash, &stored.commit_hash)
{
if current_hash == stored_hash {
let sub_branch_for_check = crate::db::get_current_branch(&submodule_abs)
.unwrap_or_else(|| {
tracing::debug!(submodule = %submodule_abs.display(), "Could not detect branch for submodule, defaulting to 'main'");
"main".to_owned()
});
let schema_ok =
seshat_storage::Database::open(std::path::Path::new(&stored.db_path))
.ok()
.map(|sub_db| {
crate::db::submodule_ir_schema_is_current(
&sub_db,
&sub_branch_for_check,
)
})
.unwrap_or(false);
if schema_ok {
if show {
let short = if current_hash.len() >= 7 {
¤t_hash[..7]
} else {
current_hash
};
eprintln!(" \u{2713} Submodule {name} up-to-date ({short})");
}
actions.push(SubmoduleAction::Skip(ScannedSubmodule {
mount_path: mount_path.clone(),
name,
db_path: stored.db_path.clone(),
commit_hash,
}));
continue;
}
if show {
eprintln!(
" \u{21bb} Submodule {name} IR schema outdated, re-scanning..."
);
}
}
}
}
actions.push(SubmoduleAction::Scan {
mount_path: mount_path.clone(),
name,
submodule_abs,
commit_hash,
});
}
let mut results: Vec<ScannedSubmodule> = Vec::new();
let mut to_scan: Vec<(String, String, std::path::PathBuf, Option<String>)> = Vec::new();
for action in actions {
match action {
SubmoduleAction::Skip(sub) => results.push(sub),
SubmoduleAction::Scan {
mount_path,
name,
submodule_abs,
commit_hash,
} => to_scan.push((mount_path, name, submodule_abs, commit_hash)),
}
}
if !to_scan.is_empty() {
let scan_config = &config.scan;
let detection_config = &config.detection;
let project_name_ref = &project_name;
let parallel_results: Vec<Result<ScannedSubmodule, CliError>> = std::thread::scope(
|scope| {
let handles: Vec<_> = to_scan
.iter()
.map(|(mount_path, name, submodule_abs, commit_hash)| {
let sp =
make_manual_spinner(&format!("{name}: discovering files..."), show);
scope.spawn(move || -> Result<ScannedSubmodule, CliError> {
let sub_db_path = crate::db::resolve_submodule_db_path(
project_name_ref,
mount_path,
)?;
let sub_db = Database::open(&sub_db_path).map_err(|e| {
CliError::scan(format!(
"failed to open submodule database for '{mount_path}': {e}"
))
})?;
let sub_wipe = wipe_stale_ir_cache(&sub_db).map_err(|e| {
CliError::scan(format!(
"failed to clear stale IR cache for submodule '{mount_path}': {e}"
))
})?;
report_ir_cache_wipe(&sub_wipe, name, show);
let sub_branch = crate::db::get_current_branch(submodule_abs)
.map(seshat_core::BranchId::from)
.unwrap_or_else(|| {
tracing::debug!(submodule = %submodule_abs.display(), "Could not detect branch for submodule scan, defaulting to 'main'");
seshat_core::BranchId::from("main")
});
let scan_result = scan_project_with_progress(
submodule_abs,
scan_config,
&sub_db,
|event| {
match event {
ScanProgress::Discovering { count } => {
sp.set_message(format!(
"{name}: discovering files... {count} found"
));
}
ScanProgress::DiscoveryDone { total } => {
sp.set_message(format!(
"{name}: discovering files... {total} found"
));
}
ScanProgress::CollectingGitHistory => {
sp.set_message(format!(
"{name}: collecting git history..."
));
}
ScanProgress::Scanning { done, total } => {
sp.set_message(format!(
"{name}: scanning files... {done}/{total}"
));
}
ScanProgress::BuildingModuleGraph => {
sp.set_message(format!(
"{name}: building module graph..."
));
}
ScanProgress::AnalyzingProjectFiles => {
sp.set_message(format!(
"{name}: analyzing manifests & docs..."
));
}
_ => {}
}
sp.tick();
},
sub_branch.clone(),
)
.map_err(|e| {
CliError::scan(format!(
"submodule scan failed for '{mount_path}': {e}"
))
})?;
sp.set_message(format!("{name}: analyzing conventions..."));
sp.tick();
let report = detect_and_persist(
&sub_db,
&sub_branch,
&detection_config.clone(),
&scan_result,
)?;
let meta =
SqliteRepoMetadataRepository::new(sub_db.connection().clone());
write_metadata(
&meta,
&[
("parent_project", project_name_ref),
("mount_path", mount_path),
("file_count", &report.file_count.to_string()),
("convention_count", &report.convention_count.to_string()),
("last_scan_time", &unix_now().to_string()),
],
)?;
sp.finish_with_message(format!(
"{name}: done ({} files, {} conventions)",
report.file_count, report.convention_count,
));
Ok(ScannedSubmodule {
mount_path: mount_path.clone(),
name: name.clone(),
db_path: sub_db_path.to_string_lossy().to_string(),
commit_hash: commit_hash.clone(),
})
})
})
.collect();
handles
.into_iter()
.map(|h| h.join().expect("submodule scan thread panicked"))
.collect()
},
);
for result in parallel_results {
results.push(result?);
}
}
results
} else {
Vec::new()
};
let discovery_sp = make_spinner("Discovering files...", show);
let git_sp: std::cell::RefCell<Option<ProgressBar>> = std::cell::RefCell::new(None);
let scan_sp: std::cell::RefCell<Option<ProgressBar>> = std::cell::RefCell::new(None);
let graph_sp: std::cell::RefCell<Option<ProgressBar>> = std::cell::RefCell::new(None);
let project_sp: std::cell::RefCell<Option<ProgressBar>> = std::cell::RefCell::new(None);
let scan_result = scan_project_with_progress(
&root,
&config.scan,
&db,
|event| match event {
ScanProgress::Discovering { count } => {
discovery_sp.set_message(format!("Discovering files... {count} found"));
}
ScanProgress::DiscoveryDone { total } => {
discovery_sp.finish_with_message(format!("Discovering files... {total} found"));
}
ScanProgress::CollectingGitHistory => {
*git_sp.borrow_mut() = Some(make_spinner("Collecting git history...", show));
}
ScanProgress::GitHistoryDone => {
if let Some(ref sp) = *git_sp.borrow() {
sp.finish_with_message("Collecting git history... done");
}
}
ScanProgress::Scanning { done, total } => {
let mut sp_opt = scan_sp.borrow_mut();
if sp_opt.is_none() {
*sp_opt = Some(make_spinner(&format!("Scanning files... 0/{total}"), show));
}
if let Some(ref sp) = *sp_opt {
sp.set_message(format!("Scanning files... {done}/{total}"));
}
}
ScanProgress::ScanningDone => {
if let Some(ref sp) = *scan_sp.borrow() {
sp.finish_with_message(sp.message().to_string());
}
}
ScanProgress::BuildingModuleGraph => {
*graph_sp.borrow_mut() = Some(make_spinner("Building module graph...", show));
}
ScanProgress::ModuleGraphDone => {
if let Some(ref sp) = *graph_sp.borrow() {
sp.finish_with_message("Building module graph... done");
}
}
ScanProgress::AnalyzingProjectFiles => {
*project_sp.borrow_mut() =
Some(make_spinner("Analyzing manifests & docs...", show));
}
ScanProgress::ProjectFilesDone => {
if let Some(ref sp) = *project_sp.borrow() {
sp.finish_with_message("Analyzing manifests & docs... done");
}
}
_ => {}
},
scan_branch.clone(),
)
.map_err(CliError::scan)?;
let detection_config = config.detection.clone();
let detect_sp = make_spinner("Analyzing conventions...", show);
let all_files = {
use seshat_storage::{FileIRRepository, SqliteFileIRRepository};
SqliteFileIRRepository::new(db.connection().clone())
.get_by_branch(&scan_branch)
.map_err(|e| CliError::scan(format!("failed to load files for detection: {e}")))?
};
let file_count = all_files.len();
detect_sp.set_message(format!("Analyzing conventions... 0/{file_count}"));
let progress_cb = |done: usize, _total: usize| {
detect_sp.set_message(format!("Analyzing conventions... {done}/{file_count}"));
};
let project_context = seshat_detectors::ProjectContext::from_files(&all_files);
let detector_results = run_all_detectors(
&all_files,
&scan_result.source_map,
&detection_config,
&project_context,
Some(&progress_cb),
);
detect_sp.finish_with_message(format!(
"Analyzing conventions... {file_count}/{file_count}"
));
let all_findings: Vec<seshat_core::ConventionFinding> = detector_results
.into_iter()
.flat_map(|dr| dr.findings)
.collect();
let file_dates_map: std::collections::HashMap<String, Option<i64>> = all_files
.iter()
.map(|f| {
let date = scan_result.file_dates.get(f.path.as_path()).copied();
(f.path.to_string_lossy().to_string(), date)
})
.collect();
let aggregated = aggregate_findings(
&all_findings,
&detection_config,
&file_dates_map,
unix_now(),
);
seshat_graph::persist_and_index(db.connection(), &scan_branch, &aggregated, &all_findings)
.map_err(|e| CliError::scan(format!("persist conventions: {e}")))?;
if let Some(ref embedding_config) = config.embedding {
generate_embeddings(
&db,
embedding_config,
&all_files,
&scan_result.source_map,
&scan_result.changed_paths,
&scan_branch.0,
show,
)?;
}
let root_sub_repo = SqliteSubmoduleRepository::new(db.connection().clone());
for sub in &scanned_submodules {
root_sub_repo
.upsert(&SubmoduleInput {
relative_path: sub.mount_path.clone(),
name: sub.name.clone(),
db_path: sub.db_path.clone(),
commit_hash: sub.commit_hash.clone(),
})
.map_err(|e| {
CliError::scan(format!(
"failed to upsert submodule '{}' in root DB: {e}",
sub.mount_path
))
})?;
}
if let Ok(stored_submodules) = root_sub_repo.list() {
let active_paths: std::collections::HashSet<&str> =
submodule_paths.iter().map(|s| s.as_str()).collect();
for stored in &stored_submodules {
if !active_paths.contains(stored.relative_path.as_str()) {
let _ = root_sub_repo.delete(&stored.relative_path);
}
}
}
let root_meta = SqliteRepoMetadataRepository::new(db.connection().clone());
write_metadata(
&root_meta,
&[
("project_name", &project_name),
("project_root", path.to_string_lossy().as_ref()),
("file_count", &file_count.to_string()),
("convention_count", &aggregated.len().to_string()),
("last_scan_time", &unix_now().to_string()),
],
)?;
let elapsed = start.elapsed();
let report_data = crate::report::build_report_data(
&scan_result,
&all_files,
aggregated,
&db_path,
elapsed,
config.scan.exclude_submodules,
);
crate::report::print_report(&report_data, verbosity, color);
Ok(())
}
fn spinner_style() -> ProgressStyle {
ProgressStyle::with_template(" {spinner:.cyan} {msg}")
.expect("valid template")
.tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏", "✓"])
}
fn make_spinner(msg: &str, visible: bool) -> ProgressBar {
let sp = ProgressBar::new_spinner();
if visible {
sp.set_style(spinner_style());
sp.set_message(msg.to_owned());
sp.enable_steady_tick(std::time::Duration::from_millis(80));
} else {
sp.set_draw_target(indicatif::ProgressDrawTarget::hidden());
}
sp
}
fn make_manual_spinner(msg: &str, visible: bool) -> ProgressBar {
let sp = ProgressBar::new_spinner();
if visible {
sp.set_style(spinner_style());
sp.set_message(msg.to_owned());
sp.tick(); } else {
sp.set_draw_target(indicatif::ProgressDrawTarget::hidden());
}
sp
}
fn report_ir_cache_wipe(report: &StaleIrWipeReport, scope: &str, visible: bool) {
if report.is_empty() {
return;
}
let versions = report
.cached_versions
.iter()
.map(u8::to_string)
.collect::<Vec<_>>()
.join(", ");
let current = seshat_storage::IR_SCHEMA_VERSION;
tracing::warn!(
scope = scope,
stale_count = report.stale_count,
cached_versions = versions,
current_version = current,
symbol_definitions_cleared = report.symbol_definitions_cleared,
symbol_imports_cleared = report.symbol_imports_cleared,
"IR cache schema mismatch — wiped stale rows, scan will re-parse from source",
);
if visible {
eprintln!(
" \u{21bb} IR cache schema mismatch ({scope}): cached v[{versions}] != current v{current}, \
cleared {n} stale IR rows — re-parsing from scratch",
n = report.stale_count,
);
}
}
#[derive(Debug)]
struct DetectionReport {
file_count: usize,
convention_count: usize,
}
fn detect_and_persist(
db: &Database,
scan_branch: &BranchId,
detection_config: &DetectionConfig,
scan_result: &ScanResult,
) -> Result<DetectionReport, CliError> {
let file_dates_map: std::collections::HashMap<String, Option<i64>> = scan_result
.file_dates
.iter()
.map(|(p, &ts)| (p.to_string_lossy().to_string(), Some(ts)))
.collect();
let report = seshat_graph::run_detection_cycle(
db.connection(),
scan_branch,
detection_config,
&file_dates_map,
&scan_result.source_map,
)
.map_err(|e| CliError::scan(format!("detection pipeline failed: {e}")))?;
Ok(DetectionReport {
file_count: report.file_count,
convention_count: report.convention_count,
})
}
fn write_metadata(
repo: &SqliteRepoMetadataRepository,
pairs: &[(&str, &str)],
) -> Result<(), CliError> {
for (key, value) in pairs {
repo.set(key, value)
.map_err(|e| CliError::scan(format!("failed to write metadata '{key}': {e}")))?;
}
Ok(())
}
fn generate_embeddings(
db: &Database,
embedding_config: &seshat_embedding::EmbeddingConfig,
all_files: &[seshat_core::ProjectFile],
source_map: &std::collections::HashMap<std::path::PathBuf, String>,
changed_paths: &std::collections::HashSet<std::path::PathBuf>,
branch_id: &str,
show: bool,
) -> Result<(), CliError> {
let provider = match seshat_embedding::create_provider(embedding_config) {
Ok(p) => p,
Err(e) => {
tracing::warn!("Failed to create embedding provider: {e}");
if show {
eprintln!(" \u{26a0} Embedding provider unavailable: {e}");
}
return Ok(());
}
};
let mut items: Vec<(String, String, String, String)> = Vec::new();
for file in all_files {
if !changed_paths.contains(&file.path) {
continue;
}
let source = match source_map.get(&file.path) {
Some(s) => s,
None => continue,
};
let file_path = file.path.to_string_lossy().to_string();
let source_lines: Option<Vec<String>> = Some(source.lines().map(str::to_owned).collect());
let import_context = {
let modules: Vec<&str> = file
.imports
.iter()
.map(|i| i.module.as_str())
.filter(|m| !m.is_empty())
.take(20)
.collect();
if modules.is_empty() {
String::new()
} else {
format!("\nuses: {}", modules.join(", "))
}
};
for func in &file.functions {
let vis = if func.is_public { "pub " } else { "" };
let asyncness = if func.is_async { "async " } else { "" };
let params = func.parameters.join(", ");
let body_snippet =
extract_body_snippet(source_lines.as_deref(), func.line, func.end_line);
let text = format!(
"{vis}{asyncness}fn {}({params}) in {file_path}{body_snippet}{import_context}",
func.name
);
items.push((
file_path.clone(),
func.name.clone(),
"function".to_string(),
text,
));
}
for ty in &file.types {
let vis = if ty.is_public { "pub " } else { "" };
let kind = match ty.kind {
seshat_core::TypeDefKind::Struct => "struct",
seshat_core::TypeDefKind::Enum => "enum",
seshat_core::TypeDefKind::Trait => "trait",
seshat_core::TypeDefKind::Interface => "interface",
seshat_core::TypeDefKind::Class => "class",
seshat_core::TypeDefKind::TypeAlias => "type_alias",
};
let text = format!("{vis}{kind} {} in {file_path}{import_context}", ty.name);
items.push((file_path.clone(), ty.name.clone(), "type".to_string(), text));
}
for exp in &file.exports {
let default = if exp.is_default { "default " } else { "" };
let text = format!(
"export {default}{} in {file_path}{import_context}",
exp.name
);
items.push((
file_path.clone(),
exp.name.clone(),
"export".to_string(),
text,
));
}
}
if items.is_empty() {
tracing::info!("No code items to embed");
return Ok(());
}
let total = items.len();
let batch_size = embedding_config.batch_size.max(1);
let embed_sp = make_spinner(&format!("Generating embeddings... 0/{total}"), show);
let conn = db.connection().clone();
let embedding_repo = SqliteEmbeddingRepository::new(conn);
let mut current_keys: std::collections::HashSet<(String, String, String)> =
std::collections::HashSet::new();
for file in all_files {
let file_path = file.path.to_string_lossy().to_string();
for func in &file.functions {
current_keys.insert((file_path.clone(), func.name.clone(), "function".to_string()));
}
for ty in &file.types {
current_keys.insert((file_path.clone(), ty.name.clone(), "type".to_string()));
}
for exp in &file.exports {
current_keys.insert((file_path.clone(), exp.name.clone(), "export".to_string()));
}
}
let mut embedded_count: usize = 0;
let _embedding_outcome: Result<(), ()> = 'embed: {
for chunk in items.chunks(batch_size) {
let texts: Vec<String> = chunk.iter().map(|(_, _, _, text)| text.clone()).collect();
match provider.embed(&texts) {
Ok(embeddings) => {
let inputs: Vec<EmbeddingInput> = chunk
.iter()
.zip(embeddings)
.map(
|((file_path, item_name, item_kind, _), emb)| EmbeddingInput {
file_path: file_path.clone(),
item_name: item_name.clone(),
item_kind: item_kind.clone(),
embedding: emb,
},
)
.collect();
if let Err(e) = embedding_repo.upsert_batch(branch_id, &inputs) {
tracing::warn!("Failed to store embedding batch: {e}");
embed_sp.finish_with_message(
"Generating embeddings... failed (storage error)".to_string(),
);
break 'embed Err(());
}
embedded_count += chunk.len();
embed_sp
.set_message(format!("Generating embeddings... {embedded_count}/{total}"));
}
Err(e) => {
tracing::warn!(
embedded = embedded_count,
total = total,
remaining = total - embedded_count,
"Embedding provider error mid-batch; {embedded_count}/{total} items stored, \
{} items skipped. Database contains partial embeddings: {e}",
total - embedded_count,
);
embed_sp.finish_with_message(format!(
"Generating embeddings... failed ({embedded_count}/{total})"
));
if show {
eprintln!(
" \u{26a0} Embedding generation failed after {embedded_count}/{total} items \
({} skipped, partial state): {e}",
total - embedded_count,
);
}
break 'embed Err(());
}
}
}
embed_sp.finish_with_message(format!("Generating embeddings... {embedded_count}/{total}"));
tracing::info!(
count = embedded_count,
total = total,
"Generated code embeddings"
);
Ok(())
};
match embedding_repo.get_stored_keys(branch_id) {
Ok(stored_keys) => {
let stored_set: std::collections::HashSet<_> = stored_keys.into_iter().collect();
let stale: Vec<_> = stored_set.difference(¤t_keys).cloned().collect();
if !stale.is_empty() {
match embedding_repo.delete_stale(branch_id, &stale) {
Ok(pruned) => {
tracing::info!(pruned = pruned, "Pruned {} stale embedding rows", pruned);
}
Err(e) => {
tracing::warn!(
"Failed to prune stale embedding rows: {e} (will retry next scan)"
);
}
}
}
}
Err(e) => {
tracing::warn!(
"Failed to query stored embedding keys for stale cleanup: {e} (will retry next scan)"
);
}
}
Ok(())
}
fn extract_body_snippet(
source_lines: Option<&[String]>,
start_line: usize,
end_line: usize,
) -> String {
const HEAD_LINES: usize = 5;
const TAIL_LINES: usize = 3;
let lines = match source_lines {
Some(l) if !l.is_empty() && start_line > 0 => l,
_ => return String::new(),
};
let start = (start_line - 1).min(lines.len());
let end = end_line.min(lines.len());
if start >= end {
return String::new();
}
let body = &lines[start..end];
let snippet = if body.len() <= HEAD_LINES + TAIL_LINES {
body.iter()
.map(String::as_str)
.collect::<Vec<_>>()
.join("\n")
} else {
let head: Vec<&str> = body.iter().take(HEAD_LINES).map(String::as_str).collect();
let tail: Vec<&str> = body
.iter()
.rev()
.take(TAIL_LINES)
.collect::<Vec<_>>()
.into_iter()
.rev()
.map(String::as_str)
.collect();
format!("{}\n...\n{}", head.join("\n"), tail.join("\n"))
};
format!("\n{}", snippet.trim())
}
#[cfg(test)]
mod tests {
use super::*;
use seshat_scanner::scan_project;
use seshat_storage::{
Database, FileIRRepository, RepoMetadataRepository, SqliteFileIRRepository,
SqliteRepoMetadataRepository, SqliteSubmoduleRepository, SubmoduleInput,
SubmoduleRepository,
};
use std::fs;
use tempfile::tempdir;
fn create_project_with_submodule() -> tempfile::TempDir {
let dir = tempdir().expect("create tempdir");
let root = dir.path();
fs::create_dir_all(root.join(".git")).unwrap();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(
root.join("src/main.rs"),
"pub fn main() { println!(\"hello\"); }\n",
)
.unwrap();
fs::write(
root.join(".gitmodules"),
"[submodule \"frontend\"]\n\tpath = frontend\n\turl = https://example.com/fe.git\n",
)
.unwrap();
fs::create_dir_all(root.join("frontend/.git")).unwrap();
fs::create_dir_all(root.join("frontend/src")).unwrap();
fs::write(
root.join("frontend/src/app.ts"),
"export function app(): string { return 'hello'; }\n",
)
.unwrap();
dir
}
#[test]
fn submodule_scan_creates_separate_dbs_with_correct_structure() {
let dir = create_project_with_submodule();
let root = dir.path();
let config = seshat_core::ScanConfig::default();
let root_db = Database::open(":memory:").expect("open root DB");
let sub_db = Database::open(":memory:").expect("open submodule DB");
let root_result = scan_project(root, &config, &root_db, BranchId::from("main"))
.expect("root scan should succeed");
assert!(
!root_result.excluded_submodules.is_empty(),
"should detect submodule in .gitmodules"
);
assert_eq!(root_result.excluded_submodules, vec!["frontend"]);
assert_eq!(
root_result.files_discovered, 1,
"root should discover 1 file (main.rs)"
);
let sub_root = root.join("frontend");
let sub_result = scan_project(&sub_root, &config, &sub_db, BranchId::from("main"))
.expect("submodule scan should succeed");
assert_eq!(
sub_result.files_discovered, 1,
"submodule should discover 1 file (app.ts)"
);
use seshat_storage::{FileIRRepository, SqliteFileIRRepository};
let branch = BranchId::from("main");
let root_files = SqliteFileIRRepository::new(root_db.connection().clone())
.get_by_branch(&branch)
.unwrap();
assert_eq!(root_files.len(), 1, "root DB should have 1 file IR");
let sub_files = SqliteFileIRRepository::new(sub_db.connection().clone())
.get_by_branch(&branch)
.unwrap();
assert_eq!(sub_files.len(), 1, "submodule DB should have 1 file IR");
let sub_meta = SqliteRepoMetadataRepository::new(sub_db.connection().clone());
sub_meta.set("parent_project", "my-project").unwrap();
sub_meta.set("mount_path", "frontend").unwrap();
sub_meta
.set("file_count", &sub_result.files_discovered.to_string())
.unwrap();
sub_meta.set("convention_count", "0").unwrap();
sub_meta.set("last_scan_time", "1700000000").unwrap();
assert_eq!(
sub_meta.get("parent_project").unwrap().unwrap(),
"my-project"
);
assert_eq!(sub_meta.get("mount_path").unwrap().unwrap(), "frontend");
assert_eq!(sub_meta.get("file_count").unwrap().unwrap(), "1");
let root_sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
root_sub_repo
.insert(&SubmoduleInput {
relative_path: "frontend".to_string(),
name: "frontend".to_string(),
db_path: "/data/seshat/repos/my-project/frontend.db".to_string(),
commit_hash: None, })
.unwrap();
let stored = root_sub_repo.list().unwrap();
assert_eq!(stored.len(), 1);
assert_eq!(stored[0].relative_path, "frontend");
assert_eq!(stored[0].name, "frontend");
let root_meta = SqliteRepoMetadataRepository::new(root_db.connection().clone());
root_meta.set("project_name", "my-project").unwrap();
root_meta
.set("file_count", &root_result.files_discovered.to_string())
.unwrap();
root_meta.set("convention_count", "0").unwrap();
root_meta.set("last_scan_time", "1700000000").unwrap();
assert_eq!(
root_meta.get("project_name").unwrap().unwrap(),
"my-project"
);
assert_eq!(root_meta.get("file_count").unwrap().unwrap(), "1");
}
#[test]
fn uninitialised_submodule_is_skipped() {
let dir = tempdir().expect("create tempdir");
let root = dir.path();
fs::create_dir_all(root.join(".git")).unwrap();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/main.rs"), "pub fn main() {}\n").unwrap();
fs::write(
root.join(".gitmodules"),
"[submodule \"libs/shared\"]\n\tpath = libs/shared\n\turl = https://example.com\n",
)
.unwrap();
fs::create_dir_all(root.join("libs/shared")).unwrap();
let config = seshat_core::ScanConfig::default();
let db = Database::open(":memory:").expect("open DB");
let result =
scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
assert_eq!(result.excluded_submodules, vec!["libs/shared"]);
assert_eq!(result.files_discovered, 1);
}
#[test]
fn submodule_removed_from_gitmodules_gets_deleted_from_table() {
let root_db = Database::open(":memory:").expect("open DB");
let sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
sub_repo
.insert(&SubmoduleInput {
relative_path: "old-module".to_string(),
name: "old-module".to_string(),
db_path: "/data/repos/project/old-module.db".to_string(),
commit_hash: Some("abc123".to_string()),
})
.unwrap();
let active_paths: std::collections::HashSet<&str> = ["frontend"].iter().copied().collect();
let stored = sub_repo.list().unwrap();
for stored_sub in &stored {
if !active_paths.contains(stored_sub.relative_path.as_str()) {
let _ = sub_repo.delete(&stored_sub.relative_path);
}
}
let remaining = sub_repo.list().unwrap();
assert!(
remaining.is_empty(),
"old-module should have been removed from submodules table"
);
}
fn should_skip_submodule(stored_hash: Option<&str>, current_hash: Option<&str>) -> bool {
match (current_hash, stored_hash) {
(Some(current), Some(stored)) => current == stored,
_ => false,
}
}
#[test]
fn change_detection_skip_when_hashes_match() {
assert!(should_skip_submodule(
Some("abc123def456abc123def456abc123def456abc123"),
Some("abc123def456abc123def456abc123def456abc123"),
));
}
#[test]
fn change_detection_rescan_when_hashes_differ() {
assert!(!should_skip_submodule(
Some("abc123def456abc123def456abc123def456abc123"),
Some("000000def456abc123def456abc123def456abc123"),
));
}
#[test]
fn change_detection_rescan_when_no_stored_hash() {
assert!(!should_skip_submodule(
None,
Some("abc123def456abc123def456abc123def456abc123"),
));
}
#[test]
fn change_detection_rescan_when_no_current_hash() {
assert!(!should_skip_submodule(
Some("abc123def456abc123def456abc123def456abc123"),
None,
));
}
#[test]
fn change_detection_rescan_when_both_hashes_none() {
assert!(!should_skip_submodule(None, None));
}
#[test]
fn change_detection_new_submodule_triggers_full_scan() {
let root_db = Database::open(":memory:").expect("open DB");
let sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
let stored = sub_repo.find_by_path("frontend").unwrap();
assert!(stored.is_none(), "new submodule should not be in table");
}
#[test]
fn change_detection_updated_hash_stored_after_rescan() {
let root_db = Database::open(":memory:").expect("open DB");
let sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
let old_hash = "aaaa".repeat(10);
sub_repo
.insert(&SubmoduleInput {
relative_path: "frontend".to_string(),
name: "frontend".to_string(),
db_path: "/data/repos/project/frontend.db".to_string(),
commit_hash: Some(old_hash.clone()),
})
.unwrap();
let new_hash = "bbbb".repeat(10);
sub_repo
.update(&SubmoduleInput {
relative_path: "frontend".to_string(),
name: "frontend".to_string(),
db_path: "/data/repos/project/frontend.db".to_string(),
commit_hash: Some(new_hash.clone()),
})
.unwrap();
let stored = sub_repo.find_by_path("frontend").unwrap().unwrap();
assert_eq!(
stored.commit_hash.as_deref(),
Some(new_hash.as_str()),
"stored hash should be updated after rescan"
);
assert!(should_skip_submodule(
stored.commit_hash.as_deref(),
Some(&new_hash),
));
}
#[test]
fn change_detection_skipped_submodule_not_deleted_from_table() {
let root_db = Database::open(":memory:").expect("open DB");
let sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
let hash = "abcd".repeat(10);
sub_repo
.insert(&SubmoduleInput {
relative_path: "frontend".to_string(),
name: "frontend".to_string(),
db_path: "/data/repos/project/frontend.db".to_string(),
commit_hash: Some(hash.clone()),
})
.unwrap();
let active_paths: std::collections::HashSet<&str> = ["frontend"].iter().copied().collect();
let stored = sub_repo.list().unwrap();
for stored_sub in &stored {
if !active_paths.contains(stored_sub.relative_path.as_str()) {
let _ = sub_repo.delete(&stored_sub.relative_path);
}
}
let remaining = sub_repo.list().unwrap();
assert_eq!(
remaining.len(),
1,
"skipped submodule should remain in table"
);
assert_eq!(remaining[0].relative_path, "frontend");
}
fn make_lines(n: usize) -> Vec<String> {
(1..=n).map(|i| format!("line_{i}")).collect()
}
#[test]
fn body_snippet_none_source_returns_empty() {
assert_eq!(extract_body_snippet(None, 1, 5), "");
}
#[test]
fn body_snippet_start_zero_returns_empty() {
let lines = make_lines(10);
assert_eq!(extract_body_snippet(Some(&lines), 0, 5), "");
}
#[test]
fn body_snippet_single_line_function() {
let lines = make_lines(20);
let result = extract_body_snippet(Some(&lines), 5, 5);
assert!(!result.is_empty());
assert!(result.contains("line_5"));
}
#[test]
fn body_snippet_short_function_returns_all_lines() {
let lines = make_lines(20);
let result = extract_body_snippet(Some(&lines), 3, 7);
assert!(result.contains("line_3"));
assert!(result.contains("line_7"));
assert!(!result.contains("...")); }
#[test]
fn body_snippet_long_function_has_head_and_tail() {
let lines = make_lines(50);
let result = extract_body_snippet(Some(&lines), 1, 50);
assert!(result.contains("line_1")); assert!(result.contains("line_5")); assert!(result.contains("...")); assert!(result.contains("line_50")); assert!(result.contains("line_48")); assert!(!result.contains("line_25"));
}
#[test]
fn body_snippet_exactly_boundary_no_overlap() {
let lines = make_lines(20);
let result = extract_body_snippet(Some(&lines), 1, 8);
assert!(
!result.contains("..."),
"8-line function should not be truncated"
);
assert!(result.contains("line_1"));
assert!(result.contains("line_8")); }
#[test]
fn body_snippet_trim_applied() {
let lines = vec![
" fn foo() {".to_owned(),
" let x = 1;".to_owned(),
" }".to_owned(),
];
let result = extract_body_snippet(Some(&lines), 1, 3);
assert!(result.starts_with('\n'));
assert!(!result.starts_with("\n ")); }
#[test]
fn body_snippet_empty_lines_returns_empty() {
let lines: Vec<String> = Vec::new();
assert_eq!(extract_body_snippet(Some(&lines), 1, 5), "");
}
#[test]
fn body_snippet_start_after_end_returns_empty() {
let lines = make_lines(20);
assert_eq!(extract_body_snippet(Some(&lines), 10, 5), "");
}
#[test]
fn body_snippet_end_line_clamped_to_available() {
let lines = make_lines(5);
let result = extract_body_snippet(Some(&lines), 1, 999);
assert!(result.contains("line_1"));
assert!(result.contains("line_5"));
}
#[test]
fn body_snippet_start_at_last_line_returns_single_line() {
let lines = make_lines(5);
let result = extract_body_snippet(Some(&lines), 5, 5);
assert!(result.contains("line_5"));
assert!(!result.contains("line_4"));
}
#[test]
fn body_snippet_start_past_lines_returns_empty() {
let lines = make_lines(3);
assert_eq!(extract_body_snippet(Some(&lines), 4, 4), "");
}
#[test]
fn body_snippet_long_body_skips_middle_lines() {
let lines = make_lines(20);
let result = extract_body_snippet(Some(&lines), 1, 15);
assert!(result.contains("line_1"));
assert!(result.contains("line_5")); assert!(!result.contains("line_6")); assert!(!result.contains("line_10")); assert!(result.contains("line_13")); assert!(result.contains("line_15")); assert!(result.contains("..."));
}
#[test]
fn detect_and_persist_uses_branch_id_for_loading_files() {
let db = Database::open(":memory:").expect("open DB");
let feature_branch = BranchId::from("feat/my-feature");
use seshat_core::test_helpers::make_project_file;
use seshat_storage::{FileIRRepository, SqliteFileIRRepository};
let file = make_project_file(seshat_core::Language::Rust);
SqliteFileIRRepository::new(db.connection().clone())
.upsert(&feature_branch, &file, None)
.expect("upsert file under feature branch");
let scan_result = seshat_scanner::ScanResult {
files_discovered: 1,
files_parsed: 1,
nodes_persisted: 0,
edges_persisted: 0,
manifests_analyzed: 0,
docs_ingested: 0,
manifest_analyses: vec![],
incremental: None,
file_dates: std::collections::HashMap::new(),
excluded_submodules: vec![],
source_map: std::collections::HashMap::new(),
changed_paths: std::collections::HashSet::new(),
};
let config = DetectionConfig::default();
let result = detect_and_persist(&db, &feature_branch, &config, &scan_result);
assert!(
result.is_ok(),
"detect_and_persist should succeed: {result:?}"
);
let report = result.unwrap();
assert_eq!(
report.file_count, 1,
"should find the file stored under feature branch"
);
}
#[test]
fn detect_and_persist_returns_zero_for_wrong_branch() {
let db = Database::open(":memory:").expect("open DB");
let feature_branch = BranchId::from("feat/my-feature");
let main_branch = BranchId::from("main");
use seshat_core::test_helpers::make_project_file;
use seshat_storage::{FileIRRepository, SqliteFileIRRepository};
let file = make_project_file(seshat_core::Language::Rust);
SqliteFileIRRepository::new(db.connection().clone())
.upsert(&feature_branch, &file, None)
.expect("upsert file under feature branch");
let scan_result = seshat_scanner::ScanResult {
files_discovered: 1,
files_parsed: 1,
nodes_persisted: 0,
edges_persisted: 0,
manifests_analyzed: 0,
docs_ingested: 0,
manifest_analyses: vec![],
incremental: None,
file_dates: std::collections::HashMap::new(),
excluded_submodules: vec![],
source_map: std::collections::HashMap::new(),
changed_paths: std::collections::HashSet::new(),
};
let config = DetectionConfig::default();
let result = detect_and_persist(&db, &main_branch, &config, &scan_result);
assert!(result.is_ok());
let report = result.unwrap();
assert_eq!(report.file_count, 0, "main branch should have no files");
}
#[test]
fn detect_and_persist_persists_conventions_under_correct_branch() {
let db = Database::open(":memory:").expect("open DB");
let feature_branch = BranchId::from("feat/snippets");
use seshat_core::test_helpers::make_project_file;
use seshat_storage::{
FileIRRepository, NodeRepository, SqliteFileIRRepository, SqliteNodeRepository,
};
let file = make_project_file(seshat_core::Language::Rust);
SqliteFileIRRepository::new(db.connection().clone())
.upsert(&feature_branch, &file, None)
.expect("upsert file under feature branch");
let scan_result = seshat_scanner::ScanResult {
files_discovered: 1,
files_parsed: 1,
nodes_persisted: 0,
edges_persisted: 0,
manifests_analyzed: 0,
docs_ingested: 0,
manifest_analyses: vec![],
incremental: None,
file_dates: std::collections::HashMap::new(),
excluded_submodules: vec![],
source_map: std::collections::HashMap::new(),
changed_paths: std::collections::HashSet::new(),
};
let config = DetectionConfig::default();
let result = detect_and_persist(&db, &feature_branch, &config, &scan_result);
assert!(result.is_ok());
let node_repo = SqliteNodeRepository::new(db.connection().clone());
let nodes = node_repo
.find_by_branch(&feature_branch)
.expect("find nodes");
assert!(
!nodes.is_empty(),
"conventions should be persisted under feature branch"
);
let main_nodes = node_repo
.find_by_branch(&BranchId::from("main"))
.expect("find nodes");
assert!(
main_nodes.is_empty(),
"no conventions should be under main branch"
);
}
#[test]
fn scan_project_with_source_map_produces_snippets() {
let dir = tempdir().expect("create tempdir");
let root = dir.path();
fs::create_dir_all(root.join(".git")).unwrap();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(
root.join("src/main.rs"),
"use std::error::Error;\n\npub fn main() {}\n",
)
.unwrap();
let config = seshat_core::ScanConfig::default();
let db = Database::open(":memory:").expect("open DB");
let branch = BranchId::from("test-branch");
let result = scan_project(root, &config, &db, branch.clone()).expect("scan should succeed");
assert!(
!result.source_map.is_empty(),
"source_map should contain files"
);
let file_ir_repo = SqliteFileIRRepository::new(db.connection().clone());
let files = file_ir_repo.get_by_branch(&branch).expect("get files");
assert!(
!files.is_empty(),
"files should be stored under the scan branch"
);
let main_files = file_ir_repo
.get_by_branch(&BranchId::from("main"))
.expect("get files");
assert!(
main_files.is_empty() || main_files.len() != files.len(),
"files should NOT be stored under main branch when scanning a different branch"
);
}
}