1use std::path::Path;
7use std::time::Instant;
8
9use indicatif::{ProgressBar, ProgressStyle};
10use seshat_core::{BranchId, DetectionConfig};
11use seshat_detectors::{aggregate_findings, run_all_detectors};
12use seshat_scanner::{
13 ScanProgress, ScanResult, detect_submodule_paths, scan_project_with_progress,
14};
15use seshat_storage::{
16 Database, EmbeddingInput, EmbeddingRepository, RepoMetadataRepository,
17 SqliteEmbeddingRepository, SqliteRepoMetadataRepository, SqliteSubmoduleRepository,
18 StaleIrWipeReport, SubmoduleInput, SubmoduleRepository, wipe_stale_ir_cache,
19};
20
21use crate::config::AppConfig;
22use crate::db::unix_now;
23use crate::error::CliError;
24use crate::format::{self, Verbosity};
25
26pub fn run_scan(
38 path: &Path,
39 verbose: bool,
40 quiet: bool,
41 exclude_submodules: bool,
42) -> Result<(), CliError> {
43 let verbosity = Verbosity::from_flags(verbose, quiet);
44 let color = format::color_enabled();
45
46 if !path.exists() {
48 return Err(CliError::InvalidPath {
49 path: path.display().to_string(),
50 reason: "path does not exist".to_owned(),
51 });
52 }
53 if !path.is_dir() {
54 return Err(CliError::InvalidPath {
55 path: path.display().to_string(),
56 reason: "path is not a directory".to_owned(),
57 });
58 }
59
60 let resolved = crate::db::resolve_project(Some(path), "scan")?;
65 let root = resolved.project_root.clone();
66 let db_path = resolved.db_path.clone();
67 let project_name = resolved.project_name.clone();
68
69 if verbosity.show_warnings() {
71 eprintln!("seshat v{}", env!("CARGO_PKG_VERSION"));
72 }
73
74 let mut config =
76 AppConfig::load().map_err(|e| CliError::scan(format!("failed to load config: {e}")))?;
77
78 if exclude_submodules {
80 config.scan.exclude_submodules = true;
81 }
82
83 if let Some(parent) = db_path.parent() {
85 std::fs::create_dir_all(parent)
86 .map_err(|e| CliError::scan(format!("failed to create database directory: {e}")))?;
87 }
88 let db = Database::open(&db_path)
89 .map_err(|e| CliError::scan(format!("failed to open database: {e}")))?;
90
91 let wipe = wipe_stale_ir_cache(&db)
99 .map_err(|e| CliError::scan(format!("failed to clear stale IR cache: {e}")))?;
100 report_ir_cache_wipe(&wipe, "root", verbosity.show_warnings());
101
102 let submodule_paths = detect_submodule_paths(&root);
104
105 let scan_branch = crate::db::get_current_branch(&root)
107 .map(seshat_core::BranchId::from)
108 .unwrap_or_else(|| {
109 tracing::debug!(root = %root.display(), "Could not detect git branch for scan root, defaulting to 'main'");
110 seshat_core::BranchId::from("main")
111 });
112
113 let start = Instant::now();
115
116 let show = verbosity.show_warnings();
117
118 struct ScannedSubmodule {
121 mount_path: String,
122 name: String,
123 db_path: String,
124 commit_hash: Option<String>,
125 }
126
127 let root_sub_repo_for_detect = SqliteSubmoduleRepository::new(db.connection().clone());
129
130 let scanned_submodules: Vec<ScannedSubmodule> = if !config.scan.exclude_submodules
134 && !submodule_paths.is_empty()
135 {
136 enum SubmoduleAction {
139 Skip(ScannedSubmodule),
140 Scan {
141 mount_path: String,
142 name: String,
143 submodule_abs: std::path::PathBuf,
144 commit_hash: Option<String>,
145 },
146 }
147
148 let mut actions: Vec<SubmoduleAction> = Vec::new();
149
150 for mount_path in &submodule_paths {
151 let submodule_abs = root.join(mount_path);
152 let name = mount_path
153 .rsplit('/')
154 .next()
155 .unwrap_or(mount_path)
156 .to_string();
157
158 if show {
160 eprintln!(" \u{2139} Submodule detected: {mount_path}");
161 }
162
163 if !submodule_abs.is_dir()
165 || (!submodule_abs.join(".git").exists() && !submodule_abs.join(".git").is_file())
166 {
167 if show {
168 let reason = "not initialized (no .git)";
169 eprintln!(" \u{2298} Submodule {name} skipped: {reason}");
170 }
171 continue;
172 }
173
174 let commit_hash = seshat_scanner::get_head_commit(&submodule_abs);
176
177 let stored_record = root_sub_repo_for_detect
179 .find_by_path(mount_path)
180 .map_err(|e| {
181 CliError::scan(format!("failed to look up submodule '{mount_path}': {e}"))
182 })?;
183
184 if let Some(ref stored) = stored_record {
185 if let (Some(current_hash), Some(stored_hash)) = (&commit_hash, &stored.commit_hash)
187 {
188 if current_hash == stored_hash {
189 let sub_branch_for_check = crate::db::get_current_branch(&submodule_abs)
199 .unwrap_or_else(|| {
200 tracing::debug!(submodule = %submodule_abs.display(), "Could not detect branch for submodule, defaulting to 'main'");
201 "main".to_owned()
202 });
203 let schema_ok =
204 seshat_storage::Database::open(std::path::Path::new(&stored.db_path))
205 .ok()
206 .map(|sub_db| {
207 crate::db::submodule_ir_schema_is_current(
208 &sub_db,
209 &sub_branch_for_check,
210 )
211 })
212 .unwrap_or(false); if schema_ok {
215 if show {
217 let short = if current_hash.len() >= 7 {
218 ¤t_hash[..7]
219 } else {
220 current_hash
221 };
222 eprintln!(" \u{2713} Submodule {name} up-to-date ({short})");
223 }
224
225 actions.push(SubmoduleAction::Skip(ScannedSubmodule {
226 mount_path: mount_path.clone(),
227 name,
228 db_path: stored.db_path.clone(),
229 commit_hash,
230 }));
231 continue;
232 }
233
234 if show {
236 eprintln!(
237 " \u{21bb} Submodule {name} IR schema outdated, re-scanning..."
238 );
239 }
240 }
241 }
242 }
243
244 actions.push(SubmoduleAction::Scan {
246 mount_path: mount_path.clone(),
247 name,
248 submodule_abs,
249 commit_hash,
250 });
251 }
252
253 let mut results: Vec<ScannedSubmodule> = Vec::new();
255 let mut to_scan: Vec<(String, String, std::path::PathBuf, Option<String>)> = Vec::new();
256
257 for action in actions {
258 match action {
259 SubmoduleAction::Skip(sub) => results.push(sub),
260 SubmoduleAction::Scan {
261 mount_path,
262 name,
263 submodule_abs,
264 commit_hash,
265 } => to_scan.push((mount_path, name, submodule_abs, commit_hash)),
266 }
267 }
268
269 if !to_scan.is_empty() {
270 let scan_config = &config.scan;
272 let detection_config = &config.detection;
273 let project_name_ref = &project_name;
274
275 let parallel_results: Vec<Result<ScannedSubmodule, CliError>> = std::thread::scope(
277 |scope| {
278 let handles: Vec<_> = to_scan
279 .iter()
280 .map(|(mount_path, name, submodule_abs, commit_hash)| {
281 let sp =
282 make_manual_spinner(&format!("{name}: discovering files..."), show);
283
284 scope.spawn(move || -> Result<ScannedSubmodule, CliError> {
285 let sub_db_path = crate::db::resolve_submodule_db_path(
287 project_name_ref,
288 mount_path,
289 )?;
290 let sub_db = Database::open(&sub_db_path).map_err(|e| {
291 CliError::scan(format!(
292 "failed to open submodule database for '{mount_path}': {e}"
293 ))
294 })?;
295
296 let sub_wipe = wipe_stale_ir_cache(&sub_db).map_err(|e| {
300 CliError::scan(format!(
301 "failed to clear stale IR cache for submodule '{mount_path}': {e}"
302 ))
303 })?;
304 report_ir_cache_wipe(&sub_wipe, name, show);
305
306 let sub_branch = crate::db::get_current_branch(submodule_abs)
308 .map(seshat_core::BranchId::from)
309 .unwrap_or_else(|| {
310 tracing::debug!(submodule = %submodule_abs.display(), "Could not detect branch for submodule scan, defaulting to 'main'");
311 seshat_core::BranchId::from("main")
312 });
313
314 let scan_result = scan_project_with_progress(
317 submodule_abs,
318 scan_config,
319 &sub_db,
320 |event| {
321 match event {
322 ScanProgress::Discovering { count } => {
323 sp.set_message(format!(
324 "{name}: discovering files... {count} found"
325 ));
326 }
327 ScanProgress::DiscoveryDone { total } => {
328 sp.set_message(format!(
329 "{name}: discovering files... {total} found"
330 ));
331 }
332 ScanProgress::CollectingGitHistory => {
333 sp.set_message(format!(
334 "{name}: collecting git history..."
335 ));
336 }
337 ScanProgress::Scanning { done, total } => {
338 sp.set_message(format!(
339 "{name}: scanning files... {done}/{total}"
340 ));
341 }
342 ScanProgress::BuildingModuleGraph => {
343 sp.set_message(format!(
344 "{name}: building module graph..."
345 ));
346 }
347 ScanProgress::AnalyzingProjectFiles => {
348 sp.set_message(format!(
349 "{name}: analyzing manifests & docs..."
350 ));
351 }
352 _ => {}
353 }
354 sp.tick();
355 },
356 sub_branch.clone(),
357 )
358 .map_err(|e| {
359 CliError::scan(format!(
360 "submodule scan failed for '{mount_path}': {e}"
361 ))
362 })?;
363
364 sp.set_message(format!("{name}: analyzing conventions..."));
365 sp.tick();
366
367 let report = detect_and_persist(
368 &sub_db,
369 &sub_branch,
370 &detection_config.clone(),
371 &scan_result,
372 )?;
373
374 let meta =
376 SqliteRepoMetadataRepository::new(sub_db.connection().clone());
377 write_metadata(
378 &meta,
379 &[
380 ("parent_project", project_name_ref),
381 ("mount_path", mount_path),
382 ("file_count", &report.file_count.to_string()),
383 ("convention_count", &report.convention_count.to_string()),
384 ("last_scan_time", &unix_now().to_string()),
385 ],
386 )?;
387
388 sp.finish_with_message(format!(
394 "{name}: done ({} files, {} conventions)",
395 report.file_count, report.convention_count,
396 ));
397
398 Ok(ScannedSubmodule {
399 mount_path: mount_path.clone(),
400 name: name.clone(),
401 db_path: sub_db_path.to_string_lossy().to_string(),
402 commit_hash: commit_hash.clone(),
403 })
404 })
405 })
406 .collect();
407
408 handles
410 .into_iter()
411 .map(|h| h.join().expect("submodule scan thread panicked"))
412 .collect()
413 },
414 );
415
416 for result in parallel_results {
418 results.push(result?);
419 }
420 }
421
422 results
423 } else {
424 Vec::new()
425 };
426
427 let discovery_sp = make_spinner("Discovering files...", show);
431
432 let git_sp: std::cell::RefCell<Option<ProgressBar>> = std::cell::RefCell::new(None);
433 let scan_sp: std::cell::RefCell<Option<ProgressBar>> = std::cell::RefCell::new(None);
434 let graph_sp: std::cell::RefCell<Option<ProgressBar>> = std::cell::RefCell::new(None);
435 let project_sp: std::cell::RefCell<Option<ProgressBar>> = std::cell::RefCell::new(None);
436
437 let scan_result = scan_project_with_progress(
438 &root,
439 &config.scan,
440 &db,
441 |event| match event {
442 ScanProgress::Discovering { count } => {
443 discovery_sp.set_message(format!("Discovering files... {count} found"));
444 }
445 ScanProgress::DiscoveryDone { total } => {
446 discovery_sp.finish_with_message(format!("Discovering files... {total} found"));
447 }
448 ScanProgress::CollectingGitHistory => {
449 *git_sp.borrow_mut() = Some(make_spinner("Collecting git history...", show));
450 }
451 ScanProgress::GitHistoryDone => {
452 if let Some(ref sp) = *git_sp.borrow() {
453 sp.finish_with_message("Collecting git history... done");
454 }
455 }
456 ScanProgress::Scanning { done, total } => {
457 let mut sp_opt = scan_sp.borrow_mut();
458 if sp_opt.is_none() {
459 *sp_opt = Some(make_spinner(&format!("Scanning files... 0/{total}"), show));
460 }
461 if let Some(ref sp) = *sp_opt {
462 sp.set_message(format!("Scanning files... {done}/{total}"));
463 }
464 }
465 ScanProgress::ScanningDone => {
466 if let Some(ref sp) = *scan_sp.borrow() {
467 sp.finish_with_message(sp.message().to_string());
468 }
469 }
470 ScanProgress::BuildingModuleGraph => {
471 *graph_sp.borrow_mut() = Some(make_spinner("Building module graph...", show));
472 }
473 ScanProgress::ModuleGraphDone => {
474 if let Some(ref sp) = *graph_sp.borrow() {
475 sp.finish_with_message("Building module graph... done");
476 }
477 }
478 ScanProgress::AnalyzingProjectFiles => {
479 *project_sp.borrow_mut() =
480 Some(make_spinner("Analyzing manifests & docs...", show));
481 }
482 ScanProgress::ProjectFilesDone => {
483 if let Some(ref sp) = *project_sp.borrow() {
484 sp.finish_with_message("Analyzing manifests & docs... done");
485 }
486 }
487
488 _ => {}
492 },
493 scan_branch.clone(),
494 )
495 .map_err(CliError::scan)?;
496
497 let detection_config = config.detection.clone();
499
500 let detect_sp = make_spinner("Analyzing conventions...", show);
501 let all_files = {
502 use seshat_storage::{FileIRRepository, SqliteFileIRRepository};
503 SqliteFileIRRepository::new(db.connection().clone())
504 .get_by_branch(&scan_branch)
505 .map_err(|e| CliError::scan(format!("failed to load files for detection: {e}")))?
506 };
507
508 let file_count = all_files.len();
514 detect_sp.set_message(format!("Analyzing conventions... 0/{file_count}"));
515 let progress_cb = |done: usize, _total: usize| {
516 detect_sp.set_message(format!("Analyzing conventions... {done}/{file_count}"));
517 };
518 let project_context = seshat_detectors::ProjectContext::from_files(&all_files);
519 let detector_results = run_all_detectors(
520 &all_files,
521 &scan_result.source_map,
522 &detection_config,
523 &project_context,
524 Some(&progress_cb),
525 );
526 detect_sp.finish_with_message(format!(
527 "Analyzing conventions... {file_count}/{file_count}"
528 ));
529
530 let all_findings: Vec<seshat_core::ConventionFinding> = detector_results
531 .into_iter()
532 .flat_map(|dr| dr.findings)
533 .collect();
534
535 let file_dates_map: std::collections::HashMap<String, Option<i64>> = all_files
536 .iter()
537 .map(|f| {
538 let date = scan_result.file_dates.get(f.path.as_path()).copied();
539 (f.path.to_string_lossy().to_string(), date)
540 })
541 .collect();
542
543 let aggregated = aggregate_findings(
544 &all_findings,
545 &detection_config,
546 &file_dates_map,
547 unix_now(),
548 );
549
550 seshat_graph::persist_and_index(db.connection(), &scan_branch, &aggregated, &all_findings)
551 .map_err(|e| CliError::scan(format!("persist conventions: {e}")))?;
552
553 if let Some(ref embedding_config) = config.embedding {
558 generate_embeddings(
559 &db,
560 embedding_config,
561 &all_files,
562 &scan_result.source_map,
563 &scan_result.changed_paths,
564 &scan_branch.0,
565 show,
566 )?;
567 }
568
569 let root_sub_repo = SqliteSubmoduleRepository::new(db.connection().clone());
571
572 for sub in &scanned_submodules {
573 root_sub_repo
574 .upsert(&SubmoduleInput {
575 relative_path: sub.mount_path.clone(),
576 name: sub.name.clone(),
577 db_path: sub.db_path.clone(),
578 commit_hash: sub.commit_hash.clone(),
579 })
580 .map_err(|e| {
581 CliError::scan(format!(
582 "failed to upsert submodule '{}' in root DB: {e}",
583 sub.mount_path
584 ))
585 })?;
586 }
587
588 if let Ok(stored_submodules) = root_sub_repo.list() {
590 let active_paths: std::collections::HashSet<&str> =
591 submodule_paths.iter().map(|s| s.as_str()).collect();
592 for stored in &stored_submodules {
593 if !active_paths.contains(stored.relative_path.as_str()) {
594 let _ = root_sub_repo.delete(&stored.relative_path);
595 }
596 }
597 }
598
599 let root_meta = SqliteRepoMetadataRepository::new(db.connection().clone());
601 write_metadata(
602 &root_meta,
603 &[
604 ("project_name", &project_name),
605 ("project_root", path.to_string_lossy().as_ref()),
606 ("file_count", &file_count.to_string()),
607 ("convention_count", &aggregated.len().to_string()),
608 ("last_scan_time", &unix_now().to_string()),
609 ],
610 )?;
611
612 let elapsed = start.elapsed();
616
617 let report_data = crate::report::build_report_data(
619 &scan_result,
620 &all_files,
621 aggregated,
622 &db_path,
623 elapsed,
624 config.scan.exclude_submodules,
625 );
626 crate::report::print_report(&report_data, verbosity, color);
627
628 Ok(())
629}
630
631fn spinner_style() -> ProgressStyle {
633 ProgressStyle::with_template(" {spinner:.cyan} {msg}")
634 .expect("valid template")
635 .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏", "✓"])
636}
637
638fn make_spinner(msg: &str, visible: bool) -> ProgressBar {
644 let sp = ProgressBar::new_spinner();
645 if visible {
646 sp.set_style(spinner_style());
647 sp.set_message(msg.to_owned());
648 sp.enable_steady_tick(std::time::Duration::from_millis(80));
649 } else {
650 sp.set_draw_target(indicatif::ProgressDrawTarget::hidden());
651 }
652 sp
653}
654
655fn make_manual_spinner(msg: &str, visible: bool) -> ProgressBar {
661 let sp = ProgressBar::new_spinner();
662 if visible {
663 sp.set_style(spinner_style());
664 sp.set_message(msg.to_owned());
665 sp.tick(); } else {
667 sp.set_draw_target(indicatif::ProgressDrawTarget::hidden());
668 }
669 sp
670}
671
672fn report_ir_cache_wipe(report: &StaleIrWipeReport, scope: &str, visible: bool) {
680 if report.is_empty() {
681 return;
682 }
683
684 let versions = report
687 .cached_versions
688 .iter()
689 .map(u8::to_string)
690 .collect::<Vec<_>>()
691 .join(", ");
692 let current = seshat_storage::IR_SCHEMA_VERSION;
693
694 tracing::warn!(
695 scope = scope,
696 stale_count = report.stale_count,
697 cached_versions = versions,
698 current_version = current,
699 symbol_definitions_cleared = report.symbol_definitions_cleared,
700 symbol_imports_cleared = report.symbol_imports_cleared,
701 "IR cache schema mismatch — wiped stale rows, scan will re-parse from source",
702 );
703
704 if visible {
705 eprintln!(
706 " \u{21bb} IR cache schema mismatch ({scope}): cached v[{versions}] != current v{current}, \
707 cleared {n} stale IR rows — re-parsing from scratch",
708 n = report.stale_count,
709 );
710 }
711}
712
713#[derive(Debug)]
717struct DetectionReport {
718 file_count: usize,
719 convention_count: usize,
720}
721
722fn detect_and_persist(
727 db: &Database,
728 scan_branch: &BranchId,
729 detection_config: &DetectionConfig,
730 scan_result: &ScanResult,
731) -> Result<DetectionReport, CliError> {
732 let file_dates_map: std::collections::HashMap<String, Option<i64>> = scan_result
734 .file_dates
735 .iter()
736 .map(|(p, &ts)| (p.to_string_lossy().to_string(), Some(ts)))
737 .collect();
738
739 let report = seshat_graph::run_detection_cycle(
740 db.connection(),
741 scan_branch,
742 detection_config,
743 &file_dates_map,
744 &scan_result.source_map,
745 )
746 .map_err(|e| CliError::scan(format!("detection pipeline failed: {e}")))?;
747
748 Ok(DetectionReport {
749 file_count: report.file_count,
750 convention_count: report.convention_count,
751 })
752}
753
754fn write_metadata(
756 repo: &SqliteRepoMetadataRepository,
757 pairs: &[(&str, &str)],
758) -> Result<(), CliError> {
759 for (key, value) in pairs {
760 repo.set(key, value)
761 .map_err(|e| CliError::scan(format!("failed to write metadata '{key}': {e}")))?;
762 }
763 Ok(())
764}
765
766fn generate_embeddings(
777 db: &Database,
778 embedding_config: &seshat_embedding::EmbeddingConfig,
779 all_files: &[seshat_core::ProjectFile],
780 source_map: &std::collections::HashMap<std::path::PathBuf, String>,
781 changed_paths: &std::collections::HashSet<std::path::PathBuf>,
782 branch_id: &str,
783 show: bool,
784) -> Result<(), CliError> {
785 let provider = match seshat_embedding::create_provider(embedding_config) {
786 Ok(p) => p,
787 Err(e) => {
788 tracing::warn!("Failed to create embedding provider: {e}");
789 if show {
790 eprintln!(" \u{26a0} Embedding provider unavailable: {e}");
791 }
792 return Ok(());
793 }
794 };
795
796 let mut items: Vec<(String, String, String, String)> = Vec::new();
798 for file in all_files {
799 if !changed_paths.contains(&file.path) {
803 continue;
804 }
805 let source = match source_map.get(&file.path) {
807 Some(s) => s,
808 None => continue,
809 };
810
811 let file_path = file.path.to_string_lossy().to_string();
812
813 let source_lines: Option<Vec<String>> = Some(source.lines().map(str::to_owned).collect());
815
816 let import_context = {
820 let modules: Vec<&str> = file
821 .imports
822 .iter()
823 .map(|i| i.module.as_str())
824 .filter(|m| !m.is_empty())
825 .take(20)
826 .collect();
827 if modules.is_empty() {
828 String::new()
829 } else {
830 format!("\nuses: {}", modules.join(", "))
831 }
832 };
833
834 for func in &file.functions {
835 let vis = if func.is_public { "pub " } else { "" };
836 let asyncness = if func.is_async { "async " } else { "" };
837 let params = func.parameters.join(", ");
838 let body_snippet =
839 extract_body_snippet(source_lines.as_deref(), func.line, func.end_line);
840 let text = format!(
841 "{vis}{asyncness}fn {}({params}) in {file_path}{body_snippet}{import_context}",
842 func.name
843 );
844 items.push((
845 file_path.clone(),
846 func.name.clone(),
847 "function".to_string(),
848 text,
849 ));
850 }
851 for ty in &file.types {
852 let vis = if ty.is_public { "pub " } else { "" };
853 let kind = match ty.kind {
856 seshat_core::TypeDefKind::Struct => "struct",
857 seshat_core::TypeDefKind::Enum => "enum",
858 seshat_core::TypeDefKind::Trait => "trait",
859 seshat_core::TypeDefKind::Interface => "interface",
860 seshat_core::TypeDefKind::Class => "class",
861 seshat_core::TypeDefKind::TypeAlias => "type_alias",
862 };
863 let text = format!("{vis}{kind} {} in {file_path}{import_context}", ty.name);
864 items.push((file_path.clone(), ty.name.clone(), "type".to_string(), text));
865 }
866 for exp in &file.exports {
867 let default = if exp.is_default { "default " } else { "" };
868 let text = format!(
869 "export {default}{} in {file_path}{import_context}",
870 exp.name
871 );
872 items.push((
873 file_path.clone(),
874 exp.name.clone(),
875 "export".to_string(),
876 text,
877 ));
878 }
879 }
880
881 if items.is_empty() {
882 tracing::info!("No code items to embed");
883 return Ok(());
884 }
885
886 let total = items.len();
887 let batch_size = embedding_config.batch_size.max(1);
888 let embed_sp = make_spinner(&format!("Generating embeddings... 0/{total}"), show);
889
890 let conn = db.connection().clone();
891 let embedding_repo = SqliteEmbeddingRepository::new(conn);
892
893 let mut current_keys: std::collections::HashSet<(String, String, String)> =
897 std::collections::HashSet::new();
898 for file in all_files {
899 let file_path = file.path.to_string_lossy().to_string();
900 for func in &file.functions {
901 current_keys.insert((file_path.clone(), func.name.clone(), "function".to_string()));
902 }
903 for ty in &file.types {
904 current_keys.insert((file_path.clone(), ty.name.clone(), "type".to_string()));
905 }
906 for exp in &file.exports {
907 current_keys.insert((file_path.clone(), exp.name.clone(), "export".to_string()));
908 }
909 }
910
911 let mut embedded_count: usize = 0;
920
921 let _embedding_outcome: Result<(), ()> = 'embed: {
922 for chunk in items.chunks(batch_size) {
923 let texts: Vec<String> = chunk.iter().map(|(_, _, _, text)| text.clone()).collect();
924
925 match provider.embed(&texts) {
926 Ok(embeddings) => {
927 let inputs: Vec<EmbeddingInput> = chunk
928 .iter()
929 .zip(embeddings)
930 .map(
931 |((file_path, item_name, item_kind, _), emb)| EmbeddingInput {
932 file_path: file_path.clone(),
933 item_name: item_name.clone(),
934 item_kind: item_kind.clone(),
935 embedding: emb,
936 },
937 )
938 .collect();
939
940 if let Err(e) = embedding_repo.upsert_batch(branch_id, &inputs) {
941 tracing::warn!("Failed to store embedding batch: {e}");
942 embed_sp.finish_with_message(
943 "Generating embeddings... failed (storage error)".to_string(),
944 );
945 break 'embed Err(());
946 }
947
948 embedded_count += chunk.len();
949 embed_sp
950 .set_message(format!("Generating embeddings... {embedded_count}/{total}"));
951 }
952 Err(e) => {
953 tracing::warn!(
954 embedded = embedded_count,
955 total = total,
956 remaining = total - embedded_count,
957 "Embedding provider error mid-batch; {embedded_count}/{total} items stored, \
958 {} items skipped. Database contains partial embeddings: {e}",
959 total - embedded_count,
960 );
961 embed_sp.finish_with_message(format!(
962 "Generating embeddings... failed ({embedded_count}/{total})"
963 ));
964 if show {
965 eprintln!(
966 " \u{26a0} Embedding generation failed after {embedded_count}/{total} items \
967 ({} skipped, partial state): {e}",
968 total - embedded_count,
969 );
970 }
971 break 'embed Err(());
972 }
973 }
974 }
975
976 embed_sp.finish_with_message(format!("Generating embeddings... {embedded_count}/{total}"));
977
978 tracing::info!(
979 count = embedded_count,
980 total = total,
981 "Generated code embeddings"
982 );
983
984 Ok(())
985 };
986
987 match embedding_repo.get_stored_keys(branch_id) {
989 Ok(stored_keys) => {
990 let stored_set: std::collections::HashSet<_> = stored_keys.into_iter().collect();
991 let stale: Vec<_> = stored_set.difference(¤t_keys).cloned().collect();
992
993 if !stale.is_empty() {
994 match embedding_repo.delete_stale(branch_id, &stale) {
995 Ok(pruned) => {
996 tracing::info!(pruned = pruned, "Pruned {} stale embedding rows", pruned);
997 }
998 Err(e) => {
999 tracing::warn!(
1000 "Failed to prune stale embedding rows: {e} (will retry next scan)"
1001 );
1002 }
1003 }
1004 }
1005 }
1006 Err(e) => {
1007 tracing::warn!(
1008 "Failed to query stored embedding keys for stale cleanup: {e} (will retry next scan)"
1009 );
1010 }
1011 }
1012
1013 Ok(())
1014}
1015
1016fn extract_body_snippet(
1025 source_lines: Option<&[String]>,
1026 start_line: usize,
1027 end_line: usize,
1028) -> String {
1029 const HEAD_LINES: usize = 5;
1030 const TAIL_LINES: usize = 3;
1031
1032 let lines = match source_lines {
1033 Some(l) if !l.is_empty() && start_line > 0 => l,
1034 _ => return String::new(),
1035 };
1036
1037 let start = (start_line - 1).min(lines.len());
1039 let end = end_line.min(lines.len());
1040
1041 if start >= end {
1042 return String::new();
1043 }
1044
1045 let body = &lines[start..end];
1046
1047 let snippet = if body.len() <= HEAD_LINES + TAIL_LINES {
1050 body.iter()
1051 .map(String::as_str)
1052 .collect::<Vec<_>>()
1053 .join("\n")
1054 } else {
1055 let head: Vec<&str> = body.iter().take(HEAD_LINES).map(String::as_str).collect();
1056 let tail: Vec<&str> = body
1057 .iter()
1058 .rev()
1059 .take(TAIL_LINES)
1060 .collect::<Vec<_>>()
1061 .into_iter()
1062 .rev()
1063 .map(String::as_str)
1064 .collect();
1065 format!("{}\n...\n{}", head.join("\n"), tail.join("\n"))
1066 };
1067
1068 format!("\n{}", snippet.trim())
1069}
1070
1071#[cfg(test)]
1072mod tests {
1073 use super::*;
1074 use seshat_scanner::scan_project;
1075 use seshat_storage::{
1076 Database, FileIRRepository, RepoMetadataRepository, SqliteFileIRRepository,
1077 SqliteRepoMetadataRepository, SqliteSubmoduleRepository, SubmoduleInput,
1078 SubmoduleRepository,
1079 };
1080 use std::fs;
1081 use tempfile::tempdir;
1082
1083 fn create_project_with_submodule() -> tempfile::TempDir {
1096 let dir = tempdir().expect("create tempdir");
1097 let root = dir.path();
1098
1099 fs::create_dir_all(root.join(".git")).unwrap();
1101 fs::create_dir_all(root.join("src")).unwrap();
1102 fs::write(
1103 root.join("src/main.rs"),
1104 "pub fn main() { println!(\"hello\"); }\n",
1105 )
1106 .unwrap();
1107
1108 fs::write(
1110 root.join(".gitmodules"),
1111 "[submodule \"frontend\"]\n\tpath = frontend\n\turl = https://example.com/fe.git\n",
1112 )
1113 .unwrap();
1114
1115 fs::create_dir_all(root.join("frontend/.git")).unwrap();
1117 fs::create_dir_all(root.join("frontend/src")).unwrap();
1118 fs::write(
1119 root.join("frontend/src/app.ts"),
1120 "export function app(): string { return 'hello'; }\n",
1121 )
1122 .unwrap();
1123
1124 dir
1125 }
1126
1127 #[test]
1128 fn submodule_scan_creates_separate_dbs_with_correct_structure() {
1129 let dir = create_project_with_submodule();
1130 let root = dir.path();
1131 let config = seshat_core::ScanConfig::default();
1132
1133 let root_db = Database::open(":memory:").expect("open root DB");
1135 let sub_db = Database::open(":memory:").expect("open submodule DB");
1136
1137 let root_result = scan_project(root, &config, &root_db, BranchId::from("main"))
1139 .expect("root scan should succeed");
1140 assert!(
1141 !root_result.excluded_submodules.is_empty(),
1142 "should detect submodule in .gitmodules"
1143 );
1144 assert_eq!(root_result.excluded_submodules, vec!["frontend"]);
1145
1146 assert_eq!(
1148 root_result.files_discovered, 1,
1149 "root should discover 1 file (main.rs)"
1150 );
1151
1152 let sub_root = root.join("frontend");
1154 let sub_result = scan_project(&sub_root, &config, &sub_db, BranchId::from("main"))
1155 .expect("submodule scan should succeed");
1156 assert_eq!(
1157 sub_result.files_discovered, 1,
1158 "submodule should discover 1 file (app.ts)"
1159 );
1160
1161 use seshat_storage::{FileIRRepository, SqliteFileIRRepository};
1163 let branch = BranchId::from("main");
1164
1165 let root_files = SqliteFileIRRepository::new(root_db.connection().clone())
1166 .get_by_branch(&branch)
1167 .unwrap();
1168 assert_eq!(root_files.len(), 1, "root DB should have 1 file IR");
1169
1170 let sub_files = SqliteFileIRRepository::new(sub_db.connection().clone())
1171 .get_by_branch(&branch)
1172 .unwrap();
1173 assert_eq!(sub_files.len(), 1, "submodule DB should have 1 file IR");
1174
1175 let sub_meta = SqliteRepoMetadataRepository::new(sub_db.connection().clone());
1177 sub_meta.set("parent_project", "my-project").unwrap();
1178 sub_meta.set("mount_path", "frontend").unwrap();
1179 sub_meta
1180 .set("file_count", &sub_result.files_discovered.to_string())
1181 .unwrap();
1182 sub_meta.set("convention_count", "0").unwrap();
1183 sub_meta.set("last_scan_time", "1700000000").unwrap();
1184
1185 assert_eq!(
1186 sub_meta.get("parent_project").unwrap().unwrap(),
1187 "my-project"
1188 );
1189 assert_eq!(sub_meta.get("mount_path").unwrap().unwrap(), "frontend");
1190 assert_eq!(sub_meta.get("file_count").unwrap().unwrap(), "1");
1191
1192 let root_sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
1194 root_sub_repo
1195 .insert(&SubmoduleInput {
1196 relative_path: "frontend".to_string(),
1197 name: "frontend".to_string(),
1198 db_path: "/data/seshat/repos/my-project/frontend.db".to_string(),
1199 commit_hash: None, })
1201 .unwrap();
1202
1203 let stored = root_sub_repo.list().unwrap();
1204 assert_eq!(stored.len(), 1);
1205 assert_eq!(stored[0].relative_path, "frontend");
1206 assert_eq!(stored[0].name, "frontend");
1207
1208 let root_meta = SqliteRepoMetadataRepository::new(root_db.connection().clone());
1210 root_meta.set("project_name", "my-project").unwrap();
1211 root_meta
1212 .set("file_count", &root_result.files_discovered.to_string())
1213 .unwrap();
1214 root_meta.set("convention_count", "0").unwrap();
1215 root_meta.set("last_scan_time", "1700000000").unwrap();
1216
1217 assert_eq!(
1218 root_meta.get("project_name").unwrap().unwrap(),
1219 "my-project"
1220 );
1221 assert_eq!(root_meta.get("file_count").unwrap().unwrap(), "1");
1222 }
1223
1224 #[test]
1225 fn uninitialised_submodule_is_skipped() {
1226 let dir = tempdir().expect("create tempdir");
1227 let root = dir.path();
1228
1229 fs::create_dir_all(root.join(".git")).unwrap();
1230 fs::create_dir_all(root.join("src")).unwrap();
1231 fs::write(root.join("src/main.rs"), "pub fn main() {}\n").unwrap();
1232
1233 fs::write(
1235 root.join(".gitmodules"),
1236 "[submodule \"libs/shared\"]\n\tpath = libs/shared\n\turl = https://example.com\n",
1237 )
1238 .unwrap();
1239 fs::create_dir_all(root.join("libs/shared")).unwrap();
1240 let config = seshat_core::ScanConfig::default();
1243 let db = Database::open(":memory:").expect("open DB");
1244
1245 let result =
1246 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
1247
1248 assert_eq!(result.excluded_submodules, vec!["libs/shared"]);
1250 assert_eq!(result.files_discovered, 1);
1252 }
1253
1254 #[test]
1255 fn submodule_removed_from_gitmodules_gets_deleted_from_table() {
1256 let root_db = Database::open(":memory:").expect("open DB");
1257 let sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
1258
1259 sub_repo
1261 .insert(&SubmoduleInput {
1262 relative_path: "old-module".to_string(),
1263 name: "old-module".to_string(),
1264 db_path: "/data/repos/project/old-module.db".to_string(),
1265 commit_hash: Some("abc123".to_string()),
1266 })
1267 .unwrap();
1268
1269 let active_paths: std::collections::HashSet<&str> = ["frontend"].iter().copied().collect();
1271
1272 let stored = sub_repo.list().unwrap();
1273 for stored_sub in &stored {
1274 if !active_paths.contains(stored_sub.relative_path.as_str()) {
1275 let _ = sub_repo.delete(&stored_sub.relative_path);
1276 }
1277 }
1278
1279 let remaining = sub_repo.list().unwrap();
1280 assert!(
1281 remaining.is_empty(),
1282 "old-module should have been removed from submodules table"
1283 );
1284 }
1285
1286 fn should_skip_submodule(stored_hash: Option<&str>, current_hash: Option<&str>) -> bool {
1291 match (current_hash, stored_hash) {
1292 (Some(current), Some(stored)) => current == stored,
1293 _ => false,
1294 }
1295 }
1296
1297 #[test]
1298 fn change_detection_skip_when_hashes_match() {
1299 assert!(should_skip_submodule(
1301 Some("abc123def456abc123def456abc123def456abc123"),
1302 Some("abc123def456abc123def456abc123def456abc123"),
1303 ));
1304 }
1305
1306 #[test]
1307 fn change_detection_rescan_when_hashes_differ() {
1308 assert!(!should_skip_submodule(
1310 Some("abc123def456abc123def456abc123def456abc123"),
1311 Some("000000def456abc123def456abc123def456abc123"),
1312 ));
1313 }
1314
1315 #[test]
1316 fn change_detection_rescan_when_no_stored_hash() {
1317 assert!(!should_skip_submodule(
1319 None,
1320 Some("abc123def456abc123def456abc123def456abc123"),
1321 ));
1322 }
1323
1324 #[test]
1325 fn change_detection_rescan_when_no_current_hash() {
1326 assert!(!should_skip_submodule(
1328 Some("abc123def456abc123def456abc123def456abc123"),
1329 None,
1330 ));
1331 }
1332
1333 #[test]
1334 fn change_detection_rescan_when_both_hashes_none() {
1335 assert!(!should_skip_submodule(None, None));
1337 }
1338
1339 #[test]
1340 fn change_detection_new_submodule_triggers_full_scan() {
1341 let root_db = Database::open(":memory:").expect("open DB");
1343 let sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
1344
1345 let stored = sub_repo.find_by_path("frontend").unwrap();
1347 assert!(stored.is_none(), "new submodule should not be in table");
1348
1349 }
1352
1353 #[test]
1354 fn change_detection_updated_hash_stored_after_rescan() {
1355 let root_db = Database::open(":memory:").expect("open DB");
1356 let sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
1357
1358 let old_hash = "aaaa".repeat(10);
1360 sub_repo
1361 .insert(&SubmoduleInput {
1362 relative_path: "frontend".to_string(),
1363 name: "frontend".to_string(),
1364 db_path: "/data/repos/project/frontend.db".to_string(),
1365 commit_hash: Some(old_hash.clone()),
1366 })
1367 .unwrap();
1368
1369 let new_hash = "bbbb".repeat(10);
1371 sub_repo
1372 .update(&SubmoduleInput {
1373 relative_path: "frontend".to_string(),
1374 name: "frontend".to_string(),
1375 db_path: "/data/repos/project/frontend.db".to_string(),
1376 commit_hash: Some(new_hash.clone()),
1377 })
1378 .unwrap();
1379
1380 let stored = sub_repo.find_by_path("frontend").unwrap().unwrap();
1381 assert_eq!(
1382 stored.commit_hash.as_deref(),
1383 Some(new_hash.as_str()),
1384 "stored hash should be updated after rescan"
1385 );
1386
1387 assert!(should_skip_submodule(
1389 stored.commit_hash.as_deref(),
1390 Some(&new_hash),
1391 ));
1392 }
1393
1394 #[test]
1395 fn change_detection_skipped_submodule_not_deleted_from_table() {
1396 let root_db = Database::open(":memory:").expect("open DB");
1397 let sub_repo = SqliteSubmoduleRepository::new(root_db.connection().clone());
1398
1399 let hash = "abcd".repeat(10);
1400 sub_repo
1401 .insert(&SubmoduleInput {
1402 relative_path: "frontend".to_string(),
1403 name: "frontend".to_string(),
1404 db_path: "/data/repos/project/frontend.db".to_string(),
1405 commit_hash: Some(hash.clone()),
1406 })
1407 .unwrap();
1408
1409 let active_paths: std::collections::HashSet<&str> = ["frontend"].iter().copied().collect();
1412
1413 let stored = sub_repo.list().unwrap();
1414 for stored_sub in &stored {
1415 if !active_paths.contains(stored_sub.relative_path.as_str()) {
1416 let _ = sub_repo.delete(&stored_sub.relative_path);
1417 }
1418 }
1419
1420 let remaining = sub_repo.list().unwrap();
1421 assert_eq!(
1422 remaining.len(),
1423 1,
1424 "skipped submodule should remain in table"
1425 );
1426 assert_eq!(remaining[0].relative_path, "frontend");
1427 }
1428
1429 fn make_lines(n: usize) -> Vec<String> {
1432 (1..=n).map(|i| format!("line_{i}")).collect()
1433 }
1434
1435 #[test]
1436 fn body_snippet_none_source_returns_empty() {
1437 assert_eq!(extract_body_snippet(None, 1, 5), "");
1438 }
1439
1440 #[test]
1441 fn body_snippet_start_zero_returns_empty() {
1442 let lines = make_lines(10);
1443 assert_eq!(extract_body_snippet(Some(&lines), 0, 5), "");
1445 }
1446
1447 #[test]
1448 fn body_snippet_single_line_function() {
1449 let lines = make_lines(20);
1450 let result = extract_body_snippet(Some(&lines), 5, 5);
1452 assert!(!result.is_empty());
1453 assert!(result.contains("line_5"));
1454 }
1455
1456 #[test]
1457 fn body_snippet_short_function_returns_all_lines() {
1458 let lines = make_lines(20);
1459 let result = extract_body_snippet(Some(&lines), 3, 7);
1461 assert!(result.contains("line_3"));
1462 assert!(result.contains("line_7"));
1463 assert!(!result.contains("...")); }
1465
1466 #[test]
1467 fn body_snippet_long_function_has_head_and_tail() {
1468 let lines = make_lines(50);
1469 let result = extract_body_snippet(Some(&lines), 1, 50);
1471 assert!(result.contains("line_1")); assert!(result.contains("line_5")); assert!(result.contains("...")); assert!(result.contains("line_50")); assert!(result.contains("line_48")); assert!(!result.contains("line_25"));
1478 }
1479
1480 #[test]
1481 fn body_snippet_exactly_boundary_no_overlap() {
1482 let lines = make_lines(20);
1483 let result = extract_body_snippet(Some(&lines), 1, 8);
1486 assert!(
1487 !result.contains("..."),
1488 "8-line function should not be truncated"
1489 );
1490 assert!(result.contains("line_1"));
1491 assert!(result.contains("line_8")); }
1493
1494 #[test]
1495 fn body_snippet_trim_applied() {
1496 let lines = vec![
1497 " fn foo() {".to_owned(),
1498 " let x = 1;".to_owned(),
1499 " }".to_owned(),
1500 ];
1501 let result = extract_body_snippet(Some(&lines), 1, 3);
1502 assert!(result.starts_with('\n'));
1504 assert!(!result.starts_with("\n ")); }
1506
1507 #[test]
1508 fn body_snippet_empty_lines_returns_empty() {
1509 let lines: Vec<String> = Vec::new();
1510 assert_eq!(extract_body_snippet(Some(&lines), 1, 5), "");
1511 }
1512
1513 #[test]
1514 fn body_snippet_start_after_end_returns_empty() {
1515 let lines = make_lines(20);
1517 assert_eq!(extract_body_snippet(Some(&lines), 10, 5), "");
1518 }
1519
1520 #[test]
1521 fn body_snippet_end_line_clamped_to_available() {
1522 let lines = make_lines(5);
1524 let result = extract_body_snippet(Some(&lines), 1, 999);
1525 assert!(result.contains("line_1"));
1526 assert!(result.contains("line_5"));
1527 }
1528
1529 #[test]
1530 fn body_snippet_start_at_last_line_returns_single_line() {
1531 let lines = make_lines(5);
1532 let result = extract_body_snippet(Some(&lines), 5, 5);
1534 assert!(result.contains("line_5"));
1535 assert!(!result.contains("line_4"));
1536 }
1537
1538 #[test]
1539 fn body_snippet_start_past_lines_returns_empty() {
1540 let lines = make_lines(3);
1542 assert_eq!(extract_body_snippet(Some(&lines), 4, 4), "");
1543 }
1544
1545 #[test]
1546 fn body_snippet_long_body_skips_middle_lines() {
1547 let lines = make_lines(20);
1549 let result = extract_body_snippet(Some(&lines), 1, 15);
1550 assert!(result.contains("line_1"));
1551 assert!(result.contains("line_5")); assert!(!result.contains("line_6")); assert!(!result.contains("line_10")); assert!(result.contains("line_13")); assert!(result.contains("line_15")); assert!(result.contains("..."));
1557 }
1558
1559 #[test]
1562 fn detect_and_persist_uses_branch_id_for_loading_files() {
1563 let db = Database::open(":memory:").expect("open DB");
1564 let feature_branch = BranchId::from("feat/my-feature");
1565
1566 use seshat_core::test_helpers::make_project_file;
1567 use seshat_storage::{FileIRRepository, SqliteFileIRRepository};
1568
1569 let file = make_project_file(seshat_core::Language::Rust);
1570 SqliteFileIRRepository::new(db.connection().clone())
1571 .upsert(&feature_branch, &file, None)
1572 .expect("upsert file under feature branch");
1573
1574 let scan_result = seshat_scanner::ScanResult {
1575 files_discovered: 1,
1576 files_parsed: 1,
1577 nodes_persisted: 0,
1578 edges_persisted: 0,
1579 manifests_analyzed: 0,
1580 docs_ingested: 0,
1581 manifest_analyses: vec![],
1582 incremental: None,
1583 file_dates: std::collections::HashMap::new(),
1584 excluded_submodules: vec![],
1585 source_map: std::collections::HashMap::new(),
1586 changed_paths: std::collections::HashSet::new(),
1587 };
1588
1589 let config = DetectionConfig::default();
1590 let result = detect_and_persist(&db, &feature_branch, &config, &scan_result);
1591 assert!(
1592 result.is_ok(),
1593 "detect_and_persist should succeed: {result:?}"
1594 );
1595 let report = result.unwrap();
1596 assert_eq!(
1597 report.file_count, 1,
1598 "should find the file stored under feature branch"
1599 );
1600 }
1601
1602 #[test]
1603 fn detect_and_persist_returns_zero_for_wrong_branch() {
1604 let db = Database::open(":memory:").expect("open DB");
1605 let feature_branch = BranchId::from("feat/my-feature");
1606 let main_branch = BranchId::from("main");
1607
1608 use seshat_core::test_helpers::make_project_file;
1609 use seshat_storage::{FileIRRepository, SqliteFileIRRepository};
1610
1611 let file = make_project_file(seshat_core::Language::Rust);
1612 SqliteFileIRRepository::new(db.connection().clone())
1613 .upsert(&feature_branch, &file, None)
1614 .expect("upsert file under feature branch");
1615
1616 let scan_result = seshat_scanner::ScanResult {
1617 files_discovered: 1,
1618 files_parsed: 1,
1619 nodes_persisted: 0,
1620 edges_persisted: 0,
1621 manifests_analyzed: 0,
1622 docs_ingested: 0,
1623 manifest_analyses: vec![],
1624 incremental: None,
1625 file_dates: std::collections::HashMap::new(),
1626 excluded_submodules: vec![],
1627 source_map: std::collections::HashMap::new(),
1628 changed_paths: std::collections::HashSet::new(),
1629 };
1630
1631 let config = DetectionConfig::default();
1632 let result = detect_and_persist(&db, &main_branch, &config, &scan_result);
1633 assert!(result.is_ok());
1634 let report = result.unwrap();
1635 assert_eq!(report.file_count, 0, "main branch should have no files");
1636 }
1637
1638 #[test]
1639 fn detect_and_persist_persists_conventions_under_correct_branch() {
1640 let db = Database::open(":memory:").expect("open DB");
1641 let feature_branch = BranchId::from("feat/snippets");
1642
1643 use seshat_core::test_helpers::make_project_file;
1644 use seshat_storage::{
1645 FileIRRepository, NodeRepository, SqliteFileIRRepository, SqliteNodeRepository,
1646 };
1647
1648 let file = make_project_file(seshat_core::Language::Rust);
1649 SqliteFileIRRepository::new(db.connection().clone())
1650 .upsert(&feature_branch, &file, None)
1651 .expect("upsert file under feature branch");
1652
1653 let scan_result = seshat_scanner::ScanResult {
1654 files_discovered: 1,
1655 files_parsed: 1,
1656 nodes_persisted: 0,
1657 edges_persisted: 0,
1658 manifests_analyzed: 0,
1659 docs_ingested: 0,
1660 manifest_analyses: vec![],
1661 incremental: None,
1662 file_dates: std::collections::HashMap::new(),
1663 excluded_submodules: vec![],
1664 source_map: std::collections::HashMap::new(),
1665 changed_paths: std::collections::HashSet::new(),
1666 };
1667
1668 let config = DetectionConfig::default();
1669 let result = detect_and_persist(&db, &feature_branch, &config, &scan_result);
1670 assert!(result.is_ok());
1671
1672 let node_repo = SqliteNodeRepository::new(db.connection().clone());
1673 let nodes = node_repo
1674 .find_by_branch(&feature_branch)
1675 .expect("find nodes");
1676 assert!(
1677 !nodes.is_empty(),
1678 "conventions should be persisted under feature branch"
1679 );
1680
1681 let main_nodes = node_repo
1682 .find_by_branch(&BranchId::from("main"))
1683 .expect("find nodes");
1684 assert!(
1685 main_nodes.is_empty(),
1686 "no conventions should be under main branch"
1687 );
1688 }
1689
1690 #[test]
1691 fn scan_project_with_source_map_produces_snippets() {
1692 let dir = tempdir().expect("create tempdir");
1693 let root = dir.path();
1694
1695 fs::create_dir_all(root.join(".git")).unwrap();
1696 fs::create_dir_all(root.join("src")).unwrap();
1697 fs::write(
1698 root.join("src/main.rs"),
1699 "use std::error::Error;\n\npub fn main() {}\n",
1700 )
1701 .unwrap();
1702
1703 let config = seshat_core::ScanConfig::default();
1704 let db = Database::open(":memory:").expect("open DB");
1705 let branch = BranchId::from("test-branch");
1706
1707 let result = scan_project(root, &config, &db, branch.clone()).expect("scan should succeed");
1708 assert!(
1709 !result.source_map.is_empty(),
1710 "source_map should contain files"
1711 );
1712
1713 let file_ir_repo = SqliteFileIRRepository::new(db.connection().clone());
1714 let files = file_ir_repo.get_by_branch(&branch).expect("get files");
1715 assert!(
1716 !files.is_empty(),
1717 "files should be stored under the scan branch"
1718 );
1719
1720 let main_files = file_ir_repo
1721 .get_by_branch(&BranchId::from("main"))
1722 .expect("get files");
1723 assert!(
1724 main_files.is_empty() || main_files.len() != files.len(),
1725 "files should NOT be stored under main branch when scanning a different branch"
1726 );
1727 }
1728}