1use std::collections::{HashMap, HashSet};
14use std::path::{Path, PathBuf};
15
16use globset::{Glob, GlobSetBuilder};
17use ignore::WalkBuilder;
18use seshat_core::{BranchId, Edge, EdgeId, NodeId, ProjectFile, ScanConfig};
19use seshat_storage::{
20 BranchMetadataRepository, BranchRepository, Database, EdgeRepository, FileIRRepository,
21 NodeRepository, SqliteBranchMetadataRepository, SqliteBranchRepository, SqliteEdgeRepository,
22 SqliteFileIRRepository, SqliteNodeRepository,
23};
24
25use crate::discovery::discover_files;
26use crate::documentation::parse_documentation;
27use crate::error::ScanError;
28use crate::git_dates::collect_git_file_dates;
29use crate::manifest::{ManifestAnalysis, ManifestType, analyze_manifests};
30use crate::module_structure::build_module_graph;
31use crate::parser::{content_hash, parse_file};
32
33#[derive(Debug, Clone)]
38pub enum ScanProgress {
39 Discovering { count: usize },
41 DiscoveryDone { total: usize },
43 CollectingGitHistory,
45 GitHistoryDone,
47 Scanning { done: usize, total: usize },
49 ScanningDone,
51 BuildingModuleGraph,
53 ModuleGraphDone,
55 AnalyzingProjectFiles,
57 ProjectFilesDone,
59
60 SubmoduleDetected { path: String },
64 ScanningSubmodule { path: String, name: String },
67 ScanningSubmoduleDone { path: String },
70 SubmoduleUpToDate { path: String, hash: String },
73 SubmoduleSkipped { path: String, reason: String },
76}
77
78fn noop_progress(_: &ScanProgress) {}
80
81#[derive(Debug, Clone)]
83pub struct ScanResult {
84 pub files_discovered: usize,
86 pub files_parsed: usize,
88 pub nodes_persisted: usize,
90 pub edges_persisted: usize,
92 pub manifests_analyzed: usize,
94 pub docs_ingested: usize,
96 pub manifest_analyses: Vec<ManifestAnalysis>,
98 pub incremental: Option<IncrementalStats>,
100 pub file_dates: HashMap<PathBuf, i64>,
104 pub excluded_submodules: Vec<String>,
107 pub source_map: HashMap<PathBuf, String>,
122
123 pub changed_paths: HashSet<PathBuf>,
133}
134
135#[derive(Debug, Clone, Default)]
137pub struct IncrementalStats {
138 pub files_unchanged: usize,
140 pub files_changed: usize,
142 pub files_new: usize,
144 pub files_deleted: usize,
146}
147
148pub fn scan_project(
153 root: &Path,
154 config: &ScanConfig,
155 db: &Database,
156 branch_id: BranchId,
157) -> Result<ScanResult, ScanError> {
158 scan_project_with_progress(root, config, db, noop_progress, branch_id)
159}
160
161pub fn scan_project_with_progress(
187 root: &Path,
188 config: &ScanConfig,
189 db: &Database,
190 on_progress: impl Fn(&ScanProgress),
191 branch_id: BranchId,
192) -> Result<ScanResult, ScanError> {
193 let conn = db.connection().clone();
194 let file_ir_repo = SqliteFileIRRepository::new(conn.clone());
195 let node_repo = SqliteNodeRepository::new(conn.clone());
196 let edge_repo = SqliteEdgeRepository::new(conn.clone());
197 let branch_repo = SqliteBranchRepository::new(conn);
198
199 let branch = branch_id;
200
201 branch_repo.ensure_branch_exists(&branch)?;
205
206 let head_at_scan_start: Option<String> = crate::git_utils::get_head_commit(root);
214
215 let discovery_result = discover_files(root, config)?;
219 let discovered = discovery_result.files;
220 let excluded_submodules = discovery_result.excluded_submodules;
221 let files_discovered = discovered.len();
222 on_progress(&ScanProgress::Discovering {
223 count: files_discovered,
224 });
225 on_progress(&ScanProgress::DiscoveryDone {
226 total: files_discovered,
227 });
228 tracing::info!(count = files_discovered, "Discovered source files");
229
230 on_progress(&ScanProgress::CollectingGitHistory);
234 let git_file_dates = collect_git_file_dates(root)?;
235 on_progress(&ScanProgress::GitHistoryDone);
236 if !git_file_dates.is_empty() {
237 tracing::info!(
238 files_with_dates = git_file_dates.len(),
239 "Collected git file dates"
240 );
241 }
242
243 let stored_hashes = file_ir_repo.get_file_hashes_by_branch(&branch)?;
247 let is_incremental = !stored_hashes.is_empty();
248
249 let discovered_paths: HashSet<String> = discovered
251 .iter()
252 .map(|df| df.path.to_string_lossy().to_string())
253 .collect();
254
255 let mut parsed_files: Vec<ProjectFile> = Vec::with_capacity(files_discovered);
259 let mut source_map: HashMap<PathBuf, String> = HashMap::new();
265 let mut changed_paths: HashSet<PathBuf> = HashSet::new();
268 let mut incremental_stats = IncrementalStats::default();
269
270 let mut scan_done: usize = 0;
271 for df in &discovered {
272 let file_path_str = df.path.to_string_lossy().to_string();
276 let abs_path = root.join(&df.path);
277
278 let source = match std::fs::read_to_string(&abs_path) {
279 Ok(s) => s,
280 Err(e) => {
281 tracing::warn!(path = %abs_path.display(), error = %e, "Failed to read file, skipping");
282 scan_done += 1;
283 on_progress(&ScanProgress::Scanning {
284 done: scan_done,
285 total: files_discovered,
286 });
287 continue;
288 }
289 };
290
291 if is_incremental {
292 let new_hash = content_hash(&source);
294
295 if let Some(stored_hash) = stored_hashes.get(&file_path_str) {
296 if *stored_hash == new_hash {
297 incremental_stats.files_unchanged += 1;
301 tracing::debug!(path = %df.path.display(), "File unchanged, skipping re-parse");
302 source_map.insert(df.path.clone(), source);
303 scan_done += 1;
304 on_progress(&ScanProgress::Scanning {
305 done: scan_done,
306 total: files_discovered,
307 });
308 continue;
309 }
310 incremental_stats.files_changed += 1;
312 tracing::debug!(path = %df.path.display(), "File changed, re-parsing");
313 } else {
314 incremental_stats.files_new += 1;
316 tracing::debug!(path = %df.path.display(), "New file, parsing");
317 }
318 }
319
320 let mut project_file = parse_file(&df.path, &source, df.language);
323
324 if !config.local_packages.is_empty() {
329 project_file
330 .dependencies_used
331 .retain(|dep| !config.local_packages.contains(&dep.package));
332 }
333
334 parsed_files.push(project_file);
335 changed_paths.insert(df.path.clone()); source_map.insert(df.path.clone(), source); scan_done += 1;
338 on_progress(&ScanProgress::Scanning {
339 done: scan_done,
340 total: files_discovered,
341 });
342 }
343 on_progress(&ScanProgress::ScanningDone);
344
345 let files_parsed = parsed_files.len();
346 tracing::info!(count = files_parsed, "Parsed source files");
347
348 on_progress(&ScanProgress::BuildingModuleGraph);
349
350 if is_incremental {
360 for stored_path in stored_hashes.keys() {
361 if !discovered_paths.contains(stored_path) {
362 tracing::info!(path = %stored_path, "File deleted, removing IR from DB");
363 let _ = file_ir_repo.delete_with_symbol_index(&branch, stored_path);
364 incremental_stats.files_deleted += 1;
365 }
366 }
367 }
368
369 for pf in &parsed_files {
377 let rel = pf.path.strip_prefix(root).unwrap_or(&pf.path);
381 let commit_date = git_file_dates.get(rel).copied();
382 file_ir_repo.upsert_with_symbol_index(&branch, pf, commit_date)?;
383 }
384 tracing::info!(count = files_parsed, "Stored file IR records");
385
386 let all_parsed_files = if is_incremental && incremental_stats.files_unchanged > 0 {
393 file_ir_repo.get_by_branch(&branch)?
395 } else {
396 parsed_files.clone()
398 };
399
400 if is_incremental {
408 let deleted_edges = edge_repo.delete_by_branch(&branch)?;
409 let deleted_nodes = node_repo.delete_facts_by_branch(&branch)?;
412 tracing::debug!(
413 nodes = deleted_nodes,
414 edges = deleted_edges,
415 "Cleared old module structure for rebuild"
416 );
417 }
418
419 let module_graph = build_module_graph(root, &all_parsed_files, &branch);
420
421 let mut id_remap: HashMap<NodeId, NodeId> = HashMap::new();
423 let mut nodes_persisted: usize = 0;
424
425 for node in &module_graph.nodes {
426 let inserted = node_repo.insert(node)?;
427 id_remap.insert(node.id, inserted.id);
428 nodes_persisted += 1;
429 }
430
431 let mut edges_persisted: usize = 0;
433
434 for edge in &module_graph.edges {
435 let remapped_edge = remap_edge(edge, &id_remap);
436 edge_repo.insert(&remapped_edge)?;
437 edges_persisted += 1;
438 }
439
440 tracing::info!(
441 nodes = nodes_persisted,
442 edges = edges_persisted,
443 "Persisted module structure"
444 );
445
446 on_progress(&ScanProgress::ModuleGraphDone);
447 on_progress(&ScanProgress::AnalyzingProjectFiles);
448
449 let manifests = discover_manifests(root)?;
453 let manifests_analyzed = manifests.len();
454
455 let manifest_analyses = if !manifests.is_empty() {
456 let analysis = analyze_manifests(&manifests, &all_parsed_files)?;
457 tracing::info!(count = analysis.len(), "Analyzed dependency manifests");
458 analysis
459 } else {
460 Vec::new()
461 };
462
463 {
476 let mut internal_names: Vec<String> = manifest_analyses
477 .iter()
478 .flat_map(|a| a.internal_names.iter().cloned())
479 .filter(|n| !n.trim().is_empty())
480 .collect();
481
482 let mut seen: HashSet<String> = internal_names.iter().cloned().collect();
486 for pkg in &config.local_packages {
487 let normalised = pkg.trim().replace('-', "_");
488 if !normalised.is_empty() && seen.insert(normalised.clone()) {
489 internal_names.push(normalised);
490 }
491 }
492
493 if internal_names.is_empty() {
494 tracing::debug!("No internal names to persist — skipping workspace_crates write");
495 } else {
496 let json = serde_json::to_string(&internal_names).unwrap_or_else(|e| {
497 tracing::warn!(error = %e, "Failed to serialise workspace_crates, storing []");
498 "[]".to_owned()
499 });
500
501 let branch_meta = SqliteBranchMetadataRepository::new(db.connection().clone());
502 if let Err(e) = branch_meta.set(&branch.0, "workspace_crates", &json) {
503 tracing::warn!(error = %e, "Failed to persist workspace_crates to branch_metadata");
504 } else {
505 tracing::info!(
506 count = internal_names.len(),
507 branch_id = %branch.0,
508 "Persisted workspace_crates to branch_metadata"
509 );
510 }
511 }
512 }
513
514 {
523 let path_aliases: Vec<_> = manifest_analyses
524 .iter()
525 .flat_map(|a| a.path_aliases.iter().cloned())
526 .collect();
527
528 if path_aliases.is_empty() {
529 tracing::debug!("No path aliases to persist — skipping tsconfig_path_aliases write");
530 } else {
531 let json = serde_json::to_string(&path_aliases).unwrap_or_else(|e| {
532 tracing::warn!(error = %e, "Failed to serialise tsconfig_path_aliases, storing []");
533 "[]".to_owned()
534 });
535
536 let branch_meta = SqliteBranchMetadataRepository::new(db.connection().clone());
537 if let Err(e) = branch_meta.set(&branch.0, "tsconfig_path_aliases", &json) {
538 tracing::warn!(error = %e, "Failed to persist tsconfig_path_aliases to branch_metadata");
539 } else {
540 tracing::info!(
541 count = path_aliases.len(),
542 branch_id = %branch.0,
543 "Persisted tsconfig_path_aliases to branch_metadata"
544 );
545 }
546 }
547 }
548
549 let doc_files = discover_documentation(root, config)?;
553 let docs_ingested = doc_files.len();
554
555 for (doc_path, doc_content) in &doc_files {
556 match parse_documentation(doc_path, doc_content, &branch) {
557 Ok(doc_result) => {
558 for node in &doc_result.nodes {
559 node_repo.insert(node)?;
560 nodes_persisted += 1;
561 }
562 }
563 Err(e) => {
564 tracing::warn!(
565 path = %doc_path.display(),
566 error = %e,
567 "Failed to parse documentation, skipping"
568 );
569 }
570 }
571 }
572
573 tracing::info!(
574 count = docs_ingested,
575 nodes = nodes_persisted,
576 "Ingested documentation"
577 );
578
579 on_progress(&ScanProgress::ProjectFilesDone);
580
581 if let Some(head) = head_at_scan_start.as_deref()
586 && let Err(e) = branch_repo.set_last_scanned_commit(&branch, head)
587 {
588 tracing::warn!(
589 error = %e,
590 branch = %branch.0,
591 "scan_project: failed to record last_scanned_commit; \
592 freshness gate may re-trigger sync next startup"
593 );
594 }
595
596 Ok(ScanResult {
597 files_discovered,
598 files_parsed,
599 nodes_persisted,
600 edges_persisted,
601 manifests_analyzed,
602 docs_ingested,
603 manifest_analyses,
604 incremental: if is_incremental {
605 Some(incremental_stats)
606 } else {
607 None
608 },
609 file_dates: git_file_dates,
610 excluded_submodules,
611 source_map,
612 changed_paths,
613 })
614}
615
616fn remap_edge(edge: &Edge, id_remap: &HashMap<NodeId, NodeId>) -> Edge {
621 Edge {
622 id: EdgeId(0), source_id: id_remap
624 .get(&edge.source_id)
625 .copied()
626 .unwrap_or(edge.source_id),
627 target_id: id_remap
628 .get(&edge.target_id)
629 .copied()
630 .unwrap_or(edge.target_id),
631 edge_type: edge.edge_type,
632 branch_id: edge.branch_id.clone(),
633 weight: edge.weight,
634 metadata: edge.metadata.clone(),
635 }
636}
637
638fn discover_manifests(root: &Path) -> Result<Vec<(PathBuf, String, ManifestType)>, ScanError> {
643 let mut manifests = Vec::new();
644
645 for filename in ManifestType::all_filenames() {
646 let path = root.join(filename);
647 if path.is_file() {
648 let content = std::fs::read_to_string(&path).map_err(|e| ScanError::ManifestError {
649 path: path.clone(),
650 reason: format!("Failed to read manifest: {e}"),
651 })?;
652
653 if let Some(manifest_type) = ManifestType::from_filename(filename) {
654 manifests.push((path, content, manifest_type));
655 }
656 }
657 }
658
659 Ok(manifests)
660}
661
662fn discover_documentation(
671 root: &Path,
672 config: &ScanConfig,
673) -> Result<Vec<(PathBuf, String)>, ScanError> {
674 let doc_extensions = ["md", "json", "yaml", "yml"];
675
676 let exclude_globset = {
679 let mut builder = GlobSetBuilder::new();
680 for pattern in &config.exclude_paths {
681 let glob = Glob::new(pattern).map_err(|e| ScanError::DiscoveryError {
682 path: root.to_path_buf(),
683 reason: format!("Invalid exclude_paths pattern '{pattern}': {e}"),
684 })?;
685 builder.add(glob);
686 }
687 builder.build().map_err(|e| ScanError::DiscoveryError {
688 path: root.to_path_buf(),
689 reason: format!("Failed to build exclude globset: {e}"),
690 })?
691 };
692
693 let mut doc_files = Vec::new();
694
695 let walker = WalkBuilder::new(root)
696 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .build();
701
702 for entry_result in walker {
703 let entry = match entry_result {
704 Ok(e) => e,
705 Err(err) => {
706 tracing::warn!("Doc walk error: {err}");
707 continue;
708 }
709 };
710
711 let Some(file_type) = entry.file_type() else {
713 continue;
714 };
715 if !file_type.is_file() {
716 continue;
717 }
718
719 let path = entry.path();
720
721 let ext = match path.extension().and_then(|e| e.to_str()) {
723 Some(e) => e,
724 None => continue,
725 };
726 if !doc_extensions.contains(&ext) {
727 continue;
728 }
729
730 let relative = path.strip_prefix(root).unwrap_or(path).to_path_buf();
732 if !exclude_globset.is_empty() && exclude_globset.is_match(&relative) {
733 tracing::debug!(
734 path = %relative.display(),
735 "Skipping doc file (matched exclude_paths)"
736 );
737 continue;
738 }
739
740 let content = match std::fs::read_to_string(path) {
742 Ok(c) => c,
743 Err(e) => {
744 tracing::warn!(path = %path.display(), error = %e, "Cannot read doc file");
745 continue;
746 }
747 };
748
749 if (ext == "json" || ext == "yaml" || ext == "yml")
751 && !is_documentation_content(ext, &content)
752 {
753 continue;
754 }
755
756 doc_files.push((relative, content));
757 }
758
759 Ok(doc_files)
760}
761
762fn is_documentation_content(ext: &str, content: &str) -> bool {
767 match ext {
768 "json" => {
769 let Ok(value) = serde_json::from_str::<serde_json::Value>(content) else {
771 return false;
772 };
773 let obj = match value.as_object() {
774 Some(o) => o,
775 None => return false,
776 };
777 obj.contains_key("$schema")
778 || obj.contains_key("properties")
779 || (obj.contains_key("type") && obj.contains_key("title"))
780 }
781 "yaml" | "yml" => {
782 let Ok(value) = serde_norway::from_str::<serde_norway::Value>(content) else {
784 return false;
785 };
786 let mapping = match value.as_mapping() {
787 Some(m) => m,
788 None => return false,
789 };
790 let has_openapi =
791 mapping.contains_key(serde_norway::Value::String("openapi".to_string()));
792 let has_swagger =
793 mapping.contains_key(serde_norway::Value::String("swagger".to_string()));
794 has_openapi || has_swagger
795 }
796 _ => false,
797 }
798}
799
800#[cfg(test)]
801mod tests {
802 use super::*;
803 use seshat_core::ScanConfig;
804 use seshat_storage::{Database, RepoMetadataRepository};
805 use std::fs;
806 use tempfile::tempdir;
807
808 fn create_test_project() -> tempfile::TempDir {
810 let dir = tempdir().expect("create tempdir");
811 let root = dir.path();
812
813 fs::create_dir_all(root.join(".git")).unwrap();
815
816 let src = root.join("src");
818 fs::create_dir_all(&src).unwrap();
819
820 fs::write(
821 src.join("main.rs"),
822 r#"
823use std::io;
824use crate::config::Config;
825
826pub fn main() {
827 println!("hello");
828}
829
830fn helper() -> bool {
831 true
832}
833"#,
834 )
835 .unwrap();
836
837 fs::write(
838 src.join("config.rs"),
839 r#"
840pub struct Config {
841 pub name: String,
842 pub debug: bool,
843}
844
845impl Config {
846 pub fn new() -> Self {
847 Config {
848 name: String::new(),
849 debug: false,
850 }
851 }
852}
853"#,
854 )
855 .unwrap();
856
857 let utils = src.join("utils");
859 fs::create_dir_all(&utils).unwrap();
860
861 fs::write(
862 utils.join("format.rs"),
863 r#"
864use crate::config::Config;
865
866pub fn format_name(config: &Config) -> String {
867 config.name.clone()
868}
869"#,
870 )
871 .unwrap();
872
873 fs::write(
875 root.join("README.md"),
876 r#"# Test Project
877
878## Overview
879A simple test project.
880
881## Features
882- Feature one
883- Feature two
884"#,
885 )
886 .unwrap();
887
888 dir
889 }
890
891 #[test]
892 fn scan_project_discovers_and_parses_files() {
893 let dir = create_test_project();
894 let root = dir.path();
895 let db = Database::open(":memory:").expect("open DB");
896 let config = ScanConfig::default();
897
898 let result =
899 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
900
901 assert_eq!(result.files_discovered, 3, "should discover 3 .rs files");
902 assert_eq!(result.files_parsed, 3, "should parse all 3 files");
903 }
904
905 #[test]
906 fn scan_project_stores_ir_in_database() {
907 let dir = create_test_project();
908 let root = dir.path();
909 let db = Database::open(":memory:").expect("open DB");
910 let config = ScanConfig::default();
911
912 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
913
914 let conn = db.connection().clone();
916 let file_ir_repo = SqliteFileIRRepository::new(conn);
917 let branch_id = BranchId::from("main");
918
919 let all_files = file_ir_repo.get_by_branch(&branch_id).expect("get files");
920 assert_eq!(all_files.len(), 3, "should have 3 file IR records");
921 }
922
923 #[test]
924 fn scan_project_stores_content_hash() {
925 let dir = create_test_project();
926 let root = dir.path();
927 let db = Database::open(":memory:").expect("open DB");
928 let config = ScanConfig::default();
929
930 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
931
932 let conn = db.connection().clone();
934 let file_ir_repo = SqliteFileIRRepository::new(conn);
935 let branch_id = BranchId::from("main");
936
937 let all_files = file_ir_repo.get_by_branch(&branch_id).expect("get files");
938 for pf in &all_files {
939 assert!(
940 !pf.content_hash.is_empty(),
941 "content hash should be non-empty for {}",
942 pf.path.display()
943 );
944 }
945 }
946
947 #[test]
948 fn scan_project_persists_module_nodes() {
949 let dir = create_test_project();
950 let root = dir.path();
951 let db = Database::open(":memory:").expect("open DB");
952 let config = ScanConfig::default();
953
954 let result =
955 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
956
957 assert!(
959 result.nodes_persisted >= 2,
960 "should persist at least 2 module nodes, got {}",
961 result.nodes_persisted
962 );
963
964 let conn = db.connection().clone();
966 let node_repo = SqliteNodeRepository::new(conn);
967 let branch_id = BranchId::from("main");
968
969 let nodes = node_repo.find_by_branch(&branch_id).expect("find nodes");
970 assert!(
971 nodes.len() >= 2,
972 "should have at least 2 nodes in DB, got {}",
973 nodes.len()
974 );
975 }
976
977 #[test]
978 fn scan_project_persists_edges() {
979 let dir = create_test_project();
980 let root = dir.path();
981 let db = Database::open(":memory:").expect("open DB");
982 let config = ScanConfig::default();
983
984 let result =
985 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
986
987 assert!(
989 result.edges_persisted >= 1,
990 "should persist at least 1 edge, got {}",
991 result.edges_persisted
992 );
993
994 let conn = db.connection().clone();
996 let edge_repo = SqliteEdgeRepository::new(conn);
997
998 let part_of_edges = edge_repo
999 .find_by_type(seshat_core::EdgeType::PartOf)
1000 .expect("find PartOf edges");
1001 assert!(
1002 !part_of_edges.is_empty(),
1003 "should have at least 1 PartOf edge"
1004 );
1005 }
1006
1007 #[test]
1008 fn scan_project_ingests_documentation() {
1009 let dir = create_test_project();
1010 let root = dir.path();
1011 let db = Database::open(":memory:").expect("open DB");
1012 let config = ScanConfig::default();
1013
1014 let result =
1015 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
1016
1017 assert!(
1018 result.docs_ingested >= 1,
1019 "should ingest at least 1 documentation file (README.md), got {}",
1020 result.docs_ingested
1021 );
1022 }
1023
1024 #[test]
1025 fn scan_project_empty_directory() {
1026 let dir = tempdir().expect("create tempdir");
1027 let root = dir.path();
1028
1029 fs::create_dir_all(root.join(".git")).unwrap();
1031
1032 let db = Database::open(":memory:").expect("open DB");
1033 let config = ScanConfig::default();
1034
1035 let result = scan_project(root, &config, &db, BranchId::from("main"))
1036 .expect("scan should succeed on empty project");
1037
1038 assert_eq!(result.files_discovered, 0);
1039 assert_eq!(result.files_parsed, 0);
1040 assert_eq!(result.nodes_persisted, 0);
1041 assert_eq!(result.edges_persisted, 0);
1042 }
1043
1044 #[test]
1045 fn scan_project_respects_config_exclude_paths() {
1046 let dir = create_test_project();
1047 let root = dir.path();
1048
1049 let config = ScanConfig {
1051 exclude_paths: vec!["**/utils/**".to_string()],
1052 ..ScanConfig::default()
1053 };
1054
1055 let db = Database::open(":memory:").expect("open DB");
1056
1057 let result =
1058 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
1059
1060 assert_eq!(
1062 result.files_discovered, 2,
1063 "should discover 2 files (utils excluded)"
1064 );
1065 }
1066
1067 #[test]
1068 fn discover_manifests_finds_cargo_toml() {
1069 let dir = tempdir().expect("create tempdir");
1070 let root = dir.path();
1071
1072 fs::write(
1073 root.join("Cargo.toml"),
1074 r#"[package]
1075name = "test"
1076version = "0.1.0"
1077edition = "2021"
1078"#,
1079 )
1080 .unwrap();
1081
1082 let manifests = discover_manifests(root).expect("discover manifests");
1083 assert_eq!(manifests.len(), 1);
1084 assert_eq!(manifests[0].2, ManifestType::CargoToml);
1085 }
1086
1087 #[test]
1088 fn discover_manifests_finds_nothing_without_manifests() {
1089 let dir = tempdir().expect("create tempdir");
1090 let manifests = discover_manifests(dir.path()).expect("discover manifests");
1091 assert!(manifests.is_empty());
1092 }
1093
1094 #[test]
1095 fn is_documentation_content_json_schema() {
1096 let content = r#"{"$schema": "http://json-schema.org/draft-07/schema#", "type": "object"}"#;
1097 assert!(is_documentation_content("json", content));
1098
1099 let content = r#"{"name": "foo", "value": 42}"#;
1100 assert!(!is_documentation_content("json", content));
1101 }
1102
1103 #[test]
1104 fn is_documentation_content_openapi() {
1105 let content = "openapi: '3.0.0'\ninfo:\n title: Test\n version: '1.0'\npaths: {}";
1106 assert!(is_documentation_content("yaml", content));
1107
1108 let content = "name: test\nvalue: 42";
1109 assert!(!is_documentation_content("yaml", content));
1110 }
1111
1112 #[test]
1113 fn remap_edge_applies_id_mapping() {
1114 let mut remap = HashMap::new();
1115 remap.insert(NodeId(1), NodeId(100));
1116 remap.insert(NodeId(2), NodeId(200));
1117
1118 let edge = Edge {
1119 id: EdgeId(0),
1120 source_id: NodeId(1),
1121 target_id: NodeId(2),
1122 edge_type: seshat_core::EdgeType::DependsOn,
1123 branch_id: BranchId::from("main"),
1124 weight: 1.0,
1125 metadata: None,
1126 };
1127
1128 let remapped = remap_edge(&edge, &remap);
1129 assert_eq!(remapped.source_id, NodeId(100));
1130 assert_eq!(remapped.target_id, NodeId(200));
1131 }
1132
1133 #[test]
1134 fn scan_project_incremental_skips_unchanged() {
1135 let dir = create_test_project();
1136 let root = dir.path();
1137 let db = Database::open(":memory:").expect("open DB");
1138 let config = ScanConfig::default();
1139
1140 let r1 = scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
1142 assert!(r1.incremental.is_none(), "first scan is not incremental");
1143 assert_eq!(r1.files_parsed, 3);
1144
1145 let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
1147 assert!(r2.incremental.is_some(), "second scan is incremental");
1148 let stats = r2.incremental.unwrap();
1149 assert_eq!(stats.files_unchanged, 3);
1150 assert_eq!(stats.files_changed, 0);
1151 assert_eq!(stats.files_new, 0);
1152 assert_eq!(stats.files_deleted, 0);
1153 assert_eq!(r2.files_parsed, 0, "no files re-parsed");
1154 }
1155
1156 #[test]
1157 fn scan_project_incremental_detects_modification() {
1158 let dir = create_test_project();
1159 let root = dir.path();
1160 let db = Database::open(":memory:").expect("open DB");
1161 let config = ScanConfig::default();
1162
1163 scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
1165
1166 fs::write(
1168 root.join("src/config.rs"),
1169 "pub struct Config { pub name: String, pub extra: bool }\n",
1170 )
1171 .unwrap();
1172
1173 let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
1175 let stats = r2.incremental.unwrap();
1176 assert_eq!(stats.files_changed, 1, "config.rs changed");
1177 assert_eq!(stats.files_unchanged, 2, "main.rs + format.rs unchanged");
1178 assert_eq!(r2.files_parsed, 1, "only changed file parsed");
1179 }
1180
1181 #[test]
1182 fn scan_project_incremental_detects_addition() {
1183 let dir = create_test_project();
1184 let root = dir.path();
1185 let db = Database::open(":memory:").expect("open DB");
1186 let config = ScanConfig::default();
1187
1188 scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
1189
1190 fs::write(root.join("src/extra.rs"), "pub fn extra() {}").unwrap();
1192
1193 let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
1194 let stats = r2.incremental.unwrap();
1195 assert_eq!(stats.files_new, 1);
1196 assert_eq!(stats.files_unchanged, 3);
1197 assert_eq!(r2.files_discovered, 4);
1198 }
1199
1200 #[test]
1201 fn scan_project_incremental_detects_deletion() {
1202 let dir = create_test_project();
1203 let root = dir.path();
1204 let db = Database::open(":memory:").expect("open DB");
1205 let config = ScanConfig::default();
1206
1207 scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
1208
1209 fs::remove_file(root.join("src/utils/format.rs")).unwrap();
1211
1212 let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
1213 let stats = r2.incremental.unwrap();
1214 assert_eq!(stats.files_deleted, 1);
1215 assert_eq!(stats.files_unchanged, 2);
1216 assert_eq!(r2.files_discovered, 2);
1217
1218 let conn = db.connection().clone();
1220 let file_ir_repo = SqliteFileIRRepository::new(conn);
1221 let branch = BranchId::from("main");
1222 let files = file_ir_repo.get_by_branch(&branch).unwrap();
1223 assert_eq!(files.len(), 2);
1224 }
1225
1226 #[test]
1234 fn full_scan_source_map_contains_all_files() {
1235 let dir = create_test_project();
1236 let root = dir.path();
1237 let db = Database::open(":memory:").expect("open DB");
1238 let config = ScanConfig::default();
1239
1240 let result =
1241 scan_project(root, &config, &db, BranchId::from("main")).expect("scan should succeed");
1242
1243 assert_eq!(
1245 result.source_map.len(),
1246 result.files_discovered,
1247 "source_map must contain all {} discovered files on full scan, got {}",
1248 result.files_discovered,
1249 result.source_map.len()
1250 );
1251 assert_eq!(
1253 result.changed_paths.len(),
1254 result.files_discovered,
1255 "changed_paths must equal files_discovered on full scan"
1256 );
1257 for (path, src) in &result.source_map {
1259 assert!(!src.is_empty(), "source for {:?} must not be empty", path);
1260 }
1261 }
1262
1263 #[test]
1264 fn incremental_scan_source_map_contains_all_files() {
1265 let dir = create_test_project();
1270 let root = dir.path();
1271 let db = Database::open(":memory:").expect("open DB");
1272 let config = ScanConfig::default();
1273
1274 scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
1276
1277 let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
1279 let stats = r2.incremental.as_ref().unwrap();
1280
1281 assert_eq!(stats.files_unchanged, 3, "all 3 files should be unchanged");
1282 assert_eq!(r2.files_parsed, 0, "no files should be re-parsed");
1283
1284 assert_eq!(
1286 r2.source_map.len(),
1287 r2.files_discovered,
1288 "source_map must contain all {} files on incremental scan (no changes), got {} — \
1289 this would cause empty snippets in convention evidence",
1290 r2.files_discovered,
1291 r2.source_map.len()
1292 );
1293
1294 assert!(
1296 r2.changed_paths.is_empty(),
1297 "changed_paths must be empty when no files changed, got {} paths",
1298 r2.changed_paths.len()
1299 );
1300
1301 for (path, src) in &r2.source_map {
1303 assert!(
1304 !src.is_empty(),
1305 "source for {:?} must not be empty on incremental scan",
1306 path
1307 );
1308 }
1309 }
1310
1311 #[test]
1312 fn scan_persists_workspace_crates_with_local_packages_union() {
1313 let dir = tempdir().expect("create tempdir");
1319 let root = dir.path();
1320
1321 fs::create_dir_all(root.join(".git")).unwrap();
1323
1324 fs::write(
1326 root.join("Cargo.toml"),
1327 r#"[package]
1328name = "auto-detected-crate"
1329version = "0.1.0"
1330edition = "2021"
1331"#,
1332 )
1333 .unwrap();
1334
1335 let src = root.join("src");
1337 fs::create_dir_all(&src).unwrap();
1338 fs::write(src.join("lib.rs"), "pub fn hello() {}\n").unwrap();
1339
1340 let config = ScanConfig {
1341 local_packages: vec![
1342 "extra-package".to_owned(),
1344 "auto_detected_crate".to_owned(),
1346 ],
1347 ..ScanConfig::default()
1348 };
1349
1350 let db = Database::open(":memory:").expect("open DB");
1351 let branch = BranchId::from("main");
1352 scan_project(root, &config, &db, branch.clone()).expect("scan should succeed");
1353
1354 let branch_meta = SqliteBranchMetadataRepository::new(db.connection().clone());
1356 let json = branch_meta
1357 .get(&branch.0, "workspace_crates")
1358 .expect("branch_metadata query must succeed")
1359 .expect("workspace_crates key must be present for the scanned branch");
1360
1361 let names: Vec<String> =
1362 serde_json::from_str(&json).expect("workspace_crates must be valid JSON array");
1363
1364 assert!(
1366 names.contains(&"auto_detected_crate".to_owned()),
1367 "auto-detected crate must be present; got {:?}",
1368 names
1369 );
1370 assert!(
1372 names.contains(&"extra_package".to_owned()),
1373 "extra_package (normalised) from local_packages must be present; got {:?}",
1374 names
1375 );
1376 let unique: std::collections::HashSet<_> = names.iter().collect();
1378 assert_eq!(
1379 unique.len(),
1380 names.len(),
1381 "workspace_crates must not contain duplicates; got {:?}",
1382 names
1383 );
1384
1385 let repo_meta = seshat_storage::SqliteRepoMetadataRepository::new(db.connection().clone());
1389 assert!(
1390 repo_meta
1391 .get("workspace_crates")
1392 .expect("repo_metadata query must succeed")
1393 .is_none(),
1394 "repo_metadata['workspace_crates'] must not be written by the scanner anymore",
1395 );
1396
1397 let all = branch_meta
1399 .list(&branch.0)
1400 .expect("list branch_metadata must succeed");
1401 assert_eq!(
1402 all.len(),
1403 1,
1404 "exactly one branch_metadata row expected after a single scan; got {:?}",
1405 all
1406 );
1407 }
1408
1409 #[test]
1410 fn scan_two_branches_isolates_workspace_crates() {
1411 let db = Database::open(":memory:").expect("open DB");
1421
1422 let main_dir = tempdir().expect("create main tempdir");
1424 let main_root = main_dir.path();
1425 fs::create_dir_all(main_root.join(".git")).unwrap();
1426 fs::write(
1427 main_root.join("Cargo.toml"),
1428 r#"[package]
1429name = "main-only-crate"
1430version = "0.1.0"
1431edition = "2021"
1432"#,
1433 )
1434 .unwrap();
1435 fs::create_dir_all(main_root.join("src")).unwrap();
1436 fs::write(main_root.join("src/lib.rs"), "pub fn m() {}\n").unwrap();
1437
1438 let feature_dir = tempdir().expect("create feature tempdir");
1440 let feature_root = feature_dir.path();
1441 fs::create_dir_all(feature_root.join(".git")).unwrap();
1442 fs::write(
1443 feature_root.join("Cargo.toml"),
1444 r#"[package]
1445name = "feature-only-crate"
1446version = "0.1.0"
1447edition = "2021"
1448"#,
1449 )
1450 .unwrap();
1451 fs::create_dir_all(feature_root.join("src")).unwrap();
1452 fs::write(feature_root.join("src/lib.rs"), "pub fn f() {}\n").unwrap();
1453
1454 let config = ScanConfig::default();
1455 let main_branch = BranchId::from("main");
1456 let feature_branch = BranchId::from("feature");
1457
1458 scan_project(main_root, &config, &db, main_branch.clone()).expect("scan main");
1459 scan_project(feature_root, &config, &db, feature_branch.clone()).expect("scan feature");
1460
1461 let branch_meta = SqliteBranchMetadataRepository::new(db.connection().clone());
1462
1463 let main_json = branch_meta
1464 .get(&main_branch.0, "workspace_crates")
1465 .unwrap()
1466 .expect("workspace_crates must exist for main");
1467 let feature_json = branch_meta
1468 .get(&feature_branch.0, "workspace_crates")
1469 .unwrap()
1470 .expect("workspace_crates must exist for feature");
1471
1472 let main_names: Vec<String> = serde_json::from_str(&main_json).unwrap();
1473 let feature_names: Vec<String> = serde_json::from_str(&feature_json).unwrap();
1474
1475 assert!(
1477 main_names.contains(&"main_only_crate".to_owned()),
1478 "main branch must see its own crate; got {:?}",
1479 main_names
1480 );
1481 assert!(
1482 !main_names.contains(&"feature_only_crate".to_owned()),
1483 "main branch must not see feature's crate; got {:?}",
1484 main_names
1485 );
1486 assert!(
1487 feature_names.contains(&"feature_only_crate".to_owned()),
1488 "feature branch must see its own crate; got {:?}",
1489 feature_names
1490 );
1491 assert!(
1492 !feature_names.contains(&"main_only_crate".to_owned()),
1493 "feature branch must not see main's crate; got {:?}",
1494 feature_names
1495 );
1496
1497 scan_project(main_root, &config, &db, main_branch.clone()).expect("re-scan main");
1500 let main_rows = branch_meta.list(&main_branch.0).unwrap();
1501 assert_eq!(main_rows.len(), 1, "main must UPSERT, not duplicate");
1502 let feature_after = branch_meta
1503 .get(&feature_branch.0, "workspace_crates")
1504 .unwrap()
1505 .expect("feature row must survive a re-scan on main");
1506 assert_eq!(
1507 feature_after, feature_json,
1508 "re-scanning main must not mutate feature's workspace_crates",
1509 );
1510 }
1511
1512 #[test]
1513 fn incremental_scan_changed_paths_contains_only_modified_files() {
1514 let dir = create_test_project();
1515 let root = dir.path();
1516 let db = Database::open(":memory:").expect("open DB");
1517 let config = ScanConfig::default();
1518
1519 scan_project(root, &config, &db, BranchId::from("main")).expect("first scan");
1520
1521 let changed_file_abs = root.join("src/config.rs");
1523 fs::write(&changed_file_abs, "pub struct Config { pub extra: bool }\n").unwrap();
1524 let changed_file = std::path::PathBuf::from("src/config.rs");
1527
1528 let r2 = scan_project(root, &config, &db, BranchId::from("main")).expect("second scan");
1529
1530 assert_eq!(
1532 r2.source_map.len(),
1533 r2.files_discovered,
1534 "source_map must contain all files even on incremental scan"
1535 );
1536
1537 assert_eq!(
1539 r2.changed_paths.len(),
1540 1,
1541 "changed_paths must contain exactly 1 file (the modified one), got: {:?}",
1542 r2.changed_paths
1543 );
1544 assert!(
1545 r2.changed_paths.contains(&changed_file),
1546 "changed_paths must contain the modified file {:?}, got: {:?}",
1547 changed_file,
1548 r2.changed_paths
1549 );
1550
1551 for path in r2.source_map.keys() {
1553 if path != &changed_file {
1554 assert!(
1555 !r2.changed_paths.contains(path),
1556 "unchanged file {:?} must not be in changed_paths",
1557 path
1558 );
1559 }
1560 }
1561 }
1562}