Skip to main content

seshat_scanner/
module_structure.rs

1//! Module structure detection and dependency graph construction.
2//!
3//! Analyzes parsed [`ProjectFile`]s to detect module boundaries (directories
4//! containing source files) and build a dependency graph from import/export
5//! relationships. Produces [`KnowledgeNode`]s (Fact nature) for each module
6//! and [`Edge`]s for `DependsOn` (import relationships) and `PartOf`
7//! (submodule hierarchy) relationships.
8
9use std::collections::{BTreeMap, BTreeSet, HashMap};
10use std::path::{Path, PathBuf};
11
12use seshat_core::{
13    BranchId, Edge, EdgeId, EdgeType, KnowledgeNature, KnowledgeNode, KnowledgeWeight, Language,
14    NodeId, ProjectFile,
15};
16
17/// A detected module in the project.
18#[derive(Debug, Clone)]
19pub struct ModuleInfo {
20    /// Relative path of the module directory from the project root.
21    pub path: PathBuf,
22    /// Files contained directly in this module directory.
23    pub files: Vec<PathBuf>,
24    /// Languages used in this module.
25    pub languages: BTreeSet<String>,
26}
27
28/// The complete module structure analysis result.
29#[derive(Debug)]
30pub struct ModuleGraph {
31    /// Knowledge nodes representing each module (Fact nature).
32    pub nodes: Vec<KnowledgeNode>,
33    /// Edges: DependsOn (import relationships) and PartOf (hierarchy).
34    pub edges: Vec<Edge>,
35    /// Module info indexed by module path (for querying).
36    pub modules: HashMap<PathBuf, ModuleInfo>,
37    /// Mapping from module path to assigned node ID (for query lookups).
38    path_to_node_id: HashMap<PathBuf, NodeId>,
39    /// Reverse mapping from node ID to module path.
40    node_id_to_path: HashMap<NodeId, PathBuf>,
41}
42
43impl ModuleGraph {
44    /// Find all modules that the given module depends on (outgoing DependsOn edges).
45    pub fn dependencies_of(&self, module_path: &Path) -> Vec<&PathBuf> {
46        let Some(&source_node_id) = self.path_to_node_id.get(module_path) else {
47            return Vec::new();
48        };
49
50        self.edges
51            .iter()
52            .filter(|e| e.edge_type == EdgeType::DependsOn && e.source_id == source_node_id)
53            .filter_map(|e| self.node_id_to_path.get(&e.target_id))
54            .collect()
55    }
56
57    /// Find all modules that depend on the given module (incoming DependsOn edges).
58    pub fn dependents_of(&self, module_path: &Path) -> Vec<&PathBuf> {
59        let Some(&target_node_id) = self.path_to_node_id.get(module_path) else {
60            return Vec::new();
61        };
62
63        self.edges
64            .iter()
65            .filter(|e| e.edge_type == EdgeType::DependsOn && e.target_id == target_node_id)
66            .filter_map(|e| self.node_id_to_path.get(&e.source_id))
67            .collect()
68    }
69}
70
71/// Analyze parsed files to detect module structure and build a dependency graph.
72///
73/// # Arguments
74///
75/// * `project_root` - The root directory of the project (used to compute relative paths).
76/// * `parsed_files` - All parsed [`ProjectFile`]s from the scanning pipeline.
77/// * `branch_id` - The branch identifier for the knowledge graph nodes and edges.
78///
79/// # Returns
80///
81/// A [`ModuleGraph`] containing:
82/// - Knowledge nodes (Fact nature, Info weight) for each detected module.
83/// - DependsOn edges between modules based on import relationships.
84/// - PartOf edges from submodules to their parent modules.
85pub fn build_module_graph(
86    project_root: &Path,
87    parsed_files: &[ProjectFile],
88    branch_id: &BranchId,
89) -> ModuleGraph {
90    // Step 1: Detect modules — group files by their parent directory.
91    let mut dir_files: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
92    let mut dir_languages: BTreeMap<PathBuf, BTreeSet<String>> = BTreeMap::new();
93
94    for pf in parsed_files {
95        let rel_path = make_relative(&pf.path, project_root);
96        let dir = rel_path
97            .parent()
98            .map(|p| p.to_path_buf())
99            .unwrap_or_default();
100
101        dir_files
102            .entry(dir.clone())
103            .or_default()
104            .push(rel_path.clone());
105        dir_languages
106            .entry(dir)
107            .or_default()
108            .insert(pf.language.as_str().to_owned());
109    }
110
111    // Build a map from relative path → parsed file for quick lookup.
112    let file_map: HashMap<PathBuf, &ProjectFile> = parsed_files
113        .iter()
114        .map(|pf| (make_relative(&pf.path, project_root), pf))
115        .collect();
116
117    // Build ordered list of module paths for stable node ID assignment.
118    let module_paths: Vec<PathBuf> = dir_files.keys().cloned().collect();
119    let path_to_node_id: HashMap<&PathBuf, NodeId> = module_paths
120        .iter()
121        .enumerate()
122        .map(|(i, p)| (p, NodeId((i + 1) as i64)))
123        .collect();
124
125    // Build ModuleInfo map.
126    let mut modules: HashMap<PathBuf, ModuleInfo> = HashMap::new();
127    for (dir, files) in &dir_files {
128        modules.insert(
129            dir.clone(),
130            ModuleInfo {
131                path: dir.clone(),
132                files: files.clone(),
133                languages: dir_languages.get(dir).cloned().unwrap_or_default(),
134            },
135        );
136    }
137
138    // Step 2: Create KnowledgeNode for each module.
139    let nodes: Vec<KnowledgeNode> = module_paths
140        .iter()
141        .map(|dir| {
142            let info = &modules[dir];
143            let node_id = path_to_node_id[dir];
144
145            // Compute human-readable purpose from doc-comments / symbols.
146            let purpose = derive_module_purpose(&info.files, &file_map);
147
148            let description = format!(
149                "Module '{}' containing {} file(s) [{}]",
150                if dir.as_os_str().is_empty() {
151                    "(root)"
152                } else {
153                    dir.to_str().unwrap_or("(non-utf8)")
154                },
155                info.files.len(),
156                info.languages
157                    .iter()
158                    .cloned()
159                    .collect::<Vec<_>>()
160                    .join(", ")
161            );
162
163            let mut ext = serde_json::json!({
164                "source": "module_structure",
165                "module_path": dir.to_str().unwrap_or(""),
166                "file_count": info.files.len(),
167                "languages": info.languages.iter().cloned().collect::<Vec<_>>(),
168                "files": info.files.iter().map(|f| f.to_str().unwrap_or("").to_owned()).collect::<Vec<_>>(),
169            });
170            if let Some(ref p) = purpose {
171                ext["purpose"] = serde_json::Value::String(p.clone());
172            }
173            let ext_data = ext;
174
175            KnowledgeNode {
176                id: node_id,
177                branch_id: branch_id.clone(),
178                nature: KnowledgeNature::Fact,
179                weight: KnowledgeWeight::Info,
180                confidence: 1.0,
181                adoption_count: 1,
182                total_count: 1,
183                description,
184                ext_data: Some(ext_data),
185            }
186        })
187        .collect();
188
189    // Step 3: Resolve import module paths to target module directories.
190    // Build a map from potential import targets to their module directories.
191    let import_target_map = build_import_target_map(project_root, parsed_files);
192
193    // Step 5: Build DependsOn edges from imports.
194    let mut edge_id_counter: i64 = 1;
195    let mut depends_on_set: BTreeSet<(PathBuf, PathBuf)> = BTreeSet::new();
196
197    for pf in parsed_files {
198        let rel = make_relative(&pf.path, project_root);
199        let source_dir = rel.parent().map(|p| p.to_path_buf()).unwrap_or_default();
200
201        for import in &pf.imports {
202            if let Some(target_dir) = resolve_import_to_module(
203                &import.module,
204                &source_dir,
205                &import_target_map,
206                &pf.language,
207            ) {
208                // Skip self-imports (same module).
209                if target_dir != source_dir {
210                    depends_on_set.insert((source_dir.clone(), target_dir));
211                }
212            }
213        }
214    }
215
216    let mut edges: Vec<Edge> = Vec::new();
217
218    for (source_dir, target_dir) in &depends_on_set {
219        if let (Some(&source_id), Some(&target_id)) = (
220            path_to_node_id.get(source_dir),
221            path_to_node_id.get(target_dir),
222        ) {
223            edges.push(Edge {
224                id: EdgeId(edge_id_counter),
225                source_id,
226                target_id,
227                edge_type: EdgeType::DependsOn,
228                branch_id: branch_id.clone(),
229                weight: 1.0,
230                metadata: Some(serde_json::json!({
231                    "source_module": source_dir.to_str().unwrap_or(""),
232                    "target_module": target_dir.to_str().unwrap_or(""),
233                })),
234            });
235            edge_id_counter += 1;
236        }
237    }
238
239    // Step 6: Build PartOf edges for module hierarchy.
240    for dir in &module_paths {
241        if dir.as_os_str().is_empty() {
242            continue; // Root has no parent.
243        }
244        if let Some(parent) = dir.parent() {
245            let parent_path = parent.to_path_buf();
246            // Only add PartOf if the parent is itself a detected module.
247            if let (Some(&child_id), Some(&parent_id)) =
248                (path_to_node_id.get(dir), path_to_node_id.get(&parent_path))
249            {
250                edges.push(Edge {
251                    id: EdgeId(edge_id_counter),
252                    source_id: child_id,
253                    target_id: parent_id,
254                    edge_type: EdgeType::PartOf,
255                    branch_id: branch_id.clone(),
256                    weight: 1.0,
257                    metadata: Some(serde_json::json!({
258                        "child_module": dir.to_str().unwrap_or(""),
259                        "parent_module": parent_path.to_str().unwrap_or(""),
260                    })),
261                });
262                edge_id_counter += 1;
263            }
264        }
265    }
266
267    // Build lookup maps for queries.
268    let path_to_node_id_owned: HashMap<PathBuf, NodeId> = path_to_node_id
269        .iter()
270        .map(|(p, &id)| ((*p).clone(), id))
271        .collect();
272    let node_id_to_path: HashMap<NodeId, PathBuf> = path_to_node_id_owned
273        .iter()
274        .map(|(p, &id)| (id, p.clone()))
275        .collect();
276
277    ModuleGraph {
278        nodes,
279        edges,
280        modules,
281        path_to_node_id: path_to_node_id_owned,
282        node_id_to_path,
283    }
284}
285
286/// Build a map from import target strings to the module directory containing
287/// the target file. This handles:
288/// - Relative file paths (e.g., "./utils", "../models/user")
289/// - Module/package paths for each language
290/// - Directory-level module paths (e.g., "src/models" → src/models directory)
291fn build_import_target_map(
292    project_root: &Path,
293    parsed_files: &[ProjectFile],
294) -> HashMap<String, PathBuf> {
295    let mut map: HashMap<String, PathBuf> = HashMap::new();
296
297    // First, collect all module directories so we can register them.
298    let mut module_dirs: BTreeSet<PathBuf> = BTreeSet::new();
299
300    for pf in parsed_files {
301        let rel = make_relative(&pf.path, project_root);
302        let dir = rel.parent().map(|p| p.to_path_buf()).unwrap_or_default();
303        module_dirs.insert(dir.clone());
304
305        // Register by full relative path (without extension).
306        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
307            let no_ext = if dir.as_os_str().is_empty() {
308                stem.to_owned()
309            } else {
310                format!("{}/{stem}", dir.display())
311            };
312            map.entry(no_ext).or_insert_with(|| dir.clone());
313        }
314
315        // Register by full relative path (with extension).
316        map.entry(rel.to_string_lossy().to_string())
317            .or_insert_with(|| dir.clone());
318
319        // For Python, register by dotted module path.
320        if pf.language == Language::Python {
321            let dotted = rel
322                .with_extension("")
323                .components()
324                .map(|c| c.as_os_str().to_string_lossy().to_string())
325                .collect::<Vec<_>>()
326                .join(".");
327            map.entry(dotted.clone()).or_insert_with(|| dir.clone());
328            // Also register the directory itself as a package.
329            if !dir.as_os_str().is_empty() {
330                let dir_dotted = dir
331                    .components()
332                    .map(|c| c.as_os_str().to_string_lossy().to_string())
333                    .collect::<Vec<_>>()
334                    .join(".");
335                map.entry(dir_dotted).or_insert_with(|| dir.clone());
336            }
337        }
338
339        // For Rust, register by crate-style path (:: separator).
340        if pf.language == Language::Rust {
341            let rust_path = rel
342                .with_extension("")
343                .components()
344                .map(|c| c.as_os_str().to_string_lossy().to_string())
345                .collect::<Vec<_>>()
346                .join("::");
347            map.entry(rust_path).or_insert_with(|| dir.clone());
348        }
349
350        // Register each export name in this file, mapped to this module.
351        for export in &pf.exports {
352            if !export.name.is_empty() {
353                // Qualified: "dir/export_name" or just "export_name" at root.
354                let qualified = if dir.as_os_str().is_empty() {
355                    export.name.clone()
356                } else {
357                    format!("{}/{}", dir.display(), export.name)
358                };
359                map.entry(qualified).or_insert_with(|| dir.clone());
360            }
361        }
362    }
363
364    // Register directory-level module paths.
365    // This handles imports like `crate::models` (Rust) or `models` (Python package).
366    for dir in &module_dirs {
367        if dir.as_os_str().is_empty() {
368            continue;
369        }
370        // Register by filesystem path (e.g., "src/models").
371        let dir_str = dir.to_string_lossy().to_string();
372        map.entry(dir_str).or_insert_with(|| dir.clone());
373
374        // Register by Rust-style path (e.g., "src::models").
375        let rust_dir_path = dir
376            .components()
377            .map(|c| c.as_os_str().to_string_lossy().to_string())
378            .collect::<Vec<_>>()
379            .join("::");
380        map.entry(rust_dir_path).or_insert_with(|| dir.clone());
381
382        // Register by dotted path for Python (e.g., "models").
383        let dotted_dir = dir
384            .components()
385            .map(|c| c.as_os_str().to_string_lossy().to_string())
386            .collect::<Vec<_>>()
387            .join(".");
388        map.entry(dotted_dir).or_insert_with(|| dir.clone());
389
390        // Register by last component (directory name) for simple imports.
391        if let Some(name) = dir.file_name().and_then(|n| n.to_str()) {
392            map.entry(name.to_owned()).or_insert_with(|| dir.clone());
393        }
394    }
395
396    map
397}
398
399/// Resolve an import module path to a target module directory.
400fn resolve_import_to_module(
401    import_module: &str,
402    source_dir: &Path,
403    target_map: &HashMap<String, PathBuf>,
404    language: &Language,
405) -> Option<PathBuf> {
406    // 1. Direct lookup in the target map.
407    if let Some(dir) = target_map.get(import_module) {
408        return Some(dir.clone());
409    }
410
411    // 2. For relative imports (starting with . or ..), resolve relative to source dir.
412    if import_module.starts_with('.') {
413        let cleaned = import_module
414            .trim_start_matches("./")
415            .trim_start_matches("../");
416
417        // Try resolving relative to source directory.
418        let resolved = if import_module.starts_with("../") {
419            source_dir
420                .parent()
421                .map(|p| p.join(cleaned))
422                .unwrap_or_else(|| PathBuf::from(cleaned))
423        } else if import_module.starts_with("./") {
424            source_dir.join(cleaned)
425        } else {
426            // Just "." — refers to current directory (Python relative import).
427            return Some(source_dir.to_path_buf());
428        };
429
430        let resolved_str = resolved.to_string_lossy().to_string();
431        if let Some(dir) = target_map.get(&resolved_str) {
432            return Some(dir.clone());
433        }
434
435        // Try the resolved path as a directory itself if it's a module.
436        if target_map.values().any(|d| *d == resolved) {
437            return Some(resolved);
438        }
439    }
440
441    // 3. For Python dotted imports like "mypackage.models", try lookup.
442    if *language == Language::Python && import_module.contains('.') {
443        if let Some(dir) = target_map.get(import_module) {
444            return Some(dir.clone());
445        }
446        // Try the base package.
447        let base = import_module.split('.').next().unwrap_or(import_module);
448        if let Some(dir) = target_map.get(base) {
449            return Some(dir.clone());
450        }
451    }
452
453    // 4. For Rust :: imports like "crate::config" or "super::models".
454    if *language == Language::Rust {
455        // Handle crate:: prefix.
456        if let Some(rest) = import_module.strip_prefix("crate::") {
457            // Map crate::X::Y to path X/Y or src/X/Y.
458            let as_path = rest.replace("::", "/");
459            if let Some(dir) = target_map.get(&as_path) {
460                return Some(dir.clone());
461            }
462            let src_path = format!("src/{as_path}");
463            if let Some(dir) = target_map.get(&src_path) {
464                return Some(dir.clone());
465            }
466        }
467        // Handle super:: prefix.
468        if let Some(rest) = import_module.strip_prefix("super::") {
469            if let Some(parent) = source_dir.parent() {
470                let as_path = rest.replace("::", "/");
471                let resolved = parent.join(&as_path);
472                let resolved_str = resolved.to_string_lossy().to_string();
473                if let Some(dir) = target_map.get(&resolved_str) {
474                    return Some(dir.clone());
475                }
476            }
477        }
478        // Handle self:: prefix.
479        if let Some(rest) = import_module.strip_prefix("self::") {
480            let as_path = rest.replace("::", "/");
481            let resolved = source_dir.join(&as_path);
482            let resolved_str = resolved.to_string_lossy().to_string();
483            if let Some(dir) = target_map.get(&resolved_str) {
484                return Some(dir.clone());
485            }
486        }
487    }
488
489    // 5. For JS/TS, try with common file extensions.
490    if matches!(language, Language::JavaScript | Language::TypeScript) {
491        // "./foo" might resolve to "./foo.ts", "./foo.js", "./foo/index.ts", etc.
492        if import_module.starts_with('.') {
493            let cleaned = import_module
494                .trim_start_matches("./")
495                .trim_start_matches("../");
496            let base = if import_module.starts_with("../") {
497                source_dir
498                    .parent()
499                    .map(|p| p.join(cleaned))
500                    .unwrap_or_else(|| PathBuf::from(cleaned))
501            } else {
502                source_dir.join(cleaned)
503            };
504
505            let base_str = base.to_string_lossy().to_string();
506
507            // Try: base/index
508            let index_path = format!("{base_str}/index");
509            if let Some(dir) = target_map.get(&index_path) {
510                return Some(dir.clone());
511            }
512        }
513    }
514
515    None
516}
517
518/// Make a path relative to the project root. If already relative, return as-is.
519fn make_relative(path: &Path, root: &Path) -> PathBuf {
520    path.strip_prefix(root)
521        .map(|p| p.to_path_buf())
522        .unwrap_or_else(|_| path.to_path_buf())
523}
524
525/// Returns `true` if the file_doc string is a technical directive rather than
526/// a human-readable description and should be excluded from `purpose`.
527///
528/// Covers TypeScript/JavaScript (`@ts-nocheck`, `@type`, `eslint-disable`),
529/// Python lint directives (`noqa`, `type: ignore`), shebangs, and strings that
530/// are too short to carry meaning.
531/// Minimum byte length for a file-doc string to be considered meaningful.
532const MIN_DOC_LEN: usize = 8;
533
534fn is_noise_file_doc(s: &str) -> bool {
535    let s = s.trim();
536    s.starts_with("@ts-")              // @ts-nocheck, @ts-ignore
537        || s.starts_with("@type")      // @type {import('...')} JSDoc annotations
538        || s.starts_with("@jest-")
539        || s.starts_with("@flow")
540        || s.starts_with("@noinspection")
541        // eslint directives always start the line — use starts_with to avoid
542        // false positives on doc comments that *mention* eslint-disable.
543        || s.starts_with("eslint-disable")
544        || s.starts_with("// eslint-disable")
545        || s.starts_with("/* eslint-disable")
546        || s.starts_with("noqa")
547        || s.contains("type: ignore")
548        || s.contains("type:ignore")
549        || s.starts_with("#!")         // shebang
550        || s.len() < MIN_DOC_LEN // too short to be meaningful
551}
552
553/// Strip Markdown heading markers (`# `, `## `, …) from each line and return
554/// at most `max_lines` non-empty lines joined with `\n`.
555fn clean_doc_text(s: &str, max_lines: usize) -> String {
556    s.lines()
557        .map(|line| line.trim_start_matches('#').trim())
558        .filter(|line| !line.is_empty())
559        .take(max_lines)
560        .collect::<Vec<_>>()
561        .join("\n")
562}
563
564/// Derive a human-readable purpose string for a module.
565///
566/// Priority:
567/// 1. `file_doc` from the canonical entry-point file (`lib.rs`, `mod.rs`,
568///    `__init__.py`, `index.ts`, `index.js`, `main.rs`) — up to 5 lines.
569/// 2. Up to `MAX_DOCS` `file_doc` values from other files in the module,
570///    each truncated to `MAX_LINES_PER_DOC` lines, joined with ` | `.
571/// 3. Deduplicated public symbol names (functions + types) — up to
572///    `MAX_SYMBOLS`, with `+N more` for the remainder.
573/// 4. `None` if nothing useful is found.
574fn derive_module_purpose(
575    files: &[PathBuf],
576    file_map: &HashMap<PathBuf, &ProjectFile>,
577) -> Option<String> {
578    const ENTRY_POINT_NAMES: &[&str] = &[
579        "lib.rs",
580        "mod.rs",
581        "main.rs",
582        "__init__.py",
583        "index.ts",
584        "index.js",
585        "index.mjs",
586    ];
587    /// Lines taken from the entry-point doc.
588    const ENTRY_POINT_MAX_LINES: usize = 5;
589    /// Maximum number of file_docs collected for Priority 2.
590    const MAX_DOCS: usize = 10;
591    /// Lines taken per file_doc in Priority 2.
592    const MAX_LINES_PER_DOC: usize = 3;
593    /// Maximum distinct public symbol names shown in the fallback.
594    const MAX_SYMBOLS: usize = 8;
595
596    // Priority 1: file_doc from entry-point file.
597    for file_path in files {
598        let file_name = file_path.file_name().and_then(|f| f.to_str()).unwrap_or("");
599        if ENTRY_POINT_NAMES.contains(&file_name) {
600            if let Some(pf) = file_map.get(file_path) {
601                if let Some(ref doc) = pf.file_doc {
602                    let raw = doc.trim();
603                    if !raw.is_empty() && !is_noise_file_doc(raw) {
604                        let cleaned = clean_doc_text(raw, ENTRY_POINT_MAX_LINES);
605                        if !cleaned.is_empty() {
606                            return Some(cleaned);
607                        }
608                    }
609                }
610            }
611        }
612    }
613
614    // Priority 2: collect file_doc from any file in the module.
615    // Each doc is truncated to MAX_LINES_PER_DOC lines; noise is filtered out.
616    let file_docs: Vec<String> = files
617        .iter()
618        .filter_map(|fp| {
619            let pf = file_map.get(fp)?;
620            let raw = pf.file_doc.as_deref()?.trim();
621            if raw.is_empty() || is_noise_file_doc(raw) {
622                return None;
623            }
624            let cleaned = clean_doc_text(raw, MAX_LINES_PER_DOC);
625            if cleaned.is_empty() {
626                None
627            } else {
628                Some(cleaned)
629            }
630        })
631        .take(MAX_DOCS)
632        .collect();
633
634    if !file_docs.is_empty() {
635        return Some(file_docs.join(" | "));
636    }
637
638    // Priority 3: deduplicated public symbol names.
639    let mut seen = std::collections::HashSet::new();
640    let mut symbols: Vec<String> = Vec::new();
641    for file_path in files {
642        if let Some(pf) = file_map.get(file_path) {
643            for f in &pf.functions {
644                if f.is_public && seen.insert(f.name.clone()) {
645                    symbols.push(f.name.clone());
646                }
647            }
648            for t in &pf.types {
649                if t.is_public && seen.insert(t.name.clone()) {
650                    symbols.push(t.name.clone());
651                }
652            }
653        }
654    }
655
656    if symbols.is_empty() {
657        return None;
658    }
659
660    let total = symbols.len();
661    let shown = symbols.into_iter().take(MAX_SYMBOLS).collect::<Vec<_>>();
662    let mut result = shown.join(", ");
663    if total > MAX_SYMBOLS {
664        result.push_str(&format!(" +{} more", total - MAX_SYMBOLS));
665    }
666    Some(result)
667}
668
669#[cfg(test)]
670mod tests {
671    use super::*;
672    use seshat_core::{
673        Export, Import, JavaScriptIR, Language, LanguageIR, PythonIR, RustIR, TypeScriptIR,
674    };
675    use std::path::PathBuf;
676
677    /// Helper: create a minimal ProjectFile with the given path, language, imports, and exports.
678    fn make_project_file(
679        path: &str,
680        language: Language,
681        imports: Vec<Import>,
682        exports: Vec<Export>,
683    ) -> ProjectFile {
684        ProjectFile {
685            path: PathBuf::from(path),
686            language,
687            content_hash: "test_hash".to_owned(),
688            imports,
689            exports,
690            functions: Vec::new(),
691            types: Vec::new(),
692            dependencies_used: Vec::new(),
693            language_ir: match language {
694                Language::Rust => LanguageIR::Rust(RustIR::default()),
695                Language::TypeScript => LanguageIR::TypeScript(TypeScriptIR::default()),
696                Language::JavaScript => LanguageIR::JavaScript(JavaScriptIR::default()),
697                Language::Python => LanguageIR::Python(PythonIR::default()),
698            },
699            file_doc: None,
700        }
701    }
702
703    fn import(module: &str) -> Import {
704        Import {
705            module: module.to_owned(),
706            names: Vec::new(),
707            is_type_only: false,
708            line: 1,
709        }
710    }
711
712    fn import_with_names(module: &str, names: &[&str]) -> Import {
713        Import {
714            module: module.to_owned(),
715            names: names.iter().map(|n| n.to_string()).collect(),
716            is_type_only: false,
717            line: 1,
718        }
719    }
720
721    fn export(name: &str) -> Export {
722        Export {
723            name: name.to_owned(),
724            is_default: false,
725            is_type_only: false,
726            line: 1,
727            end_line: 1,
728        }
729    }
730
731    // -----------------------------------------------------------------------
732    // Module detection tests
733    // -----------------------------------------------------------------------
734
735    #[test]
736    fn detects_modules_from_directories() {
737        let root = Path::new("/project");
738        let files = vec![
739            make_project_file("/project/src/main.rs", Language::Rust, vec![], vec![]),
740            make_project_file("/project/src/lib.rs", Language::Rust, vec![], vec![]),
741            make_project_file(
742                "/project/tests/test_main.rs",
743                Language::Rust,
744                vec![],
745                vec![],
746            ),
747        ];
748
749        let graph = build_module_graph(root, &files, &BranchId::from("main"));
750
751        assert_eq!(graph.modules.len(), 2);
752        assert!(graph.modules.contains_key(&PathBuf::from("src")));
753        assert!(graph.modules.contains_key(&PathBuf::from("tests")));
754    }
755
756    #[test]
757    fn root_directory_detected_as_module() {
758        let root = Path::new("/project");
759        let files = vec![make_project_file(
760            "/project/main.py",
761            Language::Python,
762            vec![],
763            vec![],
764        )];
765
766        let graph = build_module_graph(root, &files, &BranchId::from("main"));
767
768        // Root directory is PathBuf::from("")
769        assert_eq!(graph.modules.len(), 1);
770        assert!(graph.modules.contains_key(&PathBuf::from("")));
771    }
772
773    #[test]
774    fn nested_modules_detected() {
775        let root = Path::new("/project");
776        let files = vec![
777            make_project_file("/project/src/main.rs", Language::Rust, vec![], vec![]),
778            make_project_file(
779                "/project/src/handlers/api.rs",
780                Language::Rust,
781                vec![],
782                vec![],
783            ),
784            make_project_file(
785                "/project/src/handlers/web.rs",
786                Language::Rust,
787                vec![],
788                vec![],
789            ),
790        ];
791
792        let graph = build_module_graph(root, &files, &BranchId::from("main"));
793
794        assert_eq!(graph.modules.len(), 2);
795        assert!(graph.modules.contains_key(&PathBuf::from("src")));
796        assert!(graph.modules.contains_key(&PathBuf::from("src/handlers")));
797    }
798
799    #[test]
800    fn module_tracks_languages() {
801        let root = Path::new("/project");
802        let files = vec![
803            make_project_file(
804                "/project/src/index.ts",
805                Language::TypeScript,
806                vec![],
807                vec![],
808            ),
809            make_project_file(
810                "/project/src/utils.js",
811                Language::JavaScript,
812                vec![],
813                vec![],
814            ),
815        ];
816
817        let graph = build_module_graph(root, &files, &BranchId::from("main"));
818
819        let src = &graph.modules[&PathBuf::from("src")];
820        assert!(src.languages.contains("typescript"));
821        assert!(src.languages.contains("javascript"));
822    }
823
824    #[test]
825    fn module_tracks_files() {
826        let root = Path::new("/project");
827        let files = vec![
828            make_project_file("/project/src/main.rs", Language::Rust, vec![], vec![]),
829            make_project_file("/project/src/config.rs", Language::Rust, vec![], vec![]),
830        ];
831
832        let graph = build_module_graph(root, &files, &BranchId::from("main"));
833
834        let src = &graph.modules[&PathBuf::from("src")];
835        assert_eq!(src.files.len(), 2);
836    }
837
838    // -----------------------------------------------------------------------
839    // Knowledge node tests
840    // -----------------------------------------------------------------------
841
842    #[test]
843    fn creates_fact_nodes_for_modules() {
844        let root = Path::new("/project");
845        let files = vec![
846            make_project_file("/project/src/main.rs", Language::Rust, vec![], vec![]),
847            make_project_file("/project/tests/test.rs", Language::Rust, vec![], vec![]),
848        ];
849
850        let graph = build_module_graph(root, &files, &BranchId::from("main"));
851
852        assert_eq!(graph.nodes.len(), 2);
853        for node in &graph.nodes {
854            assert_eq!(node.nature, KnowledgeNature::Fact);
855            assert_eq!(node.weight, KnowledgeWeight::Info);
856            assert_eq!(node.confidence, 1.0);
857            assert_eq!(node.branch_id, BranchId::from("main"));
858            assert!(node.ext_data.is_some());
859        }
860    }
861
862    #[test]
863    fn node_ext_data_contains_module_info() {
864        let root = Path::new("/project");
865        let files = vec![
866            make_project_file("/project/src/main.rs", Language::Rust, vec![], vec![]),
867            make_project_file("/project/src/lib.rs", Language::Rust, vec![], vec![]),
868        ];
869
870        let graph = build_module_graph(root, &files, &BranchId::from("main"));
871
872        let node = graph
873            .nodes
874            .iter()
875            .find(|n| n.description.contains("'src'"))
876            .expect("should have src module node");
877
878        let ext = node.ext_data.as_ref().unwrap();
879        assert_eq!(ext["source"], "module_structure");
880        assert_eq!(ext["module_path"], "src");
881        assert_eq!(ext["file_count"], 2);
882    }
883
884    // -----------------------------------------------------------------------
885    // DependsOn edge tests
886    // -----------------------------------------------------------------------
887
888    #[test]
889    fn creates_depends_on_edges_for_relative_imports_ts() {
890        let root = Path::new("/project");
891        let files = vec![
892            make_project_file(
893                "/project/src/index.ts",
894                Language::TypeScript,
895                vec![import("./utils")],
896                vec![],
897            ),
898            make_project_file(
899                "/project/src/utils.ts",
900                Language::TypeScript,
901                vec![],
902                vec![export("formatDate")],
903            ),
904        ];
905
906        // Both in "src" — same module, so no DependsOn edge.
907        let graph = build_module_graph(root, &files, &BranchId::from("main"));
908        let depends_on: Vec<_> = graph
909            .edges
910            .iter()
911            .filter(|e| e.edge_type == EdgeType::DependsOn)
912            .collect();
913        assert_eq!(
914            depends_on.len(),
915            0,
916            "Same-module imports should not produce DependsOn edges"
917        );
918    }
919
920    #[test]
921    fn creates_depends_on_edges_cross_directory_ts() {
922        let root = Path::new("/project");
923        let files = vec![
924            make_project_file(
925                "/project/src/pages/home.ts",
926                Language::TypeScript,
927                vec![import("../utils/format")],
928                vec![],
929            ),
930            make_project_file(
931                "/project/src/utils/format.ts",
932                Language::TypeScript,
933                vec![],
934                vec![export("formatDate")],
935            ),
936        ];
937
938        let graph = build_module_graph(root, &files, &BranchId::from("main"));
939        let depends_on: Vec<_> = graph
940            .edges
941            .iter()
942            .filter(|e| e.edge_type == EdgeType::DependsOn)
943            .collect();
944
945        assert_eq!(depends_on.len(), 1);
946        assert_eq!(depends_on[0].edge_type, EdgeType::DependsOn);
947    }
948
949    #[test]
950    fn creates_depends_on_edges_rust_crate_imports() {
951        let root = Path::new("/project");
952        let files = vec![
953            make_project_file(
954                "/project/src/main.rs",
955                Language::Rust,
956                vec![import_with_names("crate::config", &["Config"])],
957                vec![],
958            ),
959            make_project_file(
960                "/project/src/config.rs",
961                Language::Rust,
962                vec![],
963                vec![export("Config")],
964            ),
965        ];
966
967        // Both in "src" — same module, no DependsOn edge.
968        let graph = build_module_graph(root, &files, &BranchId::from("main"));
969        let depends_on: Vec<_> = graph
970            .edges
971            .iter()
972            .filter(|e| e.edge_type == EdgeType::DependsOn)
973            .collect();
974        assert_eq!(
975            depends_on.len(),
976            0,
977            "Same-module crate:: imports should not produce edges"
978        );
979    }
980
981    #[test]
982    fn creates_depends_on_edges_rust_cross_module() {
983        let root = Path::new("/project");
984        let files = vec![
985            make_project_file(
986                "/project/src/handlers/api.rs",
987                Language::Rust,
988                vec![import_with_names("crate::models", &["User"])],
989                vec![],
990            ),
991            make_project_file(
992                "/project/src/models/user.rs",
993                Language::Rust,
994                vec![],
995                vec![export("User")],
996            ),
997        ];
998
999        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1000        let depends_on: Vec<_> = graph
1001            .edges
1002            .iter()
1003            .filter(|e| e.edge_type == EdgeType::DependsOn)
1004            .collect();
1005
1006        assert_eq!(depends_on.len(), 1);
1007    }
1008
1009    #[test]
1010    fn creates_depends_on_edges_python_dotted_imports() {
1011        let root = Path::new("/project");
1012        let files = vec![
1013            make_project_file(
1014                "/project/mypackage/services.py",
1015                Language::Python,
1016                vec![import("mypackage.models")],
1017                vec![],
1018            ),
1019            make_project_file(
1020                "/project/mypackage/models.py",
1021                Language::Python,
1022                vec![],
1023                vec![export("User")],
1024            ),
1025        ];
1026
1027        // Both in "mypackage" — same module, no DependsOn edge.
1028        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1029        let depends_on: Vec<_> = graph
1030            .edges
1031            .iter()
1032            .filter(|e| e.edge_type == EdgeType::DependsOn)
1033            .collect();
1034        assert_eq!(depends_on.len(), 0);
1035    }
1036
1037    #[test]
1038    fn creates_depends_on_edges_python_cross_directory() {
1039        let root = Path::new("/project");
1040        let files = vec![
1041            make_project_file(
1042                "/project/app/views.py",
1043                Language::Python,
1044                vec![import("models.user")],
1045                vec![],
1046            ),
1047            make_project_file(
1048                "/project/models/user.py",
1049                Language::Python,
1050                vec![],
1051                vec![export("User")],
1052            ),
1053        ];
1054
1055        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1056        let depends_on: Vec<_> = graph
1057            .edges
1058            .iter()
1059            .filter(|e| e.edge_type == EdgeType::DependsOn)
1060            .collect();
1061
1062        assert_eq!(depends_on.len(), 1);
1063    }
1064
1065    #[test]
1066    fn no_duplicate_depends_on_edges() {
1067        let root = Path::new("/project");
1068        let files = vec![
1069            make_project_file(
1070                "/project/src/pages/home.ts",
1071                Language::TypeScript,
1072                vec![
1073                    import("../utils/format"),
1074                    import("../utils/validate"), // Two imports to same target module.
1075                ],
1076                vec![],
1077            ),
1078            make_project_file(
1079                "/project/src/utils/format.ts",
1080                Language::TypeScript,
1081                vec![],
1082                vec![export("formatDate")],
1083            ),
1084            make_project_file(
1085                "/project/src/utils/validate.ts",
1086                Language::TypeScript,
1087                vec![],
1088                vec![export("isValid")],
1089            ),
1090        ];
1091
1092        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1093        let depends_on: Vec<_> = graph
1094            .edges
1095            .iter()
1096            .filter(|e| e.edge_type == EdgeType::DependsOn)
1097            .collect();
1098
1099        // pages -> utils (only one edge, even though two imports resolve there).
1100        assert_eq!(depends_on.len(), 1);
1101    }
1102
1103    #[test]
1104    fn self_imports_not_edges() {
1105        let root = Path::new("/project");
1106        let files = vec![
1107            make_project_file(
1108                "/project/src/a.ts",
1109                Language::TypeScript,
1110                vec![import("./b")],
1111                vec![],
1112            ),
1113            make_project_file(
1114                "/project/src/b.ts",
1115                Language::TypeScript,
1116                vec![],
1117                vec![export("B")],
1118            ),
1119        ];
1120
1121        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1122        let depends_on: Vec<_> = graph
1123            .edges
1124            .iter()
1125            .filter(|e| e.edge_type == EdgeType::DependsOn)
1126            .collect();
1127
1128        assert_eq!(
1129            depends_on.len(),
1130            0,
1131            "Same-directory imports should not produce edges"
1132        );
1133    }
1134
1135    // -----------------------------------------------------------------------
1136    // PartOf edge tests
1137    // -----------------------------------------------------------------------
1138
1139    #[test]
1140    fn creates_part_of_edges_for_nested_modules() {
1141        let root = Path::new("/project");
1142        let files = vec![
1143            make_project_file("/project/src/main.rs", Language::Rust, vec![], vec![]),
1144            make_project_file(
1145                "/project/src/handlers/api.rs",
1146                Language::Rust,
1147                vec![],
1148                vec![],
1149            ),
1150        ];
1151
1152        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1153        let part_of: Vec<_> = graph
1154            .edges
1155            .iter()
1156            .filter(|e| e.edge_type == EdgeType::PartOf)
1157            .collect();
1158
1159        // src/handlers PartOf src
1160        assert_eq!(part_of.len(), 1);
1161    }
1162
1163    #[test]
1164    fn no_part_of_for_root_module() {
1165        let root = Path::new("/project");
1166        let files = vec![make_project_file(
1167            "/project/main.py",
1168            Language::Python,
1169            vec![],
1170            vec![],
1171        )];
1172
1173        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1174        let part_of: Vec<_> = graph
1175            .edges
1176            .iter()
1177            .filter(|e| e.edge_type == EdgeType::PartOf)
1178            .collect();
1179
1180        assert_eq!(part_of.len(), 0);
1181    }
1182
1183    #[test]
1184    fn part_of_only_when_parent_is_module() {
1185        let root = Path::new("/project");
1186        // "src/deep/nested/" exists but "src/deep/" has no files — not a module.
1187        let files = vec![make_project_file(
1188            "/project/src/deep/nested/file.rs",
1189            Language::Rust,
1190            vec![],
1191            vec![],
1192        )];
1193
1194        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1195        let part_of: Vec<_> = graph
1196            .edges
1197            .iter()
1198            .filter(|e| e.edge_type == EdgeType::PartOf)
1199            .collect();
1200
1201        // "src/deep/nested" has no parent module (src/deep has no files), so no PartOf.
1202        assert_eq!(part_of.len(), 0);
1203    }
1204
1205    #[test]
1206    fn deep_hierarchy_part_of_chain() {
1207        let root = Path::new("/project");
1208        let files = vec![
1209            make_project_file("/project/src/main.rs", Language::Rust, vec![], vec![]),
1210            make_project_file(
1211                "/project/src/api/handler.rs",
1212                Language::Rust,
1213                vec![],
1214                vec![],
1215            ),
1216            make_project_file(
1217                "/project/src/api/v2/handler.rs",
1218                Language::Rust,
1219                vec![],
1220                vec![],
1221            ),
1222        ];
1223
1224        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1225        let part_of: Vec<_> = graph
1226            .edges
1227            .iter()
1228            .filter(|e| e.edge_type == EdgeType::PartOf)
1229            .collect();
1230
1231        // src/api PartOf src
1232        // src/api/v2 PartOf src/api
1233        assert_eq!(part_of.len(), 2);
1234    }
1235
1236    // -----------------------------------------------------------------------
1237    // Queryability tests
1238    // -----------------------------------------------------------------------
1239
1240    #[test]
1241    fn query_dependencies_of() {
1242        let root = Path::new("/project");
1243        let files = vec![
1244            make_project_file(
1245                "/project/src/pages/home.ts",
1246                Language::TypeScript,
1247                vec![import("../utils/format")],
1248                vec![],
1249            ),
1250            make_project_file(
1251                "/project/src/utils/format.ts",
1252                Language::TypeScript,
1253                vec![],
1254                vec![export("formatDate")],
1255            ),
1256        ];
1257
1258        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1259
1260        let deps = graph.dependencies_of(Path::new("src/pages"));
1261        assert_eq!(deps.len(), 1);
1262        assert_eq!(*deps[0], PathBuf::from("src/utils"));
1263    }
1264
1265    #[test]
1266    fn query_dependents_of() {
1267        let root = Path::new("/project");
1268        let files = vec![
1269            make_project_file(
1270                "/project/src/pages/home.ts",
1271                Language::TypeScript,
1272                vec![import("../utils/format")],
1273                vec![],
1274            ),
1275            make_project_file(
1276                "/project/src/utils/format.ts",
1277                Language::TypeScript,
1278                vec![],
1279                vec![export("formatDate")],
1280            ),
1281        ];
1282
1283        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1284
1285        let dependents = graph.dependents_of(Path::new("src/utils"));
1286        assert_eq!(dependents.len(), 1);
1287        assert_eq!(*dependents[0], PathBuf::from("src/pages"));
1288    }
1289
1290    #[test]
1291    fn query_nonexistent_module_returns_empty() {
1292        let root = Path::new("/project");
1293        let files = vec![make_project_file(
1294            "/project/src/main.rs",
1295            Language::Rust,
1296            vec![],
1297            vec![],
1298        )];
1299
1300        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1301
1302        assert!(graph.dependencies_of(Path::new("nonexistent")).is_empty());
1303        assert!(graph.dependents_of(Path::new("nonexistent")).is_empty());
1304    }
1305
1306    // -----------------------------------------------------------------------
1307    // Empty input tests
1308    // -----------------------------------------------------------------------
1309
1310    #[test]
1311    fn empty_files_produces_empty_graph() {
1312        let root = Path::new("/project");
1313        let graph = build_module_graph(root, &[], &BranchId::from("main"));
1314
1315        assert!(graph.nodes.is_empty());
1316        assert!(graph.edges.is_empty());
1317        assert!(graph.modules.is_empty());
1318    }
1319
1320    // -----------------------------------------------------------------------
1321    // Mixed language tests
1322    // -----------------------------------------------------------------------
1323
1324    #[test]
1325    fn mixed_language_project() {
1326        let root = Path::new("/project");
1327        let files = vec![
1328            make_project_file(
1329                "/project/frontend/src/App.tsx",
1330                Language::TypeScript,
1331                vec![import("../shared/types")],
1332                vec![],
1333            ),
1334            make_project_file(
1335                "/project/frontend/shared/types.ts",
1336                Language::TypeScript,
1337                vec![],
1338                vec![export("AppConfig")],
1339            ),
1340            make_project_file(
1341                "/project/backend/src/main.rs",
1342                Language::Rust,
1343                vec![],
1344                vec![],
1345            ),
1346        ];
1347
1348        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1349
1350        // 3 modules: frontend/src, frontend/shared, backend/src
1351        assert_eq!(graph.modules.len(), 3);
1352        assert_eq!(graph.nodes.len(), 3);
1353
1354        // frontend/src depends on frontend/shared
1355        let depends_on: Vec<_> = graph
1356            .edges
1357            .iter()
1358            .filter(|e| e.edge_type == EdgeType::DependsOn)
1359            .collect();
1360        assert_eq!(depends_on.len(), 1);
1361    }
1362
1363    // -----------------------------------------------------------------------
1364    // JS index barrel imports
1365    // -----------------------------------------------------------------------
1366
1367    #[test]
1368    fn js_index_barrel_import() {
1369        let root = Path::new("/project");
1370        let files = vec![
1371            make_project_file(
1372                "/project/src/app.ts",
1373                Language::TypeScript,
1374                vec![import("./components")], // barrel import to directory
1375                vec![],
1376            ),
1377            make_project_file(
1378                "/project/src/components/index.ts",
1379                Language::TypeScript,
1380                vec![],
1381                vec![export("Button"), export("Input")],
1382            ),
1383        ];
1384
1385        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1386
1387        let depends_on: Vec<_> = graph
1388            .edges
1389            .iter()
1390            .filter(|e| e.edge_type == EdgeType::DependsOn)
1391            .collect();
1392
1393        // src -> src/components
1394        assert_eq!(depends_on.len(), 1);
1395    }
1396
1397    // -----------------------------------------------------------------------
1398    // Edge metadata tests
1399    // -----------------------------------------------------------------------
1400
1401    #[test]
1402    fn depends_on_edge_has_metadata() {
1403        let root = Path::new("/project");
1404        let files = vec![
1405            make_project_file(
1406                "/project/src/pages/home.ts",
1407                Language::TypeScript,
1408                vec![import("../utils/format")],
1409                vec![],
1410            ),
1411            make_project_file(
1412                "/project/src/utils/format.ts",
1413                Language::TypeScript,
1414                vec![],
1415                vec![export("formatDate")],
1416            ),
1417        ];
1418
1419        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1420
1421        let depends_on = graph
1422            .edges
1423            .iter()
1424            .find(|e| e.edge_type == EdgeType::DependsOn)
1425            .expect("should have DependsOn edge");
1426
1427        let metadata = depends_on.metadata.as_ref().expect("should have metadata");
1428        assert!(metadata.get("source_module").is_some());
1429        assert!(metadata.get("target_module").is_some());
1430    }
1431
1432    #[test]
1433    fn part_of_edge_has_metadata() {
1434        let root = Path::new("/project");
1435        let files = vec![
1436            make_project_file("/project/src/main.rs", Language::Rust, vec![], vec![]),
1437            make_project_file(
1438                "/project/src/handlers/api.rs",
1439                Language::Rust,
1440                vec![],
1441                vec![],
1442            ),
1443        ];
1444
1445        let graph = build_module_graph(root, &files, &BranchId::from("main"));
1446
1447        let part_of = graph
1448            .edges
1449            .iter()
1450            .find(|e| e.edge_type == EdgeType::PartOf)
1451            .expect("should have PartOf edge");
1452
1453        let metadata = part_of.metadata.as_ref().expect("should have metadata");
1454        assert!(metadata.get("child_module").is_some());
1455        assert!(metadata.get("parent_module").is_some());
1456    }
1457
1458    // -----------------------------------------------------------------------
1459    // derive_module_purpose tests
1460    // -----------------------------------------------------------------------
1461
1462    fn make_file_with_doc(path: &str, file_doc: Option<&str>) -> ProjectFile {
1463        let mut pf = make_project_file(path, Language::Rust, vec![], vec![]);
1464        pf.file_doc = file_doc.map(str::to_owned);
1465        pf
1466    }
1467
1468    fn make_file_with_pub_fn(path: &str, fn_name: &str) -> ProjectFile {
1469        let pf_base = make_project_file(path, Language::Rust, vec![], vec![]);
1470        ProjectFile {
1471            functions: vec![seshat_core::Function {
1472                name: fn_name.to_owned(),
1473                is_public: true,
1474                is_async: false,
1475                line: 1,
1476                end_line: 5,
1477                parameters: vec![],
1478                doc_comment: None,
1479            }],
1480            ..pf_base
1481        }
1482    }
1483
1484    #[test]
1485    fn purpose_from_entry_point_file_doc() {
1486        let lib_rs = make_file_with_doc("/project/src/lib.rs", Some("Authentication module."));
1487        let other = make_file_with_doc("/project/src/handler.rs", Some("Handles requests."));
1488
1489        let file_map: HashMap<PathBuf, &ProjectFile> = [
1490            (PathBuf::from("src/lib.rs"), &lib_rs),
1491            (PathBuf::from("src/handler.rs"), &other),
1492        ]
1493        .into_iter()
1494        .collect();
1495
1496        let files = vec![PathBuf::from("src/lib.rs"), PathBuf::from("src/handler.rs")];
1497        let purpose = derive_module_purpose(&files, &file_map);
1498        assert_eq!(purpose.as_deref(), Some("Authentication module."));
1499    }
1500
1501    #[test]
1502    fn purpose_falls_back_to_file_docs_when_no_entry_point() {
1503        let handler = make_file_with_doc("/project/src/handler.rs", Some("Handles HTTP."));
1504        let service = make_file_with_doc("/project/src/service.rs", Some("Business logic."));
1505
1506        let file_map: HashMap<PathBuf, &ProjectFile> = [
1507            (PathBuf::from("src/handler.rs"), &handler),
1508            (PathBuf::from("src/service.rs"), &service),
1509        ]
1510        .into_iter()
1511        .collect();
1512
1513        let files = vec![
1514            PathBuf::from("src/handler.rs"),
1515            PathBuf::from("src/service.rs"),
1516        ];
1517        let purpose = derive_module_purpose(&files, &file_map);
1518        let p = purpose.unwrap();
1519        assert!(p.contains("Handles HTTP."), "got: {p}");
1520        assert!(p.contains("Business logic."), "got: {p}");
1521    }
1522
1523    #[test]
1524    fn purpose_falls_back_to_symbols_when_no_docs() {
1525        let pf = make_file_with_pub_fn("/project/src/handler.rs", "handle_request");
1526        let file_map: HashMap<PathBuf, &ProjectFile> = [(PathBuf::from("src/handler.rs"), &pf)]
1527            .into_iter()
1528            .collect();
1529        let files = vec![PathBuf::from("src/handler.rs")];
1530
1531        let purpose = derive_module_purpose(&files, &file_map);
1532        let p = purpose.unwrap();
1533        assert!(p.contains("handle_request"), "got: {p}");
1534    }
1535
1536    #[test]
1537    fn purpose_is_none_when_no_docs_no_symbols() {
1538        let pf = make_file_with_doc("/project/src/empty.rs", None);
1539        let file_map: HashMap<PathBuf, &ProjectFile> =
1540            [(PathBuf::from("src/empty.rs"), &pf)].into_iter().collect();
1541        let files = vec![PathBuf::from("src/empty.rs")];
1542
1543        let purpose = derive_module_purpose(&files, &file_map);
1544        assert!(purpose.is_none());
1545    }
1546
1547    // -----------------------------------------------------------------------
1548    // noise filter tests
1549    // -----------------------------------------------------------------------
1550
1551    #[test]
1552    fn noise_filter_rejects_ts_nocheck() {
1553        assert!(is_noise_file_doc("@ts-nocheck"));
1554        assert!(is_noise_file_doc("@ts-ignore"));
1555    }
1556
1557    #[test]
1558    fn noise_filter_rejects_type_annotation() {
1559        assert!(is_noise_file_doc("@type {import('next').NextConfig}"));
1560    }
1561
1562    #[test]
1563    fn noise_filter_rejects_eslint_disable() {
1564        assert!(is_noise_file_doc("eslint-disable no-console"));
1565        assert!(is_noise_file_doc("// eslint-disable-next-line"));
1566        // But only when the whole string is the directive, not when it appears
1567        // mid-sentence — check via the `contains` rule.
1568        assert!(is_noise_file_doc(
1569            "eslint-disable @typescript-eslint/no-explicit-any"
1570        ));
1571    }
1572
1573    #[test]
1574    fn noise_filter_rejects_python_noqa() {
1575        assert!(is_noise_file_doc("noqa: E501"));
1576        assert!(is_noise_file_doc("noqa"));
1577    }
1578
1579    #[test]
1580    fn noise_filter_rejects_type_ignore() {
1581        assert!(is_noise_file_doc("type: ignore"));
1582        assert!(is_noise_file_doc("type:ignore"));
1583    }
1584
1585    #[test]
1586    fn noise_filter_rejects_short_strings() {
1587        assert!(is_noise_file_doc("ok"));
1588        assert!(is_noise_file_doc("   hi   "));
1589    }
1590
1591    #[test]
1592    fn noise_filter_accepts_real_doc() {
1593        assert!(!is_noise_file_doc(
1594            "Handles authentication and session management."
1595        ));
1596        assert!(!is_noise_file_doc(
1597            "# Auth Module\n\nProvides JWT-based login."
1598        ));
1599    }
1600
1601    #[test]
1602    fn noise_docs_excluded_from_purpose() {
1603        // entry-point has noise, other file has real doc
1604        let index_ts = make_file_with_doc("/project/src/index.ts", Some("@ts-nocheck\n// barrel"));
1605        let service =
1606            make_file_with_doc("/project/src/service.ts", Some("Handles user operations."));
1607
1608        let file_map: HashMap<PathBuf, &ProjectFile> = [
1609            (PathBuf::from("src/index.ts"), &index_ts),
1610            (PathBuf::from("src/service.ts"), &service),
1611        ]
1612        .into_iter()
1613        .collect();
1614
1615        let files = vec![
1616            PathBuf::from("src/index.ts"),
1617            PathBuf::from("src/service.ts"),
1618        ];
1619        let purpose = derive_module_purpose(&files, &file_map);
1620        let p = purpose.as_deref().unwrap_or("");
1621        assert!(!p.contains("@ts-nocheck"), "noise must be filtered: {p}");
1622        assert!(
1623            p.contains("Handles user operations."),
1624            "real doc missing: {p}"
1625        );
1626    }
1627
1628    #[test]
1629    fn markdown_headings_stripped_from_purpose() {
1630        let lib_rs = make_file_with_doc(
1631            "/project/src/lib.rs",
1632            Some("# Auth Module\n\nProvides JWT-based login."),
1633        );
1634        let file_map: HashMap<PathBuf, &ProjectFile> = [(PathBuf::from("src/lib.rs"), &lib_rs)]
1635            .into_iter()
1636            .collect();
1637        let files = vec![PathBuf::from("src/lib.rs")];
1638
1639        let purpose = derive_module_purpose(&files, &file_map);
1640        let p = purpose.as_deref().unwrap_or("");
1641        assert!(
1642            !p.starts_with('#'),
1643            "markdown heading must be stripped: {p}"
1644        );
1645        assert!(p.contains("Auth Module"), "heading text should remain: {p}");
1646        assert!(
1647            p.contains("Provides JWT-based login."),
1648            "body must be kept: {p}"
1649        );
1650    }
1651
1652    #[test]
1653    fn symbols_are_deduplicated() {
1654        // Two files both export a function called `new` (common in Rust).
1655        let f1 = {
1656            let mut pf = make_file_with_pub_fn("/project/src/a.rs", "new");
1657            // add a second unique symbol
1658            pf.functions.push(seshat_core::Function {
1659                name: "run".to_owned(),
1660                is_public: true,
1661                is_async: false,
1662                line: 10,
1663                end_line: 20,
1664                parameters: vec![],
1665                doc_comment: None,
1666            });
1667            pf
1668        };
1669        let f2 = make_file_with_pub_fn("/project/src/b.rs", "new"); // duplicate
1670
1671        let file_map: HashMap<PathBuf, &ProjectFile> = [
1672            (PathBuf::from("src/a.rs"), &f1),
1673            (PathBuf::from("src/b.rs"), &f2),
1674        ]
1675        .into_iter()
1676        .collect();
1677
1678        let files = vec![PathBuf::from("src/a.rs"), PathBuf::from("src/b.rs")];
1679        let purpose = derive_module_purpose(&files, &file_map);
1680        let p = purpose.unwrap();
1681        // `new` should appear exactly once
1682        assert_eq!(
1683            p.matches("new").count(),
1684            1,
1685            "duplicate symbol in purpose: {p}"
1686        );
1687        assert!(p.contains("run"), "unique symbol missing: {p}");
1688    }
1689
1690    #[test]
1691    fn file_doc_truncated_to_max_lines() {
1692        // A doc with many lines — only first 5 should appear for entry-point.
1693        let doc = "Line1\nLine2\nLine3\nLine4\nLine5\nLine6\nLine7\nLine8";
1694        let lib_rs = make_file_with_doc("/project/src/lib.rs", Some(doc));
1695        let file_map: HashMap<PathBuf, &ProjectFile> = [(PathBuf::from("src/lib.rs"), &lib_rs)]
1696            .into_iter()
1697            .collect();
1698        let files = vec![PathBuf::from("src/lib.rs")];
1699
1700        let purpose = derive_module_purpose(&files, &file_map).unwrap();
1701        let line_count = purpose.lines().count();
1702        assert!(
1703            line_count <= 5,
1704            "entry-point doc should be ≤5 lines, got {line_count}: {purpose}"
1705        );
1706        assert!(
1707            !purpose.contains("Line6"),
1708            "line 6 must be truncated: {purpose}"
1709        );
1710    }
1711}