Skip to main content

mollify_graph/
lib.rs

1//! # mollify-graph
2//!
3//! Discovers Python modules, assigns **path-sorted stable FileIds** (ADR-004
4//! analog), builds the internal import graph, computes **reachability** from
5//! entry points, and answers symbol-usage queries. Pure structure — the
6//! `mollify-core` crate turns these into [`mollify_parse`]-backed findings.
7
8use camino::{Utf8Path, Utf8PathBuf};
9use mollify_parse::{Import, ParsedModule, PyParser};
10use rayon::prelude::*;
11use rustc_hash::{FxHashMap, FxHashSet};
12
13/// Stable, path-sorted module identifier.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
15pub struct FileId(pub u32);
16
17/// One module node in the graph.
18#[derive(Debug, Clone)]
19pub struct ModuleInfo {
20    pub id: FileId,
21    pub path: Utf8PathBuf,
22    /// Dotted module name relative to its source root (e.g. `pkg.sub.mod`).
23    pub dotted: String,
24    pub parsed: ParsedModule,
25    /// True if this module is an analysis root (entry point).
26    pub is_entry: bool,
27}
28
29/// An import that looks first-party/relative but resolves to no module.
30#[derive(Debug, Clone)]
31pub struct UnresolvedImport {
32    /// The importing module's file path.
33    pub importer: Utf8PathBuf,
34    /// The import as written (`.sub.thing` for relative, `pkg.mod` for absolute).
35    pub display: String,
36    pub line: u32,
37    /// True for a relative import (`from . import x`) — these must be internal.
38    pub relative: bool,
39}
40
41/// The whole project graph.
42pub struct ModuleGraph {
43    pub modules: Vec<ModuleInfo>,
44    by_dotted: FxHashMap<String, FileId>,
45    /// Resolved internal import edges: importer → imported.
46    edges: Vec<(FileId, FileId)>,
47    /// For each imported module, the set of symbol names pulled in by importers,
48    /// keyed by the imported module's FileId.
49    imported_symbols: FxHashMap<FileId, FxHashSet<String>>,
50    reachable: FxHashSet<FileId>,
51    /// True if any module in the project has a dynamic dispatch/import sink.
52    pub global_dynamic: bool,
53}
54
55/// Directory names never descended into, regardless of `.gitignore` — VCS
56/// metadata, virtualenvs, and build/cache output. Mirrors `ruff`'s default
57/// exclude list, since projects in this ecosystem already expect these names
58/// to be skipped without configuration.
59const DEFAULT_EXCLUDE_DIRS: &[&str] = &[
60    ".bzr",
61    ".direnv",
62    ".eggs",
63    ".git",
64    ".hg",
65    ".svn",
66    ".ipynb_checkpoints",
67    ".mypy_cache",
68    ".nox",
69    ".pyenv",
70    ".pytest_cache",
71    ".pytype",
72    ".ruff_cache",
73    ".tox",
74    ".venv",
75    "__pycache__",
76    "__pypackages__",
77    "_build",
78    "buck-out",
79    "build",
80    "dist",
81    "env",
82    "node_modules",
83    "site-packages",
84    "venv",
85];
86
87/// True if `entry` is a directory that should be pruned from discovery: a
88/// builtin/extra denylisted name, or any directory directly containing a
89/// `pyvenv.cfg` (a virtualenv marker that catches custom-named venvs the name
90/// denylist can't anticipate).
91fn is_excluded_dir(entry: &ignore::DirEntry, extra_excludes: &[String]) -> bool {
92    if !entry.file_type().is_some_and(|t| t.is_dir()) {
93        return false;
94    }
95    let Some(name) = entry.file_name().to_str() else {
96        return false;
97    };
98    if DEFAULT_EXCLUDE_DIRS.contains(&name) || extra_excludes.iter().any(|e| e == name) {
99        return true;
100    }
101    entry.path().join("pyvenv.cfg").is_file()
102}
103
104/// Walk `root` for `*.py` and `*.ipynb` files, honoring `.gitignore` and
105/// pruning [`DEFAULT_EXCLUDE_DIRS`] (plus any virtualenv detected via
106/// `pyvenv.cfg`). Deterministic order.
107pub fn discover_python_files(root: &Utf8Path) -> Vec<Utf8PathBuf> {
108    discover_python_files_excluding(root, &[])
109}
110
111/// Like [`discover_python_files`], but also prunes `extra_excludes` directory
112/// names (in addition to the builtin denylist) — used to honor a project's
113/// `.mollifyrc.json` `exclude_dirs`.
114pub fn discover_python_files_excluding(
115    root: &Utf8Path,
116    extra_excludes: &[String],
117) -> Vec<Utf8PathBuf> {
118    let extra = extra_excludes.to_vec();
119    let mut out = Vec::new();
120    for entry in ignore::WalkBuilder::new(root)
121        .hidden(false)
122        .filter_entry(move |e| !is_excluded_dir(e, &extra))
123        .build()
124        .flatten()
125    {
126        let p = entry.path();
127        if p.extension().is_some_and(|e| e == "py" || e == "ipynb") {
128            if let Ok(u) = Utf8PathBuf::from_path_buf(p.to_path_buf()) {
129                out.push(u);
130            }
131        }
132    }
133    out.sort();
134    out
135}
136
137/// Read a module's source. For `.ipynb`, extract and concatenate code cells into
138/// one Python source (line numbers are relative to that concatenation — a
139/// documented v1 simplification). Jupyter magics/shell-escapes are skipped.
140pub fn read_source(path: &Utf8Path) -> Option<String> {
141    let raw = std::fs::read_to_string(path).ok()?;
142    if path.extension() != Some("ipynb") {
143        return Some(raw);
144    }
145    let nb: serde_json::Value = serde_json::from_str(&raw).ok()?;
146    let cells = nb.get("cells")?.as_array()?;
147    let mut src = String::new();
148    for cell in cells {
149        if cell.get("cell_type").and_then(|t| t.as_str()) != Some("code") {
150            continue;
151        }
152        match cell.get("source") {
153            Some(serde_json::Value::Array(lines)) => {
154                for l in lines {
155                    if let Some(s) = l.as_str() {
156                        let t = s.trim_start();
157                        if t.starts_with('%') || t.starts_with('!') {
158                            continue;
159                        }
160                        src.push_str(s);
161                    }
162                }
163                src.push('\n');
164            }
165            Some(serde_json::Value::String(s)) => {
166                src.push_str(s);
167                src.push('\n');
168            }
169            _ => {}
170        }
171    }
172    Some(src)
173}
174
175/// Compute a module's dotted name relative to a source root. `src/` is treated
176/// as a source root if present; otherwise the project root is used.
177fn dotted_name(root: &Utf8Path, path: &Utf8Path) -> String {
178    let rel = path.strip_prefix(root).unwrap_or(path);
179    let mut rel = rel.to_owned();
180    // src-layout: drop a leading `src/` segment.
181    if rel.starts_with("src") {
182        if let Ok(stripped) = rel.strip_prefix("src") {
183            rel = stripped.to_owned();
184        }
185    }
186    let no_ext = rel.as_str().strip_suffix(".py").unwrap_or(rel.as_str());
187    let no_init = no_ext.strip_suffix("/__init__").unwrap_or(no_ext);
188    no_init.replace('/', ".").trim_matches('.').to_string()
189}
190
191fn is_entry(path: &Utf8Path) -> bool {
192    let name = path.file_name().unwrap_or("");
193    name == "__main__.py"
194        || name == "conftest.py"
195        || name == "setup.py"
196        || name == "__init__.py" // package surface is a public root
197        || name.starts_with("test_")
198        || name.ends_with("_test.py")
199        || path.extension() == Some("ipynb")
200}
201
202impl ModuleGraph {
203    /// Parse all files (in parallel) and build the graph.
204    pub fn build(root: &Utf8Path, files: &[Utf8PathBuf]) -> Self {
205        // Parse in parallel; each rayon task gets its own parser.
206        let parsed: Vec<(Utf8PathBuf, ParsedModule)> = files
207            .par_iter()
208            .filter_map(|p| {
209                let src = read_source(p)?;
210                let mut parser = PyParser::new().ok()?;
211                let pm = parser.parse(p, &src).ok()?;
212                Some((p.clone(), pm))
213            })
214            .collect();
215
216        // Stable FileIds by sorted path (already sorted from discovery, but be safe).
217        let mut parsed = parsed;
218        parsed.sort_by(|a, b| a.0.cmp(&b.0));
219
220        let mut modules = Vec::with_capacity(parsed.len());
221        let mut by_dotted = FxHashMap::default();
222        let mut global_dynamic = false;
223        for (i, (path, pm)) in parsed.into_iter().enumerate() {
224            let id = FileId(i as u32);
225            let dotted = dotted_name(root, &path);
226            global_dynamic |= pm.has_dynamic_sink;
227            by_dotted.entry(dotted.clone()).or_insert(id);
228            modules.push(ModuleInfo {
229                id,
230                is_entry: is_entry(&path),
231                path,
232                dotted,
233                parsed: pm,
234            });
235        }
236
237        let mut g = ModuleGraph {
238            modules,
239            by_dotted,
240            edges: Vec::new(),
241            imported_symbols: FxHashMap::default(),
242            reachable: FxHashSet::default(),
243            global_dynamic,
244        };
245        g.resolve_edges();
246        g.compute_reachability();
247        g
248    }
249
250    fn module(&self, id: FileId) -> &ModuleInfo {
251        &self.modules[id.0 as usize]
252    }
253
254    /// Resolve each import to an internal module if possible, recording edges
255    /// and which symbol names each importer pulls from the target.
256    fn resolve_edges(&mut self) {
257        let mut edges = Vec::new();
258        let mut imported_symbols: FxHashMap<FileId, FxHashSet<String>> = FxHashMap::default();
259
260        for m in &self.modules {
261            for imp in &m.parsed.imports {
262                let target_dotted = if imp.relative_dots > 0 {
263                    resolve_relative(&m.dotted, imp.relative_dots, &imp.module)
264                } else {
265                    imp.module.clone()
266                };
267                // Try the full dotted path, then progressively shorter prefixes
268                // (handles `from pkg.mod import name` where `pkg.mod` is a module
269                // and `name` is a symbol, vs `import pkg.mod`).
270                if let Some(&tid) = self.lookup(&target_dotted) {
271                    edges.push((m.id, tid));
272                    let set = imported_symbols.entry(tid).or_default();
273                    for n in &imp.names {
274                        set.insert(n.clone());
275                    }
276                    if imp.is_star {
277                        set.insert("*".into());
278                    }
279                } else if !imp.names.is_empty() {
280                    // `from pkg import submod` where submod is itself a module.
281                    for n in &imp.names {
282                        let candidate = format!("{target_dotted}.{n}");
283                        if let Some(&tid) = self.lookup(&candidate) {
284                            edges.push((m.id, tid));
285                        }
286                    }
287                }
288            }
289        }
290        edges.sort();
291        edges.dedup();
292        self.edges = edges;
293        self.imported_symbols = imported_symbols;
294    }
295
296    fn lookup(&self, dotted: &str) -> Option<&FileId> {
297        self.by_dotted.get(dotted)
298    }
299
300    /// True if an import target resolves to an internal module — directly, or as
301    /// `from pkg import submodule` where `pkg.submodule` is a module.
302    fn import_resolves(&self, target: &str, imp: &Import) -> bool {
303        if self.lookup(target).is_some() {
304            return true;
305        }
306        imp.names
307            .iter()
308            .any(|n| self.lookup(&format!("{target}.{n}")).is_some())
309    }
310
311    /// Imports that *look* internal but resolve to no module in the project:
312    /// every relative import that fails to resolve, plus absolute imports under a
313    /// first-party top-level package that fail to resolve. These are typically a
314    /// typo or a broken refactor — distinct from third-party `missing-dependency`.
315    pub fn unresolved_imports(&self) -> Vec<UnresolvedImport> {
316        // First-party top-level package names (the first segment of any module).
317        let mut first_party: FxHashSet<&str> = FxHashSet::default();
318        for k in self.by_dotted.keys() {
319            if let Some(top) = k.split('.').next() {
320                first_party.insert(top);
321            }
322        }
323        let mut out = Vec::new();
324        for m in &self.modules {
325            for imp in &m.parsed.imports {
326                let relative = imp.relative_dots > 0;
327                let target = if relative {
328                    resolve_relative(&m.dotted, imp.relative_dots, &imp.module)
329                } else {
330                    imp.module.clone()
331                };
332                if target.is_empty() || self.import_resolves(&target, imp) {
333                    continue;
334                }
335                if !relative {
336                    let top = target.split('.').next().unwrap_or(&target);
337                    if !first_party.contains(top) {
338                        continue; // third-party → handled by dependency hygiene
339                    }
340                }
341                let display = if relative {
342                    format!("{}{}", ".".repeat(imp.relative_dots as usize), imp.module)
343                } else {
344                    imp.module.clone()
345                };
346                out.push(UnresolvedImport {
347                    importer: m.path.clone(),
348                    display,
349                    line: imp.line,
350                    relative,
351                });
352            }
353        }
354        out.sort_by(|a, b| {
355            a.importer
356                .cmp(&b.importer)
357                .then(a.line.cmp(&b.line))
358                .then(a.display.cmp(&b.display))
359        });
360        out.dedup_by(|a, b| a.importer == b.importer && a.line == b.line && a.display == b.display);
361        out
362    }
363
364    /// BFS mark-reachable from all entry modules over import edges.
365    fn compute_reachability(&mut self) {
366        let mut adj: FxHashMap<FileId, Vec<FileId>> = FxHashMap::default();
367        for (a, b) in &self.edges {
368            adj.entry(*a).or_default().push(*b);
369        }
370        let mut queue: Vec<FileId> = self
371            .modules
372            .iter()
373            .filter(|m| m.is_entry)
374            .map(|m| m.id)
375            .collect();
376        let mut seen: FxHashSet<FileId> = queue.iter().copied().collect();
377        while let Some(id) = queue.pop() {
378            if let Some(neighbors) = adj.get(&id) {
379                for &n in neighbors {
380                    if seen.insert(n) {
381                        queue.push(n);
382                    }
383                }
384            }
385        }
386        self.reachable = seen;
387    }
388
389    /// Files that are neither entries nor reachable from any entry.
390    pub fn unused_files(&self) -> Vec<&ModuleInfo> {
391        self.modules
392            .iter()
393            .filter(|m| !m.is_entry && !self.reachable.contains(&m.id))
394            .collect()
395    }
396
397    /// Whether a symbol defined in `module` is referenced internally or by any
398    /// importer of that module. `defs_named` is how many top-level defs share
399    /// the name (to discount the definition site in the internal count).
400    pub fn symbol_used(&self, module: FileId, name: &str, defs_named: u32) -> bool {
401        let m = self.module(module);
402        // Internal use. With scope/binding resolution, a top-level symbol is used
403        // iff some free `Name` load resolves to it (module_used) — precise: it
404        // ignores shadowing function-locals and attribute accesses. In modules
405        // with a dynamic sink (getattr/eval/importlib) we can't trust static
406        // resolution, so fall back to the conservative token-frequency count.
407        let internal = if m.parsed.has_dynamic_sink {
408            m.parsed.name_counts.get(name).copied().unwrap_or(0) > defs_named
409        } else {
410            m.parsed
411                .module_used
412                .binary_search_by(|s| s.as_str().cmp(name))
413                .is_ok()
414        };
415        if internal {
416            return true;
417        }
418        // Imported by name from this module (covers `from m import name`).
419        if let Some(set) = self.imported_symbols.get(&module) {
420            if set.contains(name) || set.contains("*") {
421                return true;
422            }
423        }
424        // Cross-module: any module that imports `module` references `name`.
425        let importers: Vec<FileId> = self
426            .edges
427            .iter()
428            .filter(|(_, b)| *b == module)
429            .map(|(a, _)| *a)
430            .collect();
431        for imp in importers {
432            let im = self.module(imp);
433            if im.parsed.name_counts.contains_key(name) {
434                return true;
435            }
436        }
437        false
438    }
439
440    pub fn edge_count(&self) -> usize {
441        self.edges.len()
442    }
443
444    /// Internal import edges as (importer dotted, imported dotted) pairs.
445    /// Used by the architecture-boundary engine.
446    pub fn import_edges(&self) -> Vec<(&str, &str)> {
447        self.edges
448            .iter()
449            .map(|(a, b)| {
450                (
451                    self.modules[a.0 as usize].dotted.as_str(),
452                    self.modules[b.0 as usize].dotted.as_str(),
453                )
454            })
455            .collect()
456    }
457
458    /// The path of a module by its dotted name (first match), for findings.
459    pub fn path_of_dotted(&self, dotted: &str) -> Option<&Utf8Path> {
460        self.modules
461            .iter()
462            .find(|m| m.dotted == dotted)
463            .map(|m| m.path.as_path())
464    }
465
466    /// Find import cycles: strongly-connected components of size > 1, plus
467    /// self-loops. Tarjan's algorithm; results are deterministic (each cycle's
468    /// members sorted, and the list sorted). Cross-module circular imports.
469    pub fn find_cycles(&self) -> Vec<Vec<FileId>> {
470        let n = self.modules.len();
471        let mut adj: Vec<Vec<usize>> = vec![Vec::new(); n];
472        let mut self_loops: Vec<usize> = Vec::new();
473        for (a, b) in &self.edges {
474            if a == b {
475                self_loops.push(a.0 as usize);
476            } else {
477                adj[a.0 as usize].push(b.0 as usize);
478            }
479        }
480
481        // Iterative Tarjan to avoid stack overflow on large graphs.
482        let mut index = vec![u32::MAX; n];
483        let mut low = vec![0u32; n];
484        let mut on_stack = vec![false; n];
485        let mut stack: Vec<usize> = Vec::new();
486        let mut idx: u32 = 0;
487        let mut out: Vec<Vec<FileId>> = Vec::new();
488
489        // Explicit DFS frame: (node, next child position).
490        for start in 0..n {
491            if index[start] != u32::MAX {
492                continue;
493            }
494            let mut call: Vec<(usize, usize)> = vec![(start, 0)];
495            while let Some(&(v, ci)) = call.last() {
496                if ci == 0 {
497                    index[v] = idx;
498                    low[v] = idx;
499                    idx += 1;
500                    stack.push(v);
501                    on_stack[v] = true;
502                }
503                if ci < adj[v].len() {
504                    let w = adj[v][ci];
505                    call.last_mut().unwrap().1 += 1;
506                    if index[w] == u32::MAX {
507                        call.push((w, 0));
508                    } else if on_stack[w] {
509                        low[v] = low[v].min(index[w]);
510                    }
511                } else {
512                    if low[v] == index[v] {
513                        let mut comp = Vec::new();
514                        loop {
515                            let w = stack.pop().unwrap();
516                            on_stack[w] = false;
517                            comp.push(FileId(w as u32));
518                            if w == v {
519                                break;
520                            }
521                        }
522                        if comp.len() > 1 {
523                            comp.sort();
524                            out.push(comp);
525                        }
526                    }
527                    call.pop();
528                    if let Some(&(parent, _)) = call.last() {
529                        low[parent] = low[parent].min(low[v]);
530                    }
531                }
532            }
533        }
534
535        for s in self_loops {
536            out.push(vec![FileId(s as u32)]);
537        }
538        out.sort();
539        out
540    }
541}
542
543/// Resolve a relative import (`from ..pkg import x` inside `a.b.c`) to a dotted
544/// module name. `dots`=1 means the current package.
545fn resolve_relative(importer_dotted: &str, dots: u8, module: &str) -> String {
546    let parts: Vec<&str> = importer_dotted.split('.').collect();
547    // The importer's package = drop the module's own last segment.
548    // For `a.b.c`, package is `a.b`; one extra dot goes up one more level.
549    let keep = parts.len().saturating_sub(dots as usize);
550    let base = parts[..keep].join(".");
551    match (base.is_empty(), module.is_empty()) {
552        (true, _) => module.to_string(),
553        (false, true) => base,
554        (false, false) => format!("{base}.{module}"),
555    }
556}
557
558#[cfg(test)]
559mod tests {
560    use super::*;
561
562    fn write(dir: &Utf8Path, rel: &str, src: &str) {
563        let p = dir.join(rel);
564        std::fs::create_dir_all(p.parent().unwrap()).unwrap();
565        std::fs::write(p, src).unwrap();
566    }
567
568    fn temp(tag: &str) -> Utf8PathBuf {
569        let base =
570            std::env::temp_dir().join(format!("mollify-graph-test-{}-{tag}", std::process::id()));
571        let _ = std::fs::remove_dir_all(&base);
572        Utf8PathBuf::from_path_buf(base).unwrap()
573    }
574
575    #[test]
576    fn dotted_name_handles_init_and_src() {
577        let root = Utf8Path::new("/proj");
578        assert_eq!(
579            dotted_name(root, Utf8Path::new("/proj/pkg/mod.py")),
580            "pkg.mod"
581        );
582        assert_eq!(
583            dotted_name(root, Utf8Path::new("/proj/pkg/__init__.py")),
584            "pkg"
585        );
586        assert_eq!(dotted_name(root, Utf8Path::new("/proj/src/a/b.py")), "a.b");
587    }
588
589    #[test]
590    fn relative_import_resolution() {
591        assert_eq!(resolve_relative("a.b.c", 1, "d"), "a.b.d");
592        assert_eq!(resolve_relative("a.b.c", 2, "d"), "a.d");
593        assert_eq!(resolve_relative("a.b.c", 1, ""), "a.b");
594    }
595
596    #[test]
597    fn unused_file_detected() {
598        let d = temp("unused");
599        write(&d, "__main__.py", "from used import helper\nhelper()\n");
600        write(&d, "used.py", "def helper():\n    return 1\n");
601        write(&d, "orphan.py", "def never():\n    return 2\n");
602        let files = discover_python_files(&d);
603        let g = ModuleGraph::build(&d, &files);
604        let unused: Vec<_> = g.unused_files().iter().map(|m| m.dotted.clone()).collect();
605        assert!(unused.contains(&"orphan".to_string()), "got {unused:?}");
606        assert!(!unused.contains(&"used".to_string()));
607        std::fs::remove_dir_all(&d).ok();
608    }
609
610    #[test]
611    fn reads_notebook_code_cells() {
612        let d = temp("nb");
613        let nb = r##"{"cells":[{"cell_type":"markdown","source":["title"]},{"cell_type":"code","source":["def nb_fn(x):\n","    return x\n"]}]}"##;
614        write(&d, "analysis.ipynb", nb);
615        let files = discover_python_files(&d);
616        assert!(files.iter().any(|f| f.as_str().ends_with("analysis.ipynb")));
617        let g = ModuleGraph::build(&d, &files);
618        let nbmod = g
619            .modules
620            .iter()
621            .find(|m| m.path.as_str().ends_with("analysis.ipynb"))
622            .unwrap();
623        assert!(
624            nbmod.parsed.definitions.iter().any(|x| x.name == "nb_fn"),
625            "notebook code not parsed"
626        );
627        std::fs::remove_dir_all(&d).ok();
628    }
629
630    #[test]
631    fn detects_import_cycle() {
632        let d = temp("cycle");
633        write(
634            &d,
635            "__init__.py",
636            "import a
637import b
638",
639        );
640        write(
641            &d,
642            "a.py",
643            "import b
644",
645        );
646        write(
647            &d,
648            "b.py",
649            "import a
650",
651        );
652        let files = discover_python_files(&d);
653        let g = ModuleGraph::build(&d, &files);
654        let cycles = g.find_cycles();
655        assert!(
656            cycles.iter().any(|c| c.len() == 2),
657            "expected a 2-cycle, got {cycles:?}"
658        );
659        std::fs::remove_dir_all(&d).ok();
660    }
661
662    #[test]
663    fn symbol_use_cross_module() {
664        let d = temp("symuse");
665        write(&d, "__main__.py", "from lib import used_fn\nused_fn()\n");
666        write(
667            &d,
668            "lib.py",
669            "def used_fn():\n    return 1\n\ndef dead_fn():\n    return 2\n",
670        );
671        let files = discover_python_files(&d);
672        let g = ModuleGraph::build(&d, &files);
673        let lib = g.modules.iter().find(|m| m.dotted == "lib").unwrap().id;
674        assert!(g.symbol_used(lib, "used_fn", 1));
675        assert!(!g.symbol_used(lib, "dead_fn", 1));
676        std::fs::remove_dir_all(&d).ok();
677    }
678
679    #[test]
680    fn discovery_skips_venvs_vcs_and_caches() {
681        let d = temp("excluded-dirs");
682        write(&d, "src/app.py", "def main():\n    return 1\n");
683        write(
684            &d,
685            ".venv/lib/python3.12/site-packages/somepkg/__init__.py",
686            "def pkg_fn():\n    return 1\n",
687        );
688        write(&d, ".git/hooks/pre-commit.py", "raise SystemExit(0)\n");
689        write(&d, "__pycache__/app.cpython-312.py", "stale_cache = 1\n");
690        write(&d, "node_modules/foo/bar.py", "bar = 1\n");
691        // A custom-named virtualenv: only `pyvenv.cfg` marks it, not its name.
692        write(&d, "myenv/pyvenv.cfg", "home = /usr/bin\n");
693        write(&d, "myenv/lib/site.py", "site_fn = 1\n");
694
695        let files = discover_python_files(&d);
696        let rel: Vec<String> = files
697            .iter()
698            .map(|f| f.strip_prefix(&d).unwrap().to_string())
699            .collect();
700        assert_eq!(rel, vec!["src/app.py".to_string()], "got {rel:?}");
701        std::fs::remove_dir_all(&d).ok();
702    }
703
704    #[test]
705    fn discovery_honors_extra_excludes() {
706        let d = temp("extra-excludes");
707        write(&d, "src/app.py", "def main():\n    return 1\n");
708        write(&d, "vendor/thirdparty.py", "x = 1\n");
709
710        let files = discover_python_files_excluding(&d, &["vendor".to_string()]);
711        let rel: Vec<String> = files
712            .iter()
713            .map(|f| f.strip_prefix(&d).unwrap().to_string())
714            .collect();
715        assert_eq!(rel, vec!["src/app.py".to_string()], "got {rel:?}");
716        std::fs::remove_dir_all(&d).ok();
717    }
718}