Skip to main content

the_code_graph_parser/resolver/
python.rs

1use std::path::{Path, PathBuf};
2
3use domain::model::{Edge, EdgeKind, Language};
4
5use super::{ImportResolver, ResolveContext};
6use crate::ParseResult;
7
8// ---------------------------------------------------------------------------
9// Stdlib module set
10// ---------------------------------------------------------------------------
11
12static STDLIB_MODULES: &[&str] = &[
13    "abc",
14    "aifc",
15    "argparse",
16    "array",
17    "ast",
18    "asynchat",
19    "asyncio",
20    "asyncore",
21    "atexit",
22    "audioop",
23    "base64",
24    "bdb",
25    "binascii",
26    "binhex",
27    "bisect",
28    "builtins",
29    "bz2",
30    "calendar",
31    "cgi",
32    "cgitb",
33    "chunk",
34    "cmath",
35    "cmd",
36    "code",
37    "codecs",
38    "codeop",
39    "collections",
40    "colorsys",
41    "compileall",
42    "concurrent",
43    "configparser",
44    "contextlib",
45    "contextvars",
46    "copy",
47    "copyreg",
48    "cProfile",
49    "crypt",
50    "csv",
51    "ctypes",
52    "curses",
53    "dataclasses",
54    "datetime",
55    "dbm",
56    "decimal",
57    "difflib",
58    "dis",
59    "distutils",
60    "doctest",
61    "email",
62    "encodings",
63    "enum",
64    "errno",
65    "faulthandler",
66    "fcntl",
67    "filecmp",
68    "fileinput",
69    "fnmatch",
70    "formatter",
71    "fractions",
72    "ftplib",
73    "functools",
74    "gc",
75    "getopt",
76    "getpass",
77    "gettext",
78    "glob",
79    "grp",
80    "gzip",
81    "hashlib",
82    "heapq",
83    "hmac",
84    "html",
85    "http",
86    "idlelib",
87    "imaplib",
88    "imghdr",
89    "imp",
90    "importlib",
91    "inspect",
92    "io",
93    "ipaddress",
94    "itertools",
95    "json",
96    "keyword",
97    "lib2to3",
98    "linecache",
99    "locale",
100    "logging",
101    "lzma",
102    "mailbox",
103    "mailcap",
104    "marshal",
105    "math",
106    "mimetypes",
107    "mmap",
108    "modulefinder",
109    "multiprocessing",
110    "netrc",
111    "nis",
112    "nntplib",
113    "numbers",
114    "operator",
115    "optparse",
116    "os",
117    "ossaudiodev",
118    "parser",
119    "pathlib",
120    "pdb",
121    "pickle",
122    "pickletools",
123    "pipes",
124    "pkgutil",
125    "platform",
126    "plistlib",
127    "poplib",
128    "posix",
129    "posixpath",
130    "pprint",
131    "profile",
132    "pstats",
133    "pty",
134    "pwd",
135    "py_compile",
136    "pyclbr",
137    "pydoc",
138    "queue",
139    "quopri",
140    "random",
141    "re",
142    "readline",
143    "reprlib",
144    "resource",
145    "rlcompleter",
146    "runpy",
147    "sched",
148    "secrets",
149    "select",
150    "selectors",
151    "shelve",
152    "shlex",
153    "shutil",
154    "signal",
155    "site",
156    "smtpd",
157    "smtplib",
158    "sndhdr",
159    "socket",
160    "socketserver",
161    "spwd",
162    "sqlite3",
163    "sre_compile",
164    "sre_constants",
165    "sre_parse",
166    "ssl",
167    "stat",
168    "statistics",
169    "string",
170    "stringprep",
171    "struct",
172    "subprocess",
173    "sunau",
174    "symtable",
175    "sys",
176    "sysconfig",
177    "syslog",
178    "tabnanny",
179    "tarfile",
180    "telnetlib",
181    "tempfile",
182    "termios",
183    "test",
184    "textwrap",
185    "threading",
186    "time",
187    "timeit",
188    "tkinter",
189    "token",
190    "tokenize",
191    "tomllib",
192    "trace",
193    "traceback",
194    "tracemalloc",
195    "tty",
196    "turtle",
197    "turtledemo",
198    "types",
199    "typing",
200    "unicodedata",
201    "unittest",
202    "urllib",
203    "uu",
204    "uuid",
205    "venv",
206    "warnings",
207    "wave",
208    "weakref",
209    "webbrowser",
210    "winreg",
211    "winsound",
212    "wsgiref",
213    "xdrlib",
214    "xml",
215    "xmlrpc",
216    "zipapp",
217    "zipfile",
218    "zipimport",
219    "zlib",
220    // Common underscore-prefixed internals
221    "_thread",
222    "__future__",
223    "_abc",
224    "_collections_abc",
225];
226
227fn is_stdlib(first_segment: &str) -> bool {
228    STDLIB_MODULES.contains(&first_segment)
229}
230
231// ---------------------------------------------------------------------------
232// Resolution helpers
233// ---------------------------------------------------------------------------
234
235/// Try to resolve a candidate path against the file_tree.
236/// Checks `{path}.py` first, then `{path}/__init__.py`.
237fn try_resolve(candidate: &Path, file_tree: &[PathBuf]) -> Option<PathBuf> {
238    let py_path = candidate.with_extension("py");
239    if file_tree.contains(&py_path) {
240        return Some(py_path);
241    }
242    let init_path = candidate.join("__init__.py");
243    if file_tree.contains(&init_path) {
244        return Some(init_path);
245    }
246    None
247}
248
249/// Resolve a Python import specifier to a path in the file_tree.
250fn resolve_python_import(
251    specifier: &str,
252    current_file: &Path,
253    project_root: &Path,
254    file_tree: &[PathBuf],
255    package_roots: &[PathBuf],
256) -> Option<PathBuf> {
257    // Relative import — starts with one or more dots
258    if specifier.starts_with('.') {
259        let dot_count = specifier.chars().take_while(|c| *c == '.').count();
260        let module_path = &specifier[dot_count..];
261
262        let mut base_dir = current_file.parent().unwrap_or(current_file).to_path_buf();
263        for _ in 1..dot_count {
264            base_dir = base_dir.parent().unwrap_or(&base_dir).to_path_buf();
265        }
266
267        let candidate = if module_path.is_empty() {
268            base_dir
269        } else {
270            let rel: PathBuf = module_path.replace('.', "/").into();
271            base_dir.join(rel)
272        };
273
274        return try_resolve(&candidate, file_tree);
275    }
276
277    // Absolute import — check stdlib first segment
278    let first_segment = specifier.split('.').next().unwrap_or(specifier);
279    if is_stdlib(first_segment) {
280        return None;
281    }
282
283    // Local absolute import — try each package_root first, then fall back to project_root
284    let rel: PathBuf = specifier.replace('.', "/").into();
285    for package_root in package_roots {
286        let candidate = package_root.join(&rel);
287        if let Some(resolved) = try_resolve(&candidate, file_tree) {
288            return Some(resolved);
289        }
290    }
291    let candidate = project_root.join(rel);
292    try_resolve(&candidate, file_tree)
293}
294
295// ---------------------------------------------------------------------------
296// PythonConfig
297// ---------------------------------------------------------------------------
298
299/// Configuration for the Python resolver (e.g. src/ layout support).
300pub struct PythonConfig {
301    pub package_roots: Vec<PathBuf>,
302}
303
304impl PythonConfig {
305    /// Detect src/ layout: if `project_root/src` exists, use it as a package root.
306    pub fn load(project_root: &Path) -> Self {
307        let src = project_root.join("src");
308        if src.is_dir() {
309            PythonConfig {
310                package_roots: vec![src],
311            }
312        } else {
313            PythonConfig {
314                package_roots: vec![],
315            }
316        }
317    }
318}
319
320// ---------------------------------------------------------------------------
321// PythonResolver
322// ---------------------------------------------------------------------------
323
324/// Python import resolver — filesystem prober + stdlib detection.
325pub struct PythonResolver {
326    config: PythonConfig,
327}
328
329impl PythonResolver {
330    pub fn new(config: PythonConfig) -> Self {
331        PythonResolver { config }
332    }
333}
334
335impl ImportResolver for PythonResolver {
336    fn languages(&self) -> &[Language] {
337        &[Language::Python]
338    }
339
340    fn resolve(
341        &self,
342        file_path: &Path,
343        parse_result: &ParseResult,
344        context: &ResolveContext,
345    ) -> domain::error::Result<Vec<Edge>> {
346        let source = file_path.to_string_lossy().into_owned();
347        let mut edges = Vec::new();
348
349        for import in &parse_result.imports {
350            let resolved = resolve_python_import(
351                &import.specifier,
352                file_path,
353                &context.project_root,
354                &context.file_tree,
355                &self.config.package_roots,
356            );
357
358            if let Some(target_path) = resolved {
359                let target = target_path.to_string_lossy().into_owned();
360                let kind = if import.is_type_only {
361                    EdgeKind::ConditionalImport
362                } else {
363                    EdgeKind::ImportsFrom
364                };
365                edges.push(Edge {
366                    kind,
367                    source: source.clone(),
368                    target,
369                    metadata: None,
370                });
371            }
372        }
373
374        Ok(edges)
375    }
376}
377
378// ---------------------------------------------------------------------------
379// Tests
380// ---------------------------------------------------------------------------
381
382#[cfg(test)]
383mod tests {
384    use std::collections::HashMap;
385    use std::path::{Path, PathBuf};
386
387    use domain::model::EdgeKind;
388
389    use super::{PythonConfig, PythonResolver};
390    use crate::resolver::{ImportResolver, ResolveContext};
391    use crate::{ImportName, ParseResult, RawImport};
392
393    fn make_resolver() -> PythonResolver {
394        PythonResolver::new(PythonConfig {
395            package_roots: vec![],
396        })
397    }
398
399    fn make_context(project_root: &str, file_tree: Vec<&str>) -> ResolveContext {
400        ResolveContext {
401            project_root: PathBuf::from(project_root),
402            parsed_files: HashMap::new(),
403            file_tree: file_tree.into_iter().map(PathBuf::from).collect(),
404        }
405    }
406
407    // AC40: Resolves `from .models import User` to sibling models.py
408    #[test]
409    fn resolves_relative_import_single_dot() {
410        let context = make_context(
411            "/project",
412            vec!["/project/app/models.py", "/project/app/views.py"],
413        );
414        let parse_result = ParseResult {
415            imports: vec![RawImport {
416                specifier: ".models".into(),
417                names: vec![ImportName {
418                    name: "User".into(),
419                    alias: None,
420                    is_type: false,
421                }],
422                ..Default::default()
423            }],
424            ..Default::default()
425        };
426        let resolver = make_resolver();
427        let edges = resolver
428            .resolve(Path::new("/project/app/views.py"), &parse_result, &context)
429            .unwrap();
430        assert_eq!(edges.len(), 1);
431        assert_eq!(edges[0].kind, EdgeKind::ImportsFrom);
432        assert_eq!(edges[0].source, "/project/app/views.py");
433        assert_eq!(edges[0].target, "/project/app/models.py");
434    }
435
436    // AC41: Resolves `from ..utils import helper` by walking up directories
437    #[test]
438    fn resolves_relative_import_double_dot() {
439        let context = make_context(
440            "/project",
441            vec!["/project/utils.py", "/project/app/views.py"],
442        );
443        let parse_result = ParseResult {
444            imports: vec![RawImport {
445                specifier: "..utils".into(),
446                names: vec![ImportName {
447                    name: "helper".into(),
448                    alias: None,
449                    is_type: false,
450                }],
451                ..Default::default()
452            }],
453            ..Default::default()
454        };
455        let resolver = make_resolver();
456        let edges = resolver
457            .resolve(Path::new("/project/app/views.py"), &parse_result, &context)
458            .unwrap();
459        assert_eq!(edges.len(), 1);
460        assert_eq!(edges[0].kind, EdgeKind::ImportsFrom);
461        assert_eq!(edges[0].source, "/project/app/views.py");
462        assert_eq!(edges[0].target, "/project/utils.py");
463    }
464
465    // AC42: Skips stdlib imports (import os) — no edge
466    #[test]
467    fn skips_stdlib_import() {
468        let context = make_context("/project", vec![]);
469        let parse_result = ParseResult {
470            imports: vec![RawImport {
471                specifier: "os".into(),
472                ..Default::default()
473            }],
474            ..Default::default()
475        };
476        let resolver = make_resolver();
477        let edges = resolver
478            .resolve(Path::new("/project/main.py"), &parse_result, &context)
479            .unwrap();
480        assert!(edges.is_empty(), "stdlib import should produce no edge");
481    }
482
483    // AC42: Skips stdlib submodule imports (e.g. os.path)
484    #[test]
485    fn skips_stdlib_submodule_import() {
486        let context = make_context("/project", vec![]);
487        let parse_result = ParseResult {
488            imports: vec![RawImport {
489                specifier: "os.path".into(),
490                ..Default::default()
491            }],
492            ..Default::default()
493        };
494        let resolver = make_resolver();
495        let edges = resolver
496            .resolve(Path::new("/project/main.py"), &parse_result, &context)
497            .unwrap();
498        assert!(
499            edges.is_empty(),
500            "stdlib submodule import should produce no edge"
501        );
502    }
503
504    // AC43: Creates ConditionalImport edge for TYPE_CHECKING imports
505    #[test]
506    fn creates_conditional_import_for_type_checking() {
507        let context = make_context(
508            "/project",
509            vec!["/project/app/models.py", "/project/app/views.py"],
510        );
511        let parse_result = ParseResult {
512            imports: vec![RawImport {
513                specifier: ".models".into(),
514                names: vec![ImportName {
515                    name: "User".into(),
516                    alias: None,
517                    is_type: false,
518                }],
519                is_type_only: true,
520                ..Default::default()
521            }],
522            ..Default::default()
523        };
524        let resolver = make_resolver();
525        let edges = resolver
526            .resolve(Path::new("/project/app/views.py"), &parse_result, &context)
527            .unwrap();
528        assert_eq!(edges.len(), 1);
529        assert_eq!(edges[0].kind, EdgeKind::ConditionalImport);
530    }
531
532    // Absolute local import resolution
533    #[test]
534    fn resolves_absolute_local_import() {
535        let context = make_context("/project", vec!["/project/utils/helpers.py"]);
536        let parse_result = ParseResult {
537            imports: vec![RawImport {
538                specifier: "utils.helpers".into(),
539                ..Default::default()
540            }],
541            ..Default::default()
542        };
543        let resolver = make_resolver();
544        let edges = resolver
545            .resolve(Path::new("/project/main.py"), &parse_result, &context)
546            .unwrap();
547        assert_eq!(edges.len(), 1);
548        assert_eq!(edges[0].kind, EdgeKind::ImportsFrom);
549        assert_eq!(edges[0].target, "/project/utils/helpers.py");
550    }
551
552    // Package import resolves to __init__.py
553    #[test]
554    fn resolves_package_import_to_init() {
555        let context = make_context("/project", vec!["/project/mypackage/__init__.py"]);
556        let parse_result = ParseResult {
557            imports: vec![RawImport {
558                specifier: "mypackage".into(),
559                ..Default::default()
560            }],
561            ..Default::default()
562        };
563        let resolver = make_resolver();
564        let edges = resolver
565            .resolve(Path::new("/project/main.py"), &parse_result, &context)
566            .unwrap();
567        assert_eq!(edges.len(), 1);
568        assert_eq!(edges[0].target, "/project/mypackage/__init__.py");
569    }
570
571    // Unresolvable import produces no edge
572    #[test]
573    fn unresolvable_import_produces_no_edge() {
574        let context = make_context("/project", vec![]);
575        let parse_result = ParseResult {
576            imports: vec![RawImport {
577                specifier: "third_party_lib".into(),
578                ..Default::default()
579            }],
580            ..Default::default()
581        };
582        let resolver = make_resolver();
583        let edges = resolver
584            .resolve(Path::new("/project/main.py"), &parse_result, &context)
585            .unwrap();
586        assert!(edges.is_empty());
587    }
588
589    // Multiple imports in one parse result
590    #[test]
591    fn resolves_multiple_imports() {
592        let context = make_context("/project", vec!["/project/models.py", "/project/utils.py"]);
593        let parse_result = ParseResult {
594            imports: vec![
595                RawImport {
596                    specifier: "models".into(),
597                    ..Default::default()
598                },
599                RawImport {
600                    specifier: "utils".into(),
601                    ..Default::default()
602                },
603                RawImport {
604                    specifier: "sys".into(), // stdlib — skipped
605                    ..Default::default()
606                },
607            ],
608            ..Default::default()
609        };
610        let resolver = make_resolver();
611        let edges = resolver
612            .resolve(Path::new("/project/main.py"), &parse_result, &context)
613            .unwrap();
614        assert_eq!(edges.len(), 2);
615    }
616}
617
618#[cfg(test)]
619mod config_tests {
620    use super::*;
621
622    #[test]
623    fn python_config_detects_src_dir() {
624        let dir = tempfile::tempdir().unwrap();
625        std::fs::create_dir_all(dir.path().join("src")).unwrap();
626        let config = PythonConfig::load(dir.path());
627        assert_eq!(config.package_roots.len(), 1);
628        assert_eq!(config.package_roots[0], dir.path().join("src"));
629    }
630
631    #[test]
632    fn python_config_empty_without_src() {
633        let dir = tempfile::tempdir().unwrap();
634        let config = PythonConfig::load(dir.path());
635        assert!(config.package_roots.is_empty());
636    }
637}