Skip to main content

normalize_languages/
python.rs

1//! Python language support.
2
3use crate::external_packages::ResolvedPackage;
4use crate::{Export, Import, Language, Symbol, SymbolKind, Visibility, VisibilityMechanism};
5use std::path::{Path, PathBuf};
6use std::sync::Mutex;
7use tree_sitter::Node;
8
9// ============================================================================
10// Python path cache (filesystem-based detection, no subprocess calls)
11// ============================================================================
12
13static PYTHON_CACHE: Mutex<Option<PythonPathCache>> = Mutex::new(None);
14
15/// Cached Python paths detected from filesystem structure.
16#[derive(Clone)]
17struct PythonPathCache {
18    /// Canonical project root used as cache key
19    root: PathBuf,
20    /// Python version (e.g., "3.13")
21    version: Option<String>,
22    /// Stdlib path (e.g., /usr/.../lib/python3.13/)
23    stdlib: Option<PathBuf>,
24    /// Site-packages path
25    site_packages: Option<PathBuf>,
26}
27
28impl PythonPathCache {
29    fn new(root: &Path) -> Self {
30        let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
31
32        // Try to find Python from venv or PATH
33        let python_bin = if root.join(".venv/bin/python").exists() {
34            Some(root.join(".venv/bin/python"))
35        } else if root.join("venv/bin/python").exists() {
36            Some(root.join("venv/bin/python"))
37        } else {
38            // Look in PATH
39            std::env::var("PATH").ok().and_then(|path| {
40                for dir in path.split(':') {
41                    let python = PathBuf::from(dir).join("python3");
42                    if python.exists() {
43                        return Some(python);
44                    }
45                    let python = PathBuf::from(dir).join("python");
46                    if python.exists() {
47                        return Some(python);
48                    }
49                }
50                None
51            })
52        };
53
54        let Some(python_bin) = python_bin else {
55            return Self {
56                root,
57                version: None,
58                stdlib: None,
59                site_packages: None,
60            };
61        };
62
63        // Resolve symlinks to find the actual Python installation
64        let python_real = std::fs::canonicalize(&python_bin).unwrap_or(python_bin.clone());
65
66        // Python binary is typically at /prefix/bin/python3
67        // Stdlib is at /prefix/lib/pythonX.Y/
68        // Site-packages is at /prefix/lib/pythonX.Y/site-packages/ (system)
69        // Or for venv: venv/lib/pythonX.Y/site-packages/
70
71        let prefix = python_real.parent().and_then(|bin| bin.parent());
72
73        // Look for lib/pythonX.Y directories to detect version
74        let (version, stdlib, site_packages) = if let Some(prefix) = prefix {
75            let lib = prefix.join("lib");
76            if lib.exists() {
77                // Find pythonX.Y directories
78                let mut best_version: Option<(String, PathBuf)> = None;
79                if let Ok(entries) = std::fs::read_dir(&lib) {
80                    for entry in entries.flatten() {
81                        let name = entry.file_name();
82                        let name = name.to_string_lossy();
83                        if name.starts_with("python") && entry.path().is_dir() {
84                            let ver = name.trim_start_matches("python");
85                            // Check it looks like a version (X.Y)
86                            if ver.contains('.')
87                                && ver.chars().next().is_some_and(|c| c.is_ascii_digit())
88                            {
89                                // Prefer higher versions
90                                if best_version.as_ref().is_none_or(|(v, _)| ver > v.as_str()) {
91                                    best_version = Some((ver.to_string(), entry.path()));
92                                }
93                            }
94                        }
95                    }
96                }
97
98                if let Some((ver, stdlib_path)) = best_version {
99                    // For venv, site-packages is in the venv
100                    let site = if root.join(".venv").exists() || root.join("venv").exists() {
101                        let venv = if root.join(".venv").exists() {
102                            root.join(".venv")
103                        } else {
104                            root.join("venv")
105                        };
106                        let venv_site = venv
107                            .join("lib")
108                            .join(format!("python{}", ver))
109                            .join("site-packages");
110                        if venv_site.exists() {
111                            Some(venv_site)
112                        } else {
113                            // Fall back to system site-packages
114                            let sys_site = stdlib_path.join("site-packages");
115                            if sys_site.exists() {
116                                Some(sys_site)
117                            } else {
118                                None
119                            }
120                        }
121                    } else {
122                        let sys_site = stdlib_path.join("site-packages");
123                        if sys_site.exists() {
124                            Some(sys_site)
125                        } else {
126                            None
127                        }
128                    };
129
130                    (Some(ver), Some(stdlib_path), site)
131                } else {
132                    (None, None, None)
133                }
134            } else {
135                (None, None, None)
136            }
137        } else {
138            (None, None, None)
139        };
140
141        Self {
142            root,
143            version,
144            stdlib,
145            site_packages,
146        }
147    }
148}
149
150/// Get cached Python paths for a project.
151fn get_python_cache(project_root: &Path) -> PythonPathCache {
152    let canonical = project_root
153        .canonicalize()
154        .unwrap_or_else(|_| project_root.to_path_buf());
155
156    let mut cache_guard = PYTHON_CACHE.lock().unwrap();
157
158    if let Some(ref cache) = *cache_guard {
159        if cache.root == canonical {
160            return cache.clone();
161        }
162    }
163
164    let new_cache = PythonPathCache::new(project_root);
165    *cache_guard = Some(new_cache.clone());
166    new_cache
167}
168
169// ============================================================================
170// Python stdlib and site-packages resolution
171// ============================================================================
172
173/// Get Python version from filesystem structure (no subprocess).
174pub fn get_python_version(project_root: &Path) -> Option<String> {
175    get_python_cache(project_root).version
176}
177
178/// Find Python stdlib directory from filesystem structure (no subprocess).
179pub fn find_python_stdlib(project_root: &Path) -> Option<PathBuf> {
180    get_python_cache(project_root).stdlib
181}
182
183/// Check if a module name is a Python stdlib module.
184fn is_python_stdlib_module(module_name: &str, stdlib_path: &Path) -> bool {
185    let top_level = module_name.split('.').next().unwrap_or(module_name);
186
187    // Check for package
188    let pkg_dir = stdlib_path.join(top_level);
189    if pkg_dir.is_dir() {
190        return true;
191    }
192
193    // Check for module
194    let py_file = stdlib_path.join(format!("{}.py", top_level));
195    py_file.is_file()
196}
197
198/// Resolve a Python stdlib import to its source location.
199fn resolve_python_stdlib_import(import_name: &str, stdlib_path: &Path) -> Option<ResolvedPackage> {
200    let parts: Vec<&str> = import_name.split('.').collect();
201    let top_level = parts[0];
202
203    // Check for package (directory)
204    let pkg_dir = stdlib_path.join(top_level);
205    if pkg_dir.is_dir() {
206        if parts.len() == 1 {
207            let init = pkg_dir.join("__init__.py");
208            if init.is_file() {
209                return Some(ResolvedPackage {
210                    path: pkg_dir,
211                    name: import_name.to_string(),
212                    is_namespace: false,
213                });
214            }
215            // Some stdlib packages don't have __init__.py in newer Python
216            return Some(ResolvedPackage {
217                path: pkg_dir,
218                name: import_name.to_string(),
219                is_namespace: true,
220            });
221        } else {
222            // Submodule
223            let mut path = pkg_dir.clone();
224            for part in &parts[1..] {
225                path = path.join(part);
226            }
227
228            if path.is_dir() {
229                let init = path.join("__init__.py");
230                return Some(ResolvedPackage {
231                    path: path.clone(),
232                    name: import_name.to_string(),
233                    is_namespace: !init.is_file(),
234                });
235            }
236
237            let py_file = path.with_extension("py");
238            if py_file.is_file() {
239                return Some(ResolvedPackage {
240                    path: py_file,
241                    name: import_name.to_string(),
242                    is_namespace: false,
243                });
244            }
245
246            return None;
247        }
248    }
249
250    // Check for single-file module
251    let py_file = stdlib_path.join(format!("{}.py", top_level));
252    if py_file.is_file() {
253        return Some(ResolvedPackage {
254            path: py_file,
255            name: import_name.to_string(),
256            is_namespace: false,
257        });
258    }
259
260    None
261}
262
263/// Find Python site-packages directory for a project.
264///
265/// Search order:
266/// 1. .venv/lib/pythonX.Y/site-packages/ (uv, poetry, standard venv)
267/// 2. Walk up looking for venv directories
268pub fn find_python_site_packages(project_root: &Path) -> Option<PathBuf> {
269    // Use cached result from filesystem detection
270    if let Some(site) = get_python_cache(project_root).site_packages {
271        return Some(site);
272    }
273
274    // Fall back to scanning parent directories for venvs
275    let mut current = project_root.to_path_buf();
276    while let Some(parent) = current.parent() {
277        let venv_dir = parent.join(".venv");
278        if venv_dir.is_dir() {
279            if let Some(site_packages) = find_site_packages_in_venv(&venv_dir) {
280                return Some(site_packages);
281            }
282        }
283        current = parent.to_path_buf();
284    }
285
286    None
287}
288
289/// Find site-packages within a venv directory.
290fn find_site_packages_in_venv(venv: &Path) -> Option<PathBuf> {
291    // Unix: lib/pythonX.Y/site-packages
292    let lib_dir = venv.join("lib");
293    if lib_dir.is_dir() {
294        if let Ok(entries) = std::fs::read_dir(&lib_dir) {
295            for entry in entries.flatten() {
296                let name = entry.file_name();
297                let name_str = name.to_string_lossy();
298                if name_str.starts_with("python") {
299                    let site_packages = entry.path().join("site-packages");
300                    if site_packages.is_dir() {
301                        return Some(site_packages);
302                    }
303                }
304            }
305        }
306    }
307
308    // Windows: Lib/site-packages
309    let lib_dir = venv.join("Lib").join("site-packages");
310    if lib_dir.is_dir() {
311        return Some(lib_dir);
312    }
313
314    None
315}
316
317/// Resolve a Python import to its source location.
318///
319/// Handles:
320/// - Package imports (requests -> requests/__init__.py)
321/// - Module imports (six -> six.py)
322/// - Submodule imports (requests.api -> requests/api.py)
323/// - Namespace packages (no __init__.py)
324fn resolve_python_import(import_name: &str, site_packages: &Path) -> Option<ResolvedPackage> {
325    // Split on dots for submodule resolution
326    let parts: Vec<&str> = import_name.split('.').collect();
327    let top_level = parts[0];
328
329    // Check for package (directory)
330    let pkg_dir = site_packages.join(top_level);
331    if pkg_dir.is_dir() {
332        if parts.len() == 1 {
333            // Just the package - look for __init__.py
334            let init = pkg_dir.join("__init__.py");
335            if init.is_file() {
336                return Some(ResolvedPackage {
337                    path: pkg_dir,
338                    name: import_name.to_string(),
339                    is_namespace: false,
340                });
341            }
342            // Namespace package (no __init__.py)
343            return Some(ResolvedPackage {
344                path: pkg_dir,
345                name: import_name.to_string(),
346                is_namespace: true,
347            });
348        } else {
349            // Submodule - build path
350            let mut path = pkg_dir.clone();
351            for part in &parts[1..] {
352                path = path.join(part);
353            }
354
355            // Try as package first
356            if path.is_dir() {
357                let init = path.join("__init__.py");
358                return Some(ResolvedPackage {
359                    path: path.clone(),
360                    name: import_name.to_string(),
361                    is_namespace: !init.is_file(),
362                });
363            }
364
365            // Try as module
366            let py_file = path.with_extension("py");
367            if py_file.is_file() {
368                return Some(ResolvedPackage {
369                    path: py_file,
370                    name: import_name.to_string(),
371                    is_namespace: false,
372                });
373            }
374
375            return None;
376        }
377    }
378
379    // Check for single-file module
380    let py_file = site_packages.join(format!("{}.py", top_level));
381    if py_file.is_file() {
382        return Some(ResolvedPackage {
383            path: py_file,
384            name: import_name.to_string(),
385            is_namespace: false,
386        });
387    }
388
389    None
390}
391
392// ============================================================================
393// Python language support
394// ============================================================================
395
396/// Python language support.
397pub struct Python;
398
399impl Language for Python {
400    fn name(&self) -> &'static str {
401        "Python"
402    }
403    fn extensions(&self) -> &'static [&'static str] {
404        &["py", "pyi", "pyw"]
405    }
406    fn grammar_name(&self) -> &'static str {
407        "python"
408    }
409
410    fn has_symbols(&self) -> bool {
411        true
412    }
413
414    fn container_kinds(&self) -> &'static [&'static str] {
415        &["class_definition"]
416    }
417
418    fn function_kinds(&self) -> &'static [&'static str] {
419        &["function_definition"]
420    }
421
422    fn type_kinds(&self) -> &'static [&'static str] {
423        &["class_definition"]
424    }
425
426    fn import_kinds(&self) -> &'static [&'static str] {
427        &["import_statement", "import_from_statement"]
428    }
429
430    fn public_symbol_kinds(&self) -> &'static [&'static str] {
431        &["function_definition", "class_definition"]
432    }
433
434    fn visibility_mechanism(&self) -> VisibilityMechanism {
435        VisibilityMechanism::NamingConvention
436    }
437
438    fn complexity_nodes(&self) -> &'static [&'static str] {
439        &[
440            "if_statement",
441            "for_statement",
442            "while_statement",
443            "try_statement",
444            "except_clause",
445            "with_statement",
446            "match_statement",
447            "case_clause",
448            "and",
449            "or",
450            "conditional_expression",
451            "list_comprehension",
452            "dictionary_comprehension",
453            "set_comprehension",
454            "generator_expression",
455        ]
456    }
457
458    fn nesting_nodes(&self) -> &'static [&'static str] {
459        &[
460            "if_statement",
461            "for_statement",
462            "while_statement",
463            "try_statement",
464            "with_statement",
465            "match_statement",
466            "function_definition",
467            "class_definition",
468        ]
469    }
470
471    fn signature_suffix(&self) -> &'static str {
472        ""
473    }
474
475    fn scope_creating_kinds(&self) -> &'static [&'static str] {
476        // Additional scope-creating nodes beyond functions and containers
477        &[
478            "for_statement",
479            "with_statement",
480            "list_comprehension",
481            "set_comprehension",
482            "dictionary_comprehension",
483            "generator_expression",
484            "lambda",
485        ]
486    }
487
488    fn control_flow_kinds(&self) -> &'static [&'static str] {
489        &[
490            "if_statement",
491            "for_statement",
492            "while_statement",
493            "try_statement",
494            "with_statement",
495            "match_statement",
496            "return_statement",
497            "break_statement",
498            "continue_statement",
499            "raise_statement",
500            "assert_statement",
501        ]
502    }
503
504    fn extract_function(&self, node: &Node, content: &str, in_container: bool) -> Option<Symbol> {
505        let name = self.node_name(node, content)?;
506
507        // Skip private methods unless they're dunder methods
508        // (visibility filtering can be done by caller)
509
510        // Check for async keyword as first child token
511        let is_async = node
512            .child(0)
513            .map(|c| &content[c.byte_range()] == "async")
514            .unwrap_or(false);
515        let prefix = if is_async { "async def" } else { "def" };
516
517        let params = node
518            .child_by_field_name("parameters")
519            .map(|p| &content[p.byte_range()])
520            .unwrap_or("()");
521
522        let return_type = node
523            .child_by_field_name("return_type")
524            .map(|r| format!(" -> {}", &content[r.byte_range()]))
525            .unwrap_or_default();
526
527        let signature = format!("{} {}{}{}", prefix, name, params, return_type);
528        let visibility = self.get_visibility(node, content);
529
530        Some(Symbol {
531            name: name.to_string(),
532            kind: if in_container {
533                SymbolKind::Method
534            } else {
535                SymbolKind::Function
536            },
537            signature,
538            docstring: self.extract_docstring(node, content),
539            attributes: Vec::new(),
540            start_line: node.start_position().row + 1,
541            end_line: node.end_position().row + 1,
542            visibility,
543            children: Vec::new(),
544            is_interface_impl: false,
545            implements: Vec::new(),
546        })
547    }
548
549    fn extract_container(&self, node: &Node, content: &str) -> Option<Symbol> {
550        let name = self.node_name(node, content)?;
551
552        let bases = node
553            .child_by_field_name("superclasses")
554            .map(|b| &content[b.byte_range()])
555            .unwrap_or("");
556
557        let signature = if bases.is_empty() {
558            format!("class {}", name)
559        } else {
560            format!("class {}{}", name, bases)
561        };
562
563        Some(Symbol {
564            name: name.to_string(),
565            kind: SymbolKind::Class,
566            signature,
567            docstring: self.extract_docstring(node, content),
568            attributes: Vec::new(),
569            start_line: node.start_position().row + 1,
570            end_line: node.end_position().row + 1,
571            visibility: self.get_visibility(node, content),
572            children: Vec::new(), // Caller fills this in
573            is_interface_impl: false,
574            implements: Vec::new(),
575        })
576    }
577
578    fn extract_type(&self, node: &Node, content: &str) -> Option<Symbol> {
579        // Python classes are both containers and types
580        self.extract_container(node, content)
581    }
582
583    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
584        let body = node.child_by_field_name("body")?;
585        let first = body.child(0)?;
586
587        // Handle both grammar versions:
588        // - Old: expression_statement > string
589        // - New (arborium): string directly, with string_content child
590        let string_node = match first.kind() {
591            "string" => Some(first),
592            "expression_statement" => first.child(0).filter(|n| n.kind() == "string"),
593            _ => None,
594        }?;
595
596        // Try string_content child (arborium style)
597        let mut cursor = string_node.walk();
598        for child in string_node.children(&mut cursor) {
599            if child.kind() == "string_content" {
600                let doc = content[child.byte_range()].trim();
601                if !doc.is_empty() {
602                    return Some(doc.to_string());
603                }
604            }
605        }
606
607        // Fallback: extract from full string text (old style)
608        let text = &content[string_node.byte_range()];
609        let doc = text
610            .trim_start_matches("\"\"\"")
611            .trim_start_matches("'''")
612            .trim_start_matches('"')
613            .trim_start_matches('\'')
614            .trim_end_matches("\"\"\"")
615            .trim_end_matches("'''")
616            .trim_end_matches('"')
617            .trim_end_matches('\'')
618            .trim();
619
620        if !doc.is_empty() {
621            Some(doc.to_string())
622        } else {
623            None
624        }
625    }
626
627    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
628        Vec::new()
629    }
630
631    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
632        let line = node.start_position().row + 1;
633
634        match node.kind() {
635            "import_statement" => {
636                // import foo, import foo as bar
637                let mut imports = Vec::new();
638                let mut cursor = node.walk();
639                for child in node.children(&mut cursor) {
640                    if child.kind() == "dotted_name" {
641                        let module = content[child.byte_range()].to_string();
642                        imports.push(Import {
643                            module,
644                            names: Vec::new(),
645                            alias: None,
646                            is_wildcard: false,
647                            is_relative: false,
648                            line,
649                        });
650                    } else if child.kind() == "aliased_import" {
651                        if let Some(name) = child.child_by_field_name("name") {
652                            let module = content[name.byte_range()].to_string();
653                            let alias = child
654                                .child_by_field_name("alias")
655                                .map(|a| content[a.byte_range()].to_string());
656                            imports.push(Import {
657                                module,
658                                names: Vec::new(),
659                                alias,
660                                is_wildcard: false,
661                                is_relative: false,
662                                line,
663                            });
664                        }
665                    }
666                }
667                imports
668            }
669            "import_from_statement" => {
670                // from foo import bar, baz
671                let module = node
672                    .child_by_field_name("module_name")
673                    .map(|m| content[m.byte_range()].to_string())
674                    .unwrap_or_default();
675
676                // Check for relative import (from . or from .. or from .foo)
677                let text = &content[node.byte_range()];
678                let is_relative = text.starts_with("from .");
679
680                let mut names = Vec::new();
681                let mut is_wildcard = false;
682                let module_end = node
683                    .child_by_field_name("module_name")
684                    .map(|m| m.end_byte())
685                    .unwrap_or(0);
686
687                let mut cursor = node.walk();
688                for child in node.children(&mut cursor) {
689                    match child.kind() {
690                        "dotted_name" | "identifier" => {
691                            // Skip the module name itself
692                            if child.start_byte() > module_end {
693                                names.push(content[child.byte_range()].to_string());
694                            }
695                        }
696                        "aliased_import" => {
697                            if let Some(name) = child.child_by_field_name("name") {
698                                names.push(content[name.byte_range()].to_string());
699                            }
700                        }
701                        "wildcard_import" => {
702                            is_wildcard = true;
703                        }
704                        _ => {}
705                    }
706                }
707
708                vec![Import {
709                    module,
710                    names,
711                    alias: None,
712                    is_wildcard,
713                    is_relative,
714                    line,
715                }]
716            }
717            _ => Vec::new(),
718        }
719    }
720
721    fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String {
722        let names_to_use: Vec<&str> = names
723            .map(|n| n.to_vec())
724            .unwrap_or_else(|| import.names.iter().map(|s| s.as_str()).collect());
725
726        if import.is_wildcard {
727            format!("from {} import *", import.module)
728        } else if names_to_use.is_empty() {
729            if let Some(ref alias) = import.alias {
730                format!("import {} as {}", import.module, alias)
731            } else {
732                format!("import {}", import.module)
733            }
734        } else {
735            format!("from {} import {}", import.module, names_to_use.join(", "))
736        }
737    }
738
739    fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export> {
740        let line = node.start_position().row + 1;
741
742        match node.kind() {
743            "function_definition" => {
744                if let Some(name) = self.node_name(node, content) {
745                    if !name.starts_with('_') {
746                        return vec![Export {
747                            name: name.to_string(),
748                            kind: SymbolKind::Function,
749                            line,
750                        }];
751                    }
752                }
753                Vec::new()
754            }
755            "class_definition" => {
756                if let Some(name) = self.node_name(node, content) {
757                    if !name.starts_with('_') {
758                        return vec![Export {
759                            name: name.to_string(),
760                            kind: SymbolKind::Class,
761                            line,
762                        }];
763                    }
764                }
765                Vec::new()
766            }
767            _ => Vec::new(),
768        }
769    }
770
771    fn is_public(&self, node: &Node, content: &str) -> bool {
772        if let Some(name) = self.node_name(node, content) {
773            // Public if doesn't start with _ or is dunder method
774            !name.starts_with('_') || name.starts_with("__")
775        } else {
776            true
777        }
778    }
779
780    fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
781        if let Some(name) = self.node_name(node, content) {
782            if name.starts_with("__") && name.ends_with("__") {
783                Visibility::Public // dunder methods
784            } else if name.starts_with("__") {
785                Visibility::Private // name mangled
786            } else if name.starts_with('_') {
787                Visibility::Protected // convention private
788            } else {
789                Visibility::Public
790            }
791        } else {
792            Visibility::Public
793        }
794    }
795
796    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
797        let name = symbol.name.as_str();
798        match symbol.kind {
799            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
800            crate::SymbolKind::Class => name.starts_with("Test") && name.len() > 4,
801            crate::SymbolKind::Module => name == "tests" || name == "test" || name == "__tests__",
802            _ => false,
803        }
804    }
805
806    fn embedded_content(&self, _node: &Node, _content: &str) -> Option<crate::EmbeddedBlock> {
807        None
808    }
809
810    fn body_has_docstring(&self, body: &Node, content: &str) -> bool {
811        let _ = content;
812        body.child(0)
813            .map(|c| {
814                c.kind() == "string"
815                    || (c.kind() == "expression_statement"
816                        && c.child(0).map(|n| n.kind() == "string").unwrap_or(false))
817            })
818            .unwrap_or(false)
819    }
820
821    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
822        node.child_by_field_name("body")
823    }
824
825    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
826        let name_node = node.child_by_field_name("name")?;
827        Some(&content[name_node.byte_range()])
828    }
829
830    // === Import Resolution ===
831
832    fn lang_key(&self) -> &'static str {
833        "python"
834    }
835
836    fn resolve_local_import(
837        &self,
838        import_name: &str,
839        current_file: &Path,
840        project_root: &Path,
841    ) -> Option<PathBuf> {
842        // Handle relative imports (starting with .)
843        if import_name.starts_with('.') {
844            let current_dir = current_file.parent()?;
845            let dots = import_name.chars().take_while(|c| *c == '.').count();
846            let module_part = &import_name[dots..];
847
848            // Go up (dots-1) directories from current file's directory
849            let mut base = current_dir.to_path_buf();
850            for _ in 1..dots {
851                base = base.parent()?.to_path_buf();
852            }
853
854            // Convert module.path to module/path.py
855            let module_path = if module_part.is_empty() {
856                base.join("__init__.py")
857            } else {
858                let path_part = module_part.replace('.', "/");
859                // Try module/submodule.py first, then module/submodule/__init__.py
860                let direct = base.join(format!("{}.py", path_part));
861                if direct.exists() {
862                    return Some(direct);
863                }
864                base.join(path_part).join("__init__.py")
865            };
866
867            if module_path.exists() {
868                return Some(module_path);
869            }
870        }
871
872        // Handle absolute imports - try to find in src/ or as top-level package
873        let module_path = import_name.replace('.', "/");
874
875        // Try src/<module>.py
876        let src_path = project_root.join("src").join(format!("{}.py", module_path));
877        if src_path.exists() {
878            return Some(src_path);
879        }
880
881        // Try src/<module>/__init__.py
882        let src_pkg_path = project_root
883            .join("src")
884            .join(&module_path)
885            .join("__init__.py");
886        if src_pkg_path.exists() {
887            return Some(src_pkg_path);
888        }
889
890        // Try <module>.py directly
891        let direct_path = project_root.join(format!("{}.py", module_path));
892        if direct_path.exists() {
893            return Some(direct_path);
894        }
895
896        // Try <module>/__init__.py
897        let pkg_path = project_root.join(&module_path).join("__init__.py");
898        if pkg_path.exists() {
899            return Some(pkg_path);
900        }
901
902        None
903    }
904
905    fn resolve_external_import(
906        &self,
907        import_name: &str,
908        project_root: &Path,
909    ) -> Option<ResolvedPackage> {
910        // Check stdlib first
911        if let Some(stdlib) = find_python_stdlib(project_root)
912            && let Some(pkg) = resolve_python_stdlib_import(import_name, &stdlib)
913        {
914            return Some(pkg);
915        }
916
917        // Then site-packages
918        if let Some(site_packages) = find_python_site_packages(project_root) {
919            return resolve_python_import(import_name, &site_packages);
920        }
921
922        None
923    }
924
925    fn is_stdlib_import(&self, import_name: &str, project_root: &Path) -> bool {
926        if let Some(stdlib) = find_python_stdlib(project_root) {
927            is_python_stdlib_module(import_name, &stdlib)
928        } else {
929            false
930        }
931    }
932
933    fn get_version(&self, project_root: &Path) -> Option<String> {
934        get_python_version(project_root)
935    }
936
937    fn find_package_cache(&self, project_root: &Path) -> Option<PathBuf> {
938        find_python_site_packages(project_root)
939    }
940
941    fn indexable_extensions(&self) -> &'static [&'static str] {
942        &["py"]
943    }
944
945    fn find_stdlib(&self, project_root: &Path) -> Option<PathBuf> {
946        find_python_stdlib(project_root)
947    }
948
949    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool {
950        // Skip private modules
951        if name.starts_with('_') {
952            return true;
953        }
954        // Skip __pycache__, dist-info, egg-info
955        if name == "__pycache__" || name.ends_with(".dist-info") || name.ends_with(".egg-info") {
956            return true;
957        }
958        // Skip non-Python files
959        if !is_dir && !name.ends_with(".py") {
960            return true;
961        }
962        false
963    }
964
965    fn find_package_entry(&self, path: &Path) -> Option<PathBuf> {
966        if path.is_file() {
967            return Some(path.to_path_buf());
968        }
969        // Python packages use __init__.py as entry point
970        let init_py = path.join("__init__.py");
971        if init_py.is_file() {
972            return Some(init_py);
973        }
974        None
975    }
976
977    fn package_module_name(&self, entry_name: &str) -> String {
978        // Strip .py extension
979        entry_name
980            .strip_suffix(".py")
981            .unwrap_or(entry_name)
982            .to_string()
983    }
984
985    fn package_sources(&self, project_root: &Path) -> Vec<crate::PackageSource> {
986        let mut sources = Vec::new();
987        if let Some(stdlib) = self.find_stdlib(project_root) {
988            sources.push(crate::PackageSource {
989                name: "stdlib",
990                path: stdlib,
991                kind: crate::PackageSourceKind::Flat,
992                version_specific: true,
993            });
994        }
995        if let Some(cache) = self.find_package_cache(project_root) {
996            sources.push(crate::PackageSource {
997                name: "site-packages",
998                path: cache,
999                kind: crate::PackageSourceKind::Flat,
1000                version_specific: false,
1001            });
1002        }
1003        sources
1004    }
1005
1006    fn discover_packages(&self, source: &crate::PackageSource) -> Vec<(String, PathBuf)> {
1007        self.discover_flat_packages(&source.path)
1008    }
1009
1010    fn file_path_to_module_name(&self, path: &Path) -> Option<String> {
1011        // Only Python files
1012        if path.extension()?.to_str()? != "py" {
1013            return None;
1014        }
1015
1016        // Remove extension
1017        let stem = path.with_extension("");
1018        let stem_str = stem.to_str()?;
1019
1020        // Strip common source directory prefixes
1021        let module_path = stem_str
1022            .strip_prefix("src/")
1023            .or_else(|| stem_str.strip_prefix("lib/"))
1024            .unwrap_or(stem_str);
1025
1026        // Handle __init__.py - use parent directory as module
1027        let module_path = if module_path.ends_with("/__init__") {
1028            module_path.strip_suffix("/__init__")?
1029        } else {
1030            module_path
1031        };
1032
1033        // Convert path separators to dots
1034        Some(module_path.replace('/', "."))
1035    }
1036
1037    fn module_name_to_paths(&self, module: &str) -> Vec<String> {
1038        // Convert dots to path separators
1039        let rel_path = module.replace('.', "/");
1040
1041        // Try common source directories and both .py and __init__.py
1042        let mut candidates = Vec::with_capacity(4);
1043        for prefix in &["src/", ""] {
1044            candidates.push(format!("{}{}.py", prefix, rel_path));
1045            candidates.push(format!("{}{}/__init__.py", prefix, rel_path));
1046        }
1047        candidates
1048    }
1049}
1050
1051#[cfg(test)]
1052mod tests {
1053    use super::*;
1054    use crate::GrammarLoader;
1055    use tree_sitter::Parser;
1056
1057    struct ParseResult {
1058        tree: tree_sitter::Tree,
1059        #[allow(dead_code)]
1060        loader: GrammarLoader,
1061    }
1062
1063    fn parse_python(content: &str) -> ParseResult {
1064        let loader = GrammarLoader::new();
1065        let language = loader.get("python").unwrap();
1066        let mut parser = Parser::new();
1067        parser.set_language(&language).unwrap();
1068        ParseResult {
1069            tree: parser.parse(content, None).unwrap(),
1070            loader,
1071        }
1072    }
1073
1074    #[test]
1075    fn test_python_function_kinds() {
1076        let support = Python;
1077        assert!(support.function_kinds().contains(&"function_definition"));
1078        // async functions are function_definition with "async" keyword as first child
1079    }
1080
1081    #[test]
1082    fn test_python_extract_function() {
1083        let support = Python;
1084        let content = r#"def foo(x: int) -> str:
1085    """Convert to string."""
1086    return str(x)
1087"#;
1088        let result = parse_python(content);
1089        let root = result.tree.root_node();
1090
1091        // Find function node
1092        let mut cursor = root.walk();
1093        let func = root
1094            .children(&mut cursor)
1095            .find(|n| n.kind() == "function_definition")
1096            .unwrap();
1097
1098        let sym = support.extract_function(&func, content, false).unwrap();
1099        assert_eq!(sym.name, "foo");
1100        assert_eq!(sym.kind, SymbolKind::Function);
1101        assert!(sym.signature.contains("def foo(x: int) -> str"));
1102        assert_eq!(sym.docstring, Some("Convert to string.".to_string()));
1103    }
1104
1105    #[test]
1106    fn test_python_extract_class() {
1107        let support = Python;
1108        let content = r#"class Foo(Bar):
1109    """A foo class."""
1110    pass
1111"#;
1112        let result = parse_python(content);
1113        let root = result.tree.root_node();
1114
1115        let mut cursor = root.walk();
1116        let class = root
1117            .children(&mut cursor)
1118            .find(|n| n.kind() == "class_definition")
1119            .unwrap();
1120
1121        let sym = support.extract_container(&class, content).unwrap();
1122        assert_eq!(sym.name, "Foo");
1123        assert_eq!(sym.kind, SymbolKind::Class);
1124        assert!(sym.signature.contains("class Foo(Bar)"));
1125        assert_eq!(sym.docstring, Some("A foo class.".to_string()));
1126    }
1127
1128    #[test]
1129    fn test_python_visibility() {
1130        let support = Python;
1131        let content = r#"def public(): pass
1132def _protected(): pass
1133def __private(): pass
1134def __dunder__(): pass
1135"#;
1136        let result = parse_python(content);
1137        let root = result.tree.root_node();
1138
1139        let mut cursor = root.walk();
1140        let funcs: Vec<_> = root
1141            .children(&mut cursor)
1142            .filter(|n| n.kind() == "function_definition")
1143            .collect();
1144
1145        assert_eq!(
1146            support.get_visibility(&funcs[0], content),
1147            Visibility::Public
1148        );
1149        assert_eq!(
1150            support.get_visibility(&funcs[1], content),
1151            Visibility::Protected
1152        );
1153        assert_eq!(
1154            support.get_visibility(&funcs[2], content),
1155            Visibility::Private
1156        );
1157        assert_eq!(
1158            support.get_visibility(&funcs[3], content),
1159            Visibility::Public
1160        ); // dunder
1161    }
1162
1163    /// Documents node kinds that exist in the Python grammar but aren't used in trait methods.
1164    /// Each exclusion has a reason. Review periodically as features expand.
1165    ///
1166    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
1167    #[test]
1168    fn unused_node_kinds_audit() {
1169        use crate::validate_unused_kinds_audit;
1170
1171        // Categories:
1172        // - STRUCTURAL: Internal/wrapper nodes, not semantically meaningful on their own
1173        // - CLAUSE: Sub-parts of statements, handled via parent (e.g., else_clause in if_statement)
1174        // - EXPRESSION: Expressions don't create control flow/scope, we track statements
1175        // - TYPE: Type annotation nodes, not relevant for current analysis
1176        // - LEGACY: Python 2 compatibility, not worth supporting
1177        // - OPERATOR: Operators within expressions, too granular
1178        // - TODO: Potentially useful, to be added when needed
1179
1180        #[rustfmt::skip]
1181        let documented_unused: &[&str] = &[
1182            // STRUCTURAL
1183            "aliased_import",          // used internally by extract_imports
1184            "block",                   // generic block wrapper (duplicate in grammar)
1185            "expression_list",         // comma-separated expressions
1186            "identifier",              // too common, used everywhere
1187            "import_prefix",           // dots in relative imports
1188            "lambda_parameters",       // internal to lambda
1189            "module",                  // root node of file
1190            "parenthesized_expression",// grouping only
1191            "relative_import",         // handled in extract_imports
1192            "tuple_expression",        // comma-separated values
1193            "wildcard_import",         // handled in extract_imports
1194
1195            // CLAUSE (sub-parts of statements)
1196            "case_pattern",            // internal to case_clause
1197            "class_pattern",           // pattern in match/case
1198            "elif_clause",             // part of if_statement
1199            "else_clause",             // part of if/for/while/try
1200            "finally_clause",          // part of try_statement
1201            "for_in_clause",           // internal to comprehensions
1202            "if_clause",               // internal to comprehensions
1203            "with_clause",             // internal to with_statement
1204            "with_item",               // internal to with_statement
1205
1206            // EXPRESSION (don't affect control flow structure)
1207            "await",                   // await keyword, not a statement
1208            "format_expression",       // f-string interpolation
1209            "format_specifier",        // f-string format spec
1210            "named_expression",        // walrus operator :=
1211            "yield",                   // yield keyword form
1212
1213            // TYPE (type annotations)
1214            "constrained_type",        // type constraints
1215            "generic_type",            // parameterized types
1216            "member_type",             // attribute access in types
1217            "splat_type",              // *args/**kwargs types
1218            "type",                    // generic type node
1219            "type_alias_statement",    // could track as symbol
1220            "type_conversion",         // !r/!s/!a in f-strings
1221            "type_parameter",          // generic type params
1222            "typed_default_parameter", // param with type and default
1223            "typed_parameter",         // param with type annotation
1224            "union_type",              // X | Y union syntax
1225
1226            // OPERATOR
1227            "binary_operator",         // +, -, *, /, etc.
1228            "boolean_operator",        // and/or - handled in complexity_nodes as keywords
1229            "comparison_operator",     // ==, <, >, etc.
1230            "not_operator",            // not keyword
1231            "unary_operator",          // -, +, ~
1232
1233            // LEGACY (Python 2)
1234            "exec_statement",          // Python 2 exec
1235            "print_statement",         // Python 2 print
1236
1237            // TODO: Potentially useful
1238            "decorated_definition",    // wrapper for @decorator
1239            "delete_statement",        // del statement
1240            "future_import_statement", // from __future__
1241            "global_statement",        // scope modifier
1242            "nonlocal_statement",      // scope modifier
1243            "pass_statement",          // no-op, detect empty bodies
1244        ];
1245
1246        validate_unused_kinds_audit(&Python, documented_unused)
1247            .expect("Python unused node kinds audit failed");
1248    }
1249}