context_creator/core/semantic/languages/
python.rs

1//! Semantic analyzer for Python
2
3use crate::core::semantic::{
4    analyzer::{AnalysisResult, LanguageAnalyzer, SemanticContext, SemanticResult},
5    path_validator::{validate_import_path, validate_module_name},
6    query_engine::QueryEngine,
7    resolver::{ModuleResolver, ResolvedPath, ResolverUtils},
8};
9use crate::utils::error::ContextCreatorError;
10use std::path::Path;
11use tree_sitter::Parser;
12
13#[allow(clippy::new_without_default)]
14pub struct PythonAnalyzer {
15    query_engine: QueryEngine,
16}
17
18impl PythonAnalyzer {
19    pub fn new() -> Self {
20        let language = tree_sitter_python::language();
21        let query_engine =
22            QueryEngine::new(language, "python").expect("Failed to create Python query engine");
23        Self { query_engine }
24    }
25}
26
27impl LanguageAnalyzer for PythonAnalyzer {
28    fn language_name(&self) -> &'static str {
29        "Python"
30    }
31
32    fn analyze_file(
33        &self,
34        path: &Path,
35        content: &str,
36        context: &SemanticContext,
37    ) -> SemanticResult<AnalysisResult> {
38        let mut parser = Parser::new();
39        parser
40            .set_language(tree_sitter_python::language())
41            .map_err(|e| ContextCreatorError::ParseError(format!("Failed to set language: {e}")))?;
42
43        let mut result = self
44            .query_engine
45            .analyze_with_parser(&mut parser, content)?;
46
47        // Correlate type references with imports to populate module information
48        self.correlate_types_with_imports(&mut result);
49
50        // Resolve type definitions for the type references found
51        self.query_engine.resolve_type_definitions(
52            &mut result.type_references,
53            path,
54            &context.base_dir,
55        )?;
56
57        Ok(result)
58    }
59
60    fn can_handle_extension(&self, extension: &str) -> bool {
61        matches!(extension, "py" | "pyw" | "pyi")
62    }
63
64    fn supported_extensions(&self) -> Vec<&'static str> {
65        vec!["py", "pyw", "pyi"]
66    }
67}
68
69impl PythonAnalyzer {
70    /// Correlate type references with imports to populate module information
71    fn correlate_types_with_imports(&self, result: &mut AnalysisResult) {
72        use std::collections::HashMap;
73
74        // Create a mapping from imported type names to their module paths
75        let mut type_to_module: HashMap<String, String> = HashMap::new();
76
77        for import in &result.imports {
78            // Handle "from module import Type" style imports
79            if !import.items.is_empty() {
80                for item in &import.items {
81                    // In Python, all imported names could be types
82                    // We'll check if they start with uppercase (convention for classes)
83                    if item.chars().next().is_some_and(|c| c.is_uppercase()) {
84                        type_to_module.insert(item.clone(), import.module.clone());
85                    }
86                }
87            } else if !import.module.is_empty() {
88                // Handle "import module" style imports
89                // For these, we might see usage like "module.Type"
90                // We'll handle this case by looking for the module prefix in type references
91            }
92        }
93
94        // Update type references with module information
95        for type_ref in &mut result.type_references {
96            if let Some(module) = type_to_module.get(&type_ref.name) {
97                type_ref.module = Some(module.clone());
98            }
99        }
100    }
101}
102
103pub struct PythonModuleResolver;
104
105impl ModuleResolver for PythonModuleResolver {
106    fn resolve_import(
107        &self,
108        module_path: &str,
109        from_file: &Path,
110        base_dir: &Path,
111    ) -> Result<ResolvedPath, ContextCreatorError> {
112        // Validate module name for security - allow Python relative imports
113        if !module_path.starts_with('.') {
114            validate_module_name(module_path)?;
115        } else {
116            // For relative imports, do a minimal validation
117            if module_path.is_empty() || module_path.len() > 255 || module_path.contains('\0') {
118                return Err(ContextCreatorError::SecurityError(format!(
119                    "Invalid relative module name: {module_path}"
120                )));
121            }
122        }
123
124        // Handle standard library imports
125        if self.is_external_module(module_path) {
126            return Ok(ResolvedPath {
127                path: base_dir.join("requirements.txt"), // Point to requirements.txt as indicator
128                is_external: true,
129                confidence: 1.0,
130            });
131        }
132
133        // Handle relative imports (., ..)
134        if module_path.starts_with('.') {
135            let mut level = 0;
136            let mut chars = module_path.chars();
137            while chars.next() == Some('.') {
138                level += 1;
139            }
140
141            // Get the rest of the module path after dots
142            let rest = &module_path[level..];
143
144            if let Some(parent) = from_file.parent() {
145                let mut current = parent;
146
147                // Go up directories based on dot count
148                // For level=1 (.), stay in current directory
149                // For level=2 (..), go up 1 directory
150                // For level=3 (...), go up 2 directories
151                for _ in 0..(level.saturating_sub(1)) {
152                    if let Some(p) = current.parent() {
153                        current = p;
154                    }
155                }
156
157                // Resolve the rest of the path
158                if !rest.is_empty() {
159                    let path = ResolverUtils::module_to_path(rest);
160                    let full_path = current.join(&path);
161
162                    // Try as a Python file
163                    if let Some(resolved) = ResolverUtils::find_with_extensions(&full_path, &["py"])
164                    {
165                        let validated_path = validate_import_path(base_dir, &resolved)?;
166                        return Ok(ResolvedPath {
167                            path: validated_path,
168                            is_external: false,
169                            confidence: 0.9,
170                        });
171                    }
172
173                    // Try as a package directory with __init__.py
174                    let init_path = full_path.join("__init__.py");
175                    if init_path.exists() {
176                        let validated_path = validate_import_path(base_dir, &init_path)?;
177                        return Ok(ResolvedPath {
178                            path: validated_path,
179                            is_external: false,
180                            confidence: 0.9,
181                        });
182                    }
183                }
184            }
185        }
186
187        // Handle absolute imports
188        let parts: Vec<&str> = module_path.split('.').collect();
189
190        // Start from base directory or parent of current file
191        let search_paths = vec![
192            base_dir.to_path_buf(),
193            from_file.parent().unwrap_or(base_dir).to_path_buf(),
194        ];
195
196        for search_path in &search_paths {
197            let mut current_path = search_path.clone();
198
199            // Build path from module parts
200            for (i, part) in parts.iter().enumerate() {
201                current_path = current_path.join(part);
202
203                // Check if this is the final part
204                if i == parts.len() - 1 {
205                    // Try as a Python file
206                    let py_file = current_path.with_extension("py");
207                    if py_file.exists() {
208                        let validated_path = validate_import_path(base_dir, &py_file)?;
209                        return Ok(ResolvedPath {
210                            path: validated_path,
211                            is_external: false,
212                            confidence: 0.8,
213                        });
214                    }
215
216                    // Try as a package directory
217                    let init_path = current_path.join("__init__.py");
218                    if init_path.exists() {
219                        let validated_path = validate_import_path(base_dir, &init_path)?;
220                        return Ok(ResolvedPath {
221                            path: validated_path,
222                            is_external: false,
223                            confidence: 0.8,
224                        });
225                    }
226                }
227            }
228        }
229
230        // Otherwise, assume it's an external package
231        Ok(ResolvedPath {
232            path: base_dir.join("requirements.txt"),
233            is_external: true,
234            confidence: 0.5,
235        })
236    }
237
238    fn get_file_extensions(&self) -> Vec<&'static str> {
239        vec!["py", "pyw", "pyi"]
240    }
241
242    fn is_external_module(&self, module_path: &str) -> bool {
243        // Common standard library modules
244        let stdlib_modules = [
245            "os",
246            "sys",
247            "json",
248            "math",
249            "random",
250            "datetime",
251            "collections",
252            "itertools",
253            "functools",
254            "re",
255            "time",
256            "subprocess",
257            "pathlib",
258            "typing",
259            "asyncio",
260            "unittest",
261            "logging",
262            "argparse",
263            "urllib",
264            "http",
265            "email",
266            "csv",
267            "sqlite3",
268            "threading",
269            "multiprocessing",
270            "abc",
271            "enum",
272            "dataclasses",
273            "contextlib",
274            "io",
275            "pickle",
276            "copy",
277            "hashlib",
278            "base64",
279            "secrets",
280            "uuid",
281            "platform",
282            "socket",
283            "ssl",
284            "select",
285            "queue",
286            "struct",
287            "array",
288            "bisect",
289            "heapq",
290            "weakref",
291            "types",
292            "importlib",
293            "pkgutil",
294            "inspect",
295            "ast",
296            "dis",
297            "traceback",
298            "linecache",
299            "tokenize",
300            "keyword",
301            "builtins",
302            "__future__",
303            "gc",
304            "signal",
305            "atexit",
306            "concurrent",
307            "xml",
308            "html",
309            "urllib",
310            "http",
311            "ftplib",
312            "poplib",
313            "imaplib",
314            "smtplib",
315            "telnetlib",
316            "uuid",
317            "socketserver",
318            "xmlrpc",
319            "ipaddress",
320            "shutil",
321            "tempfile",
322            "glob",
323            "fnmatch",
324            "stat",
325            "filecmp",
326            "zipfile",
327            "tarfile",
328            "gzip",
329            "bz2",
330            "lzma",
331            "zlib",
332            "configparser",
333            "netrc",
334            "plistlib",
335            "statistics",
336            "decimal",
337            "fractions",
338            "numbers",
339            "cmath",
340            "operator",
341            "difflib",
342            "textwrap",
343            "unicodedata",
344            "stringprep",
345            "codecs",
346            "encodings",
347            "locale",
348            "gettext",
349            "warnings",
350            "pprint",
351            "reprlib",
352            "graphlib",
353        ];
354
355        // Also check common third-party packages that might be imported
356        let third_party = [
357            "numpy",
358            "pandas",
359            "requests",
360            "flask",
361            "django",
362            "pytest",
363            "matplotlib",
364            "scipy",
365            "sklearn",
366            "tensorflow",
367            "torch",
368            "beautifulsoup4",
369            "selenium",
370            "pygame",
371            "pillow",
372            "sqlalchemy",
373            "celery",
374            "redis",
375            "pymongo",
376            "aiohttp",
377            "fastapi",
378            "pydantic",
379            "click",
380            "tqdm",
381            "colorama",
382            "setuptools",
383            "pip",
384            "wheel",
385        ];
386
387        let first_part = module_path.split('.').next().unwrap_or("");
388        stdlib_modules.contains(&first_part) || third_party.contains(&first_part)
389    }
390}