context_creator/core/semantic/languages/
python.rs

1//! Semantic analyzer for Python
2
3use crate::core::semantic::{
4    analyzer::{AnalysisResult, LanguageAnalyzer, SemanticContext, SemanticResult},
5    path_validator::{validate_import_path, validate_module_name},
6    query_engine::QueryEngine,
7    resolver::{ModuleResolver, ResolvedPath, ResolverUtils},
8};
9use crate::utils::error::ContextCreatorError;
10use std::path::Path;
11use tree_sitter::Parser;
12
13#[allow(clippy::new_without_default)]
14pub struct PythonAnalyzer {
15    query_engine: QueryEngine,
16}
17
18impl PythonAnalyzer {
19    pub fn new() -> Self {
20        let language = tree_sitter_python::language();
21        let query_engine =
22            QueryEngine::new(language, "python").expect("Failed to create Python query engine");
23        Self { query_engine }
24    }
25}
26
27impl LanguageAnalyzer for PythonAnalyzer {
28    fn language_name(&self) -> &'static str {
29        "Python"
30    }
31
32    fn analyze_file(
33        &self,
34        path: &Path,
35        content: &str,
36        context: &SemanticContext,
37    ) -> SemanticResult<AnalysisResult> {
38        let mut parser = Parser::new();
39        parser
40            .set_language(tree_sitter_python::language())
41            .map_err(|e| ContextCreatorError::ParseError(format!("Failed to set language: {e}")))?;
42
43        let mut result = self
44            .query_engine
45            .analyze_with_parser(&mut parser, content)?;
46
47        // Resolve type definitions for the type references found
48        self.query_engine.resolve_type_definitions(
49            &mut result.type_references,
50            path,
51            &context.base_dir,
52        )?;
53
54        Ok(result)
55    }
56
57    fn can_handle_extension(&self, extension: &str) -> bool {
58        matches!(extension, "py" | "pyw" | "pyi")
59    }
60
61    fn supported_extensions(&self) -> Vec<&'static str> {
62        vec!["py", "pyw", "pyi"]
63    }
64}
65
66pub struct PythonModuleResolver;
67
68impl ModuleResolver for PythonModuleResolver {
69    fn resolve_import(
70        &self,
71        module_path: &str,
72        from_file: &Path,
73        base_dir: &Path,
74    ) -> Result<ResolvedPath, ContextCreatorError> {
75        // Validate module name for security
76        validate_module_name(module_path)?;
77
78        // Handle standard library imports
79        if self.is_external_module(module_path) {
80            return Ok(ResolvedPath {
81                path: base_dir.join("requirements.txt"), // Point to requirements.txt as indicator
82                is_external: true,
83                confidence: 1.0,
84            });
85        }
86
87        // Handle relative imports (., ..)
88        if module_path.starts_with('.') {
89            let mut level = 0;
90            let mut chars = module_path.chars();
91            while chars.next() == Some('.') {
92                level += 1;
93            }
94
95            // Get the rest of the module path after dots
96            let rest = &module_path[level..];
97
98            if let Some(parent) = from_file.parent() {
99                let mut current = parent;
100
101                // Go up directories based on dot count
102                for _ in 1..level {
103                    if let Some(p) = current.parent() {
104                        current = p;
105                    }
106                }
107
108                // Resolve the rest of the path
109                if !rest.is_empty() {
110                    let path = ResolverUtils::module_to_path(rest);
111                    let full_path = current.join(path);
112
113                    // Try as a Python file
114                    if let Some(resolved) = ResolverUtils::find_with_extensions(&full_path, &["py"])
115                    {
116                        let validated_path = validate_import_path(base_dir, &resolved)?;
117                        return Ok(ResolvedPath {
118                            path: validated_path,
119                            is_external: false,
120                            confidence: 0.9,
121                        });
122                    }
123
124                    // Try as a package directory with __init__.py
125                    let init_path = full_path.join("__init__.py");
126                    if init_path.exists() {
127                        let validated_path = validate_import_path(base_dir, &init_path)?;
128                        return Ok(ResolvedPath {
129                            path: validated_path,
130                            is_external: false,
131                            confidence: 0.9,
132                        });
133                    }
134                }
135            }
136        }
137
138        // Handle absolute imports
139        let parts: Vec<&str> = module_path.split('.').collect();
140
141        // Start from base directory or parent of current file
142        let search_paths = vec![
143            base_dir.to_path_buf(),
144            from_file.parent().unwrap_or(base_dir).to_path_buf(),
145        ];
146
147        for search_path in &search_paths {
148            let mut current_path = search_path.clone();
149
150            // Build path from module parts
151            for (i, part) in parts.iter().enumerate() {
152                current_path = current_path.join(part);
153
154                // Check if this is the final part
155                if i == parts.len() - 1 {
156                    // Try as a Python file
157                    let py_file = current_path.with_extension("py");
158                    if py_file.exists() {
159                        let validated_path = validate_import_path(base_dir, &py_file)?;
160                        return Ok(ResolvedPath {
161                            path: validated_path,
162                            is_external: false,
163                            confidence: 0.8,
164                        });
165                    }
166
167                    // Try as a package directory
168                    let init_path = current_path.join("__init__.py");
169                    if init_path.exists() {
170                        let validated_path = validate_import_path(base_dir, &init_path)?;
171                        return Ok(ResolvedPath {
172                            path: validated_path,
173                            is_external: false,
174                            confidence: 0.8,
175                        });
176                    }
177                }
178            }
179        }
180
181        // Otherwise, assume it's an external package
182        Ok(ResolvedPath {
183            path: base_dir.join("requirements.txt"),
184            is_external: true,
185            confidence: 0.5,
186        })
187    }
188
189    fn get_file_extensions(&self) -> Vec<&'static str> {
190        vec!["py", "pyw", "pyi"]
191    }
192
193    fn is_external_module(&self, module_path: &str) -> bool {
194        // Common standard library modules
195        let stdlib_modules = [
196            "os",
197            "sys",
198            "json",
199            "math",
200            "random",
201            "datetime",
202            "collections",
203            "itertools",
204            "functools",
205            "re",
206            "time",
207            "subprocess",
208            "pathlib",
209            "typing",
210            "asyncio",
211            "unittest",
212            "logging",
213            "argparse",
214            "urllib",
215            "http",
216            "email",
217            "csv",
218            "sqlite3",
219            "threading",
220            "multiprocessing",
221            "abc",
222            "enum",
223            "dataclasses",
224            "contextlib",
225            "io",
226            "pickle",
227            "copy",
228            "hashlib",
229            "base64",
230            "secrets",
231            "uuid",
232            "platform",
233            "socket",
234            "ssl",
235            "select",
236            "queue",
237            "struct",
238            "array",
239            "bisect",
240            "heapq",
241            "weakref",
242            "types",
243            "importlib",
244            "pkgutil",
245            "inspect",
246            "ast",
247            "dis",
248            "traceback",
249            "linecache",
250            "tokenize",
251            "keyword",
252            "builtins",
253            "__future__",
254            "gc",
255            "signal",
256            "atexit",
257            "concurrent",
258            "xml",
259            "html",
260            "urllib",
261            "http",
262            "ftplib",
263            "poplib",
264            "imaplib",
265            "smtplib",
266            "telnetlib",
267            "uuid",
268            "socketserver",
269            "xmlrpc",
270            "ipaddress",
271            "shutil",
272            "tempfile",
273            "glob",
274            "fnmatch",
275            "stat",
276            "filecmp",
277            "zipfile",
278            "tarfile",
279            "gzip",
280            "bz2",
281            "lzma",
282            "zlib",
283            "configparser",
284            "netrc",
285            "plistlib",
286            "statistics",
287            "decimal",
288            "fractions",
289            "numbers",
290            "cmath",
291            "operator",
292            "difflib",
293            "textwrap",
294            "unicodedata",
295            "stringprep",
296            "codecs",
297            "encodings",
298            "locale",
299            "gettext",
300            "warnings",
301            "pprint",
302            "reprlib",
303            "graphlib",
304        ];
305
306        // Also check common third-party packages that might be imported
307        let third_party = [
308            "numpy",
309            "pandas",
310            "requests",
311            "flask",
312            "django",
313            "pytest",
314            "matplotlib",
315            "scipy",
316            "sklearn",
317            "tensorflow",
318            "torch",
319            "beautifulsoup4",
320            "selenium",
321            "pygame",
322            "pillow",
323            "sqlalchemy",
324            "celery",
325            "redis",
326            "pymongo",
327            "aiohttp",
328            "fastapi",
329            "pydantic",
330            "click",
331            "tqdm",
332            "colorama",
333            "setuptools",
334            "pip",
335            "wheel",
336        ];
337
338        let first_part = module_path.split('.').next().unwrap_or("");
339        stdlib_modules.contains(&first_part) || third_party.contains(&first_part)
340    }
341}