1use crate::core::semantic::{
4 analyzer::{AnalysisResult, LanguageAnalyzer, SemanticContext, SemanticResult},
5 path_validator::{validate_import_path, validate_module_name},
6 query_engine::QueryEngine,
7 resolver::{ModuleResolver, ResolvedPath, ResolverUtils},
8};
9use crate::utils::error::ContextCreatorError;
10use std::path::Path;
11use tree_sitter::Parser;
12
13#[allow(clippy::new_without_default)]
14pub struct PythonAnalyzer {
15 query_engine: QueryEngine,
16}
17
18impl PythonAnalyzer {
19 pub fn new() -> Self {
20 let language = tree_sitter_python::language();
21 let query_engine =
22 QueryEngine::new(language, "python").expect("Failed to create Python query engine");
23 Self { query_engine }
24 }
25}
26
27impl LanguageAnalyzer for PythonAnalyzer {
28 fn language_name(&self) -> &'static str {
29 "Python"
30 }
31
32 fn analyze_file(
33 &self,
34 path: &Path,
35 content: &str,
36 context: &SemanticContext,
37 ) -> SemanticResult<AnalysisResult> {
38 let mut parser = Parser::new();
39 parser
40 .set_language(tree_sitter_python::language())
41 .map_err(|e| ContextCreatorError::ParseError(format!("Failed to set language: {e}")))?;
42
43 let mut result = self
44 .query_engine
45 .analyze_with_parser(&mut parser, content)?;
46
47 self.correlate_types_with_imports(&mut result);
49
50 self.query_engine.resolve_type_definitions(
52 &mut result.type_references,
53 path,
54 &context.base_dir,
55 )?;
56
57 Ok(result)
58 }
59
60 fn can_handle_extension(&self, extension: &str) -> bool {
61 matches!(extension, "py" | "pyw" | "pyi")
62 }
63
64 fn supported_extensions(&self) -> Vec<&'static str> {
65 vec!["py", "pyw", "pyi"]
66 }
67}
68
69impl PythonAnalyzer {
70 fn correlate_types_with_imports(&self, result: &mut AnalysisResult) {
72 use std::collections::HashMap;
73
74 let mut type_to_module: HashMap<String, String> = HashMap::new();
76
77 for import in &result.imports {
78 if !import.items.is_empty() {
80 for item in &import.items {
81 if item.chars().next().is_some_and(|c| c.is_uppercase()) {
84 type_to_module.insert(item.clone(), import.module.clone());
85 }
86 }
87 } else if !import.module.is_empty() {
88 }
92 }
93
94 for type_ref in &mut result.type_references {
96 if let Some(module) = type_to_module.get(&type_ref.name) {
97 type_ref.module = Some(module.clone());
98 }
99 }
100 }
101}
102
103pub struct PythonModuleResolver;
104
105impl ModuleResolver for PythonModuleResolver {
106 fn resolve_import(
107 &self,
108 module_path: &str,
109 from_file: &Path,
110 base_dir: &Path,
111 ) -> Result<ResolvedPath, ContextCreatorError> {
112 if !module_path.starts_with('.') {
114 validate_module_name(module_path)?;
115 } else {
116 if module_path.is_empty() || module_path.len() > 255 || module_path.contains('\0') {
118 return Err(ContextCreatorError::SecurityError(format!(
119 "Invalid relative module name: {module_path}"
120 )));
121 }
122 }
123
124 if self.is_external_module(module_path) {
126 return Ok(ResolvedPath {
127 path: base_dir.join("requirements.txt"), is_external: true,
129 confidence: 1.0,
130 });
131 }
132
133 if module_path.starts_with('.') {
135 let mut level = 0;
136 let mut chars = module_path.chars();
137 while chars.next() == Some('.') {
138 level += 1;
139 }
140
141 let rest = &module_path[level..];
143
144 if let Some(parent) = from_file.parent() {
145 let mut current = parent;
146
147 for _ in 0..(level.saturating_sub(1)) {
152 if let Some(p) = current.parent() {
153 current = p;
154 }
155 }
156
157 if !rest.is_empty() {
159 let path = ResolverUtils::module_to_path(rest);
160 let full_path = current.join(&path);
161
162 if let Some(resolved) = ResolverUtils::find_with_extensions(&full_path, &["py"])
164 {
165 let validated_path = validate_import_path(base_dir, &resolved)?;
166 return Ok(ResolvedPath {
167 path: validated_path,
168 is_external: false,
169 confidence: 0.9,
170 });
171 }
172
173 let init_path = full_path.join("__init__.py");
175 if init_path.exists() {
176 let validated_path = validate_import_path(base_dir, &init_path)?;
177 return Ok(ResolvedPath {
178 path: validated_path,
179 is_external: false,
180 confidence: 0.9,
181 });
182 }
183 }
184 }
185 }
186
187 let parts: Vec<&str> = module_path.split('.').collect();
189
190 let search_paths = vec![
192 base_dir.to_path_buf(),
193 from_file.parent().unwrap_or(base_dir).to_path_buf(),
194 ];
195
196 for search_path in &search_paths {
197 let mut current_path = search_path.clone();
198
199 for (i, part) in parts.iter().enumerate() {
201 current_path = current_path.join(part);
202
203 if i == parts.len() - 1 {
205 let py_file = current_path.with_extension("py");
207 if py_file.exists() {
208 let validated_path = validate_import_path(base_dir, &py_file)?;
209 return Ok(ResolvedPath {
210 path: validated_path,
211 is_external: false,
212 confidence: 0.8,
213 });
214 }
215
216 let init_path = current_path.join("__init__.py");
218 if init_path.exists() {
219 let validated_path = validate_import_path(base_dir, &init_path)?;
220 return Ok(ResolvedPath {
221 path: validated_path,
222 is_external: false,
223 confidence: 0.8,
224 });
225 }
226 }
227 }
228 }
229
230 Ok(ResolvedPath {
232 path: base_dir.join("requirements.txt"),
233 is_external: true,
234 confidence: 0.5,
235 })
236 }
237
238 fn get_file_extensions(&self) -> Vec<&'static str> {
239 vec!["py", "pyw", "pyi"]
240 }
241
242 fn is_external_module(&self, module_path: &str) -> bool {
243 let stdlib_modules = [
245 "os",
246 "sys",
247 "json",
248 "math",
249 "random",
250 "datetime",
251 "collections",
252 "itertools",
253 "functools",
254 "re",
255 "time",
256 "subprocess",
257 "pathlib",
258 "typing",
259 "asyncio",
260 "unittest",
261 "logging",
262 "argparse",
263 "urllib",
264 "http",
265 "email",
266 "csv",
267 "sqlite3",
268 "threading",
269 "multiprocessing",
270 "abc",
271 "enum",
272 "dataclasses",
273 "contextlib",
274 "io",
275 "pickle",
276 "copy",
277 "hashlib",
278 "base64",
279 "secrets",
280 "uuid",
281 "platform",
282 "socket",
283 "ssl",
284 "select",
285 "queue",
286 "struct",
287 "array",
288 "bisect",
289 "heapq",
290 "weakref",
291 "types",
292 "importlib",
293 "pkgutil",
294 "inspect",
295 "ast",
296 "dis",
297 "traceback",
298 "linecache",
299 "tokenize",
300 "keyword",
301 "builtins",
302 "__future__",
303 "gc",
304 "signal",
305 "atexit",
306 "concurrent",
307 "xml",
308 "html",
309 "urllib",
310 "http",
311 "ftplib",
312 "poplib",
313 "imaplib",
314 "smtplib",
315 "telnetlib",
316 "uuid",
317 "socketserver",
318 "xmlrpc",
319 "ipaddress",
320 "shutil",
321 "tempfile",
322 "glob",
323 "fnmatch",
324 "stat",
325 "filecmp",
326 "zipfile",
327 "tarfile",
328 "gzip",
329 "bz2",
330 "lzma",
331 "zlib",
332 "configparser",
333 "netrc",
334 "plistlib",
335 "statistics",
336 "decimal",
337 "fractions",
338 "numbers",
339 "cmath",
340 "operator",
341 "difflib",
342 "textwrap",
343 "unicodedata",
344 "stringprep",
345 "codecs",
346 "encodings",
347 "locale",
348 "gettext",
349 "warnings",
350 "pprint",
351 "reprlib",
352 "graphlib",
353 ];
354
355 let third_party = [
357 "numpy",
358 "pandas",
359 "requests",
360 "flask",
361 "django",
362 "pytest",
363 "matplotlib",
364 "scipy",
365 "sklearn",
366 "tensorflow",
367 "torch",
368 "beautifulsoup4",
369 "selenium",
370 "pygame",
371 "pillow",
372 "sqlalchemy",
373 "celery",
374 "redis",
375 "pymongo",
376 "aiohttp",
377 "fastapi",
378 "pydantic",
379 "click",
380 "tqdm",
381 "colorama",
382 "setuptools",
383 "pip",
384 "wheel",
385 ];
386
387 let first_part = module_path.split('.').next().unwrap_or("");
388 stdlib_modules.contains(&first_part) || third_party.contains(&first_part)
389 }
390}