1use anyhow::{Context, Result};
15use streaming_iterator::StreamingIterator;
16use tree_sitter::{Parser, Query, QueryCursor};
17use crate::models::{Language, SearchResult, Span, SymbolKind};
18
19pub fn parse(path: &str, source: &str) -> Result<Vec<SearchResult>> {
21 let mut parser = Parser::new();
22 let language = tree_sitter_python::LANGUAGE;
23
24 parser
25 .set_language(&language.into())
26 .context("Failed to set Python language")?;
27
28 let tree = parser
29 .parse(source, None)
30 .context("Failed to parse Python source")?;
31
32 let root_node = tree.root_node();
33
34 let mut symbols = Vec::new();
35
36 symbols.extend(extract_functions(source, &root_node, &language.into())?);
38 symbols.extend(extract_classes(source, &root_node, &language.into())?);
39 symbols.extend(extract_methods(source, &root_node, &language.into())?);
40 symbols.extend(extract_constants(source, &root_node, &language.into())?);
41 symbols.extend(extract_global_variables(source, &root_node, &language.into())?);
42 symbols.extend(extract_local_variables(source, &root_node, &language.into())?);
43 symbols.extend(extract_lambdas(source, &root_node, &language.into())?);
44
45 for symbol in &mut symbols {
47 symbol.path = path.to_string();
48 symbol.lang = Language::Python;
49 }
50
51 Ok(symbols)
52}
53
54fn extract_functions(
56 source: &str,
57 root: &tree_sitter::Node,
58 language: &tree_sitter::Language,
59) -> Result<Vec<SearchResult>> {
60 let query_str = r#"
61 (function_definition
62 name: (identifier) @name) @function
63 "#;
64
65 let query = Query::new(language, query_str)
66 .context("Failed to create function query")?;
67
68 extract_symbols(source, root, &query, SymbolKind::Function, None)
69}
70
71fn extract_classes(
73 source: &str,
74 root: &tree_sitter::Node,
75 language: &tree_sitter::Language,
76) -> Result<Vec<SearchResult>> {
77 let query_str = r#"
78 (class_definition
79 name: (identifier) @name) @class
80 "#;
81
82 let query = Query::new(language, query_str)
83 .context("Failed to create class query")?;
84
85 extract_symbols(source, root, &query, SymbolKind::Class, None)
86}
87
88fn extract_methods(
90 source: &str,
91 root: &tree_sitter::Node,
92 language: &tree_sitter::Language,
93) -> Result<Vec<SearchResult>> {
94 let query_str = r#"
95 (class_definition
96 name: (identifier) @class_name
97 body: (block
98 (function_definition
99 name: (identifier) @method_name))) @class
100
101 (class_definition
102 name: (identifier) @class_name
103 body: (block
104 (decorated_definition
105 (function_definition
106 name: (identifier) @method_name)))) @class
107 "#;
108
109 let query = Query::new(language, query_str)
110 .context("Failed to create method query")?;
111
112 let mut cursor = QueryCursor::new();
113 let mut matches = cursor.matches(&query, *root, source.as_bytes());
114
115 let mut symbols = Vec::new();
116
117 while let Some(match_) = matches.next() {
118 let mut class_name = None;
119 let mut method_name = None;
120 let mut method_node = None;
121
122 for capture in match_.captures {
123 let capture_name: &str = &query.capture_names()[capture.index as usize];
124 match capture_name {
125 "class_name" => {
126 class_name = Some(capture.node.utf8_text(source.as_bytes()).unwrap_or("").to_string());
127 }
128 "method_name" => {
129 method_name = Some(capture.node.utf8_text(source.as_bytes()).unwrap_or("").to_string());
130 let mut current = capture.node;
132 while let Some(parent) = current.parent() {
133 if parent.kind() == "function_definition" {
134 method_node = Some(parent);
135 break;
136 }
137 current = parent;
138 }
139 }
140 _ => {}
141 }
142 }
143
144 if let (Some(class_name), Some(method_name), Some(node)) = (class_name, method_name, method_node) {
145 let scope = format!("class {}", class_name);
146 let span = node_to_span(&node);
147 let preview = extract_preview(source, &span);
148
149 symbols.push(SearchResult::new(
150 String::new(),
151 Language::Python,
152 SymbolKind::Method,
153 Some(method_name),
154 span,
155 Some(scope),
156 preview,
157 ));
158 }
159 }
160
161 Ok(symbols)
162}
163
164fn extract_constants(
166 source: &str,
167 root: &tree_sitter::Node,
168 language: &tree_sitter::Language,
169) -> Result<Vec<SearchResult>> {
170 let query_str = r#"
171 (module
172 (expression_statement
173 (assignment
174 left: (identifier) @name))) @const
175 "#;
176
177 let query = Query::new(language, query_str)
178 .context("Failed to create constant query")?;
179
180 let mut cursor = QueryCursor::new();
181 let mut matches = cursor.matches(&query, *root, source.as_bytes());
182
183 let mut symbols = Vec::new();
184
185 while let Some(match_) = matches.next() {
186 let mut name = None;
187 let mut const_node = None;
188
189 for capture in match_.captures {
190 let capture_name: &str = &query.capture_names()[capture.index as usize];
191 if capture_name == "name" {
192 let name_text = capture.node.utf8_text(source.as_bytes()).unwrap_or("");
193 if name_text.chars().all(|c| c.is_uppercase() || c == '_' || c.is_numeric()) {
195 name = Some(name_text.to_string());
196 let mut current = capture.node;
198 while let Some(parent) = current.parent() {
199 if parent.kind() == "assignment" {
200 const_node = Some(parent);
201 break;
202 }
203 current = parent;
204 }
205 }
206 }
207 }
208
209 if let (Some(name), Some(node)) = (name, const_node) {
210 let span = node_to_span(&node);
211 let preview = extract_preview(source, &span);
212
213 symbols.push(SearchResult::new(
214 String::new(),
215 Language::Python,
216 SymbolKind::Constant,
217 Some(name),
218 span,
219 None,
220 preview,
221 ));
222 }
223 }
224
225 Ok(symbols)
226}
227
228fn extract_global_variables(
230 source: &str,
231 root: &tree_sitter::Node,
232 language: &tree_sitter::Language,
233) -> Result<Vec<SearchResult>> {
234 let query_str = r#"
235 (module
236 (expression_statement
237 (assignment
238 left: (identifier) @name))) @var
239 "#;
240
241 let query = Query::new(language, query_str)
242 .context("Failed to create global variable query")?;
243
244 let mut cursor = QueryCursor::new();
245 let mut matches = cursor.matches(&query, *root, source.as_bytes());
246
247 let mut symbols = Vec::new();
248
249 while let Some(match_) = matches.next() {
250 let mut name = None;
251 let mut var_node = None;
252
253 for capture in match_.captures {
254 let capture_name: &str = &query.capture_names()[capture.index as usize];
255 if capture_name == "name" {
256 let name_text = capture.node.utf8_text(source.as_bytes()).unwrap_or("");
257 if !name_text.chars().all(|c| c.is_uppercase() || c == '_' || c.is_numeric()) {
259 name = Some(name_text.to_string());
260 let mut current = capture.node;
262 while let Some(parent) = current.parent() {
263 if parent.kind() == "assignment" {
264 var_node = Some(parent);
265 break;
266 }
267 current = parent;
268 }
269 }
270 }
271 }
272
273 if let (Some(name), Some(node)) = (name, var_node) {
274 let span = node_to_span(&node);
275 let preview = extract_preview(source, &span);
276
277 symbols.push(SearchResult::new(
278 String::new(),
279 Language::Python,
280 SymbolKind::Variable,
281 Some(name),
282 span,
283 None,
284 preview,
285 ));
286 }
287 }
288
289 Ok(symbols)
290}
291
292fn extract_local_variables(
294 source: &str,
295 root: &tree_sitter::Node,
296 language: &tree_sitter::Language,
297) -> Result<Vec<SearchResult>> {
298 let query_str = r#"
299 (assignment
300 left: (identifier) @name) @assignment
301 "#;
302
303 let query = Query::new(language, query_str)
304 .context("Failed to create local variable query")?;
305
306 let mut cursor = QueryCursor::new();
307 let mut matches = cursor.matches(&query, *root, source.as_bytes());
308
309 let mut symbols = Vec::new();
310
311 while let Some(match_) = matches.next() {
312 let mut name = None;
313 let mut assignment_node = None;
314
315 for capture in match_.captures {
316 let capture_name: &str = &query.capture_names()[capture.index as usize];
317 match capture_name {
318 "name" => {
319 let name_text = capture.node.utf8_text(source.as_bytes()).unwrap_or("");
320 if !name_text.chars().all(|c| c.is_uppercase() || c == '_' || c.is_numeric()) {
322 name = Some(name_text.to_string());
323 }
324 }
325 "assignment" => {
326 assignment_node = Some(capture.node);
327 }
328 _ => {}
329 }
330 }
331
332 if let (Some(name), Some(node)) = (name, assignment_node) {
334 let mut is_in_function = false;
335 let mut current = node;
336
337 while let Some(parent) = current.parent() {
338 if parent.kind() == "function_definition" {
339 is_in_function = true;
340 break;
341 }
342 if parent.kind() == "module" {
344 break;
345 }
346 current = parent;
347 }
348
349 if is_in_function {
350 let span = node_to_span(&node);
351 let preview = extract_preview(source, &span);
352
353 symbols.push(SearchResult::new(
354 String::new(),
355 Language::Python,
356 SymbolKind::Variable,
357 Some(name),
358 span,
359 None, preview,
361 ));
362 }
363 }
364 }
365
366 Ok(symbols)
367}
368
369fn extract_lambdas(
371 source: &str,
372 root: &tree_sitter::Node,
373 language: &tree_sitter::Language,
374) -> Result<Vec<SearchResult>> {
375 let query_str = r#"
376 (assignment
377 left: (identifier) @name
378 right: (lambda)) @lambda
379 "#;
380
381 let query = Query::new(language, query_str)
382 .context("Failed to create lambda query")?;
383
384 extract_symbols(source, root, &query, SymbolKind::Function, None)
385}
386
387fn extract_symbols(
389 source: &str,
390 root: &tree_sitter::Node,
391 query: &Query,
392 kind: SymbolKind,
393 scope: Option<String>,
394) -> Result<Vec<SearchResult>> {
395 let mut cursor = QueryCursor::new();
396 let mut matches = cursor.matches(query, *root, source.as_bytes());
397
398 let mut symbols = Vec::new();
399
400 while let Some(match_) = matches.next() {
401 let mut name = None;
403 let mut full_node = None;
404
405 for capture in match_.captures {
406 let capture_name: &str = &query.capture_names()[capture.index as usize];
407 if capture_name == "name" {
408 name = Some(capture.node.utf8_text(source.as_bytes()).unwrap_or("").to_string());
409 } else {
410 full_node = Some(capture.node);
412 }
413 }
414
415 if let (Some(name), Some(node)) = (name, full_node) {
416 let span = node_to_span(&node);
417 let preview = extract_preview(source, &span);
418
419 symbols.push(SearchResult::new(
420 String::new(),
421 Language::Python,
422 kind.clone(),
423 Some(name),
424 span,
425 scope.clone(),
426 preview,
427 ));
428 }
429 }
430
431 Ok(symbols)
432}
433
434fn node_to_span(node: &tree_sitter::Node) -> Span {
436 let start = node.start_position();
437 let end = node.end_position();
438
439 Span::new(
440 start.row + 1, start.column,
442 end.row + 1,
443 end.column,
444 )
445}
446
447fn extract_preview(source: &str, span: &Span) -> String {
449 let lines: Vec<&str> = source.lines().collect();
450
451 let start_idx = (span.start_line - 1) as usize; let end_idx = (start_idx + 7).min(lines.len());
454
455 lines[start_idx..end_idx].join("\n")
456}
457
458use crate::models::ImportType;
463use crate::parsers::{DependencyExtractor, ImportInfo};
464
465pub struct PythonDependencyExtractor;
467
468impl DependencyExtractor for PythonDependencyExtractor {
469 fn extract_dependencies(source: &str) -> Result<Vec<ImportInfo>> {
470 let mut parser = Parser::new();
471 let language = tree_sitter_python::LANGUAGE;
472
473 parser
474 .set_language(&language.into())
475 .context("Failed to set Python language")?;
476
477 let tree = parser
478 .parse(source, None)
479 .context("Failed to parse Python source")?;
480
481 let root_node = tree.root_node();
482
483 let mut imports = Vec::new();
484
485 imports.extend(extract_import_statements(source, &root_node)?);
487
488 imports.extend(extract_from_imports(source, &root_node)?);
490
491 Ok(imports)
492 }
493}
494
495fn extract_import_statements(
497 source: &str,
498 root: &tree_sitter::Node,
499) -> Result<Vec<ImportInfo>> {
500 let language = tree_sitter_python::LANGUAGE;
501
502 let query_str = r#"
503 (import_statement
504 name: (dotted_name) @import_path) @import
505 "#;
506
507 let query = Query::new(&language.into(), query_str)
508 .context("Failed to create import statement query")?;
509
510 let mut cursor = QueryCursor::new();
511 let mut matches = cursor.matches(&query, *root, source.as_bytes());
512
513 let mut imports = Vec::new();
514
515 while let Some(match_) = matches.next() {
516 let mut import_path = None;
517 let mut import_node = None;
518
519 for capture in match_.captures {
520 let capture_name: &str = &query.capture_names()[capture.index as usize];
521 match capture_name {
522 "import_path" => {
523 import_path = Some(capture.node.utf8_text(source.as_bytes()).unwrap_or("").to_string());
524 }
525 "import" => {
526 import_node = Some(capture.node);
527 }
528 _ => {}
529 }
530 }
531
532 if let (Some(path), Some(node)) = (import_path, import_node) {
533 let import_type = classify_python_import(&path);
534 let line_number = node.start_position().row + 1;
535
536 imports.push(ImportInfo {
537 imported_path: path,
538 import_type,
539 line_number,
540 imported_symbols: None,
541 });
542 }
543 }
544
545 Ok(imports)
546}
547
548fn extract_from_imports(
550 source: &str,
551 root: &tree_sitter::Node,
552) -> Result<Vec<ImportInfo>> {
553 let language = tree_sitter_python::LANGUAGE;
554
555 let query_str = r#"
556 (import_from_statement
557 module_name: (dotted_name) @module_path) @import
558
559 (import_from_statement
560 module_name: (relative_import) @module_path) @import
561 "#;
562
563 let query = Query::new(&language.into(), query_str)
564 .context("Failed to create from-import query")?;
565
566 let mut cursor = QueryCursor::new();
567 let mut matches = cursor.matches(&query, *root, source.as_bytes());
568
569 let mut imports = Vec::new();
570
571 while let Some(match_) = matches.next() {
572 let mut module_path = None;
573 let mut import_node = None;
574
575 for capture in match_.captures {
576 let capture_name: &str = &query.capture_names()[capture.index as usize];
577 match capture_name {
578 "module_path" => {
579 module_path = Some(capture.node.utf8_text(source.as_bytes()).unwrap_or("").to_string());
580 }
581 "import" => {
582 import_node = Some(capture.node);
583 }
584 _ => {}
585 }
586 }
587
588 if let (Some(path), Some(node)) = (module_path, import_node) {
589 let import_type = classify_python_import(&path);
590 let line_number = node.start_position().row + 1;
591
592 let imported_symbols = extract_imported_symbols(source, &node);
594
595 imports.push(ImportInfo {
596 imported_path: path,
597 import_type,
598 line_number,
599 imported_symbols,
600 });
601 }
602 }
603
604 Ok(imports)
605}
606
607fn extract_imported_symbols(source: &str, import_node: &tree_sitter::Node) -> Option<Vec<String>> {
609 let mut symbols = Vec::new();
610
611 let mut cursor = import_node.walk();
613 for child in import_node.children(&mut cursor) {
614 match child.kind() {
615 "aliased_import" | "dotted_name" => {
616 let mut child_cursor = child.walk();
618 for grandchild in child.children(&mut child_cursor) {
619 if grandchild.kind() == "identifier" || grandchild.kind() == "dotted_name" {
620 if let Ok(text) = grandchild.utf8_text(source.as_bytes()) {
621 symbols.push(text.to_string());
622 break; }
624 }
625 }
626 }
627 _ => {}
628 }
629 }
630
631 if symbols.is_empty() {
632 None
633 } else {
634 Some(symbols)
635 }
636}
637
638pub fn find_python_package_name(root: &std::path::Path) -> Option<String> {
641 if let Some(name) = find_pyproject_package(root) {
643 return Some(name);
644 }
645
646 if let Some(name) = find_setup_py_package(root) {
648 return Some(name);
649 }
650
651 if let Some(name) = find_setup_cfg_package(root) {
653 return Some(name);
654 }
655
656 None
657}
658
659fn find_pyproject_package(root: &std::path::Path) -> Option<String> {
661 let pyproject_path = root.join("pyproject.toml");
662 let content = std::fs::read_to_string(pyproject_path).ok()?;
663
664 let mut in_project_section = false;
667
668 for line in content.lines() {
669 let trimmed = line.trim();
670
671 if trimmed == "[project]" {
673 in_project_section = true;
674 continue;
675 }
676
677 if trimmed.starts_with('[') && trimmed != "[project]" {
679 in_project_section = false;
680 continue;
681 }
682
683 if in_project_section && trimmed.starts_with("name") && trimmed.contains('=') {
685 if let Some(equals_pos) = trimmed.find('=') {
686 let after_equals = trimmed[equals_pos + 1..].trim();
687
688 for quote in ['"', '\''] {
690 if let Some(start) = after_equals.find(quote) {
691 if let Some(end) = after_equals[start + 1..].find(quote) {
692 let name = &after_equals[start + 1..start + 1 + end];
693 return Some(name.to_lowercase());
695 }
696 }
697 }
698 }
699 }
700 }
701
702 None
703}
704
705fn find_setup_py_package(root: &std::path::Path) -> Option<String> {
707 let setup_path = root.join("setup.py");
708 let content = std::fs::read_to_string(setup_path).ok()?;
709
710 for line in content.lines() {
713 let trimmed = line.trim();
714
715 if trimmed.contains("name") && trimmed.contains('=') {
716 if let Some(name_pos) = trimmed.find("name") {
718 let after_name = &trimmed[name_pos + 4..]; if let Some(equals_pos) = after_name.find('=') {
721 let after_equals = after_name[equals_pos + 1..].trim();
722
723 for quote in ['"', '\''] {
725 if let Some(start) = after_equals.find(quote) {
726 if let Some(end) = after_equals[start + 1..].find(quote) {
727 let name = &after_equals[start + 1..start + 1 + end];
728 return Some(name.to_lowercase());
729 }
730 }
731 }
732 }
733 }
734 }
735 }
736
737 None
738}
739
740fn find_setup_cfg_package(root: &std::path::Path) -> Option<String> {
742 let setup_cfg_path = root.join("setup.cfg");
743 let content = std::fs::read_to_string(setup_cfg_path).ok()?;
744
745 let mut in_metadata_section = false;
747
748 for line in content.lines() {
749 let trimmed = line.trim();
750
751 if trimmed == "[metadata]" {
753 in_metadata_section = true;
754 continue;
755 }
756
757 if trimmed.starts_with('[') && trimmed != "[metadata]" {
759 in_metadata_section = false;
760 continue;
761 }
762
763 if in_metadata_section && trimmed.starts_with("name") && trimmed.contains('=') {
765 if let Some(equals_pos) = trimmed.find('=') {
766 let name = trimmed[equals_pos + 1..].trim();
767 return Some(name.to_lowercase());
768 }
769 }
770 }
771
772 None
773}
774
775pub fn reclassify_python_import(
778 import_path: &str,
779 package_prefix: Option<&str>,
780) -> ImportType {
781 if let Some(prefix) = package_prefix {
783 let first_component = import_path.split('.').next().unwrap_or(import_path);
785
786 if first_component == prefix {
787 return ImportType::Internal;
788 }
789 }
790
791 if import_path.starts_with('.') {
793 return ImportType::Internal;
794 }
795
796 if is_python_stdlib(import_path) {
798 return ImportType::Stdlib;
799 }
800
801 ImportType::External
803}
804
805fn is_python_stdlib(path: &str) -> bool {
807 const STDLIB_MODULES: &[&str] = &[
808 "os", "sys", "io", "re", "json", "csv", "xml", "html", "http", "urllib",
809 "collections", "itertools", "functools", "operator", "pathlib", "glob",
810 "tempfile", "shutil", "pickle", "shelve", "sqlite3", "zlib", "gzip",
811 "time", "datetime", "calendar", "logging", "argparse", "configparser",
812 "typing", "dataclasses", "enum", "abc", "contextlib", "weakref",
813 "threading", "multiprocessing", "subprocess", "queue", "asyncio",
814 "socket", "email", "base64", "hashlib", "hmac", "secrets", "uuid",
815 "math", "random", "statistics", "decimal", "fractions",
816 "unittest", "doctest", "pdb", "trace", "timeit",
817 ];
818
819 let first_component = path.split('.').next().unwrap_or("");
821
822 STDLIB_MODULES.contains(&first_component)
823}
824
825fn classify_python_import(import_path: &str) -> ImportType {
827 if import_path.starts_with('.') {
829 return ImportType::Internal;
830 }
831
832 const STDLIB_MODULES: &[&str] = &[
834 "os", "sys", "io", "re", "json", "csv", "xml", "html", "http", "urllib",
835 "collections", "itertools", "functools", "operator", "pathlib", "glob",
836 "tempfile", "shutil", "pickle", "shelve", "sqlite3", "zlib", "gzip",
837 "time", "datetime", "calendar", "logging", "argparse", "configparser",
838 "typing", "dataclasses", "enum", "abc", "contextlib", "weakref",
839 "threading", "multiprocessing", "subprocess", "queue", "asyncio",
840 "socket", "email", "base64", "hashlib", "hmac", "secrets", "uuid",
841 "math", "random", "statistics", "decimal", "fractions",
842 "unittest", "doctest", "pdb", "trace", "timeit",
843 ];
844
845 let first_component = import_path.split('.').next().unwrap_or("");
847
848 if STDLIB_MODULES.contains(&first_component) {
849 ImportType::Stdlib
850 } else {
851 ImportType::External
853 }
854}
855
856#[derive(Debug, Clone)]
862pub struct PythonPackage {
863 pub name: String,
865 pub project_root: String,
867 pub abs_project_root: std::path::PathBuf,
869}
870
871pub fn find_all_python_configs(index_root: &std::path::Path) -> Result<Vec<std::path::PathBuf>> {
874 use ignore::WalkBuilder;
875
876 let mut config_files = Vec::new();
877
878 let walker = WalkBuilder::new(index_root)
879 .follow_links(false)
880 .git_ignore(true)
881 .build();
882
883 for entry in walker {
884 let entry = entry?;
885 let path = entry.path();
886
887 if !path.is_file() {
888 continue;
889 }
890
891 let filename = path.file_name()
892 .and_then(|n| n.to_str())
893 .unwrap_or("");
894
895 if filename == "pyproject.toml" || filename == "setup.py" || filename == "setup.cfg" {
897 let path_str = path.to_string_lossy();
899 if path_str.contains("/venv/")
900 || path_str.contains("/.venv/")
901 || path_str.contains("/site-packages/")
902 || path_str.contains("/dist/")
903 || path_str.contains("/build/")
904 || path_str.contains("/__pycache__/") {
905 log::trace!("Skipping Python config in vendor/build directory: {:?}", path);
906 continue;
907 }
908
909 config_files.push(path.to_path_buf());
910 }
911 }
912
913 log::debug!("Found {} Python config files", config_files.len());
914 Ok(config_files)
915}
916
917pub fn parse_all_python_packages(index_root: &std::path::Path) -> Result<Vec<PythonPackage>> {
919 let config_files = find_all_python_configs(index_root)?;
920
921 if config_files.is_empty() {
922 log::debug!("No Python config files found in {:?}", index_root);
923 return Ok(Vec::new());
924 }
925
926 let mut packages = Vec::new();
927 let config_count = config_files.len();
928
929 for config_path in &config_files {
930 let project_root = config_path
931 .parent()
932 .ok_or_else(|| anyhow::anyhow!("Config file has no parent directory"))?;
933
934 if let Some(package_name) = find_python_package_name(project_root) {
936 let relative_project_root = project_root
937 .strip_prefix(index_root)
938 .unwrap_or(project_root)
939 .to_string_lossy()
940 .to_string();
941
942 log::debug!(
943 "Found Python package '{}' at {:?}",
944 package_name,
945 relative_project_root
946 );
947
948 packages.push(PythonPackage {
949 name: package_name,
950 project_root: relative_project_root,
951 abs_project_root: project_root.to_path_buf(),
952 });
953 }
954 }
955
956 log::info!(
957 "Loaded {} Python packages from {} config files",
958 packages.len(),
959 config_count
960 );
961
962 Ok(packages)
963}
964
965pub fn resolve_python_import_to_path(
972 import_path: &str,
973 packages: &[PythonPackage],
974 current_file_path: Option<&str>,
975) -> Option<String> {
976 if import_path.starts_with('.') {
978 return resolve_relative_python_import(import_path, current_file_path);
979 }
980
981 let first_component = import_path.split('.').next()?;
984
985 for package in packages {
987 if package.name == first_component {
988 let module_path = import_path.replace('.', "/");
991
992 let candidates = vec![
994 format!("{}/{}.py", package.project_root, module_path),
995 format!("{}/{}/__init__.py", package.project_root, module_path),
996 ];
997
998 for candidate in candidates {
999 log::trace!("Checking Python module path: {}", candidate);
1000 return Some(candidate);
1001 }
1002 }
1003 }
1004
1005 None
1006}
1007
1008fn resolve_relative_python_import(
1011 import_path: &str,
1012 current_file_path: Option<&str>,
1013) -> Option<String> {
1014 let current_file = current_file_path?;
1015
1016 let dots = import_path.chars().take_while(|&c| c == '.').count();
1018 if dots == 0 {
1019 return None;
1020 }
1021
1022 let current_dir = std::path::Path::new(current_file).parent()?;
1024
1025 let mut target_dir = current_dir.to_path_buf();
1027 for _ in 1..dots {
1028 target_dir = target_dir.parent()?.to_path_buf();
1029 }
1030
1031 let module_path = import_path.trim_start_matches('.');
1033
1034 if module_path.is_empty() {
1035 return Some(format!("{}/__init__.py", target_dir.to_string_lossy()));
1037 }
1038
1039 let file_path = module_path.replace('.', "/");
1041
1042 let candidates = vec![
1044 format!("{}/{}.py", target_dir.to_string_lossy(), file_path),
1045 format!("{}/{}/__init__.py", target_dir.to_string_lossy(), file_path),
1046 ];
1047
1048 for candidate in candidates {
1049 log::trace!("Checking relative Python import: {}", candidate);
1050 return Some(candidate);
1051 }
1052
1053 None
1054}
1055
1056#[cfg(test)]
1057mod tests {
1058 use super::*;
1059
1060 #[test]
1061 fn test_parse_function() {
1062 let source = r#"
1063def hello_world():
1064 print("Hello, world!")
1065 return True
1066 "#;
1067
1068 let symbols = parse("test.py", source).unwrap();
1069 assert_eq!(symbols.len(), 1);
1070 assert_eq!(symbols[0].symbol.as_deref(), Some("hello_world"));
1071 assert!(matches!(symbols[0].kind, SymbolKind::Function));
1072 }
1073
1074 #[test]
1075 fn test_parse_async_function() {
1076 let source = r#"
1077async def fetch_data(url):
1078 async with aiohttp.ClientSession() as session:
1079 async with session.get(url) as response:
1080 return await response.text()
1081 "#;
1082
1083 let symbols = parse("test.py", source).unwrap();
1084 assert_eq!(symbols.len(), 1);
1085 assert_eq!(symbols[0].symbol.as_deref(), Some("fetch_data"));
1086 assert!(matches!(symbols[0].kind, SymbolKind::Function));
1087 }
1088
1089 #[test]
1090 fn test_parse_class() {
1091 let source = r#"
1092class User:
1093 def __init__(self, name, age):
1094 self.name = name
1095 self.age = age
1096 "#;
1097
1098 let symbols = parse("test.py", source).unwrap();
1099
1100 let class_symbols: Vec<_> = symbols.iter()
1101 .filter(|s| matches!(s.kind, SymbolKind::Class))
1102 .collect();
1103
1104 assert_eq!(class_symbols.len(), 1);
1105 assert_eq!(class_symbols[0].symbol.as_deref(), Some("User"));
1106 }
1107
1108 #[test]
1109 fn test_parse_class_with_methods() {
1110 let source = r#"
1111class Calculator:
1112 def add(self, a, b):
1113 return a + b
1114
1115 def subtract(self, a, b):
1116 return a - b
1117
1118 @staticmethod
1119 def multiply(a, b):
1120 return a * b
1121 "#;
1122
1123 let symbols = parse("test.py", source).unwrap();
1124
1125 let method_symbols: Vec<_> = symbols.iter()
1126 .filter(|s| matches!(s.kind, SymbolKind::Method))
1127 .collect();
1128
1129 assert_eq!(method_symbols.len(), 3);
1130 assert!(method_symbols.iter().any(|s| s.symbol.as_deref() == Some("add")));
1131 assert!(method_symbols.iter().any(|s| s.symbol.as_deref() == Some("subtract")));
1132 assert!(method_symbols.iter().any(|s| s.symbol.as_deref() == Some("multiply")));
1133
1134 for method in method_symbols {
1136 }
1138 }
1139
1140 #[test]
1141 fn test_parse_async_method() {
1142 let source = r#"
1143class DataFetcher:
1144 async def get_user(self, user_id):
1145 return await fetch(f"/users/{user_id}")
1146
1147 async def get_all_users(self):
1148 return await fetch("/users")
1149 "#;
1150
1151 let symbols = parse("test.py", source).unwrap();
1152
1153 let method_symbols: Vec<_> = symbols.iter()
1154 .filter(|s| matches!(s.kind, SymbolKind::Method))
1155 .collect();
1156
1157 assert_eq!(method_symbols.len(), 2);
1158 assert!(method_symbols.iter().any(|s| s.symbol.as_deref() == Some("get_user")));
1159 assert!(method_symbols.iter().any(|s| s.symbol.as_deref() == Some("get_all_users")));
1160 }
1161
1162 #[test]
1163 fn test_parse_constants() {
1164 let source = r#"
1165MAX_SIZE = 100
1166DEFAULT_TIMEOUT = 30
1167API_URL = "https://api.example.com"
1168 "#;
1169
1170 let symbols = parse("test.py", source).unwrap();
1171
1172 let const_symbols: Vec<_> = symbols.iter()
1173 .filter(|s| matches!(s.kind, SymbolKind::Constant))
1174 .collect();
1175
1176 assert_eq!(const_symbols.len(), 3);
1177 assert!(const_symbols.iter().any(|s| s.symbol.as_deref() == Some("MAX_SIZE")));
1178 assert!(const_symbols.iter().any(|s| s.symbol.as_deref() == Some("DEFAULT_TIMEOUT")));
1179 assert!(const_symbols.iter().any(|s| s.symbol.as_deref() == Some("API_URL")));
1180 }
1181
1182 #[test]
1183 fn test_parse_lambda() {
1184 let source = r#"
1185square = lambda x: x * x
1186add = lambda a, b: a + b
1187 "#;
1188
1189 let symbols = parse("test.py", source).unwrap();
1190
1191 let lambda_symbols: Vec<_> = symbols.iter()
1192 .filter(|s| matches!(s.kind, SymbolKind::Function))
1193 .collect();
1194
1195 assert!(lambda_symbols.len() >= 2);
1196 assert!(lambda_symbols.iter().any(|s| s.symbol.as_deref() == Some("square")));
1197 assert!(lambda_symbols.iter().any(|s| s.symbol.as_deref() == Some("add")));
1198 }
1199
1200 #[test]
1201 fn test_parse_decorated_method() {
1202 let source = r#"
1203class WebService:
1204 @property
1205 def url(self):
1206 return self._url
1207
1208 @classmethod
1209 def from_config(cls, config):
1210 return cls(config['url'])
1211
1212 @staticmethod
1213 def validate_url(url):
1214 return url.startswith('http')
1215 "#;
1216
1217 let symbols = parse("test.py", source).unwrap();
1218
1219 let method_symbols: Vec<_> = symbols.iter()
1220 .filter(|s| matches!(s.kind, SymbolKind::Method))
1221 .collect();
1222
1223 assert_eq!(method_symbols.len(), 3);
1224 assert!(method_symbols.iter().any(|s| s.symbol.as_deref() == Some("url")));
1225 assert!(method_symbols.iter().any(|s| s.symbol.as_deref() == Some("from_config")));
1226 assert!(method_symbols.iter().any(|s| s.symbol.as_deref() == Some("validate_url")));
1227 }
1228
1229 #[test]
1230 fn test_parse_mixed_symbols() {
1231 let source = r#"
1232API_KEY = "secret123"
1233MAX_RETRIES = 3
1234
1235class APIClient:
1236 def __init__(self, api_key):
1237 self.api_key = api_key
1238
1239 async def request(self, endpoint):
1240 return await self._fetch(endpoint)
1241
1242 @staticmethod
1243 def build_url(endpoint):
1244 return f"https://api.example.com/{endpoint}"
1245
1246def create_client():
1247 return APIClient(API_KEY)
1248
1249process = lambda data: data.strip().lower()
1250 "#;
1251
1252 let symbols = parse("test.py", source).unwrap();
1253
1254 assert!(symbols.len() >= 8);
1256
1257 let kinds: Vec<&SymbolKind> = symbols.iter().map(|s| &s.kind).collect();
1258 assert!(kinds.contains(&&SymbolKind::Constant));
1259 assert!(kinds.contains(&&SymbolKind::Class));
1260 assert!(kinds.contains(&&SymbolKind::Method));
1261 assert!(kinds.contains(&&SymbolKind::Function));
1262 }
1263
1264 #[test]
1265 fn test_parse_nested_class() {
1266 let source = r#"
1267class Outer:
1268 class Inner:
1269 def inner_method(self):
1270 pass
1271
1272 def outer_method(self):
1273 pass
1274 "#;
1275
1276 let symbols = parse("test.py", source).unwrap();
1277
1278 let class_symbols: Vec<_> = symbols.iter()
1279 .filter(|s| matches!(s.kind, SymbolKind::Class))
1280 .collect();
1281
1282 assert_eq!(class_symbols.len(), 2);
1284 assert!(class_symbols.iter().any(|s| s.symbol.as_deref() == Some("Outer")));
1285 assert!(class_symbols.iter().any(|s| s.symbol.as_deref() == Some("Inner")));
1286 }
1287
1288 #[test]
1289 fn test_local_variables_included() {
1290 let source = r#"
1291def calculate(input):
1292 local_var = input * 2
1293 result = local_var + 10
1294 return result
1295
1296class Calculator:
1297 def compute(self, value):
1298 temp = value * 3
1299 final = temp + 5
1300 return final
1301 "#;
1302
1303 let symbols = parse("test.py", source).unwrap();
1304
1305 let variables: Vec<_> = symbols.iter()
1307 .filter(|s| matches!(s.kind, SymbolKind::Variable))
1308 .collect();
1309
1310 assert!(variables.iter().any(|v| v.symbol.as_deref() == Some("local_var")));
1312 assert!(variables.iter().any(|v| v.symbol.as_deref() == Some("result")));
1313 assert!(variables.iter().any(|v| v.symbol.as_deref() == Some("temp")));
1314 assert!(variables.iter().any(|v| v.symbol.as_deref() == Some("final")));
1315
1316 for var in variables {
1318 }
1320 }
1321
1322 #[test]
1323 fn test_global_variables() {
1324 let source = r#"
1325# Global constants (uppercase)
1326MAX_SIZE = 100
1327DEFAULT_TIMEOUT = 30
1328
1329# Global variables (non-uppercase)
1330database_url = "postgresql://localhost/mydb"
1331config = {"debug": True}
1332current_user = None
1333
1334def get_config():
1335 return config
1336 "#;
1337
1338 let symbols = parse("test.py", source).unwrap();
1339
1340 let constants: Vec<_> = symbols.iter()
1342 .filter(|s| matches!(s.kind, SymbolKind::Constant))
1343 .collect();
1344
1345 let variables: Vec<_> = symbols.iter()
1346 .filter(|s| matches!(s.kind, SymbolKind::Variable))
1347 .collect();
1348
1349 assert!(constants.iter().any(|c| c.symbol.as_deref() == Some("MAX_SIZE")));
1351 assert!(constants.iter().any(|c| c.symbol.as_deref() == Some("DEFAULT_TIMEOUT")));
1352
1353 assert!(variables.iter().any(|v| v.symbol.as_deref() == Some("database_url")));
1355 assert!(variables.iter().any(|v| v.symbol.as_deref() == Some("config")));
1356 assert!(variables.iter().any(|v| v.symbol.as_deref() == Some("current_user")));
1357
1358 for constant in constants {
1360 }
1362 for var in variables {
1363 }
1365 }
1366
1367 #[test]
1368 fn test_find_all_python_configs() {
1369 use tempfile::TempDir;
1370 use std::fs;
1371
1372 let temp = TempDir::new().unwrap();
1373 let root = temp.path();
1374
1375 let project1 = root.join("backend");
1377 fs::create_dir_all(&project1).unwrap();
1378 fs::write(project1.join("pyproject.toml"), "[project]\nname = \"backend\"").unwrap();
1379
1380 let project2 = root.join("frontend/api");
1381 fs::create_dir_all(&project2).unwrap();
1382 fs::write(project2.join("setup.py"), "setup(name='api')").unwrap();
1383
1384 let venv = root.join("venv");
1386 fs::create_dir_all(&venv).unwrap();
1387 fs::write(venv.join("setup.py"), "setup(name='should_skip')").unwrap();
1388
1389 let configs = find_all_python_configs(root).unwrap();
1390
1391 assert_eq!(configs.len(), 2);
1393 assert!(configs.iter().any(|p| p.ends_with("backend/pyproject.toml")));
1394 assert!(configs.iter().any(|p| p.ends_with("frontend/api/setup.py")));
1395 }
1396
1397 #[test]
1398 fn test_parse_all_python_packages() {
1399 use tempfile::TempDir;
1400 use std::fs;
1401
1402 let temp = TempDir::new().unwrap();
1403 let root = temp.path();
1404
1405 let project1 = root.join("services/auth");
1407 fs::create_dir_all(&project1).unwrap();
1408 fs::write(
1409 project1.join("pyproject.toml"),
1410 "[project]\nname = \"auth-service\"\n"
1411 ).unwrap();
1412
1413 let project2 = root.join("services/api");
1414 fs::create_dir_all(&project2).unwrap();
1415 fs::write(
1416 project2.join("setup.py"),
1417 "setup(name=\"api-service\")"
1418 ).unwrap();
1419
1420 let packages = parse_all_python_packages(root).unwrap();
1421
1422 assert_eq!(packages.len(), 2);
1424
1425 let names: Vec<_> = packages.iter().map(|p| p.name.as_str()).collect();
1427 assert!(names.contains(&"auth-service"));
1428 assert!(names.contains(&"api-service"));
1429
1430 for package in &packages {
1432 assert!(package.project_root.starts_with("services/"));
1433 assert!(package.abs_project_root.ends_with(&package.project_root));
1434 }
1435 }
1436
1437 #[test]
1438 fn test_resolve_python_import_absolute() {
1439 use tempfile::TempDir;
1440 use std::fs;
1441
1442 let temp = TempDir::new().unwrap();
1443 let root = temp.path();
1444
1445 let myapp = root.join("myapp");
1447 fs::create_dir_all(myapp.join("models")).unwrap();
1448 fs::write(
1449 myapp.join("pyproject.toml"),
1450 "[project]\nname = \"myapp\"\n"
1451 ).unwrap();
1452
1453 let packages = parse_all_python_packages(root).unwrap();
1454 assert_eq!(packages.len(), 1);
1455
1456 let resolved = resolve_python_import_to_path(
1459 "myapp.models.user",
1460 &packages,
1461 None
1462 );
1463
1464 assert!(resolved.is_some());
1465 let path = resolved.unwrap();
1466 assert!(path.contains("myapp/models/user.py") || path.contains("myapp/models/user/__init__.py"));
1467 }
1468
1469 #[test]
1470 fn test_resolve_python_import_relative() {
1471 let current_file = "myapp/views/admin.py";
1473
1474 let resolved = resolve_python_import_to_path(
1476 ".models",
1477 &[], Some(current_file),
1479 );
1480
1481 assert!(resolved.is_some());
1482 let path = resolved.unwrap();
1483 assert!(path.contains("myapp/views/models"));
1485
1486 let resolved = resolve_python_import_to_path(
1488 "..utils",
1489 &[],
1490 Some(current_file),
1491 );
1492
1493 assert!(resolved.is_some());
1494 let path = resolved.unwrap();
1495 assert!(path.contains("myapp/utils"));
1497 }
1498
1499 #[test]
1500 fn test_resolve_python_import_relative_with_module() {
1501 let current_file = "myapp/views/dashboard/index.py";
1503
1504 let resolved = resolve_python_import_to_path(
1505 "..models.user",
1506 &[],
1507 Some(current_file),
1508 );
1509
1510 assert!(resolved.is_some());
1511 let path = resolved.unwrap();
1512 assert!(path.contains("models/user"));
1514 }
1515
1516 #[test]
1517 fn test_resolve_python_import_not_found() {
1518 use tempfile::TempDir;
1519 use std::fs;
1520
1521 let temp = TempDir::new().unwrap();
1522 let root = temp.path();
1523
1524 let myapp = root.join("myapp");
1525 fs::create_dir_all(&myapp).unwrap();
1526 fs::write(
1527 myapp.join("pyproject.toml"),
1528 "[project]\nname = \"myapp\"\n"
1529 ).unwrap();
1530
1531 let packages = parse_all_python_packages(root).unwrap();
1532
1533 let resolved = resolve_python_import_to_path(
1535 "other_package.module",
1536 &packages,
1537 None
1538 );
1539
1540 assert!(resolved.is_none());
1542 }
1543
1544 #[test]
1545 fn test_dynamic_imports_filtered() {
1546 let source = r#"
1547import os
1548import sys
1549from json import loads
1550from .models import User
1551
1552# Dynamic imports - should be filtered out
1553import importlib
1554mod = importlib.import_module("some_module")
1555pkg = __import__("package")
1556exec("import dynamic")
1557 "#;
1558
1559 let deps = PythonDependencyExtractor::extract_dependencies(source).unwrap();
1560
1561 assert_eq!(deps.len(), 5, "Should extract 5 static imports only");
1564
1565 assert!(deps.iter().any(|d| d.imported_path == "os"));
1566 assert!(deps.iter().any(|d| d.imported_path == "sys"));
1567 assert!(deps.iter().any(|d| d.imported_path == "json"));
1568 assert!(deps.iter().any(|d| d.imported_path == ".models"));
1569 assert!(deps.iter().any(|d| d.imported_path == "importlib"));
1570
1571 assert!(!deps.iter().any(|d| d.imported_path.contains("some_module")));
1573 assert!(!deps.iter().any(|d| d.imported_path.contains("package") && d.imported_path != "json"));
1574 assert!(!deps.iter().any(|d| d.imported_path.contains("dynamic")));
1575 }
1576}