1use crate::models::{Language, SearchResult, Span, SymbolKind};
15use anyhow::{Context, Result};
16use streaming_iterator::StreamingIterator;
17use tree_sitter::{Parser, Query, QueryCursor};
18
19pub fn parse(path: &str, source: &str) -> Result<Vec<SearchResult>> {
21 let mut parser = Parser::new();
22 let language = tree_sitter_python::LANGUAGE;
23
24 parser
25 .set_language(&language.into())
26 .context("Failed to set Python language")?;
27
28 let tree = parser
29 .parse(source, None)
30 .context("Failed to parse Python source")?;
31
32 let root_node = tree.root_node();
33
34 let mut symbols = Vec::new();
35
36 symbols.extend(extract_functions(source, &root_node, &language.into())?);
38 symbols.extend(extract_classes(source, &root_node, &language.into())?);
39 symbols.extend(extract_methods(source, &root_node, &language.into())?);
40 symbols.extend(extract_constants(source, &root_node, &language.into())?);
41 symbols.extend(extract_global_variables(
42 source,
43 &root_node,
44 &language.into(),
45 )?);
46 symbols.extend(extract_local_variables(
47 source,
48 &root_node,
49 &language.into(),
50 )?);
51 symbols.extend(extract_lambdas(source, &root_node, &language.into())?);
52
53 for symbol in &mut symbols {
55 symbol.path = path.to_string();
56 symbol.lang = Language::Python;
57 }
58
59 Ok(symbols)
60}
61
62fn extract_functions(
64 source: &str,
65 root: &tree_sitter::Node,
66 language: &tree_sitter::Language,
67) -> Result<Vec<SearchResult>> {
68 let query_str = r#"
69 (function_definition
70 name: (identifier) @name) @function
71 "#;
72
73 let query = Query::new(language, query_str).context("Failed to create function query")?;
74
75 extract_symbols(source, root, &query, SymbolKind::Function, None)
76}
77
78fn extract_classes(
80 source: &str,
81 root: &tree_sitter::Node,
82 language: &tree_sitter::Language,
83) -> Result<Vec<SearchResult>> {
84 let query_str = r#"
85 (class_definition
86 name: (identifier) @name) @class
87 "#;
88
89 let query = Query::new(language, query_str).context("Failed to create class query")?;
90
91 extract_symbols(source, root, &query, SymbolKind::Class, None)
92}
93
94fn extract_methods(
96 source: &str,
97 root: &tree_sitter::Node,
98 language: &tree_sitter::Language,
99) -> Result<Vec<SearchResult>> {
100 let query_str = r#"
101 (class_definition
102 name: (identifier) @class_name
103 body: (block
104 (function_definition
105 name: (identifier) @method_name))) @class
106
107 (class_definition
108 name: (identifier) @class_name
109 body: (block
110 (decorated_definition
111 (function_definition
112 name: (identifier) @method_name)))) @class
113 "#;
114
115 let query = Query::new(language, query_str).context("Failed to create method query")?;
116
117 let mut cursor = QueryCursor::new();
118 let mut matches = cursor.matches(&query, *root, source.as_bytes());
119
120 let mut symbols = Vec::new();
121
122 while let Some(match_) = matches.next() {
123 let mut class_name = None;
124 let mut method_name = None;
125 let mut method_node = None;
126
127 for capture in match_.captures {
128 let capture_name: &str = &query.capture_names()[capture.index as usize];
129 match capture_name {
130 "class_name" => {
131 class_name = Some(
132 capture
133 .node
134 .utf8_text(source.as_bytes())
135 .unwrap_or("")
136 .to_string(),
137 );
138 }
139 "method_name" => {
140 method_name = Some(
141 capture
142 .node
143 .utf8_text(source.as_bytes())
144 .unwrap_or("")
145 .to_string(),
146 );
147 let mut current = capture.node;
149 while let Some(parent) = current.parent() {
150 if parent.kind() == "function_definition" {
151 method_node = Some(parent);
152 break;
153 }
154 current = parent;
155 }
156 }
157 _ => {}
158 }
159 }
160
161 if let (Some(class_name), Some(method_name), Some(node)) =
162 (class_name, method_name, method_node)
163 {
164 let scope = format!("class {}", class_name);
165 let span = node_to_span(&node);
166 let preview = extract_preview(source, &span);
167
168 symbols.push(SearchResult::new(
169 String::new(),
170 Language::Python,
171 SymbolKind::Method,
172 Some(method_name),
173 span,
174 Some(scope),
175 preview,
176 ));
177 }
178 }
179
180 Ok(symbols)
181}
182
183fn extract_constants(
185 source: &str,
186 root: &tree_sitter::Node,
187 language: &tree_sitter::Language,
188) -> Result<Vec<SearchResult>> {
189 let query_str = r#"
190 (module
191 (expression_statement
192 (assignment
193 left: (identifier) @name))) @const
194 "#;
195
196 let query = Query::new(language, query_str).context("Failed to create constant query")?;
197
198 let mut cursor = QueryCursor::new();
199 let mut matches = cursor.matches(&query, *root, source.as_bytes());
200
201 let mut symbols = Vec::new();
202
203 while let Some(match_) = matches.next() {
204 let mut name = None;
205 let mut const_node = None;
206
207 for capture in match_.captures {
208 let capture_name: &str = &query.capture_names()[capture.index as usize];
209 if capture_name == "name" {
210 let name_text = capture.node.utf8_text(source.as_bytes()).unwrap_or("");
211 if name_text
213 .chars()
214 .all(|c| c.is_uppercase() || c == '_' || c.is_numeric())
215 {
216 name = Some(name_text.to_string());
217 let mut current = capture.node;
219 while let Some(parent) = current.parent() {
220 if parent.kind() == "assignment" {
221 const_node = Some(parent);
222 break;
223 }
224 current = parent;
225 }
226 }
227 }
228 }
229
230 if let (Some(name), Some(node)) = (name, const_node) {
231 let span = node_to_span(&node);
232 let preview = extract_preview(source, &span);
233
234 symbols.push(SearchResult::new(
235 String::new(),
236 Language::Python,
237 SymbolKind::Constant,
238 Some(name),
239 span,
240 None,
241 preview,
242 ));
243 }
244 }
245
246 Ok(symbols)
247}
248
249fn extract_global_variables(
251 source: &str,
252 root: &tree_sitter::Node,
253 language: &tree_sitter::Language,
254) -> Result<Vec<SearchResult>> {
255 let query_str = r#"
256 (module
257 (expression_statement
258 (assignment
259 left: (identifier) @name))) @var
260 "#;
261
262 let query =
263 Query::new(language, query_str).context("Failed to create global variable query")?;
264
265 let mut cursor = QueryCursor::new();
266 let mut matches = cursor.matches(&query, *root, source.as_bytes());
267
268 let mut symbols = Vec::new();
269
270 while let Some(match_) = matches.next() {
271 let mut name = None;
272 let mut var_node = None;
273
274 for capture in match_.captures {
275 let capture_name: &str = &query.capture_names()[capture.index as usize];
276 if capture_name == "name" {
277 let name_text = capture.node.utf8_text(source.as_bytes()).unwrap_or("");
278 if !name_text
280 .chars()
281 .all(|c| c.is_uppercase() || c == '_' || c.is_numeric())
282 {
283 name = Some(name_text.to_string());
284 let mut current = capture.node;
286 while let Some(parent) = current.parent() {
287 if parent.kind() == "assignment" {
288 var_node = Some(parent);
289 break;
290 }
291 current = parent;
292 }
293 }
294 }
295 }
296
297 if let (Some(name), Some(node)) = (name, var_node) {
298 let span = node_to_span(&node);
299 let preview = extract_preview(source, &span);
300
301 symbols.push(SearchResult::new(
302 String::new(),
303 Language::Python,
304 SymbolKind::Variable,
305 Some(name),
306 span,
307 None,
308 preview,
309 ));
310 }
311 }
312
313 Ok(symbols)
314}
315
316fn extract_local_variables(
318 source: &str,
319 root: &tree_sitter::Node,
320 language: &tree_sitter::Language,
321) -> Result<Vec<SearchResult>> {
322 let query_str = r#"
323 (assignment
324 left: (identifier) @name) @assignment
325 "#;
326
327 let query = Query::new(language, query_str).context("Failed to create local variable query")?;
328
329 let mut cursor = QueryCursor::new();
330 let mut matches = cursor.matches(&query, *root, source.as_bytes());
331
332 let mut symbols = Vec::new();
333
334 while let Some(match_) = matches.next() {
335 let mut name = None;
336 let mut assignment_node = None;
337
338 for capture in match_.captures {
339 let capture_name: &str = &query.capture_names()[capture.index as usize];
340 match capture_name {
341 "name" => {
342 let name_text = capture.node.utf8_text(source.as_bytes()).unwrap_or("");
343 if !name_text
345 .chars()
346 .all(|c| c.is_uppercase() || c == '_' || c.is_numeric())
347 {
348 name = Some(name_text.to_string());
349 }
350 }
351 "assignment" => {
352 assignment_node = Some(capture.node);
353 }
354 _ => {}
355 }
356 }
357
358 if let (Some(name), Some(node)) = (name, assignment_node) {
360 let mut is_in_function = false;
361 let mut current = node;
362
363 while let Some(parent) = current.parent() {
364 if parent.kind() == "function_definition" {
365 is_in_function = true;
366 break;
367 }
368 if parent.kind() == "module" {
370 break;
371 }
372 current = parent;
373 }
374
375 if is_in_function {
376 let span = node_to_span(&node);
377 let preview = extract_preview(source, &span);
378
379 symbols.push(SearchResult::new(
380 String::new(),
381 Language::Python,
382 SymbolKind::Variable,
383 Some(name),
384 span,
385 None, preview,
387 ));
388 }
389 }
390 }
391
392 Ok(symbols)
393}
394
395fn extract_lambdas(
397 source: &str,
398 root: &tree_sitter::Node,
399 language: &tree_sitter::Language,
400) -> Result<Vec<SearchResult>> {
401 let query_str = r#"
402 (assignment
403 left: (identifier) @name
404 right: (lambda)) @lambda
405 "#;
406
407 let query = Query::new(language, query_str).context("Failed to create lambda query")?;
408
409 extract_symbols(source, root, &query, SymbolKind::Function, None)
410}
411
412fn extract_symbols(
414 source: &str,
415 root: &tree_sitter::Node,
416 query: &Query,
417 kind: SymbolKind,
418 scope: Option<String>,
419) -> Result<Vec<SearchResult>> {
420 let mut cursor = QueryCursor::new();
421 let mut matches = cursor.matches(query, *root, source.as_bytes());
422
423 let mut symbols = Vec::new();
424
425 while let Some(match_) = matches.next() {
426 let mut name = None;
428 let mut full_node = None;
429
430 for capture in match_.captures {
431 let capture_name: &str = &query.capture_names()[capture.index as usize];
432 if capture_name == "name" {
433 name = Some(
434 capture
435 .node
436 .utf8_text(source.as_bytes())
437 .unwrap_or("")
438 .to_string(),
439 );
440 } else {
441 full_node = Some(capture.node);
443 }
444 }
445
446 if let (Some(name), Some(node)) = (name, full_node) {
447 let span = node_to_span(&node);
448 let preview = extract_preview(source, &span);
449
450 symbols.push(SearchResult::new(
451 String::new(),
452 Language::Python,
453 kind.clone(),
454 Some(name),
455 span,
456 scope.clone(),
457 preview,
458 ));
459 }
460 }
461
462 Ok(symbols)
463}
464
465fn node_to_span(node: &tree_sitter::Node) -> Span {
467 let start = node.start_position();
468 let end = node.end_position();
469
470 Span::new(
471 start.row + 1, start.column,
473 end.row + 1,
474 end.column,
475 )
476}
477
478fn extract_preview(source: &str, span: &Span) -> String {
480 let lines: Vec<&str> = source.lines().collect();
481
482 let start_idx = (span.start_line - 1) as usize; let end_idx = (start_idx + 7).min(lines.len());
485
486 lines[start_idx..end_idx].join("\n")
487}
488
489use crate::models::ImportType;
494use crate::parsers::{DependencyExtractor, ImportInfo};
495
496pub struct PythonDependencyExtractor;
498
499impl DependencyExtractor for PythonDependencyExtractor {
500 fn extract_dependencies(source: &str) -> Result<Vec<ImportInfo>> {
501 let mut parser = Parser::new();
502 let language = tree_sitter_python::LANGUAGE;
503
504 parser
505 .set_language(&language.into())
506 .context("Failed to set Python language")?;
507
508 let tree = parser
509 .parse(source, None)
510 .context("Failed to parse Python source")?;
511
512 let root_node = tree.root_node();
513
514 let mut imports = Vec::new();
515
516 imports.extend(extract_import_statements(source, &root_node)?);
518
519 imports.extend(extract_from_imports(source, &root_node)?);
521
522 Ok(imports)
523 }
524}
525
526fn extract_import_statements(source: &str, root: &tree_sitter::Node) -> Result<Vec<ImportInfo>> {
528 let language = tree_sitter_python::LANGUAGE;
529
530 let query_str = r#"
531 (import_statement
532 name: (dotted_name) @import_path) @import
533 "#;
534
535 let query = Query::new(&language.into(), query_str)
536 .context("Failed to create import statement query")?;
537
538 let mut cursor = QueryCursor::new();
539 let mut matches = cursor.matches(&query, *root, source.as_bytes());
540
541 let mut imports = Vec::new();
542
543 while let Some(match_) = matches.next() {
544 let mut import_path = None;
545 let mut import_node = None;
546
547 for capture in match_.captures {
548 let capture_name: &str = &query.capture_names()[capture.index as usize];
549 match capture_name {
550 "import_path" => {
551 import_path = Some(
552 capture
553 .node
554 .utf8_text(source.as_bytes())
555 .unwrap_or("")
556 .to_string(),
557 );
558 }
559 "import" => {
560 import_node = Some(capture.node);
561 }
562 _ => {}
563 }
564 }
565
566 if let (Some(path), Some(node)) = (import_path, import_node) {
567 let import_type = classify_python_import(&path);
568 let line_number = node.start_position().row + 1;
569
570 imports.push(ImportInfo {
571 imported_path: path,
572 import_type,
573 line_number,
574 imported_symbols: None,
575 });
576 }
577 }
578
579 Ok(imports)
580}
581
582fn extract_from_imports(source: &str, root: &tree_sitter::Node) -> Result<Vec<ImportInfo>> {
584 let language = tree_sitter_python::LANGUAGE;
585
586 let query_str = r#"
587 (import_from_statement
588 module_name: (dotted_name) @module_path) @import
589
590 (import_from_statement
591 module_name: (relative_import) @module_path) @import
592 "#;
593
594 let query =
595 Query::new(&language.into(), query_str).context("Failed to create from-import query")?;
596
597 let mut cursor = QueryCursor::new();
598 let mut matches = cursor.matches(&query, *root, source.as_bytes());
599
600 let mut imports = Vec::new();
601
602 while let Some(match_) = matches.next() {
603 let mut module_path = None;
604 let mut import_node = None;
605
606 for capture in match_.captures {
607 let capture_name: &str = &query.capture_names()[capture.index as usize];
608 match capture_name {
609 "module_path" => {
610 module_path = Some(
611 capture
612 .node
613 .utf8_text(source.as_bytes())
614 .unwrap_or("")
615 .to_string(),
616 );
617 }
618 "import" => {
619 import_node = Some(capture.node);
620 }
621 _ => {}
622 }
623 }
624
625 if let (Some(path), Some(node)) = (module_path, import_node) {
626 let import_type = classify_python_import(&path);
627 let line_number = node.start_position().row + 1;
628
629 let imported_symbols = extract_imported_symbols(source, &node);
631
632 imports.push(ImportInfo {
633 imported_path: path,
634 import_type,
635 line_number,
636 imported_symbols,
637 });
638 }
639 }
640
641 Ok(imports)
642}
643
644fn extract_imported_symbols(source: &str, import_node: &tree_sitter::Node) -> Option<Vec<String>> {
646 let mut symbols = Vec::new();
647
648 let mut cursor = import_node.walk();
650 for child in import_node.children(&mut cursor) {
651 match child.kind() {
652 "aliased_import" | "dotted_name" => {
653 let mut child_cursor = child.walk();
655 for grandchild in child.children(&mut child_cursor) {
656 if grandchild.kind() == "identifier" || grandchild.kind() == "dotted_name" {
657 if let Ok(text) = grandchild.utf8_text(source.as_bytes()) {
658 symbols.push(text.to_string());
659 break; }
661 }
662 }
663 }
664 _ => {}
665 }
666 }
667
668 if symbols.is_empty() {
669 None
670 } else {
671 Some(symbols)
672 }
673}
674
675pub fn find_python_package_name(root: &std::path::Path) -> Option<String> {
678 if let Some(name) = find_pyproject_package(root) {
680 return Some(name);
681 }
682
683 if let Some(name) = find_setup_py_package(root) {
685 return Some(name);
686 }
687
688 if let Some(name) = find_setup_cfg_package(root) {
690 return Some(name);
691 }
692
693 None
694}
695
696fn find_pyproject_package(root: &std::path::Path) -> Option<String> {
698 let pyproject_path = root.join("pyproject.toml");
699 let content = std::fs::read_to_string(pyproject_path).ok()?;
700
701 let mut in_project_section = false;
704
705 for line in content.lines() {
706 let trimmed = line.trim();
707
708 if trimmed == "[project]" {
710 in_project_section = true;
711 continue;
712 }
713
714 if trimmed.starts_with('[') && trimmed != "[project]" {
716 in_project_section = false;
717 continue;
718 }
719
720 if in_project_section && trimmed.starts_with("name") && trimmed.contains('=') {
722 if let Some(equals_pos) = trimmed.find('=') {
723 let after_equals = trimmed[equals_pos + 1..].trim();
724
725 for quote in ['"', '\''] {
727 if let Some(start) = after_equals.find(quote) {
728 if let Some(end) = after_equals[start + 1..].find(quote) {
729 let name = &after_equals[start + 1..start + 1 + end];
730 return Some(name.to_lowercase());
732 }
733 }
734 }
735 }
736 }
737 }
738
739 None
740}
741
742fn find_setup_py_package(root: &std::path::Path) -> Option<String> {
744 let setup_path = root.join("setup.py");
745 let content = std::fs::read_to_string(setup_path).ok()?;
746
747 for line in content.lines() {
750 let trimmed = line.trim();
751
752 if trimmed.contains("name") && trimmed.contains('=') {
753 if let Some(name_pos) = trimmed.find("name") {
755 let after_name = &trimmed[name_pos + 4..]; if let Some(equals_pos) = after_name.find('=') {
758 let after_equals = after_name[equals_pos + 1..].trim();
759
760 for quote in ['"', '\''] {
762 if let Some(start) = after_equals.find(quote) {
763 if let Some(end) = after_equals[start + 1..].find(quote) {
764 let name = &after_equals[start + 1..start + 1 + end];
765 return Some(name.to_lowercase());
766 }
767 }
768 }
769 }
770 }
771 }
772 }
773
774 None
775}
776
777fn find_setup_cfg_package(root: &std::path::Path) -> Option<String> {
779 let setup_cfg_path = root.join("setup.cfg");
780 let content = std::fs::read_to_string(setup_cfg_path).ok()?;
781
782 let mut in_metadata_section = false;
784
785 for line in content.lines() {
786 let trimmed = line.trim();
787
788 if trimmed == "[metadata]" {
790 in_metadata_section = true;
791 continue;
792 }
793
794 if trimmed.starts_with('[') && trimmed != "[metadata]" {
796 in_metadata_section = false;
797 continue;
798 }
799
800 if in_metadata_section && trimmed.starts_with("name") && trimmed.contains('=') {
802 if let Some(equals_pos) = trimmed.find('=') {
803 let name = trimmed[equals_pos + 1..].trim();
804 return Some(name.to_lowercase());
805 }
806 }
807 }
808
809 None
810}
811
812pub fn reclassify_python_import(import_path: &str, package_prefix: Option<&str>) -> ImportType {
815 if let Some(prefix) = package_prefix {
817 let first_component = import_path.split('.').next().unwrap_or(import_path);
819
820 if first_component == prefix {
821 return ImportType::Internal;
822 }
823 }
824
825 if import_path.starts_with('.') {
827 return ImportType::Internal;
828 }
829
830 if is_python_stdlib(import_path) {
832 return ImportType::Stdlib;
833 }
834
835 ImportType::External
837}
838
839fn is_python_stdlib(path: &str) -> bool {
841 const STDLIB_MODULES: &[&str] = &[
842 "os",
843 "sys",
844 "io",
845 "re",
846 "json",
847 "csv",
848 "xml",
849 "html",
850 "http",
851 "urllib",
852 "collections",
853 "itertools",
854 "functools",
855 "operator",
856 "pathlib",
857 "glob",
858 "tempfile",
859 "shutil",
860 "pickle",
861 "shelve",
862 "sqlite3",
863 "zlib",
864 "gzip",
865 "time",
866 "datetime",
867 "calendar",
868 "logging",
869 "argparse",
870 "configparser",
871 "typing",
872 "dataclasses",
873 "enum",
874 "abc",
875 "contextlib",
876 "weakref",
877 "threading",
878 "multiprocessing",
879 "subprocess",
880 "queue",
881 "asyncio",
882 "socket",
883 "email",
884 "base64",
885 "hashlib",
886 "hmac",
887 "secrets",
888 "uuid",
889 "math",
890 "random",
891 "statistics",
892 "decimal",
893 "fractions",
894 "unittest",
895 "doctest",
896 "pdb",
897 "trace",
898 "timeit",
899 ];
900
901 let first_component = path.split('.').next().unwrap_or("");
903
904 STDLIB_MODULES.contains(&first_component)
905}
906
907fn classify_python_import(import_path: &str) -> ImportType {
909 if import_path.starts_with('.') {
911 return ImportType::Internal;
912 }
913
914 const STDLIB_MODULES: &[&str] = &[
916 "os",
917 "sys",
918 "io",
919 "re",
920 "json",
921 "csv",
922 "xml",
923 "html",
924 "http",
925 "urllib",
926 "collections",
927 "itertools",
928 "functools",
929 "operator",
930 "pathlib",
931 "glob",
932 "tempfile",
933 "shutil",
934 "pickle",
935 "shelve",
936 "sqlite3",
937 "zlib",
938 "gzip",
939 "time",
940 "datetime",
941 "calendar",
942 "logging",
943 "argparse",
944 "configparser",
945 "typing",
946 "dataclasses",
947 "enum",
948 "abc",
949 "contextlib",
950 "weakref",
951 "threading",
952 "multiprocessing",
953 "subprocess",
954 "queue",
955 "asyncio",
956 "socket",
957 "email",
958 "base64",
959 "hashlib",
960 "hmac",
961 "secrets",
962 "uuid",
963 "math",
964 "random",
965 "statistics",
966 "decimal",
967 "fractions",
968 "unittest",
969 "doctest",
970 "pdb",
971 "trace",
972 "timeit",
973 ];
974
975 let first_component = import_path.split('.').next().unwrap_or("");
977
978 if STDLIB_MODULES.contains(&first_component) {
979 ImportType::Stdlib
980 } else {
981 ImportType::External
983 }
984}
985
986#[derive(Debug, Clone)]
992pub struct PythonPackage {
993 pub name: String,
995 pub project_root: String,
997 pub abs_project_root: std::path::PathBuf,
999}
1000
1001pub fn find_all_python_configs(index_root: &std::path::Path) -> Result<Vec<std::path::PathBuf>> {
1004 use ignore::WalkBuilder;
1005
1006 let mut config_files = Vec::new();
1007
1008 let walker = WalkBuilder::new(index_root)
1009 .follow_links(false)
1010 .git_ignore(true)
1011 .build();
1012
1013 for entry in walker {
1014 let entry = entry?;
1015 let path = entry.path();
1016
1017 if !path.is_file() {
1018 continue;
1019 }
1020
1021 let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1022
1023 if filename == "pyproject.toml" || filename == "setup.py" || filename == "setup.cfg" {
1025 let path_str = path.to_string_lossy();
1027 if path_str.contains("/venv/")
1028 || path_str.contains("/.venv/")
1029 || path_str.contains("/site-packages/")
1030 || path_str.contains("/dist/")
1031 || path_str.contains("/build/")
1032 || path_str.contains("/__pycache__/")
1033 {
1034 log::trace!(
1035 "Skipping Python config in vendor/build directory: {:?}",
1036 path
1037 );
1038 continue;
1039 }
1040
1041 config_files.push(path.to_path_buf());
1042 }
1043 }
1044
1045 log::debug!("Found {} Python config files", config_files.len());
1046 Ok(config_files)
1047}
1048
1049pub fn parse_all_python_packages(index_root: &std::path::Path) -> Result<Vec<PythonPackage>> {
1051 let config_files = find_all_python_configs(index_root)?;
1052
1053 if config_files.is_empty() {
1054 log::debug!("No Python config files found in {:?}", index_root);
1055 return Ok(Vec::new());
1056 }
1057
1058 let mut packages = Vec::new();
1059 let config_count = config_files.len();
1060
1061 for config_path in &config_files {
1062 let project_root = config_path
1063 .parent()
1064 .ok_or_else(|| anyhow::anyhow!("Config file has no parent directory"))?;
1065
1066 if let Some(package_name) = find_python_package_name(project_root) {
1068 let relative_project_root = project_root
1069 .strip_prefix(index_root)
1070 .unwrap_or(project_root)
1071 .to_string_lossy()
1072 .to_string();
1073
1074 log::debug!(
1075 "Found Python package '{}' at {:?}",
1076 package_name,
1077 relative_project_root
1078 );
1079
1080 packages.push(PythonPackage {
1081 name: package_name,
1082 project_root: relative_project_root,
1083 abs_project_root: project_root.to_path_buf(),
1084 });
1085 }
1086 }
1087
1088 log::info!(
1089 "Loaded {} Python packages from {} config files",
1090 packages.len(),
1091 config_count
1092 );
1093
1094 Ok(packages)
1095}
1096
1097pub fn resolve_python_import_to_path(
1104 import_path: &str,
1105 packages: &[PythonPackage],
1106 current_file_path: Option<&str>,
1107) -> Option<String> {
1108 if import_path.starts_with('.') {
1110 return resolve_relative_python_import(import_path, current_file_path);
1111 }
1112
1113 let first_component = import_path.split('.').next()?;
1116
1117 for package in packages {
1119 if package.name == first_component {
1120 let module_path = import_path.replace('.', "/");
1123
1124 let candidates = vec![
1126 format!("{}/{}.py", package.project_root, module_path),
1127 format!("{}/{}/__init__.py", package.project_root, module_path),
1128 ];
1129
1130 for candidate in candidates {
1131 log::trace!("Checking Python module path: {}", candidate);
1132 return Some(candidate);
1133 }
1134 }
1135 }
1136
1137 None
1138}
1139
1140fn resolve_relative_python_import(
1143 import_path: &str,
1144 current_file_path: Option<&str>,
1145) -> Option<String> {
1146 let current_file = current_file_path?;
1147
1148 let dots = import_path.chars().take_while(|&c| c == '.').count();
1150 if dots == 0 {
1151 return None;
1152 }
1153
1154 let current_dir = std::path::Path::new(current_file).parent()?;
1156
1157 let mut target_dir = current_dir.to_path_buf();
1159 for _ in 1..dots {
1160 target_dir = target_dir.parent()?.to_path_buf();
1161 }
1162
1163 let module_path = import_path.trim_start_matches('.');
1165
1166 if module_path.is_empty() {
1167 return Some(format!("{}/__init__.py", target_dir.to_string_lossy()));
1169 }
1170
1171 let file_path = module_path.replace('.', "/");
1173
1174 let candidates = vec![
1176 format!("{}/{}.py", target_dir.to_string_lossy(), file_path),
1177 format!("{}/{}/__init__.py", target_dir.to_string_lossy(), file_path),
1178 ];
1179
1180 for candidate in candidates {
1181 log::trace!("Checking relative Python import: {}", candidate);
1182 return Some(candidate);
1183 }
1184
1185 None
1186}
1187
1188#[cfg(test)]
1189mod tests {
1190 use super::*;
1191
1192 #[test]
1193 fn test_parse_function() {
1194 let source = r#"
1195def hello_world():
1196 print("Hello, world!")
1197 return True
1198 "#;
1199
1200 let symbols = parse("test.py", source).unwrap();
1201 assert_eq!(symbols.len(), 1);
1202 assert_eq!(symbols[0].symbol.as_deref(), Some("hello_world"));
1203 assert!(matches!(symbols[0].kind, SymbolKind::Function));
1204 }
1205
1206 #[test]
1207 fn test_parse_async_function() {
1208 let source = r#"
1209async def fetch_data(url):
1210 async with aiohttp.ClientSession() as session:
1211 async with session.get(url) as response:
1212 return await response.text()
1213 "#;
1214
1215 let symbols = parse("test.py", source).unwrap();
1216 assert_eq!(symbols.len(), 1);
1217 assert_eq!(symbols[0].symbol.as_deref(), Some("fetch_data"));
1218 assert!(matches!(symbols[0].kind, SymbolKind::Function));
1219 }
1220
1221 #[test]
1222 fn test_parse_class() {
1223 let source = r#"
1224class User:
1225 def __init__(self, name, age):
1226 self.name = name
1227 self.age = age
1228 "#;
1229
1230 let symbols = parse("test.py", source).unwrap();
1231
1232 let class_symbols: Vec<_> = symbols
1233 .iter()
1234 .filter(|s| matches!(s.kind, SymbolKind::Class))
1235 .collect();
1236
1237 assert_eq!(class_symbols.len(), 1);
1238 assert_eq!(class_symbols[0].symbol.as_deref(), Some("User"));
1239 }
1240
1241 #[test]
1242 fn test_parse_class_with_methods() {
1243 let source = r#"
1244class Calculator:
1245 def add(self, a, b):
1246 return a + b
1247
1248 def subtract(self, a, b):
1249 return a - b
1250
1251 @staticmethod
1252 def multiply(a, b):
1253 return a * b
1254 "#;
1255
1256 let symbols = parse("test.py", source).unwrap();
1257
1258 let method_symbols: Vec<_> = symbols
1259 .iter()
1260 .filter(|s| matches!(s.kind, SymbolKind::Method))
1261 .collect();
1262
1263 assert_eq!(method_symbols.len(), 3);
1264 assert!(
1265 method_symbols
1266 .iter()
1267 .any(|s| s.symbol.as_deref() == Some("add"))
1268 );
1269 assert!(
1270 method_symbols
1271 .iter()
1272 .any(|s| s.symbol.as_deref() == Some("subtract"))
1273 );
1274 assert!(
1275 method_symbols
1276 .iter()
1277 .any(|s| s.symbol.as_deref() == Some("multiply"))
1278 );
1279
1280 for method in method_symbols {
1282 }
1284 }
1285
1286 #[test]
1287 fn test_parse_async_method() {
1288 let source = r#"
1289class DataFetcher:
1290 async def get_user(self, user_id):
1291 return await fetch(f"/users/{user_id}")
1292
1293 async def get_all_users(self):
1294 return await fetch("/users")
1295 "#;
1296
1297 let symbols = parse("test.py", source).unwrap();
1298
1299 let method_symbols: Vec<_> = symbols
1300 .iter()
1301 .filter(|s| matches!(s.kind, SymbolKind::Method))
1302 .collect();
1303
1304 assert_eq!(method_symbols.len(), 2);
1305 assert!(
1306 method_symbols
1307 .iter()
1308 .any(|s| s.symbol.as_deref() == Some("get_user"))
1309 );
1310 assert!(
1311 method_symbols
1312 .iter()
1313 .any(|s| s.symbol.as_deref() == Some("get_all_users"))
1314 );
1315 }
1316
1317 #[test]
1318 fn test_parse_constants() {
1319 let source = r#"
1320MAX_SIZE = 100
1321DEFAULT_TIMEOUT = 30
1322API_URL = "https://api.example.com"
1323 "#;
1324
1325 let symbols = parse("test.py", source).unwrap();
1326
1327 let const_symbols: Vec<_> = symbols
1328 .iter()
1329 .filter(|s| matches!(s.kind, SymbolKind::Constant))
1330 .collect();
1331
1332 assert_eq!(const_symbols.len(), 3);
1333 assert!(
1334 const_symbols
1335 .iter()
1336 .any(|s| s.symbol.as_deref() == Some("MAX_SIZE"))
1337 );
1338 assert!(
1339 const_symbols
1340 .iter()
1341 .any(|s| s.symbol.as_deref() == Some("DEFAULT_TIMEOUT"))
1342 );
1343 assert!(
1344 const_symbols
1345 .iter()
1346 .any(|s| s.symbol.as_deref() == Some("API_URL"))
1347 );
1348 }
1349
1350 #[test]
1351 fn test_parse_lambda() {
1352 let source = r#"
1353square = lambda x: x * x
1354add = lambda a, b: a + b
1355 "#;
1356
1357 let symbols = parse("test.py", source).unwrap();
1358
1359 let lambda_symbols: Vec<_> = symbols
1360 .iter()
1361 .filter(|s| matches!(s.kind, SymbolKind::Function))
1362 .collect();
1363
1364 assert!(lambda_symbols.len() >= 2);
1365 assert!(
1366 lambda_symbols
1367 .iter()
1368 .any(|s| s.symbol.as_deref() == Some("square"))
1369 );
1370 assert!(
1371 lambda_symbols
1372 .iter()
1373 .any(|s| s.symbol.as_deref() == Some("add"))
1374 );
1375 }
1376
1377 #[test]
1378 fn test_parse_decorated_method() {
1379 let source = r#"
1380class WebService:
1381 @property
1382 def url(self):
1383 return self._url
1384
1385 @classmethod
1386 def from_config(cls, config):
1387 return cls(config['url'])
1388
1389 @staticmethod
1390 def validate_url(url):
1391 return url.startswith('http')
1392 "#;
1393
1394 let symbols = parse("test.py", source).unwrap();
1395
1396 let method_symbols: Vec<_> = symbols
1397 .iter()
1398 .filter(|s| matches!(s.kind, SymbolKind::Method))
1399 .collect();
1400
1401 assert_eq!(method_symbols.len(), 3);
1402 assert!(
1403 method_symbols
1404 .iter()
1405 .any(|s| s.symbol.as_deref() == Some("url"))
1406 );
1407 assert!(
1408 method_symbols
1409 .iter()
1410 .any(|s| s.symbol.as_deref() == Some("from_config"))
1411 );
1412 assert!(
1413 method_symbols
1414 .iter()
1415 .any(|s| s.symbol.as_deref() == Some("validate_url"))
1416 );
1417 }
1418
1419 #[test]
1420 fn test_parse_mixed_symbols() {
1421 let source = r#"
1422API_KEY = "secret123"
1423MAX_RETRIES = 3
1424
1425class APIClient:
1426 def __init__(self, api_key):
1427 self.api_key = api_key
1428
1429 async def request(self, endpoint):
1430 return await self._fetch(endpoint)
1431
1432 @staticmethod
1433 def build_url(endpoint):
1434 return f"https://api.example.com/{endpoint}"
1435
1436def create_client():
1437 return APIClient(API_KEY)
1438
1439process = lambda data: data.strip().lower()
1440 "#;
1441
1442 let symbols = parse("test.py", source).unwrap();
1443
1444 assert!(symbols.len() >= 8);
1446
1447 let kinds: Vec<&SymbolKind> = symbols.iter().map(|s| &s.kind).collect();
1448 assert!(kinds.contains(&&SymbolKind::Constant));
1449 assert!(kinds.contains(&&SymbolKind::Class));
1450 assert!(kinds.contains(&&SymbolKind::Method));
1451 assert!(kinds.contains(&&SymbolKind::Function));
1452 }
1453
1454 #[test]
1455 fn test_parse_nested_class() {
1456 let source = r#"
1457class Outer:
1458 class Inner:
1459 def inner_method(self):
1460 pass
1461
1462 def outer_method(self):
1463 pass
1464 "#;
1465
1466 let symbols = parse("test.py", source).unwrap();
1467
1468 let class_symbols: Vec<_> = symbols
1469 .iter()
1470 .filter(|s| matches!(s.kind, SymbolKind::Class))
1471 .collect();
1472
1473 assert_eq!(class_symbols.len(), 2);
1475 assert!(
1476 class_symbols
1477 .iter()
1478 .any(|s| s.symbol.as_deref() == Some("Outer"))
1479 );
1480 assert!(
1481 class_symbols
1482 .iter()
1483 .any(|s| s.symbol.as_deref() == Some("Inner"))
1484 );
1485 }
1486
1487 #[test]
1488 fn test_local_variables_included() {
1489 let source = r#"
1490def calculate(input):
1491 local_var = input * 2
1492 result = local_var + 10
1493 return result
1494
1495class Calculator:
1496 def compute(self, value):
1497 temp = value * 3
1498 final = temp + 5
1499 return final
1500 "#;
1501
1502 let symbols = parse("test.py", source).unwrap();
1503
1504 let variables: Vec<_> = symbols
1506 .iter()
1507 .filter(|s| matches!(s.kind, SymbolKind::Variable))
1508 .collect();
1509
1510 assert!(
1512 variables
1513 .iter()
1514 .any(|v| v.symbol.as_deref() == Some("local_var"))
1515 );
1516 assert!(
1517 variables
1518 .iter()
1519 .any(|v| v.symbol.as_deref() == Some("result"))
1520 );
1521 assert!(
1522 variables
1523 .iter()
1524 .any(|v| v.symbol.as_deref() == Some("temp"))
1525 );
1526 assert!(
1527 variables
1528 .iter()
1529 .any(|v| v.symbol.as_deref() == Some("final"))
1530 );
1531
1532 for var in variables {
1534 }
1536 }
1537
1538 #[test]
1539 fn test_global_variables() {
1540 let source = r#"
1541# Global constants (uppercase)
1542MAX_SIZE = 100
1543DEFAULT_TIMEOUT = 30
1544
1545# Global variables (non-uppercase)
1546database_url = "postgresql://localhost/mydb"
1547config = {"debug": True}
1548current_user = None
1549
1550def get_config():
1551 return config
1552 "#;
1553
1554 let symbols = parse("test.py", source).unwrap();
1555
1556 let constants: Vec<_> = symbols
1558 .iter()
1559 .filter(|s| matches!(s.kind, SymbolKind::Constant))
1560 .collect();
1561
1562 let variables: Vec<_> = symbols
1563 .iter()
1564 .filter(|s| matches!(s.kind, SymbolKind::Variable))
1565 .collect();
1566
1567 assert!(
1569 constants
1570 .iter()
1571 .any(|c| c.symbol.as_deref() == Some("MAX_SIZE"))
1572 );
1573 assert!(
1574 constants
1575 .iter()
1576 .any(|c| c.symbol.as_deref() == Some("DEFAULT_TIMEOUT"))
1577 );
1578
1579 assert!(
1581 variables
1582 .iter()
1583 .any(|v| v.symbol.as_deref() == Some("database_url"))
1584 );
1585 assert!(
1586 variables
1587 .iter()
1588 .any(|v| v.symbol.as_deref() == Some("config"))
1589 );
1590 assert!(
1591 variables
1592 .iter()
1593 .any(|v| v.symbol.as_deref() == Some("current_user"))
1594 );
1595
1596 for constant in constants {
1598 }
1600 for var in variables {
1601 }
1603 }
1604
1605 #[test]
1606 fn test_find_all_python_configs() {
1607 use std::fs;
1608 use tempfile::TempDir;
1609
1610 let temp = TempDir::new().unwrap();
1611 let root = temp.path();
1612
1613 let project1 = root.join("backend");
1615 fs::create_dir_all(&project1).unwrap();
1616 fs::write(
1617 project1.join("pyproject.toml"),
1618 "[project]\nname = \"backend\"",
1619 )
1620 .unwrap();
1621
1622 let project2 = root.join("frontend/api");
1623 fs::create_dir_all(&project2).unwrap();
1624 fs::write(project2.join("setup.py"), "setup(name='api')").unwrap();
1625
1626 let venv = root.join("venv");
1628 fs::create_dir_all(&venv).unwrap();
1629 fs::write(venv.join("setup.py"), "setup(name='should_skip')").unwrap();
1630
1631 let configs = find_all_python_configs(root).unwrap();
1632
1633 assert_eq!(configs.len(), 2);
1635 assert!(
1636 configs
1637 .iter()
1638 .any(|p| p.ends_with("backend/pyproject.toml"))
1639 );
1640 assert!(configs.iter().any(|p| p.ends_with("frontend/api/setup.py")));
1641 }
1642
1643 #[test]
1644 fn test_parse_all_python_packages() {
1645 use std::fs;
1646 use tempfile::TempDir;
1647
1648 let temp = TempDir::new().unwrap();
1649 let root = temp.path();
1650
1651 let project1 = root.join("services/auth");
1653 fs::create_dir_all(&project1).unwrap();
1654 fs::write(
1655 project1.join("pyproject.toml"),
1656 "[project]\nname = \"auth-service\"\n",
1657 )
1658 .unwrap();
1659
1660 let project2 = root.join("services/api");
1661 fs::create_dir_all(&project2).unwrap();
1662 fs::write(project2.join("setup.py"), "setup(name=\"api-service\")").unwrap();
1663
1664 let packages = parse_all_python_packages(root).unwrap();
1665
1666 assert_eq!(packages.len(), 2);
1668
1669 let names: Vec<_> = packages.iter().map(|p| p.name.as_str()).collect();
1671 assert!(names.contains(&"auth-service"));
1672 assert!(names.contains(&"api-service"));
1673
1674 for package in &packages {
1676 assert!(package.project_root.starts_with("services/"));
1677 assert!(package.abs_project_root.ends_with(&package.project_root));
1678 }
1679 }
1680
1681 #[test]
1682 fn test_resolve_python_import_absolute() {
1683 use std::fs;
1684 use tempfile::TempDir;
1685
1686 let temp = TempDir::new().unwrap();
1687 let root = temp.path();
1688
1689 let myapp = root.join("myapp");
1691 fs::create_dir_all(myapp.join("models")).unwrap();
1692 fs::write(
1693 myapp.join("pyproject.toml"),
1694 "[project]\nname = \"myapp\"\n",
1695 )
1696 .unwrap();
1697
1698 let packages = parse_all_python_packages(root).unwrap();
1699 assert_eq!(packages.len(), 1);
1700
1701 let resolved = resolve_python_import_to_path("myapp.models.user", &packages, None);
1704
1705 assert!(resolved.is_some());
1706 let path = resolved.unwrap();
1707 assert!(
1708 path.contains("myapp/models/user.py") || path.contains("myapp/models/user/__init__.py")
1709 );
1710 }
1711
1712 #[test]
1713 fn test_resolve_python_import_relative() {
1714 let current_file = "myapp/views/admin.py";
1716
1717 let resolved = resolve_python_import_to_path(
1719 ".models",
1720 &[], Some(current_file),
1722 );
1723
1724 assert!(resolved.is_some());
1725 let path = resolved.unwrap();
1726 assert!(path.contains("myapp/views/models"));
1728
1729 let resolved = resolve_python_import_to_path("..utils", &[], Some(current_file));
1731
1732 assert!(resolved.is_some());
1733 let path = resolved.unwrap();
1734 assert!(path.contains("myapp/utils"));
1736 }
1737
1738 #[test]
1739 fn test_resolve_python_import_relative_with_module() {
1740 let current_file = "myapp/views/dashboard/index.py";
1742
1743 let resolved = resolve_python_import_to_path("..models.user", &[], Some(current_file));
1744
1745 assert!(resolved.is_some());
1746 let path = resolved.unwrap();
1747 assert!(path.contains("models/user"));
1749 }
1750
1751 #[test]
1752 fn test_resolve_python_import_not_found() {
1753 use std::fs;
1754 use tempfile::TempDir;
1755
1756 let temp = TempDir::new().unwrap();
1757 let root = temp.path();
1758
1759 let myapp = root.join("myapp");
1760 fs::create_dir_all(&myapp).unwrap();
1761 fs::write(
1762 myapp.join("pyproject.toml"),
1763 "[project]\nname = \"myapp\"\n",
1764 )
1765 .unwrap();
1766
1767 let packages = parse_all_python_packages(root).unwrap();
1768
1769 let resolved = resolve_python_import_to_path("other_package.module", &packages, None);
1771
1772 assert!(resolved.is_none());
1774 }
1775
1776 #[test]
1777 fn test_dynamic_imports_filtered() {
1778 let source = r#"
1779import os
1780import sys
1781from json import loads
1782from .models import User
1783
1784# Dynamic imports - should be filtered out
1785import importlib
1786mod = importlib.import_module("some_module")
1787pkg = __import__("package")
1788exec("import dynamic")
1789 "#;
1790
1791 let deps = PythonDependencyExtractor::extract_dependencies(source).unwrap();
1792
1793 assert_eq!(deps.len(), 5, "Should extract 5 static imports only");
1796
1797 assert!(deps.iter().any(|d| d.imported_path == "os"));
1798 assert!(deps.iter().any(|d| d.imported_path == "sys"));
1799 assert!(deps.iter().any(|d| d.imported_path == "json"));
1800 assert!(deps.iter().any(|d| d.imported_path == ".models"));
1801 assert!(deps.iter().any(|d| d.imported_path == "importlib"));
1802
1803 assert!(!deps.iter().any(|d| d.imported_path.contains("some_module")));
1805 assert!(
1806 !deps
1807 .iter()
1808 .any(|d| d.imported_path.contains("package") && d.imported_path != "json")
1809 );
1810 assert!(!deps.iter().any(|d| d.imported_path.contains("dynamic")));
1811 }
1812}