1use std::{collections::HashMap, path::Path, sync::OnceLock};
2
3use sqry_core::graph::unified::StagingGraph;
4use sqry_core::graph::unified::build::GraphBuildHelper;
5use sqry_core::graph::unified::build::helper::CalleeKindHint;
6use sqry_core::graph::unified::build::shape::{CfBucket, ShapeMapping};
7use sqry_core::graph::unified::edge::FfiConvention;
8use sqry_core::graph::unified::edge::kind::TypeOfContext;
9use sqry_core::graph::unified::node::NodeId as UnifiedNodeId;
10use sqry_core::graph::unified::storage::shape::SignatureShape;
11use sqry_core::graph::{GraphBuilder, GraphBuilderError, GraphResult, Language, Span};
12use tree_sitter::{Node, Tree};
13
14use super::local_scopes;
15
16const DEFAULT_SCOPE_DEPTH: usize = 4;
17const STD_C_MODULES: &[&str] = &[
18 "_ctypes",
19 "_socket",
20 "_ssl",
21 "_hashlib",
22 "_json",
23 "_pickle",
24 "_struct",
25 "_sqlite3",
26 "_decimal",
27 "_lzma",
28 "_bz2",
29 "_zlib",
30 "_elementtree",
31 "_csv",
32 "_datetime",
33 "_heapq",
34 "_bisect",
35 "_random",
36 "_collections",
37 "_functools",
38 "_itertools",
39 "_operator",
40 "_io",
41 "_thread",
42 "_multiprocessing",
43 "_posixsubprocess",
44 "_asyncio",
45 "array",
46 "math",
47 "cmath",
48];
49const THIRD_PARTY_C_PACKAGES: &[&str] = &[
50 "numpy",
51 "pandas",
52 "scipy",
53 "sklearn",
54 "cv2",
55 "PIL",
56 "torch",
57 "tensorflow",
58 "lxml",
59 "psycopg2",
60 "MySQLdb",
61 "sqlite3",
62 "cryptography",
63 "bcrypt",
64 "regex",
65 "ujson",
66 "orjson",
67 "msgpack",
68 "greenlet",
69 "gevent",
70 "uvloop",
71];
72
73#[derive(Debug, Clone, Copy)]
75pub struct PythonGraphBuilder {
76 max_scope_depth: usize,
77}
78
79impl Default for PythonGraphBuilder {
80 fn default() -> Self {
81 Self {
82 max_scope_depth: DEFAULT_SCOPE_DEPTH,
83 }
84 }
85}
86
87impl PythonGraphBuilder {
88 #[must_use]
89 pub fn new(max_scope_depth: usize) -> Self {
90 Self { max_scope_depth }
91 }
92}
93
94impl GraphBuilder for PythonGraphBuilder {
95 fn build_graph(
96 &self,
97 tree: &Tree,
98 content: &[u8],
99 file: &Path,
100 staging: &mut StagingGraph,
101 ) -> GraphResult<()> {
102 let mut helper = GraphBuildHelper::new(staging, file, Language::Python);
104
105 let ast_graph = ASTGraph::from_tree(tree, content, self.max_scope_depth).map_err(|e| {
107 GraphBuilderError::ParseError {
108 span: Span::default(),
109 reason: e,
110 }
111 })?;
112
113 let has_all = has_all_assignment(tree.root_node(), content);
115
116 let mut scope_tree = local_scopes::build(tree.root_node(), content)?;
118
119 let recursion_limits =
121 sqry_core::config::RecursionLimits::load_or_default().map_err(|e| {
122 GraphBuilderError::ParseError {
123 span: Span::default(),
124 reason: format!("Failed to load recursion limits: {e}"),
125 }
126 })?;
127 let file_ops_depth = recursion_limits.effective_file_ops_depth().map_err(|e| {
128 GraphBuilderError::ParseError {
129 span: Span::default(),
130 reason: format!("Invalid file_ops_depth configuration: {e}"),
131 }
132 })?;
133 let mut guard =
134 sqry_core::query::security::RecursionGuard::new(file_ops_depth).map_err(|e| {
135 GraphBuilderError::ParseError {
136 span: Span::default(),
137 reason: format!("Failed to create recursion guard: {e}"),
138 }
139 })?;
140
141 walk_tree_for_graph(
143 tree.root_node(),
144 content,
145 &ast_graph,
146 &mut helper,
147 has_all,
148 &mut guard,
149 &mut scope_tree,
150 )?;
151
152 Ok(())
153 }
154
155 fn language(&self) -> Language {
156 Language::Python
157 }
158
159 fn shape_mapping(&self) -> Option<&dyn ShapeMapping> {
160 Some(python_shape_mapping())
161 }
162}
163
164pub struct PythonShapeMapping {
173 cf_by_kind_id: Vec<Option<CfBucket>>,
174}
175
176impl PythonShapeMapping {
177 fn build() -> Self {
179 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
180 let count = lang.node_kind_count();
181 let mut cf_by_kind_id = vec![None; count];
182 for (id, slot) in cf_by_kind_id.iter_mut().enumerate() {
183 let Ok(kind_id) = u16::try_from(id) else {
184 break;
185 };
186 if !lang.node_kind_is_named(kind_id) {
187 continue;
188 }
189 if let Some(name) = lang.node_kind_for_id(kind_id) {
190 *slot = cf_bucket_for_python_kind(name);
191 }
192 }
193 Self { cf_by_kind_id }
194 }
195}
196
197impl ShapeMapping for PythonShapeMapping {
198 fn cf_bucket(&self, ts_node_kind_id: u16) -> Option<CfBucket> {
199 self.cf_by_kind_id
200 .get(ts_node_kind_id as usize)
201 .copied()
202 .flatten()
203 }
204
205 fn signature_shape(&self, fn_node: Node, _src: &[u8]) -> SignatureShape {
206 let mut shape = SignatureShape::default();
207 if let Some(params) = fn_node.child_by_field_name("parameters") {
208 let mut keyword_only = false;
212 let mut cursor = params.walk();
213 for child in params.named_children(&mut cursor) {
214 match child.kind() {
215 "list_splat_pattern" => {
217 shape.has_varargs = true;
218 keyword_only = true;
219 }
220 "dictionary_splat_pattern" => shape.has_kwargs = true,
222 "identifier" | "typed_parameter" => {
224 bump_arity(&mut shape, keyword_only);
225 }
226 "default_parameter" | "typed_default_parameter" => {
228 shape.has_defaults = true;
229 bump_arity(&mut shape, keyword_only);
230 }
231 _ => {}
232 }
233 }
234 }
235 shape.has_return_annotation = fn_node.child_by_field_name("return_type").is_some();
236 shape
237 }
238}
239
240fn bump_arity(shape: &mut SignatureShape, keyword_only: bool) {
242 if keyword_only {
243 shape.arity_keyword_only = shape.arity_keyword_only.saturating_add(1);
244 } else {
245 shape.arity_positional = shape.arity_positional.saturating_add(1);
246 }
247}
248
249fn cf_bucket_for_python_kind(name: &str) -> Option<CfBucket> {
253 let bucket = match name {
254 "if_statement" | "elif_clause" | "conditional_expression" => CfBucket::Branch,
255 "for_statement" | "while_statement" => CfBucket::Loop,
256 "match_statement" | "case_clause" => CfBucket::Match,
257 "try_statement" => CfBucket::Try,
258 "except_clause" | "except_group_clause" => CfBucket::Catch,
259 "raise_statement" => CfBucket::Throw,
260 "with_statement" => CfBucket::Resource,
262 "return_statement" => CfBucket::Return,
263 "yield" => CfBucket::Yield,
264 "await" => CfBucket::Await,
265 "break_statement" | "continue_statement" => CfBucket::BreakContinue,
266 "call" => CfBucket::Call,
267 "assignment" | "augmented_assignment" | "named_expression" => CfBucket::Assign,
268 "lambda" => CfBucket::Closure,
269 "list_comprehension"
270 | "dictionary_comprehension"
271 | "set_comprehension"
272 | "generator_expression" => CfBucket::Comprehension,
273 _ => return None,
274 };
275 Some(bucket)
276}
277
278#[must_use]
280pub fn python_shape_mapping() -> &'static PythonShapeMapping {
281 static MAPPING: OnceLock<PythonShapeMapping> = OnceLock::new();
282 MAPPING.get_or_init(PythonShapeMapping::build)
283}
284
285fn has_all_assignment(node: Node, content: &[u8]) -> bool {
287 let mut cursor = node.walk();
288 for child in node.children(&mut cursor) {
289 if child.kind() == "expression_statement" {
290 let assignment = child
292 .children(&mut child.walk())
293 .find(|c| c.kind() == "assignment" || c.kind() == "augmented_assignment");
294
295 if let Some(assignment) = assignment
296 && let Some(left) = assignment.child_by_field_name("left")
297 && let Ok(left_text) = left.utf8_text(content)
298 && left_text.trim() == "__all__"
299 {
300 return true;
301 }
302 }
303 }
304 false
305}
306
307#[allow(clippy::too_many_lines)]
312fn walk_tree_for_graph(
313 node: Node,
314 content: &[u8],
315 ast_graph: &ASTGraph,
316 helper: &mut GraphBuildHelper,
317 has_all: bool,
318 guard: &mut sqry_core::query::security::RecursionGuard,
319 scope_tree: &mut local_scopes::PythonScopeTree,
320) -> GraphResult<()> {
321 guard.enter().map_err(|e| GraphBuilderError::ParseError {
322 span: Span::default(),
323 reason: format!("Recursion limit exceeded: {e}"),
324 })?;
325
326 match node.kind() {
327 "class_definition" => {
328 if let Some(name_node) = node.child_by_field_name("name")
330 && let Ok(class_name) = name_node.utf8_text(content)
331 {
332 let span = span_from_node(node);
333
334 let qualified_name = class_name.to_string();
336
337 let class_id = helper.add_class(&qualified_name, Some(span));
339
340 process_class_inheritance(node, content, class_id, helper);
342
343 if !has_all && is_module_level(node) && is_public_name(class_name) {
347 export_from_file_module(helper, class_id);
348 }
349 }
350 }
351 "expression_statement" => {
352 process_all_assignment(node, content, helper);
354
355 process_annotated_assignment(node, content, ast_graph, helper);
357 }
358 "function_definition" => {
359 if let Some(call_context) = ast_graph.get_callable_context(node.id()) {
361 let span = span_from_node(node);
362
363 let func_name = node
365 .child_by_field_name("name")
366 .and_then(|n| n.utf8_text(content).ok())
367 .unwrap_or("");
368 let visibility = extract_visibility_from_name(func_name);
369
370 let is_property = has_property_decorator(node, content);
372
373 let return_type = extract_return_type_annotation(node, content);
376
377 let return_type_source = extract_return_type_source_text(node, content);
383
384 let function_id = if is_property && call_context.is_method {
386 helper.add_node_with_visibility(
388 &call_context.qualified_name,
389 Some(span),
390 sqry_core::graph::unified::node::NodeKind::Property,
391 Some(visibility),
392 )
393 } else if call_context.is_method {
394 if return_type.is_some() {
396 helper.add_method_with_signature(
397 &call_context.qualified_name,
398 Some(span),
399 call_context.is_async,
400 false, Some(visibility),
402 return_type.as_deref(),
403 )
404 } else {
405 helper.add_method_with_visibility(
406 &call_context.qualified_name,
407 Some(span),
408 call_context.is_async,
409 false,
410 Some(visibility),
411 )
412 }
413 } else {
414 if return_type.is_some() {
416 helper.add_function_with_signature(
417 &call_context.qualified_name,
418 Some(span),
419 call_context.is_async,
420 false, Some(visibility),
422 return_type.as_deref(),
423 )
424 } else {
425 helper.add_function_with_visibility(
426 &call_context.qualified_name,
427 Some(span),
428 call_context.is_async,
429 false,
430 Some(visibility),
431 )
432 }
433 };
434
435 if !(is_property && call_context.is_method)
453 && let Some(annotation_text) = return_type_source.as_deref()
454 && let Some(return_type_node) = node.child_by_field_name("return_type")
455 {
456 let type_span = span_from_node(return_type_node);
457 let type_id = helper.add_type(annotation_text, Some(type_span));
458 helper.add_typeof_edge_with_context(
459 function_id,
460 type_id,
461 Some(TypeOfContext::Return),
462 Some(0),
463 Some(call_context.qualified_name.as_str()),
464 );
465 helper.add_reference_edge(function_id, type_id);
466 }
467
468 if let Some((http_method, route_path)) = extract_route_decorator_info(node, content)
470 {
471 let endpoint_name = format!("route::{http_method}::{route_path}");
472 let endpoint_id = helper.add_endpoint(&endpoint_name, Some(span));
473 helper.add_contains_edge(endpoint_id, function_id);
474 }
475
476 process_function_parameters(node, content, ast_graph, helper);
478
479 if !has_all
481 && !call_context.is_method
482 && is_module_level(node)
483 && let Some(name_node) = node.child_by_field_name("name")
484 && let Ok(func_name) = name_node.utf8_text(content)
485 && is_public_name(func_name)
486 {
487 export_from_file_module(helper, function_id);
488 }
489 }
490 }
491 "call" => {
492 let is_ffi = build_ffi_call_edge(ast_graph, node, content, helper)?;
494 if !is_ffi {
495 if let Ok(Some((caller_qname, callee_qname, argument_count, is_awaited))) =
497 build_call_for_staging(ast_graph, node, content)
498 {
499 let call_context = ast_graph.get_callable_context(node.id());
501 let _is_async = call_context.is_some_and(|c| c.is_async);
502
503 let call_span = span_from_node(node);
504 let source_id =
505 helper.ensure_callee(&caller_qname, call_span, CalleeKindHint::Function);
506 let target_id =
507 helper.ensure_callee(&callee_qname, call_span, CalleeKindHint::Function);
508
509 let argument_count = u8::try_from(argument_count).unwrap_or(u8::MAX);
511 helper.add_call_edge_full_with_span(
512 source_id,
513 target_id,
514 argument_count,
515 is_awaited,
516 vec![call_span],
517 );
518 }
519 }
520 }
521 "import_statement" | "import_from_statement" => {
522 if let Ok(Some((from_qname, to_qname))) =
524 build_import_for_staging(node, content, helper)
525 {
526 let from_id = helper.add_import(&from_qname, None);
528 let to_id = helper.add_import(&to_qname, Some(span_from_node(node)));
529
530 helper.add_import_edge(from_id, to_id);
532
533 if is_native_extension_import(&to_qname) {
535 build_native_import_ffi_edge(&to_qname, node, helper);
536 }
537 }
538 }
539 "identifier" => {
540 local_scopes::handle_identifier_for_reference(node, content, scope_tree, helper);
542 }
543 _ => {}
544 }
545
546 let mut cursor = node.walk();
548 for child in node.children(&mut cursor) {
549 walk_tree_for_graph(
550 child, content, ast_graph, helper, has_all, guard, scope_tree,
551 )?;
552 }
553
554 guard.exit();
555 Ok(())
556}
557
558fn build_call_for_staging(
560 ast_graph: &ASTGraph,
561 call_node: Node<'_>,
562 content: &[u8],
563) -> GraphResult<Option<(String, String, usize, bool)>> {
564 let module_context;
566 let call_context = if let Some(ctx) = ast_graph.get_callable_context(call_node.id()) {
567 ctx
568 } else {
569 module_context = CallContext {
571 qualified_name: "<module>".to_string(),
572 span: (0, content.len()),
573 is_async: false,
574 is_method: false,
575 class_name: None,
576 };
577 &module_context
578 };
579
580 let Some(callee_expr) = call_node.child_by_field_name("function") else {
581 return Ok(None);
582 };
583
584 let callee_text = callee_expr
585 .utf8_text(content)
586 .map_err(|_| GraphBuilderError::ParseError {
587 span: span_from_node(call_node),
588 reason: "failed to read call expression".to_string(),
589 })?
590 .trim()
591 .to_string();
592
593 if callee_text.is_empty() {
594 return Ok(None);
595 }
596
597 let callee_simple = simple_name(&callee_text);
598 if callee_simple.is_empty() {
599 return Ok(None);
600 }
601
602 let caller_qname = call_context.qualified_name();
604 let target_qname = if let Some(method_name) = callee_text.strip_prefix("self.") {
605 if let Some(class_name) = &call_context.class_name {
607 format!("{}.{}", class_name, simple_name(method_name))
608 } else {
609 callee_simple.to_string()
610 }
611 } else {
612 callee_simple.to_string()
613 };
614
615 let argument_count = count_arguments(call_node);
616 let is_awaited = is_awaited_call(call_node);
617 Ok(Some((
618 caller_qname,
619 target_qname,
620 argument_count,
621 is_awaited,
622 )))
623}
624
625fn build_import_for_staging(
627 import_node: Node<'_>,
628 content: &[u8],
629 helper: &GraphBuildHelper,
630) -> GraphResult<Option<(String, String)>> {
631 let raw_module_name = if import_node.kind() == "import_statement" {
633 import_node
634 .child_by_field_name("name")
635 .and_then(|n| extract_module_name(n, content))
636 } else if import_node.kind() == "import_from_statement" {
637 import_node
638 .child_by_field_name("module_name")
639 .and_then(|n| extract_module_name(n, content))
640 } else {
641 None
642 };
643
644 let module_name = if raw_module_name.is_none() && import_node.kind() == "import_from_statement"
646 {
647 if let Ok(import_text) = import_node.utf8_text(content) {
648 if let Some(from_idx) = import_text.find("from") {
649 if let Some(import_idx) = import_text.find("import") {
650 let between = import_text[from_idx + 4..import_idx].trim();
651 if between.starts_with('.') {
652 Some(between.to_string())
653 } else {
654 None
655 }
656 } else {
657 None
658 }
659 } else {
660 None
661 }
662 } else {
663 None
664 }
665 } else {
666 raw_module_name
667 };
668
669 let Some(module_name) = module_name else {
670 return Ok(None);
671 };
672
673 if module_name.is_empty() {
674 return Ok(None);
675 }
676
677 let resolved_path = sqry_core::graph::resolve_python_import(
679 std::path::Path::new(helper.file_path()),
680 &module_name,
681 import_node.kind() == "import_from_statement",
682 )?;
683
684 Ok(Some((helper.file_path().to_string(), resolved_path)))
686}
687
688fn span_from_node(node: Node<'_>) -> Span {
689 let start = node.start_position();
690 let end = node.end_position();
691 Span::new(
692 sqry_core::graph::node::Position::new(start.row, start.column),
693 sqry_core::graph::node::Position::new(end.row, end.column),
694 )
695}
696
697fn count_arguments(call_node: Node<'_>) -> usize {
698 call_node
699 .child_by_field_name("arguments")
700 .map_or(0, |args| {
701 args.named_children(&mut args.walk())
702 .filter(|child| {
703 !matches!(child.kind(), "," | "(" | ")")
705 })
706 .count()
707 })
708}
709
710fn is_awaited_call(call_node: Node<'_>) -> bool {
711 let mut current = call_node.parent();
712 while let Some(node) = current {
713 let kind = node.kind();
714 if kind == "await" || kind == "await_expression" {
715 return true;
716 }
717 current = node.parent();
718 }
719 false
720}
721
722fn simple_name(qualified: &str) -> &str {
727 qualified.split('.').next_back().unwrap_or(qualified)
728}
729
730fn ffi_library_simple_name(library_path: &str) -> String {
743 use std::path::Path;
744
745 let filename = Path::new(library_path)
747 .file_name()
748 .and_then(|f| f.to_str())
749 .unwrap_or(library_path);
750
751 if let Some(so_pos) = filename.find(".so.") {
753 return filename[..so_pos].to_string();
754 }
755
756 if let Some(dot_pos) = filename.find('.') {
758 let extension = &filename[dot_pos + 1..];
759
760 if extension == "so" || extension == "dll" || extension == "dylib" {
762 return filename[..dot_pos].to_string();
764 }
765 }
766
767 filename.to_string()
769}
770
771fn is_public_name(name: &str) -> bool {
777 !name.starts_with('_')
778}
779
780fn is_module_level(node: Node<'_>) -> bool {
785 let mut current = node.parent();
787 while let Some(parent) = current {
788 match parent.kind() {
789 "module" => return true,
790 "function_definition" | "class_definition" => return false,
791 _ => current = parent.parent(),
792 }
793 }
794 false
795}
796
797const FILE_MODULE_NAME: &str = "<file_module>";
802
803fn export_from_file_module(
804 helper: &mut GraphBuildHelper,
805 exported: sqry_core::graph::unified::node::NodeId,
806) {
807 let module_id = helper.add_module(FILE_MODULE_NAME, None);
808 helper.add_export_edge(module_id, exported);
809}
810
811fn extract_module_name(node: Node<'_>, content: &[u8]) -> Option<String> {
817 if node.kind() == "aliased_import" {
819 return node
821 .child_by_field_name("name")
822 .and_then(|name_node| name_node.utf8_text(content).ok())
823 .map(std::string::ToString::to_string);
824 }
825
826 node.utf8_text(content)
828 .ok()
829 .map(std::string::ToString::to_string)
830}
831
832fn process_all_assignment(node: Node<'_>, content: &[u8], helper: &mut GraphBuildHelper) {
841 let assignment = node
843 .children(&mut node.walk())
844 .find(|child| child.kind() == "assignment" || child.kind() == "augmented_assignment");
845
846 let Some(assignment) = assignment else {
847 return;
848 };
849
850 let left = assignment.child_by_field_name("left");
852 let Some(left) = left else {
853 return;
854 };
855
856 let Ok(left_text) = left.utf8_text(content) else {
857 return;
858 };
859
860 if left_text.trim() != "__all__" {
861 return;
862 }
863
864 let right = assignment.child_by_field_name("right");
866 let Some(right) = right else {
867 return;
868 };
869
870 if right.kind() == "list" || right.kind() == "tuple" {
872 process_all_list(right, content, helper);
873 }
874}
875
876fn process_all_list(list_node: Node<'_>, content: &[u8], helper: &mut GraphBuildHelper) {
878 for child in list_node.children(&mut list_node.walk()) {
879 if child.kind() == "string"
881 && let Some(export_name) = extract_string_content(child, content)
882 && !export_name.is_empty()
883 {
884 let span = span_from_node(child);
888 let export_id = helper.add_function(&export_name, Some(span), false, false);
889
890 export_from_file_module(helper, export_id);
892 }
893 }
894}
895
896fn extract_string_content(string_node: Node<'_>, content: &[u8]) -> Option<String> {
898 let Ok(text) = string_node.utf8_text(content) else {
901 return None;
902 };
903
904 let text = text.trim();
905
906 let stripped = text
908 .trim_start_matches(|c: char| {
909 c == 'r'
910 || c == 'b'
911 || c == 'f'
912 || c == 'u'
913 || c == 'R'
914 || c == 'B'
915 || c == 'F'
916 || c == 'U'
917 })
918 .trim_start_matches("'''")
919 .trim_end_matches("'''")
920 .trim_start_matches("\"\"\"")
921 .trim_end_matches("\"\"\"")
922 .trim_start_matches('\'')
923 .trim_end_matches('\'')
924 .trim_start_matches('"')
925 .trim_end_matches('"');
926
927 Some(stripped.to_string())
928}
929
930fn process_class_inheritance(
939 class_node: Node<'_>,
940 content: &[u8],
941 class_id: UnifiedNodeId,
942 helper: &mut GraphBuildHelper,
943) {
944 let superclasses = class_node.child_by_field_name("superclasses");
947
948 let Some(superclasses) = superclasses else {
949 return;
950 };
951
952 for child in superclasses.children(&mut superclasses.walk()) {
954 if child.kind() == "keyword_argument" {
955 continue;
957 }
958
959 match child.kind() {
960 "identifier" => {
961 if let Ok(base_name) = child.utf8_text(content) {
963 let base_name = base_name.trim();
964 if !base_name.is_empty() {
965 let span = span_from_node(child);
966 let base_id = helper.add_class(base_name, Some(span));
967 helper.add_inherits_edge(class_id, base_id);
968 }
969 }
970 }
971 "attribute" => {
972 if let Ok(base_name) = child.utf8_text(content) {
974 let base_name = base_name.trim();
975 if !base_name.is_empty() {
976 let span = span_from_node(child);
977 let base_id = helper.add_class(base_name, Some(span));
978 helper.add_inherits_edge(class_id, base_id);
979 }
980 }
981 }
982 "call" => {
983 if let Some(func) = child.child_by_field_name("function")
986 && let Ok(base_name) = func.utf8_text(content)
987 {
988 let base_name = base_name.trim();
989 if !base_name.is_empty() {
990 let span = span_from_node(child);
991 let base_id = helper.add_class(base_name, Some(span));
992 helper.add_inherits_edge(class_id, base_id);
993 }
994 }
995 }
996 "subscript" => {
997 if let Some(value) = child.child_by_field_name("value")
1000 && let Ok(base_name) = value.utf8_text(content)
1001 {
1002 let base_name = base_name.trim();
1003 if !base_name.is_empty() {
1004 let span = span_from_node(child);
1005 let base_id = helper.add_class(base_name, Some(span));
1006 helper.add_inherits_edge(class_id, base_id);
1007 }
1008 }
1009 }
1010 _ => {}
1011 }
1012 }
1013}
1014
1015#[derive(Debug, Clone)]
1020struct CallContext {
1021 qualified_name: String,
1022 #[allow(dead_code)] span: (usize, usize),
1024 is_async: bool,
1025 is_method: bool,
1026 class_name: Option<String>,
1027}
1028
1029impl CallContext {
1030 fn qualified_name(&self) -> String {
1031 self.qualified_name.clone()
1032 }
1033}
1034
1035struct ASTGraph {
1036 contexts: Vec<CallContext>,
1037 node_to_context: HashMap<usize, usize>,
1038}
1039
1040impl ASTGraph {
1041 fn from_tree(tree: &Tree, content: &[u8], max_depth: usize) -> Result<Self, String> {
1042 let mut contexts = Vec::new();
1043 let mut node_to_context = HashMap::new();
1044 let mut scope_stack: Vec<String> = Vec::new();
1045 let mut class_stack: Vec<String> = Vec::new();
1046
1047 walk_ast(
1048 tree.root_node(),
1049 content,
1050 &mut contexts,
1051 &mut node_to_context,
1052 &mut scope_stack,
1053 &mut class_stack,
1054 max_depth,
1055 )?;
1056
1057 Ok(Self {
1058 contexts,
1059 node_to_context,
1060 })
1061 }
1062
1063 #[allow(dead_code)] fn contexts(&self) -> &[CallContext] {
1065 &self.contexts
1066 }
1067
1068 fn get_callable_context(&self, node_id: usize) -> Option<&CallContext> {
1069 self.node_to_context
1070 .get(&node_id)
1071 .and_then(|idx| self.contexts.get(*idx))
1072 }
1073}
1074
1075fn walk_ast(
1076 node: Node,
1077 content: &[u8],
1078 contexts: &mut Vec<CallContext>,
1079 node_to_context: &mut HashMap<usize, usize>,
1080 scope_stack: &mut Vec<String>,
1081 class_stack: &mut Vec<String>,
1082 max_depth: usize,
1083) -> Result<(), String> {
1084 if scope_stack.len() > max_depth {
1085 return Ok(());
1086 }
1087
1088 match node.kind() {
1089 "class_definition" => {
1090 let name_node = node
1091 .child_by_field_name("name")
1092 .ok_or_else(|| "class_definition missing name".to_string())?;
1093 let class_name = name_node
1094 .utf8_text(content)
1095 .map_err(|_| "failed to read class name".to_string())?;
1096
1097 let qualified_class = if scope_stack.is_empty() {
1099 class_name.to_string()
1100 } else {
1101 format!("{}.{}", scope_stack.join("."), class_name)
1102 };
1103
1104 class_stack.push(qualified_class.clone());
1105 scope_stack.push(class_name.to_string());
1106
1107 if let Some(body) = node.child_by_field_name("body") {
1109 let mut cursor = body.walk();
1110 for child in body.children(&mut cursor) {
1111 walk_ast(
1112 child,
1113 content,
1114 contexts,
1115 node_to_context,
1116 scope_stack,
1117 class_stack,
1118 max_depth,
1119 )?;
1120 }
1121 }
1122
1123 class_stack.pop();
1124 scope_stack.pop();
1125 }
1126 "function_definition" => {
1127 let name_node = node
1128 .child_by_field_name("name")
1129 .ok_or_else(|| "function_definition missing name".to_string())?;
1130 let func_name = name_node
1131 .utf8_text(content)
1132 .map_err(|_| "failed to read function name".to_string())?;
1133
1134 let is_async = node
1136 .children(&mut node.walk())
1137 .any(|child| child.kind() == "async");
1138
1139 let qualified_func = if scope_stack.is_empty() {
1141 func_name.to_string()
1142 } else {
1143 format!("{}.{}", scope_stack.join("."), func_name)
1144 };
1145
1146 let is_method = !class_stack.is_empty();
1148 let class_name = class_stack.last().cloned();
1149
1150 let context_idx = contexts.len();
1151 contexts.push(CallContext {
1152 qualified_name: qualified_func.clone(),
1153 span: (node.start_byte(), node.end_byte()),
1154 is_async,
1155 is_method,
1156 class_name,
1157 });
1158
1159 node_to_context.insert(node.id(), context_idx);
1162
1163 if let Some(body) = node.child_by_field_name("body") {
1165 associate_descendants(body, context_idx, node_to_context);
1166 }
1167
1168 scope_stack.push(func_name.to_string());
1169
1170 if let Some(body) = node.child_by_field_name("body") {
1172 let mut cursor = body.walk();
1173 for child in body.children(&mut cursor) {
1174 walk_ast(
1175 child,
1176 content,
1177 contexts,
1178 node_to_context,
1179 scope_stack,
1180 class_stack,
1181 max_depth,
1182 )?;
1183 }
1184 }
1185
1186 scope_stack.pop();
1187 }
1188 _ => {
1189 let mut cursor = node.walk();
1191 for child in node.children(&mut cursor) {
1192 walk_ast(
1193 child,
1194 content,
1195 contexts,
1196 node_to_context,
1197 scope_stack,
1198 class_stack,
1199 max_depth,
1200 )?;
1201 }
1202 }
1203 }
1204
1205 Ok(())
1206}
1207
1208fn associate_descendants(
1209 node: Node,
1210 context_idx: usize,
1211 node_to_context: &mut HashMap<usize, usize>,
1212) {
1213 node_to_context.insert(node.id(), context_idx);
1214
1215 let mut stack = vec![node];
1216 while let Some(current) = stack.pop() {
1217 node_to_context.insert(current.id(), context_idx);
1218
1219 let mut cursor = current.walk();
1220 for child in current.children(&mut cursor) {
1221 stack.push(child);
1222 }
1223 }
1224}
1225
1226fn build_ffi_call_edge(
1241 ast_graph: &ASTGraph,
1242 call_node: Node<'_>,
1243 content: &[u8],
1244 helper: &mut GraphBuildHelper,
1245) -> GraphResult<bool> {
1246 let Some(callee_expr) = call_node.child_by_field_name("function") else {
1247 return Ok(false);
1248 };
1249
1250 let callee_text = callee_expr
1251 .utf8_text(content)
1252 .map_err(|_| GraphBuilderError::ParseError {
1253 span: span_from_node(call_node),
1254 reason: "failed to read call expression".to_string(),
1255 })?
1256 .trim();
1257
1258 if is_ctypes_load_call(callee_text) {
1260 return Ok(build_ctypes_ffi_edge(
1261 ast_graph,
1262 call_node,
1263 content,
1264 callee_text,
1265 helper,
1266 ));
1267 }
1268
1269 if is_cffi_dlopen_call(callee_text) {
1271 return Ok(build_cffi_ffi_edge(ast_graph, call_node, content, helper));
1272 }
1273
1274 Ok(false)
1275}
1276
1277fn is_ctypes_load_call(callee_text: &str) -> bool {
1286 callee_text == "ctypes.CDLL"
1288 || callee_text == "ctypes.WinDLL"
1289 || callee_text == "ctypes.OleDLL"
1290 || callee_text == "ctypes.PyDLL"
1291 || callee_text == "ctypes.cdll.LoadLibrary"
1293 || callee_text == "ctypes.windll.LoadLibrary"
1294 || callee_text == "ctypes.oledll.LoadLibrary"
1295 || callee_text == "CDLL"
1297 || callee_text == "WinDLL"
1298 || callee_text == "OleDLL"
1299 || callee_text == "PyDLL"
1300 || callee_text == "cdll.LoadLibrary"
1302 || callee_text == "windll.LoadLibrary"
1303 || callee_text == "oledll.LoadLibrary"
1304}
1305
1306fn is_cffi_dlopen_call(callee_text: &str) -> bool {
1311 callee_text == "ffi.dlopen"
1313 || callee_text == "cffi.dlopen"
1314 || callee_text == "_ffi.dlopen"
1315 || callee_text == "FFI().dlopen"
1320}
1321
1322fn build_ctypes_ffi_edge(
1324 ast_graph: &ASTGraph,
1325 call_node: Node<'_>,
1326 content: &[u8],
1327 callee_text: &str,
1328 helper: &mut GraphBuildHelper,
1329) -> bool {
1330 let caller_id = get_ffi_caller_node_id(ast_graph, call_node, content, helper);
1332
1333 let convention = if callee_text.contains("WinDLL")
1335 || callee_text.contains("windll")
1336 || callee_text.contains("OleDLL")
1337 {
1338 FfiConvention::Stdcall
1339 } else {
1340 FfiConvention::C
1341 };
1342
1343 let library_name = extract_ffi_library_name(call_node, content)
1345 .unwrap_or_else(|| "ctypes::unknown".to_string());
1346
1347 let ffi_name = format!("native::{}", ffi_library_simple_name(&library_name));
1348 let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(call_node)));
1349
1350 helper.add_ffi_edge(caller_id, ffi_node_id, convention);
1352
1353 true
1354}
1355
1356fn build_cffi_ffi_edge(
1358 ast_graph: &ASTGraph,
1359 call_node: Node<'_>,
1360 content: &[u8],
1361 helper: &mut GraphBuildHelper,
1362) -> bool {
1363 let caller_id = get_ffi_caller_node_id(ast_graph, call_node, content, helper);
1365
1366 let library_name =
1368 extract_ffi_library_name(call_node, content).unwrap_or_else(|| "cffi::unknown".to_string());
1369
1370 let ffi_name = format!("native::{}", ffi_library_simple_name(&library_name));
1371 let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(call_node)));
1372
1373 helper.add_ffi_edge(caller_id, ffi_node_id, FfiConvention::C);
1375
1376 true
1377}
1378
1379fn get_ffi_caller_node_id(
1381 ast_graph: &ASTGraph,
1382 node: Node<'_>,
1383 content: &[u8],
1384 helper: &mut GraphBuildHelper,
1385) -> UnifiedNodeId {
1386 let module_context;
1387 let call_context = if let Some(ctx) = ast_graph.get_callable_context(node.id()) {
1388 ctx
1389 } else {
1390 module_context = CallContext {
1391 qualified_name: "<module>".to_string(),
1392 span: (0, content.len()),
1393 is_async: false,
1394 is_method: false,
1395 class_name: None,
1396 };
1397 &module_context
1398 };
1399
1400 let caller_span = Some(Span::from_bytes(call_context.span.0, call_context.span.1));
1401 helper.ensure_function(
1402 &call_context.qualified_name(),
1403 caller_span,
1404 call_context.is_async,
1405 false,
1406 )
1407}
1408
1409fn extract_ffi_library_name(call_node: Node<'_>, content: &[u8]) -> Option<String> {
1411 let args = call_node.child_by_field_name("arguments")?;
1412
1413 let mut cursor = args.walk();
1414 let first_arg = args
1415 .children(&mut cursor)
1416 .find(|child| !matches!(child.kind(), "(" | ")" | ","))?;
1417
1418 if first_arg.kind() == "string" {
1420 return extract_string_content(first_arg, content);
1421 }
1422
1423 if first_arg.kind() == "identifier" {
1425 let text = first_arg.utf8_text(content).ok()?;
1426 return Some(format!("${}", text.trim())); }
1428
1429 None
1430}
1431
1432fn is_native_extension_import(module_name: &str) -> bool {
1439 if module_name.starts_with('_') && !module_name.starts_with("__") {
1441 return true;
1442 }
1443
1444 let base_module = module_name.split('.').next().unwrap_or(module_name);
1446
1447 STD_C_MODULES.contains(&base_module) || THIRD_PARTY_C_PACKAGES.contains(&base_module)
1448}
1449
1450fn build_native_import_ffi_edge(
1452 module_name: &str,
1453 import_node: Node<'_>,
1454 helper: &mut GraphBuildHelper,
1455) {
1456 let file_path = helper.file_path().to_string();
1458 let importer_id = helper.add_module(&file_path, None);
1459
1460 let ffi_name = format!("native::{}", simple_name(module_name));
1462 let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(import_node)));
1463
1464 helper.add_ffi_edge(importer_id, ffi_node_id, FfiConvention::C);
1466}
1467
1468const ROUTE_METHOD_NAMES: &[&str] = &["get", "post", "put", "delete", "patch"];
1474
1475const ROUTE_RECEIVER_NAMES: &[&str] = &["app", "router", "blueprint"];
1479
1480fn extract_route_decorator_info(func_node: Node<'_>, content: &[u8]) -> Option<(String, String)> {
1493 let parent = func_node.parent()?;
1495 if parent.kind() != "decorated_definition" {
1496 return None;
1497 }
1498
1499 let mut cursor = parent.walk();
1501 for child in parent.children(&mut cursor) {
1502 if child.kind() != "decorator" {
1503 continue;
1504 }
1505
1506 let Ok(decorator_text) = child.utf8_text(content) else {
1507 continue;
1508 };
1509 let decorator_text = decorator_text.trim();
1510
1511 let without_at = decorator_text.strip_prefix('@')?;
1513
1514 if let Some(result) = parse_route_decorator_text(without_at) {
1516 return Some(result);
1517 }
1518 }
1519
1520 None
1521}
1522
1523fn parse_route_decorator_text(text: &str) -> Option<(String, String)> {
1531 let paren_pos = text.find('(')?;
1534 let accessor = &text[..paren_pos];
1535 let args_text = &text[paren_pos + 1..];
1536
1537 let dot_pos = accessor.rfind('.')?;
1539 let receiver = &accessor[..dot_pos];
1540 let method_name = &accessor[dot_pos + 1..];
1541
1542 let receiver_base = receiver.rsplit('.').next().unwrap_or(receiver);
1545 if !ROUTE_RECEIVER_NAMES.contains(&receiver_base) {
1546 return None;
1547 }
1548
1549 let path = extract_path_from_decorator_args(args_text)?;
1551
1552 let method_lower = method_name.to_ascii_lowercase();
1554 if ROUTE_METHOD_NAMES.contains(&method_lower.as_str()) {
1555 return Some((method_lower.to_ascii_uppercase(), path));
1557 }
1558
1559 if method_lower == "route" {
1560 let http_method = extract_method_from_route_args(args_text);
1562 return Some((http_method, path));
1563 }
1564
1565 None
1566}
1567
1568fn extract_path_from_decorator_args(args_text: &str) -> Option<String> {
1575 let trimmed = args_text.trim();
1576
1577 let (quote_char, start_pos) = {
1579 let single_pos = trimmed.find('\'');
1580 let double_pos = trimmed.find('"');
1581 match (single_pos, double_pos) {
1582 (Some(s), Some(d)) => {
1583 if s < d {
1584 ('\'', s)
1585 } else {
1586 ('"', d)
1587 }
1588 }
1589 (Some(s), None) => ('\'', s),
1590 (None, Some(d)) => ('"', d),
1591 (None, None) => return None,
1592 }
1593 };
1594
1595 let after_open = start_pos + 1;
1597 let close_pos = trimmed[after_open..].find(quote_char)?;
1598 let path = &trimmed[after_open..after_open + close_pos];
1599
1600 if path.is_empty() {
1601 return None;
1602 }
1603
1604 Some(path.to_string())
1605}
1606
1607fn extract_method_from_route_args(args_text: &str) -> String {
1612 let Some(methods_pos) = args_text.find("methods") else {
1614 return "GET".to_string();
1615 };
1616
1617 let after_methods = &args_text[methods_pos..];
1619 let Some(bracket_pos) = after_methods.find('[') else {
1620 return "GET".to_string();
1621 };
1622
1623 let after_bracket = &after_methods[bracket_pos + 1..];
1624
1625 let method_str = extract_first_string_literal(after_bracket);
1627 match method_str {
1628 Some(m) => m.to_ascii_uppercase(),
1629 None => "GET".to_string(),
1630 }
1631}
1632
1633fn extract_first_string_literal(text: &str) -> Option<String> {
1635 let trimmed = text.trim();
1636
1637 let (quote_char, start_pos) = {
1638 let single_pos = trimmed.find('\'');
1639 let double_pos = trimmed.find('"');
1640 match (single_pos, double_pos) {
1641 (Some(s), Some(d)) => {
1642 if s < d {
1643 ('\'', s)
1644 } else {
1645 ('"', d)
1646 }
1647 }
1648 (Some(s), None) => ('\'', s),
1649 (None, Some(d)) => ('"', d),
1650 (None, None) => return None,
1651 }
1652 };
1653
1654 let after_open = start_pos + 1;
1655 let close_pos = trimmed[after_open..].find(quote_char)?;
1656 let literal = &trimmed[after_open..after_open + close_pos];
1657
1658 if literal.is_empty() {
1659 return None;
1660 }
1661
1662 Some(literal.to_string())
1663}
1664
1665fn has_property_decorator(func_node: Node<'_>, content: &[u8]) -> bool {
1686 let Some(parent) = func_node.parent() else {
1688 return false;
1689 };
1690
1691 if parent.kind() != "decorated_definition" {
1693 return false;
1694 }
1695
1696 let mut cursor = parent.walk();
1698 for child in parent.children(&mut cursor) {
1699 if child.kind() == "decorator" {
1700 if let Ok(decorator_text) = child.utf8_text(content) {
1702 let decorator_text = decorator_text.trim();
1703 if decorator_text == "@property"
1705 || decorator_text.starts_with("@property(")
1706 || decorator_text.starts_with("@property (")
1707 {
1708 return true;
1709 }
1710 }
1711 }
1712 }
1713
1714 false
1715}
1716
1717fn extract_visibility_from_name(name: &str) -> &'static str {
1724 if name.starts_with("__") && !name.ends_with("__") {
1725 "private"
1726 } else if name.starts_with('_') {
1727 "protected"
1728 } else {
1729 "public"
1730 }
1731}
1732
1733fn find_containing_scope(node: Node<'_>, content: &[u8], ast_graph: &ASTGraph) -> String {
1745 let mut current = node;
1746 let mut found_class_name: Option<String> = None;
1747
1748 while let Some(parent) = current.parent() {
1750 match parent.kind() {
1751 "function_definition" => {
1752 if let Some(ctx) = ast_graph.get_callable_context(parent.id()) {
1754 return ctx.qualified_name.clone();
1755 }
1756 }
1757 "class_definition" => {
1758 if found_class_name.is_none() {
1761 if let Some(name_node) = parent.child_by_field_name("name")
1763 && let Ok(class_name) = name_node.utf8_text(content)
1764 {
1765 found_class_name = Some(class_name.to_string());
1766 }
1767 }
1768 }
1769 _ => {}
1770 }
1771 current = parent;
1772 }
1773
1774 found_class_name.unwrap_or_default()
1776}
1777
1778fn extract_return_type_annotation(func_node: Node<'_>, content: &[u8]) -> Option<String> {
1785 let return_type_node = func_node.child_by_field_name("return_type")?;
1786 extract_type_from_node(return_type_node, content)
1787}
1788
1789fn extract_return_type_source_text(func_node: Node<'_>, content: &[u8]) -> Option<String> {
1808 let return_type_node = func_node.child_by_field_name("return_type")?;
1809 let text = return_type_node.utf8_text(content).ok()?.trim();
1810 if text.is_empty() {
1811 None
1812 } else {
1813 Some(text.to_string())
1814 }
1815}
1816
1817fn process_function_parameters(
1824 func_node: Node<'_>,
1825 content: &[u8],
1826 ast_graph: &ASTGraph,
1827 helper: &mut GraphBuildHelper,
1828) {
1829 let Some(params_node) = func_node.child_by_field_name("parameters") else {
1830 return;
1831 };
1832
1833 let scope_prefix = ast_graph
1835 .get_callable_context(func_node.id())
1836 .map_or("", |ctx| ctx.qualified_name.as_str());
1837
1838 for param in params_node.children(&mut params_node.walk()) {
1840 match param.kind() {
1843 "typed_parameter" | "typed_default_parameter" => {
1844 process_typed_parameter(param, content, scope_prefix, helper);
1845 }
1846 "identifier" | "default_parameter" => {}
1850 _ => {
1851 if param.child_by_field_name("type").is_some() {
1854 process_typed_parameter(param, content, scope_prefix, helper);
1855 }
1856 }
1857 }
1858 }
1859}
1860
1861fn process_typed_parameter(
1866 param: Node<'_>,
1867 content: &[u8],
1868 scope_prefix: &str,
1869 helper: &mut GraphBuildHelper,
1870) {
1871 let param_name = if let Some(name_node) = param.child_by_field_name("name") {
1873 name_node.utf8_text(content).ok()
1874 } else {
1875 param
1877 .children(&mut param.walk())
1878 .find(|c| c.kind() == "identifier")
1879 .and_then(|n| n.utf8_text(content).ok())
1880 };
1881
1882 let Some(param_name) = param_name else {
1883 return;
1884 };
1885
1886 if param_name == "self" || param_name == "cls" {
1888 return;
1889 }
1890
1891 let Some(type_node) = param.child_by_field_name("type") else {
1893 return;
1894 };
1895
1896 let Some(type_name) = extract_type_from_node(type_node, content) else {
1897 return;
1898 };
1899
1900 let qualified_param_name = if scope_prefix.is_empty() {
1903 format!(":{param_name}")
1905 } else {
1906 format!("{scope_prefix}:{param_name}")
1907 };
1908
1909 let param_id = helper.add_variable(&qualified_param_name, Some(span_from_node(param)));
1911
1912 let type_id = helper.add_type(&type_name, None);
1914
1915 helper.add_typeof_edge(param_id, type_id);
1917 helper.add_reference_edge(param_id, type_id);
1918}
1919
1920fn process_annotated_assignment(
1927 expr_stmt_node: Node<'_>,
1928 content: &[u8],
1929 ast_graph: &ASTGraph,
1930 helper: &mut GraphBuildHelper,
1931) {
1932 let scope_prefix = find_containing_scope(expr_stmt_node, content, ast_graph);
1935
1936 for child in expr_stmt_node.children(&mut expr_stmt_node.walk()) {
1938 if child.kind() == "assignment" {
1939 process_typed_assignment(child, content, &scope_prefix, helper);
1940 }
1941 }
1942}
1943
1944fn process_typed_assignment(
1948 assignment_node: Node<'_>,
1949 content: &[u8],
1950 scope_prefix: &str,
1951 helper: &mut GraphBuildHelper,
1952) {
1953 let Some(left) = assignment_node.child_by_field_name("left") else {
1958 return;
1959 };
1960
1961 let Some(type_node) = assignment_node.child_by_field_name("type") else {
1962 return;
1963 };
1964
1965 let Ok(var_name) = left.utf8_text(content) else {
1967 return;
1968 };
1969
1970 let Some(type_name) = extract_type_from_node(type_node, content) else {
1972 return;
1973 };
1974
1975 let qualified_var_name = if scope_prefix.is_empty() {
1979 var_name.to_string()
1981 } else if scope_prefix.contains('.') && !scope_prefix.contains(':') {
1982 format!("{scope_prefix}.{var_name}")
1984 } else {
1985 format!("{scope_prefix}:{var_name}")
1987 };
1988
1989 let var_id = helper.add_variable(&qualified_var_name, Some(span_from_node(assignment_node)));
1991
1992 let type_id = helper.add_type(&type_name, None);
1994
1995 helper.add_typeof_edge(var_id, type_id);
1997 helper.add_reference_edge(var_id, type_id);
1998}
1999
2000fn extract_type_from_node(type_node: Node<'_>, content: &[u8]) -> Option<String> {
2010 match type_node.kind() {
2011 "type" => {
2012 type_node
2014 .named_child(0)
2015 .and_then(|child| extract_type_from_node(child, content))
2016 }
2017 "identifier" => {
2018 type_node.utf8_text(content).ok().map(String::from)
2020 }
2021 "string" => {
2022 let text = type_node.utf8_text(content).ok()?;
2025 let trimmed = text.trim();
2026
2027 if (trimmed.starts_with('"') && trimmed.ends_with('"'))
2029 || (trimmed.starts_with('\'') && trimmed.ends_with('\''))
2030 {
2031 let unquoted = &trimmed[1..trimmed.len() - 1];
2032 Some(normalize_union_type(unquoted))
2034 } else {
2035 Some(trimmed.to_string())
2036 }
2037 }
2038 "binary_operator" => {
2039 if let Some(left) = type_node.child_by_field_name("left") {
2042 extract_type_from_node(left, content)
2043 } else {
2044 type_node
2046 .utf8_text(content)
2047 .ok()
2048 .map(|text| normalize_union_type(text.trim()))
2049 }
2050 }
2051 "generic_type" | "subscript" => {
2052 if let Some(value_node) = type_node.child_by_field_name("value") {
2056 extract_type_from_node(value_node, content)
2057 } else {
2058 type_node
2060 .named_child(0)
2061 .and_then(|child| extract_type_from_node(child, content))
2062 .or_else(|| {
2063 type_node.utf8_text(content).ok().and_then(|text| {
2065 text.split('[').next().map(|s| s.trim().to_string())
2067 })
2068 })
2069 }
2070 }
2071 "attribute" => {
2072 type_node.utf8_text(content).ok().map(String::from)
2074 }
2075 "list" | "tuple" | "set" => {
2076 type_node.utf8_text(content).ok().map(String::from)
2078 }
2079 _ => {
2080 let text = type_node.utf8_text(content).ok()?;
2083 let trimmed = text.trim();
2084
2085 if trimmed.contains('[') {
2087 trimmed.split('[').next().map(|s| s.trim().to_string())
2088 } else {
2089 Some(normalize_union_type(trimmed))
2091 }
2092 }
2093 }
2094}
2095
2096fn normalize_union_type(type_str: &str) -> String {
2103 if let Some(pipe_pos) = type_str.find('|') {
2104 type_str[..pipe_pos].trim().to_string()
2106 } else {
2107 type_str.to_string()
2108 }
2109}
2110
2111#[cfg(test)]
2112mod tests {
2113 use super::*;
2114
2115 #[test]
2116 fn test_simple_name_extracts_dotted_identifiers() {
2117 assert_eq!(simple_name("module.func"), "func");
2119 assert_eq!(simple_name("obj.method"), "method");
2120 assert_eq!(simple_name("package.module.func"), "func");
2121 assert_eq!(simple_name("self.helper"), "helper");
2122
2123 assert_eq!(simple_name("function"), "function");
2125 assert_eq!(simple_name(""), "");
2126 }
2127
2128 #[test]
2129 fn test_ffi_library_simple_name_extracts_library_base_names() {
2130 assert_eq!(ffi_library_simple_name("libfoo.so"), "libfoo");
2132 assert_eq!(ffi_library_simple_name("lib1.so"), "lib1");
2133 assert_eq!(ffi_library_simple_name("lib2.so"), "lib2");
2134
2135 assert_eq!(ffi_library_simple_name("kernel32.dll"), "kernel32");
2137 assert_eq!(ffi_library_simple_name("libSystem.dylib"), "libSystem");
2138
2139 assert_eq!(ffi_library_simple_name("libc.so.6"), "libc");
2141
2142 assert_eq!(ffi_library_simple_name("kernel32"), "kernel32");
2144 assert_eq!(ffi_library_simple_name("numpy"), "numpy");
2145
2146 assert_eq!(ffi_library_simple_name("$libname"), "$libname");
2148
2149 assert_eq!(ffi_library_simple_name(""), "");
2151 assert_eq!(ffi_library_simple_name("lib.so"), "lib");
2152 }
2153
2154 #[test]
2155 fn test_ffi_library_simple_name_prevents_duplicate_edges() {
2156 let name1 = ffi_library_simple_name("lib1.so");
2158 let name2 = ffi_library_simple_name("lib2.so");
2159
2160 assert_ne!(
2162 name1, name2,
2163 "lib1.so and lib2.so must produce different simple names"
2164 );
2165 assert_eq!(name1, "lib1");
2166 assert_eq!(name2, "lib2");
2167 }
2168
2169 #[test]
2170 fn test_ffi_library_simple_name_handles_directory_paths() {
2171 assert_eq!(ffi_library_simple_name("/opt/v1.2/libfoo.so"), "libfoo");
2173 assert_eq!(
2174 ffi_library_simple_name("/usr/lib/x86_64-linux-gnu/libc.so.6"),
2175 "libc"
2176 );
2177 assert_eq!(ffi_library_simple_name("libs/lib1.so"), "lib1");
2178
2179 assert_eq!(ffi_library_simple_name("./libs/kernel32.dll"), "kernel32");
2181 assert_eq!(
2182 ffi_library_simple_name("../lib/libSystem.dylib"),
2183 "libSystem"
2184 );
2185 }
2186
2187 #[test]
2192 fn test_parse_route_decorator_app_route_default_get() {
2193 let result = parse_route_decorator_text("app.route('/api/users')");
2194 assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2195 }
2196
2197 #[test]
2198 fn test_parse_route_decorator_app_route_with_methods_post() {
2199 let result = parse_route_decorator_text("app.route('/api/users', methods=['POST'])");
2200 assert_eq!(result, Some(("POST".to_string(), "/api/users".to_string())));
2201 }
2202
2203 #[test]
2204 fn test_parse_route_decorator_app_route_with_methods_put_double_quotes() {
2205 let result = parse_route_decorator_text("app.route(\"/api/items\", methods=[\"PUT\"])");
2206 assert_eq!(result, Some(("PUT".to_string(), "/api/items".to_string())));
2207 }
2208
2209 #[test]
2210 fn test_parse_route_decorator_app_get() {
2211 let result = parse_route_decorator_text("app.get('/api/users')");
2212 assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2213 }
2214
2215 #[test]
2216 fn test_parse_route_decorator_app_post() {
2217 let result = parse_route_decorator_text("app.post('/api/items')");
2218 assert_eq!(result, Some(("POST".to_string(), "/api/items".to_string())));
2219 }
2220
2221 #[test]
2222 fn test_parse_route_decorator_app_put() {
2223 let result = parse_route_decorator_text("app.put('/api/items/1')");
2224 assert_eq!(
2225 result,
2226 Some(("PUT".to_string(), "/api/items/1".to_string()))
2227 );
2228 }
2229
2230 #[test]
2231 fn test_parse_route_decorator_app_delete() {
2232 let result = parse_route_decorator_text("app.delete('/api/items/1')");
2233 assert_eq!(
2234 result,
2235 Some(("DELETE".to_string(), "/api/items/1".to_string()))
2236 );
2237 }
2238
2239 #[test]
2240 fn test_parse_route_decorator_app_patch() {
2241 let result = parse_route_decorator_text("app.patch('/api/items/1')");
2242 assert_eq!(
2243 result,
2244 Some(("PATCH".to_string(), "/api/items/1".to_string()))
2245 );
2246 }
2247
2248 #[test]
2249 fn test_parse_route_decorator_router_get_fastapi() {
2250 let result = parse_route_decorator_text("router.get('/api/users')");
2251 assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2252 }
2253
2254 #[test]
2255 fn test_parse_route_decorator_router_post_fastapi() {
2256 let result = parse_route_decorator_text("router.post('/api/items')");
2257 assert_eq!(result, Some(("POST".to_string(), "/api/items".to_string())));
2258 }
2259
2260 #[test]
2261 fn test_parse_route_decorator_blueprint_route() {
2262 let result = parse_route_decorator_text("blueprint.route('/health')");
2263 assert_eq!(result, Some(("GET".to_string(), "/health".to_string())));
2264 }
2265
2266 #[test]
2267 fn test_parse_route_decorator_unknown_receiver_returns_none() {
2268 let result = parse_route_decorator_text("server.get('/api/users')");
2270 assert_eq!(result, None);
2271 }
2272
2273 #[test]
2274 fn test_parse_route_decorator_unknown_method_returns_none() {
2275 let result = parse_route_decorator_text("app.options('/api/users')");
2277 assert_eq!(result, None);
2278 }
2279
2280 #[test]
2281 fn test_parse_route_decorator_no_parens_returns_none() {
2282 let result = parse_route_decorator_text("app.route");
2283 assert_eq!(result, None);
2284 }
2285
2286 #[test]
2287 fn test_parse_route_decorator_no_dot_returns_none() {
2288 let result = parse_route_decorator_text("route('/api/users')");
2289 assert_eq!(result, None);
2290 }
2291
2292 #[test]
2293 fn test_extract_path_from_decorator_args_single_quotes() {
2294 let result = extract_path_from_decorator_args("'/api/users')");
2295 assert_eq!(result, Some("/api/users".to_string()));
2296 }
2297
2298 #[test]
2299 fn test_extract_path_from_decorator_args_double_quotes() {
2300 let result = extract_path_from_decorator_args("\"/api/items\")");
2301 assert_eq!(result, Some("/api/items".to_string()));
2302 }
2303
2304 #[test]
2305 fn test_extract_path_from_decorator_args_empty_returns_none() {
2306 let result = extract_path_from_decorator_args("'')");
2307 assert_eq!(result, None);
2308 }
2309
2310 #[test]
2311 fn test_extract_path_from_decorator_args_no_string_returns_none() {
2312 let result = extract_path_from_decorator_args("some_var)");
2313 assert_eq!(result, None);
2314 }
2315
2316 #[test]
2317 fn test_extract_method_from_route_args_with_methods_keyword() {
2318 let result = extract_method_from_route_args("'/api/users', methods=['POST'])");
2319 assert_eq!(result, "POST");
2320 }
2321
2322 #[test]
2323 fn test_extract_method_from_route_args_without_methods_keyword() {
2324 let result = extract_method_from_route_args("'/api/users')");
2325 assert_eq!(result, "GET");
2326 }
2327
2328 #[test]
2329 fn test_extract_method_from_route_args_delete() {
2330 let result = extract_method_from_route_args("'/api/items', methods=['DELETE'])");
2331 assert_eq!(result, "DELETE");
2332 }
2333
2334 #[test]
2335 fn test_extract_method_from_route_args_lowercase_normalizes() {
2336 let result = extract_method_from_route_args("'/x', methods=['put'])");
2337 assert_eq!(result, "PUT");
2338 }
2339
2340 #[test]
2341 fn test_extract_first_string_literal_single_quotes() {
2342 let result = extract_first_string_literal("'POST']");
2343 assert_eq!(result, Some("POST".to_string()));
2344 }
2345
2346 #[test]
2347 fn test_extract_first_string_literal_double_quotes() {
2348 let result = extract_first_string_literal("\"DELETE\"]");
2349 assert_eq!(result, Some("DELETE".to_string()));
2350 }
2351
2352 #[test]
2353 fn test_extract_first_string_literal_empty_returns_none() {
2354 let result = extract_first_string_literal("no quotes here");
2355 assert_eq!(result, None);
2356 }
2357}
2358
2359#[cfg(test)]
2360mod shape_tests {
2361 use super::{cf_bucket_for_python_kind, python_shape_mapping};
2362 use sqry_core::graph::unified::build::shape::{
2363 CfBucket, ShapeBudget, ShapeMapping, compute_shape_descriptor,
2364 };
2365
2366 const SAMPLE: &str = include_str!(concat!(
2367 env!("CARGO_MANIFEST_DIR"),
2368 "/../test-fixtures/shape/reference/sample.py"
2369 ));
2370
2371 fn parse(src: &str) -> tree_sitter::Tree {
2372 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2373 let mut p = tree_sitter::Parser::new();
2374 p.set_language(&lang).expect("load python grammar");
2375 p.parse(src, None).expect("parse")
2376 }
2377
2378 fn function_named<'t>(tree: &'t tree_sitter::Tree, name: &str) -> tree_sitter::Node<'t> {
2380 let root = tree.root_node();
2381 let mut stack = vec![root];
2382 while let Some(node) = stack.pop() {
2383 if node.kind() == "function_definition"
2384 && node
2385 .child_by_field_name("name")
2386 .and_then(|n| n.utf8_text(SAMPLE.as_bytes()).ok())
2387 == Some(name)
2388 {
2389 return node;
2390 }
2391 let mut c = node.walk();
2392 for ch in node.children(&mut c) {
2393 stack.push(ch);
2394 }
2395 }
2396 panic!("no function_definition named {name}");
2397 }
2398
2399 #[test]
2400 fn cf_table_is_non_empty() {
2401 let mapping = python_shape_mapping();
2402 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2403 let mut covered = 0;
2404 for id in 0..lang.node_kind_count() {
2405 let kid = id as u16;
2406 if mapping.cf_bucket(kid).is_some() {
2407 covered += 1;
2408 }
2409 }
2410 assert!(
2411 covered >= 10,
2412 "expected many python CF kinds mapped, got {covered}"
2413 );
2414 }
2415
2416 #[test]
2417 fn histogram_covers_real_control_flow() {
2418 let tree = parse(SAMPLE);
2419 let func = function_named(&tree, "classify");
2420 let d = compute_shape_descriptor(
2421 func,
2422 SAMPLE.as_bytes(),
2423 python_shape_mapping(),
2424 &ShapeBudget::default(),
2425 );
2426 assert!(!d.is_unhashable(), "classify body must be hashable");
2427 for bucket in [
2428 CfBucket::Branch,
2429 CfBucket::Loop,
2430 CfBucket::Match,
2431 CfBucket::Try,
2432 CfBucket::Catch,
2433 CfBucket::Throw,
2434 CfBucket::Resource,
2435 CfBucket::Return,
2436 CfBucket::BreakContinue,
2437 CfBucket::Call,
2438 CfBucket::Assign,
2439 CfBucket::Comprehension,
2440 ] {
2441 assert!(
2442 d.cf_histogram[bucket.index()] >= 1,
2443 "classify must exercise {bucket:?}"
2444 );
2445 }
2446 }
2447
2448 #[test]
2449 fn async_body_covers_yield_await_closure() {
2450 let tree = parse(SAMPLE);
2451 let func = function_named(&tree, "fetch");
2452 let d = compute_shape_descriptor(
2453 func,
2454 SAMPLE.as_bytes(),
2455 python_shape_mapping(),
2456 &ShapeBudget::default(),
2457 );
2458 assert!(d.cf_histogram[CfBucket::Await.index()] >= 1, "await");
2459 assert!(d.cf_histogram[CfBucket::Yield.index()] >= 1, "yield");
2460 assert!(
2461 d.cf_histogram[CfBucket::Closure.index()] >= 1,
2462 "lambda closure"
2463 );
2464 assert!(
2465 d.signature_shape.has_return_annotation,
2466 "-> str return annotation"
2467 );
2468 }
2469
2470 #[test]
2471 fn signature_shape_reads_arity_and_splats() {
2472 let tree = parse(SAMPLE);
2473 let func = function_named(&tree, "classify");
2474 let mapping = python_shape_mapping();
2475 let shape = mapping.signature_shape(func, SAMPLE.as_bytes());
2476 assert_eq!(
2478 shape.arity_positional, 2,
2479 "values + threshold are positional"
2480 );
2481 assert!(shape.has_defaults, "threshold=0");
2482 assert!(shape.has_varargs, "*extra");
2483 assert!(shape.has_kwargs, "**opts");
2484 }
2485
2486 #[test]
2487 fn unknown_kind_maps_to_none() {
2488 assert!(cf_bucket_for_python_kind("module").is_none());
2489 assert!(cf_bucket_for_python_kind("identifier").is_none());
2490 }
2491}