1use std::{collections::HashMap, path::Path, sync::OnceLock};
2
3use sqry_core::graph::unified::StagingGraph;
4use sqry_core::graph::unified::build::GraphBuildHelper;
5use sqry_core::graph::unified::build::helper::CalleeKindHint;
6use sqry_core::graph::unified::build::shape::{CfBucket, ShapeMapping};
7use sqry_core::graph::unified::edge::FfiConvention;
8use sqry_core::graph::unified::edge::kind::TypeOfContext;
9use sqry_core::graph::unified::node::NodeId as UnifiedNodeId;
10use sqry_core::graph::unified::storage::shape::SignatureShape;
11use sqry_core::graph::{GraphBuilder, GraphBuilderError, GraphResult, Language, Span};
12use tree_sitter::{Node, Tree};
13
14use super::local_scopes;
15
16const DEFAULT_SCOPE_DEPTH: usize = 4;
17const STD_C_MODULES: &[&str] = &[
18 "_ctypes",
19 "_socket",
20 "_ssl",
21 "_hashlib",
22 "_json",
23 "_pickle",
24 "_struct",
25 "_sqlite3",
26 "_decimal",
27 "_lzma",
28 "_bz2",
29 "_zlib",
30 "_elementtree",
31 "_csv",
32 "_datetime",
33 "_heapq",
34 "_bisect",
35 "_random",
36 "_collections",
37 "_functools",
38 "_itertools",
39 "_operator",
40 "_io",
41 "_thread",
42 "_multiprocessing",
43 "_posixsubprocess",
44 "_asyncio",
45 "array",
46 "math",
47 "cmath",
48];
49const THIRD_PARTY_C_PACKAGES: &[&str] = &[
50 "numpy",
51 "pandas",
52 "scipy",
53 "sklearn",
54 "cv2",
55 "PIL",
56 "torch",
57 "tensorflow",
58 "lxml",
59 "psycopg2",
60 "MySQLdb",
61 "sqlite3",
62 "cryptography",
63 "bcrypt",
64 "regex",
65 "ujson",
66 "orjson",
67 "msgpack",
68 "greenlet",
69 "gevent",
70 "uvloop",
71];
72
73#[derive(Debug, Clone, Copy)]
75pub struct PythonGraphBuilder {
76 max_scope_depth: usize,
77}
78
79impl Default for PythonGraphBuilder {
80 fn default() -> Self {
81 Self {
82 max_scope_depth: DEFAULT_SCOPE_DEPTH,
83 }
84 }
85}
86
87impl PythonGraphBuilder {
88 #[must_use]
89 pub fn new(max_scope_depth: usize) -> Self {
90 Self { max_scope_depth }
91 }
92}
93
94impl GraphBuilder for PythonGraphBuilder {
95 fn build_graph(
96 &self,
97 tree: &Tree,
98 content: &[u8],
99 file: &Path,
100 staging: &mut StagingGraph,
101 ) -> GraphResult<()> {
102 let mut helper = GraphBuildHelper::new(staging, file, Language::Python);
104
105 let ast_graph = ASTGraph::from_tree(tree, content, self.max_scope_depth).map_err(|e| {
107 GraphBuilderError::ParseError {
108 span: Span::default(),
109 reason: e,
110 }
111 })?;
112
113 let has_all = has_all_assignment(tree.root_node(), content);
115
116 let mut scope_tree = local_scopes::build(tree.root_node(), content)?;
118
119 let recursion_limits =
121 sqry_core::config::RecursionLimits::load_or_default().map_err(|e| {
122 GraphBuilderError::ParseError {
123 span: Span::default(),
124 reason: format!("Failed to load recursion limits: {e}"),
125 }
126 })?;
127 let file_ops_depth = recursion_limits.effective_file_ops_depth().map_err(|e| {
128 GraphBuilderError::ParseError {
129 span: Span::default(),
130 reason: format!("Invalid file_ops_depth configuration: {e}"),
131 }
132 })?;
133 let mut guard =
134 sqry_core::query::security::RecursionGuard::new(file_ops_depth).map_err(|e| {
135 GraphBuilderError::ParseError {
136 span: Span::default(),
137 reason: format!("Failed to create recursion guard: {e}"),
138 }
139 })?;
140
141 walk_tree_for_graph(
143 tree.root_node(),
144 content,
145 &ast_graph,
146 &mut helper,
147 has_all,
148 &mut guard,
149 &mut scope_tree,
150 )?;
151
152 Ok(())
153 }
154
155 fn language(&self) -> Language {
156 Language::Python
157 }
158
159 fn shape_mapping(&self) -> Option<&dyn ShapeMapping> {
160 Some(python_shape_mapping())
161 }
162}
163
164pub struct PythonShapeMapping {
173 cf_by_kind_id: Vec<Option<CfBucket>>,
174}
175
176impl PythonShapeMapping {
177 fn build() -> Self {
179 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
180 let count = lang.node_kind_count();
181 let mut cf_by_kind_id = vec![None; count];
182 for (id, slot) in cf_by_kind_id.iter_mut().enumerate() {
183 let Ok(kind_id) = u16::try_from(id) else {
184 break;
185 };
186 if !lang.node_kind_is_named(kind_id) {
187 continue;
188 }
189 if let Some(name) = lang.node_kind_for_id(kind_id) {
190 *slot = cf_bucket_for_python_kind(name);
191 }
192 }
193 Self { cf_by_kind_id }
194 }
195}
196
197impl ShapeMapping for PythonShapeMapping {
198 fn cf_bucket(&self, ts_node_kind_id: u16) -> Option<CfBucket> {
199 self.cf_by_kind_id
200 .get(ts_node_kind_id as usize)
201 .copied()
202 .flatten()
203 }
204
205 fn signature_shape(&self, fn_node: Node, _src: &[u8]) -> SignatureShape {
206 let mut shape = SignatureShape::default();
207 if let Some(params) = fn_node.child_by_field_name("parameters") {
208 let mut keyword_only = false;
212 let mut cursor = params.walk();
213 for child in params.named_children(&mut cursor) {
214 match child.kind() {
215 "list_splat_pattern" => {
217 shape.has_varargs = true;
218 keyword_only = true;
219 }
220 "dictionary_splat_pattern" => shape.has_kwargs = true,
222 "identifier" | "typed_parameter" => {
224 bump_arity(&mut shape, keyword_only);
225 }
226 "default_parameter" | "typed_default_parameter" => {
228 shape.has_defaults = true;
229 bump_arity(&mut shape, keyword_only);
230 }
231 _ => {}
232 }
233 }
234 }
235 shape.has_return_annotation = fn_node.child_by_field_name("return_type").is_some();
236 shape
237 }
238}
239
240fn bump_arity(shape: &mut SignatureShape, keyword_only: bool) {
242 if keyword_only {
243 shape.arity_keyword_only = shape.arity_keyword_only.saturating_add(1);
244 } else {
245 shape.arity_positional = shape.arity_positional.saturating_add(1);
246 }
247}
248
249fn cf_bucket_for_python_kind(name: &str) -> Option<CfBucket> {
253 let bucket = match name {
254 "if_statement" | "elif_clause" | "conditional_expression" => CfBucket::Branch,
255 "for_statement" | "while_statement" => CfBucket::Loop,
256 "match_statement" | "case_clause" => CfBucket::Match,
257 "try_statement" => CfBucket::Try,
258 "except_clause" | "except_group_clause" => CfBucket::Catch,
259 "raise_statement" => CfBucket::Throw,
260 "with_statement" => CfBucket::Resource,
262 "return_statement" => CfBucket::Return,
263 "yield" => CfBucket::Yield,
264 "await" => CfBucket::Await,
265 "break_statement" | "continue_statement" => CfBucket::BreakContinue,
266 "call" => CfBucket::Call,
267 "assignment" | "augmented_assignment" | "named_expression" => CfBucket::Assign,
268 "lambda" => CfBucket::Closure,
269 "list_comprehension"
270 | "dictionary_comprehension"
271 | "set_comprehension"
272 | "generator_expression" => CfBucket::Comprehension,
273 _ => return None,
274 };
275 Some(bucket)
276}
277
278#[must_use]
280pub fn python_shape_mapping() -> &'static PythonShapeMapping {
281 static MAPPING: OnceLock<PythonShapeMapping> = OnceLock::new();
282 MAPPING.get_or_init(PythonShapeMapping::build)
283}
284
285fn has_all_assignment(node: Node, content: &[u8]) -> bool {
287 let mut cursor = node.walk();
288 for child in node.children(&mut cursor) {
289 if child.kind() == "expression_statement" {
290 let assignment = child
292 .children(&mut child.walk())
293 .find(|c| c.kind() == "assignment" || c.kind() == "augmented_assignment");
294
295 if let Some(assignment) = assignment
296 && let Some(left) = assignment.child_by_field_name("left")
297 && let Ok(left_text) = left.utf8_text(content)
298 && left_text.trim() == "__all__"
299 {
300 return true;
301 }
302 }
303 }
304 false
305}
306
307#[allow(clippy::too_many_lines)]
312fn walk_tree_for_graph(
313 node: Node,
314 content: &[u8],
315 ast_graph: &ASTGraph,
316 helper: &mut GraphBuildHelper,
317 has_all: bool,
318 guard: &mut sqry_core::query::security::RecursionGuard,
319 scope_tree: &mut local_scopes::PythonScopeTree,
320) -> GraphResult<()> {
321 guard.enter().map_err(|e| GraphBuilderError::ParseError {
322 span: Span::default(),
323 reason: format!("Recursion limit exceeded: {e}"),
324 })?;
325
326 match node.kind() {
327 "class_definition" => {
328 if let Some(name_node) = node.child_by_field_name("name")
330 && let Ok(class_name) = name_node.utf8_text(content)
331 {
332 let span = span_from_node(node);
333
334 let qualified_name = class_name.to_string();
336
337 let class_id = helper.add_class(&qualified_name, Some(span));
340 helper.mark_definition(class_id);
341
342 process_class_inheritance(node, content, class_id, helper);
344
345 if !has_all && is_module_level(node) && is_public_name(class_name) {
349 export_from_file_module(helper, class_id);
350 }
351 }
352 }
353 "expression_statement" => {
354 process_all_assignment(node, content, helper);
356
357 process_annotated_assignment(node, content, ast_graph, helper);
359 }
360 "function_definition" => {
361 if let Some(call_context) = ast_graph.get_callable_context(node.id()) {
363 let span = span_from_node(node);
364
365 let func_name = node
367 .child_by_field_name("name")
368 .and_then(|n| n.utf8_text(content).ok())
369 .unwrap_or("");
370 let visibility = extract_visibility_from_name(func_name);
371
372 let is_property = has_property_decorator(node, content);
374
375 let return_type = extract_return_type_annotation(node, content);
378
379 let return_type_source = extract_return_type_source_text(node, content);
385
386 let function_id = if is_property && call_context.is_method {
388 helper.add_node_with_visibility(
390 &call_context.qualified_name,
391 Some(span),
392 sqry_core::graph::unified::node::NodeKind::Property,
393 Some(visibility),
394 )
395 } else if call_context.is_method {
396 if return_type.is_some() {
398 helper.add_method_with_signature(
399 &call_context.qualified_name,
400 Some(span),
401 call_context.is_async,
402 false, Some(visibility),
404 return_type.as_deref(),
405 )
406 } else {
407 helper.add_method_with_visibility(
408 &call_context.qualified_name,
409 Some(span),
410 call_context.is_async,
411 false,
412 Some(visibility),
413 )
414 }
415 } else {
416 if return_type.is_some() {
418 helper.add_function_with_signature(
419 &call_context.qualified_name,
420 Some(span),
421 call_context.is_async,
422 false, Some(visibility),
424 return_type.as_deref(),
425 )
426 } else {
427 helper.add_function_with_visibility(
428 &call_context.qualified_name,
429 Some(span),
430 call_context.is_async,
431 false,
432 Some(visibility),
433 )
434 }
435 };
436
437 if !(is_property && call_context.is_method)
455 && let Some(annotation_text) = return_type_source.as_deref()
456 && let Some(return_type_node) = node.child_by_field_name("return_type")
457 {
458 let type_span = span_from_node(return_type_node);
459 let type_id = helper.add_type(annotation_text, Some(type_span));
460 helper.add_typeof_edge_with_context(
461 function_id,
462 type_id,
463 Some(TypeOfContext::Return),
464 Some(0),
465 Some(call_context.qualified_name.as_str()),
466 );
467 helper.add_reference_edge(function_id, type_id);
468 }
469
470 if let Some((http_method, route_path)) = extract_route_decorator_info(node, content)
472 {
473 let endpoint_name = format!("route::{http_method}::{route_path}");
474 let endpoint_id = helper.add_endpoint(&endpoint_name, Some(span));
475 helper.add_contains_edge(endpoint_id, function_id);
476 }
477
478 process_function_parameters(node, content, ast_graph, helper);
480
481 if !has_all
483 && !call_context.is_method
484 && is_module_level(node)
485 && let Some(name_node) = node.child_by_field_name("name")
486 && let Ok(func_name) = name_node.utf8_text(content)
487 && is_public_name(func_name)
488 {
489 export_from_file_module(helper, function_id);
490 }
491 }
492 }
493 "call" => {
494 let is_ffi = build_ffi_call_edge(ast_graph, node, content, helper)?;
496 if !is_ffi {
497 if let Ok(Some((caller_qname, callee_qname, argument_count, is_awaited))) =
499 build_call_for_staging(ast_graph, node, content)
500 {
501 let call_context = ast_graph.get_callable_context(node.id());
503 let _is_async = call_context.is_some_and(|c| c.is_async);
504
505 let call_span = span_from_node(node);
506 let source_id =
507 helper.ensure_callee(&caller_qname, call_span, CalleeKindHint::Function);
508 let target_id =
509 helper.ensure_callee(&callee_qname, call_span, CalleeKindHint::Function);
510
511 let argument_count = u8::try_from(argument_count).unwrap_or(u8::MAX);
513 helper.add_call_edge_full_with_span(
514 source_id,
515 target_id,
516 argument_count,
517 is_awaited,
518 vec![call_span],
519 );
520 }
521 }
522 }
523 "import_statement" | "import_from_statement" => {
524 if let Ok(Some((from_qname, to_qname))) =
526 build_import_for_staging(node, content, helper)
527 {
528 let from_id = helper.add_import(&from_qname, None);
530 let to_id = helper.add_import(&to_qname, Some(span_from_node(node)));
531
532 helper.add_import_edge(from_id, to_id);
534
535 if is_native_extension_import(&to_qname) {
537 build_native_import_ffi_edge(&to_qname, node, helper);
538 }
539 }
540 }
541 "identifier" => {
542 local_scopes::handle_identifier_for_reference(node, content, scope_tree, helper);
544 }
545 _ => {}
546 }
547
548 let mut cursor = node.walk();
550 for child in node.children(&mut cursor) {
551 walk_tree_for_graph(
552 child, content, ast_graph, helper, has_all, guard, scope_tree,
553 )?;
554 }
555
556 guard.exit();
557 Ok(())
558}
559
560fn build_call_for_staging(
562 ast_graph: &ASTGraph,
563 call_node: Node<'_>,
564 content: &[u8],
565) -> GraphResult<Option<(String, String, usize, bool)>> {
566 let module_context;
568 let call_context = if let Some(ctx) = ast_graph.get_callable_context(call_node.id()) {
569 ctx
570 } else {
571 module_context = CallContext {
573 qualified_name: "<module>".to_string(),
574 span: (0, content.len()),
575 is_async: false,
576 is_method: false,
577 class_name: None,
578 };
579 &module_context
580 };
581
582 let Some(callee_expr) = call_node.child_by_field_name("function") else {
583 return Ok(None);
584 };
585
586 let callee_text = callee_expr
587 .utf8_text(content)
588 .map_err(|_| GraphBuilderError::ParseError {
589 span: span_from_node(call_node),
590 reason: "failed to read call expression".to_string(),
591 })?
592 .trim()
593 .to_string();
594
595 if callee_text.is_empty() {
596 return Ok(None);
597 }
598
599 let callee_simple = simple_name(&callee_text);
600 if callee_simple.is_empty() {
601 return Ok(None);
602 }
603
604 let caller_qname = call_context.qualified_name();
606 let target_qname = if let Some(method_name) = callee_text.strip_prefix("self.") {
607 if let Some(class_name) = &call_context.class_name {
609 format!("{}.{}", class_name, simple_name(method_name))
610 } else {
611 callee_simple.to_string()
612 }
613 } else {
614 callee_simple.to_string()
615 };
616
617 let argument_count = count_arguments(call_node);
618 let is_awaited = is_awaited_call(call_node);
619 Ok(Some((
620 caller_qname,
621 target_qname,
622 argument_count,
623 is_awaited,
624 )))
625}
626
627fn build_import_for_staging(
629 import_node: Node<'_>,
630 content: &[u8],
631 helper: &GraphBuildHelper,
632) -> GraphResult<Option<(String, String)>> {
633 let raw_module_name = if import_node.kind() == "import_statement" {
635 import_node
636 .child_by_field_name("name")
637 .and_then(|n| extract_module_name(n, content))
638 } else if import_node.kind() == "import_from_statement" {
639 import_node
640 .child_by_field_name("module_name")
641 .and_then(|n| extract_module_name(n, content))
642 } else {
643 None
644 };
645
646 let module_name = if raw_module_name.is_none() && import_node.kind() == "import_from_statement"
648 {
649 if let Ok(import_text) = import_node.utf8_text(content) {
650 if let Some(from_idx) = import_text.find("from") {
651 if let Some(import_idx) = import_text.find("import") {
652 let between = import_text[from_idx + 4..import_idx].trim();
653 if between.starts_with('.') {
654 Some(between.to_string())
655 } else {
656 None
657 }
658 } else {
659 None
660 }
661 } else {
662 None
663 }
664 } else {
665 None
666 }
667 } else {
668 raw_module_name
669 };
670
671 let Some(module_name) = module_name else {
672 return Ok(None);
673 };
674
675 if module_name.is_empty() {
676 return Ok(None);
677 }
678
679 let resolved_path = sqry_core::graph::resolve_python_import(
681 std::path::Path::new(helper.file_path()),
682 &module_name,
683 import_node.kind() == "import_from_statement",
684 )?;
685
686 Ok(Some((helper.file_path().to_string(), resolved_path)))
688}
689
690fn span_from_node(node: Node<'_>) -> Span {
691 let start = node.start_position();
692 let end = node.end_position();
693 Span::new(
694 sqry_core::graph::node::Position::new(start.row, start.column),
695 sqry_core::graph::node::Position::new(end.row, end.column),
696 )
697}
698
699fn count_arguments(call_node: Node<'_>) -> usize {
700 call_node
701 .child_by_field_name("arguments")
702 .map_or(0, |args| {
703 args.named_children(&mut args.walk())
704 .filter(|child| {
705 !matches!(child.kind(), "," | "(" | ")")
707 })
708 .count()
709 })
710}
711
712fn is_awaited_call(call_node: Node<'_>) -> bool {
713 let mut current = call_node.parent();
714 while let Some(node) = current {
715 let kind = node.kind();
716 if kind == "await" || kind == "await_expression" {
717 return true;
718 }
719 current = node.parent();
720 }
721 false
722}
723
724fn simple_name(qualified: &str) -> &str {
729 qualified.split('.').next_back().unwrap_or(qualified)
730}
731
732fn ffi_library_simple_name(library_path: &str) -> String {
745 use std::path::Path;
746
747 let filename = Path::new(library_path)
749 .file_name()
750 .and_then(|f| f.to_str())
751 .unwrap_or(library_path);
752
753 if let Some(so_pos) = filename.find(".so.") {
755 return filename[..so_pos].to_string();
756 }
757
758 if let Some(dot_pos) = filename.find('.') {
760 let extension = &filename[dot_pos + 1..];
761
762 if extension == "so" || extension == "dll" || extension == "dylib" {
764 return filename[..dot_pos].to_string();
766 }
767 }
768
769 filename.to_string()
771}
772
773fn is_public_name(name: &str) -> bool {
779 !name.starts_with('_')
780}
781
782fn is_module_level(node: Node<'_>) -> bool {
787 let mut current = node.parent();
789 while let Some(parent) = current {
790 match parent.kind() {
791 "module" => return true,
792 "function_definition" | "class_definition" => return false,
793 _ => current = parent.parent(),
794 }
795 }
796 false
797}
798
799const FILE_MODULE_NAME: &str = "<file_module>";
804
805fn export_from_file_module(
806 helper: &mut GraphBuildHelper,
807 exported: sqry_core::graph::unified::node::NodeId,
808) {
809 let module_id = helper.add_module(FILE_MODULE_NAME, None);
810 helper.add_export_edge(module_id, exported);
811}
812
813fn extract_module_name(node: Node<'_>, content: &[u8]) -> Option<String> {
819 if node.kind() == "aliased_import" {
821 return node
823 .child_by_field_name("name")
824 .and_then(|name_node| name_node.utf8_text(content).ok())
825 .map(std::string::ToString::to_string);
826 }
827
828 node.utf8_text(content)
830 .ok()
831 .map(std::string::ToString::to_string)
832}
833
834fn process_all_assignment(node: Node<'_>, content: &[u8], helper: &mut GraphBuildHelper) {
843 let assignment = node
845 .children(&mut node.walk())
846 .find(|child| child.kind() == "assignment" || child.kind() == "augmented_assignment");
847
848 let Some(assignment) = assignment else {
849 return;
850 };
851
852 let left = assignment.child_by_field_name("left");
854 let Some(left) = left else {
855 return;
856 };
857
858 let Ok(left_text) = left.utf8_text(content) else {
859 return;
860 };
861
862 if left_text.trim() != "__all__" {
863 return;
864 }
865
866 let right = assignment.child_by_field_name("right");
868 let Some(right) = right else {
869 return;
870 };
871
872 if right.kind() == "list" || right.kind() == "tuple" {
874 process_all_list(right, content, helper);
875 }
876}
877
878fn process_all_list(list_node: Node<'_>, content: &[u8], helper: &mut GraphBuildHelper) {
880 for child in list_node.children(&mut list_node.walk()) {
881 if child.kind() == "string"
883 && let Some(export_name) = extract_string_content(child, content)
884 && !export_name.is_empty()
885 {
886 let span = span_from_node(child);
890 let export_id = helper.add_function(&export_name, Some(span), false, false);
891
892 export_from_file_module(helper, export_id);
894 }
895 }
896}
897
898fn extract_string_content(string_node: Node<'_>, content: &[u8]) -> Option<String> {
900 let Ok(text) = string_node.utf8_text(content) else {
903 return None;
904 };
905
906 let text = text.trim();
907
908 let stripped = text
910 .trim_start_matches(|c: char| {
911 c == 'r'
912 || c == 'b'
913 || c == 'f'
914 || c == 'u'
915 || c == 'R'
916 || c == 'B'
917 || c == 'F'
918 || c == 'U'
919 })
920 .trim_start_matches("'''")
921 .trim_end_matches("'''")
922 .trim_start_matches("\"\"\"")
923 .trim_end_matches("\"\"\"")
924 .trim_start_matches('\'')
925 .trim_end_matches('\'')
926 .trim_start_matches('"')
927 .trim_end_matches('"');
928
929 Some(stripped.to_string())
930}
931
932fn process_class_inheritance(
941 class_node: Node<'_>,
942 content: &[u8],
943 class_id: UnifiedNodeId,
944 helper: &mut GraphBuildHelper,
945) {
946 let superclasses = class_node.child_by_field_name("superclasses");
949
950 let Some(superclasses) = superclasses else {
951 return;
952 };
953
954 for child in superclasses.children(&mut superclasses.walk()) {
956 if child.kind() == "keyword_argument" {
957 continue;
959 }
960
961 match child.kind() {
962 "identifier" => {
963 if let Ok(base_name) = child.utf8_text(content) {
965 let base_name = base_name.trim();
966 if !base_name.is_empty() {
967 let span = span_from_node(child);
968 let base_id = helper.add_class(base_name, Some(span));
969 helper.add_inherits_edge(class_id, base_id);
970 }
971 }
972 }
973 "attribute" => {
974 if let Ok(base_name) = child.utf8_text(content) {
976 let base_name = base_name.trim();
977 if !base_name.is_empty() {
978 let span = span_from_node(child);
979 let base_id = helper.add_class(base_name, Some(span));
980 helper.add_inherits_edge(class_id, base_id);
981 }
982 }
983 }
984 "call" => {
985 if let Some(func) = child.child_by_field_name("function")
988 && let Ok(base_name) = func.utf8_text(content)
989 {
990 let base_name = base_name.trim();
991 if !base_name.is_empty() {
992 let span = span_from_node(child);
993 let base_id = helper.add_class(base_name, Some(span));
994 helper.add_inherits_edge(class_id, base_id);
995 }
996 }
997 }
998 "subscript" => {
999 if let Some(value) = child.child_by_field_name("value")
1002 && let Ok(base_name) = value.utf8_text(content)
1003 {
1004 let base_name = base_name.trim();
1005 if !base_name.is_empty() {
1006 let span = span_from_node(child);
1007 let base_id = helper.add_class(base_name, Some(span));
1008 helper.add_inherits_edge(class_id, base_id);
1009 }
1010 }
1011 }
1012 _ => {}
1013 }
1014 }
1015}
1016
1017#[derive(Debug, Clone)]
1022struct CallContext {
1023 qualified_name: String,
1024 #[allow(dead_code)] span: (usize, usize),
1026 is_async: bool,
1027 is_method: bool,
1028 class_name: Option<String>,
1029}
1030
1031impl CallContext {
1032 fn qualified_name(&self) -> String {
1033 self.qualified_name.clone()
1034 }
1035}
1036
1037struct ASTGraph {
1038 contexts: Vec<CallContext>,
1039 node_to_context: HashMap<usize, usize>,
1040}
1041
1042impl ASTGraph {
1043 fn from_tree(tree: &Tree, content: &[u8], max_depth: usize) -> Result<Self, String> {
1044 let mut contexts = Vec::new();
1045 let mut node_to_context = HashMap::new();
1046 let mut scope_stack: Vec<String> = Vec::new();
1047 let mut class_stack: Vec<String> = Vec::new();
1048
1049 walk_ast(
1050 tree.root_node(),
1051 content,
1052 &mut contexts,
1053 &mut node_to_context,
1054 &mut scope_stack,
1055 &mut class_stack,
1056 max_depth,
1057 )?;
1058
1059 Ok(Self {
1060 contexts,
1061 node_to_context,
1062 })
1063 }
1064
1065 #[allow(dead_code)] fn contexts(&self) -> &[CallContext] {
1067 &self.contexts
1068 }
1069
1070 fn get_callable_context(&self, node_id: usize) -> Option<&CallContext> {
1071 self.node_to_context
1072 .get(&node_id)
1073 .and_then(|idx| self.contexts.get(*idx))
1074 }
1075}
1076
1077fn walk_ast(
1078 node: Node,
1079 content: &[u8],
1080 contexts: &mut Vec<CallContext>,
1081 node_to_context: &mut HashMap<usize, usize>,
1082 scope_stack: &mut Vec<String>,
1083 class_stack: &mut Vec<String>,
1084 max_depth: usize,
1085) -> Result<(), String> {
1086 if scope_stack.len() > max_depth {
1087 return Ok(());
1088 }
1089
1090 match node.kind() {
1091 "class_definition" => {
1092 let name_node = node
1093 .child_by_field_name("name")
1094 .ok_or_else(|| "class_definition missing name".to_string())?;
1095 let class_name = name_node
1096 .utf8_text(content)
1097 .map_err(|_| "failed to read class name".to_string())?;
1098
1099 let qualified_class = if scope_stack.is_empty() {
1101 class_name.to_string()
1102 } else {
1103 format!("{}.{}", scope_stack.join("."), class_name)
1104 };
1105
1106 class_stack.push(qualified_class.clone());
1107 scope_stack.push(class_name.to_string());
1108
1109 if let Some(body) = node.child_by_field_name("body") {
1111 let mut cursor = body.walk();
1112 for child in body.children(&mut cursor) {
1113 walk_ast(
1114 child,
1115 content,
1116 contexts,
1117 node_to_context,
1118 scope_stack,
1119 class_stack,
1120 max_depth,
1121 )?;
1122 }
1123 }
1124
1125 class_stack.pop();
1126 scope_stack.pop();
1127 }
1128 "function_definition" => {
1129 let name_node = node
1130 .child_by_field_name("name")
1131 .ok_or_else(|| "function_definition missing name".to_string())?;
1132 let func_name = name_node
1133 .utf8_text(content)
1134 .map_err(|_| "failed to read function name".to_string())?;
1135
1136 let is_async = node
1138 .children(&mut node.walk())
1139 .any(|child| child.kind() == "async");
1140
1141 let qualified_func = if scope_stack.is_empty() {
1143 func_name.to_string()
1144 } else {
1145 format!("{}.{}", scope_stack.join("."), func_name)
1146 };
1147
1148 let is_method = !class_stack.is_empty();
1150 let class_name = class_stack.last().cloned();
1151
1152 let context_idx = contexts.len();
1153 contexts.push(CallContext {
1154 qualified_name: qualified_func.clone(),
1155 span: (node.start_byte(), node.end_byte()),
1156 is_async,
1157 is_method,
1158 class_name,
1159 });
1160
1161 node_to_context.insert(node.id(), context_idx);
1164
1165 if let Some(body) = node.child_by_field_name("body") {
1167 associate_descendants(body, context_idx, node_to_context);
1168 }
1169
1170 scope_stack.push(func_name.to_string());
1171
1172 if let Some(body) = node.child_by_field_name("body") {
1174 let mut cursor = body.walk();
1175 for child in body.children(&mut cursor) {
1176 walk_ast(
1177 child,
1178 content,
1179 contexts,
1180 node_to_context,
1181 scope_stack,
1182 class_stack,
1183 max_depth,
1184 )?;
1185 }
1186 }
1187
1188 scope_stack.pop();
1189 }
1190 _ => {
1191 let mut cursor = node.walk();
1193 for child in node.children(&mut cursor) {
1194 walk_ast(
1195 child,
1196 content,
1197 contexts,
1198 node_to_context,
1199 scope_stack,
1200 class_stack,
1201 max_depth,
1202 )?;
1203 }
1204 }
1205 }
1206
1207 Ok(())
1208}
1209
1210fn associate_descendants(
1211 node: Node,
1212 context_idx: usize,
1213 node_to_context: &mut HashMap<usize, usize>,
1214) {
1215 node_to_context.insert(node.id(), context_idx);
1216
1217 let mut stack = vec![node];
1218 while let Some(current) = stack.pop() {
1219 node_to_context.insert(current.id(), context_idx);
1220
1221 let mut cursor = current.walk();
1222 for child in current.children(&mut cursor) {
1223 stack.push(child);
1224 }
1225 }
1226}
1227
1228fn build_ffi_call_edge(
1243 ast_graph: &ASTGraph,
1244 call_node: Node<'_>,
1245 content: &[u8],
1246 helper: &mut GraphBuildHelper,
1247) -> GraphResult<bool> {
1248 let Some(callee_expr) = call_node.child_by_field_name("function") else {
1249 return Ok(false);
1250 };
1251
1252 let callee_text = callee_expr
1253 .utf8_text(content)
1254 .map_err(|_| GraphBuilderError::ParseError {
1255 span: span_from_node(call_node),
1256 reason: "failed to read call expression".to_string(),
1257 })?
1258 .trim();
1259
1260 if is_ctypes_load_call(callee_text) {
1262 return Ok(build_ctypes_ffi_edge(
1263 ast_graph,
1264 call_node,
1265 content,
1266 callee_text,
1267 helper,
1268 ));
1269 }
1270
1271 if is_cffi_dlopen_call(callee_text) {
1273 return Ok(build_cffi_ffi_edge(ast_graph, call_node, content, helper));
1274 }
1275
1276 Ok(false)
1277}
1278
1279fn is_ctypes_load_call(callee_text: &str) -> bool {
1288 callee_text == "ctypes.CDLL"
1290 || callee_text == "ctypes.WinDLL"
1291 || callee_text == "ctypes.OleDLL"
1292 || callee_text == "ctypes.PyDLL"
1293 || callee_text == "ctypes.cdll.LoadLibrary"
1295 || callee_text == "ctypes.windll.LoadLibrary"
1296 || callee_text == "ctypes.oledll.LoadLibrary"
1297 || callee_text == "CDLL"
1299 || callee_text == "WinDLL"
1300 || callee_text == "OleDLL"
1301 || callee_text == "PyDLL"
1302 || callee_text == "cdll.LoadLibrary"
1304 || callee_text == "windll.LoadLibrary"
1305 || callee_text == "oledll.LoadLibrary"
1306}
1307
1308fn is_cffi_dlopen_call(callee_text: &str) -> bool {
1313 callee_text == "ffi.dlopen"
1315 || callee_text == "cffi.dlopen"
1316 || callee_text == "_ffi.dlopen"
1317 || callee_text == "FFI().dlopen"
1322}
1323
1324fn build_ctypes_ffi_edge(
1326 ast_graph: &ASTGraph,
1327 call_node: Node<'_>,
1328 content: &[u8],
1329 callee_text: &str,
1330 helper: &mut GraphBuildHelper,
1331) -> bool {
1332 let caller_id = get_ffi_caller_node_id(ast_graph, call_node, content, helper);
1334
1335 let convention = if callee_text.contains("WinDLL")
1337 || callee_text.contains("windll")
1338 || callee_text.contains("OleDLL")
1339 {
1340 FfiConvention::Stdcall
1341 } else {
1342 FfiConvention::C
1343 };
1344
1345 let library_name = extract_ffi_library_name(call_node, content)
1347 .unwrap_or_else(|| "ctypes::unknown".to_string());
1348
1349 let ffi_name = format!("native::{}", ffi_library_simple_name(&library_name));
1350 let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(call_node)));
1351
1352 helper.add_ffi_edge(caller_id, ffi_node_id, convention);
1354
1355 true
1356}
1357
1358fn build_cffi_ffi_edge(
1360 ast_graph: &ASTGraph,
1361 call_node: Node<'_>,
1362 content: &[u8],
1363 helper: &mut GraphBuildHelper,
1364) -> bool {
1365 let caller_id = get_ffi_caller_node_id(ast_graph, call_node, content, helper);
1367
1368 let library_name =
1370 extract_ffi_library_name(call_node, content).unwrap_or_else(|| "cffi::unknown".to_string());
1371
1372 let ffi_name = format!("native::{}", ffi_library_simple_name(&library_name));
1373 let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(call_node)));
1374
1375 helper.add_ffi_edge(caller_id, ffi_node_id, FfiConvention::C);
1377
1378 true
1379}
1380
1381fn get_ffi_caller_node_id(
1383 ast_graph: &ASTGraph,
1384 node: Node<'_>,
1385 content: &[u8],
1386 helper: &mut GraphBuildHelper,
1387) -> UnifiedNodeId {
1388 let module_context;
1389 let call_context = if let Some(ctx) = ast_graph.get_callable_context(node.id()) {
1390 ctx
1391 } else {
1392 module_context = CallContext {
1393 qualified_name: "<module>".to_string(),
1394 span: (0, content.len()),
1395 is_async: false,
1396 is_method: false,
1397 class_name: None,
1398 };
1399 &module_context
1400 };
1401
1402 let caller_span = Some(Span::from_bytes(call_context.span.0, call_context.span.1));
1403 helper.ensure_function(
1404 &call_context.qualified_name(),
1405 caller_span,
1406 call_context.is_async,
1407 false,
1408 )
1409}
1410
1411fn extract_ffi_library_name(call_node: Node<'_>, content: &[u8]) -> Option<String> {
1413 let args = call_node.child_by_field_name("arguments")?;
1414
1415 let mut cursor = args.walk();
1416 let first_arg = args
1417 .children(&mut cursor)
1418 .find(|child| !matches!(child.kind(), "(" | ")" | ","))?;
1419
1420 if first_arg.kind() == "string" {
1422 return extract_string_content(first_arg, content);
1423 }
1424
1425 if first_arg.kind() == "identifier" {
1427 let text = first_arg.utf8_text(content).ok()?;
1428 return Some(format!("${}", text.trim())); }
1430
1431 None
1432}
1433
1434fn is_native_extension_import(module_name: &str) -> bool {
1441 if module_name.starts_with('_') && !module_name.starts_with("__") {
1443 return true;
1444 }
1445
1446 let base_module = module_name.split('.').next().unwrap_or(module_name);
1448
1449 STD_C_MODULES.contains(&base_module) || THIRD_PARTY_C_PACKAGES.contains(&base_module)
1450}
1451
1452fn build_native_import_ffi_edge(
1454 module_name: &str,
1455 import_node: Node<'_>,
1456 helper: &mut GraphBuildHelper,
1457) {
1458 let file_path = helper.file_path().to_string();
1460 let importer_id = helper.add_module(&file_path, None);
1461
1462 let ffi_name = format!("native::{}", simple_name(module_name));
1464 let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(import_node)));
1465
1466 helper.add_ffi_edge(importer_id, ffi_node_id, FfiConvention::C);
1468}
1469
1470const ROUTE_METHOD_NAMES: &[&str] = &["get", "post", "put", "delete", "patch"];
1476
1477const ROUTE_RECEIVER_NAMES: &[&str] = &["app", "router", "blueprint"];
1481
1482fn extract_route_decorator_info(func_node: Node<'_>, content: &[u8]) -> Option<(String, String)> {
1495 let parent = func_node.parent()?;
1497 if parent.kind() != "decorated_definition" {
1498 return None;
1499 }
1500
1501 let mut cursor = parent.walk();
1503 for child in parent.children(&mut cursor) {
1504 if child.kind() != "decorator" {
1505 continue;
1506 }
1507
1508 let Ok(decorator_text) = child.utf8_text(content) else {
1509 continue;
1510 };
1511 let decorator_text = decorator_text.trim();
1512
1513 let without_at = decorator_text.strip_prefix('@')?;
1515
1516 if let Some(result) = parse_route_decorator_text(without_at) {
1518 return Some(result);
1519 }
1520 }
1521
1522 None
1523}
1524
1525fn parse_route_decorator_text(text: &str) -> Option<(String, String)> {
1533 let paren_pos = text.find('(')?;
1536 let accessor = &text[..paren_pos];
1537 let args_text = &text[paren_pos + 1..];
1538
1539 let dot_pos = accessor.rfind('.')?;
1541 let receiver = &accessor[..dot_pos];
1542 let method_name = &accessor[dot_pos + 1..];
1543
1544 let receiver_base = receiver.rsplit('.').next().unwrap_or(receiver);
1547 if !ROUTE_RECEIVER_NAMES.contains(&receiver_base) {
1548 return None;
1549 }
1550
1551 let path = extract_path_from_decorator_args(args_text)?;
1553
1554 let method_lower = method_name.to_ascii_lowercase();
1556 if ROUTE_METHOD_NAMES.contains(&method_lower.as_str()) {
1557 return Some((method_lower.to_ascii_uppercase(), path));
1559 }
1560
1561 if method_lower == "route" {
1562 let http_method = extract_method_from_route_args(args_text);
1564 return Some((http_method, path));
1565 }
1566
1567 None
1568}
1569
1570fn extract_path_from_decorator_args(args_text: &str) -> Option<String> {
1577 let trimmed = args_text.trim();
1578
1579 let (quote_char, start_pos) = {
1581 let single_pos = trimmed.find('\'');
1582 let double_pos = trimmed.find('"');
1583 match (single_pos, double_pos) {
1584 (Some(s), Some(d)) => {
1585 if s < d {
1586 ('\'', s)
1587 } else {
1588 ('"', d)
1589 }
1590 }
1591 (Some(s), None) => ('\'', s),
1592 (None, Some(d)) => ('"', d),
1593 (None, None) => return None,
1594 }
1595 };
1596
1597 let after_open = start_pos + 1;
1599 let close_pos = trimmed[after_open..].find(quote_char)?;
1600 let path = &trimmed[after_open..after_open + close_pos];
1601
1602 if path.is_empty() {
1603 return None;
1604 }
1605
1606 Some(path.to_string())
1607}
1608
1609fn extract_method_from_route_args(args_text: &str) -> String {
1614 let Some(methods_pos) = args_text.find("methods") else {
1616 return "GET".to_string();
1617 };
1618
1619 let after_methods = &args_text[methods_pos..];
1621 let Some(bracket_pos) = after_methods.find('[') else {
1622 return "GET".to_string();
1623 };
1624
1625 let after_bracket = &after_methods[bracket_pos + 1..];
1626
1627 let method_str = extract_first_string_literal(after_bracket);
1629 match method_str {
1630 Some(m) => m.to_ascii_uppercase(),
1631 None => "GET".to_string(),
1632 }
1633}
1634
1635fn extract_first_string_literal(text: &str) -> Option<String> {
1637 let trimmed = text.trim();
1638
1639 let (quote_char, start_pos) = {
1640 let single_pos = trimmed.find('\'');
1641 let double_pos = trimmed.find('"');
1642 match (single_pos, double_pos) {
1643 (Some(s), Some(d)) => {
1644 if s < d {
1645 ('\'', s)
1646 } else {
1647 ('"', d)
1648 }
1649 }
1650 (Some(s), None) => ('\'', s),
1651 (None, Some(d)) => ('"', d),
1652 (None, None) => return None,
1653 }
1654 };
1655
1656 let after_open = start_pos + 1;
1657 let close_pos = trimmed[after_open..].find(quote_char)?;
1658 let literal = &trimmed[after_open..after_open + close_pos];
1659
1660 if literal.is_empty() {
1661 return None;
1662 }
1663
1664 Some(literal.to_string())
1665}
1666
1667fn has_property_decorator(func_node: Node<'_>, content: &[u8]) -> bool {
1688 let Some(parent) = func_node.parent() else {
1690 return false;
1691 };
1692
1693 if parent.kind() != "decorated_definition" {
1695 return false;
1696 }
1697
1698 let mut cursor = parent.walk();
1700 for child in parent.children(&mut cursor) {
1701 if child.kind() == "decorator" {
1702 if let Ok(decorator_text) = child.utf8_text(content) {
1704 let decorator_text = decorator_text.trim();
1705 if decorator_text == "@property"
1707 || decorator_text.starts_with("@property(")
1708 || decorator_text.starts_with("@property (")
1709 {
1710 return true;
1711 }
1712 }
1713 }
1714 }
1715
1716 false
1717}
1718
1719fn extract_visibility_from_name(name: &str) -> &'static str {
1726 if name.starts_with("__") && !name.ends_with("__") {
1727 "private"
1728 } else if name.starts_with('_') {
1729 "protected"
1730 } else {
1731 "public"
1732 }
1733}
1734
1735fn find_containing_scope(node: Node<'_>, content: &[u8], ast_graph: &ASTGraph) -> String {
1747 let mut current = node;
1748 let mut found_class_name: Option<String> = None;
1749
1750 while let Some(parent) = current.parent() {
1752 match parent.kind() {
1753 "function_definition" => {
1754 if let Some(ctx) = ast_graph.get_callable_context(parent.id()) {
1756 return ctx.qualified_name.clone();
1757 }
1758 }
1759 "class_definition" => {
1760 if found_class_name.is_none() {
1763 if let Some(name_node) = parent.child_by_field_name("name")
1765 && let Ok(class_name) = name_node.utf8_text(content)
1766 {
1767 found_class_name = Some(class_name.to_string());
1768 }
1769 }
1770 }
1771 _ => {}
1772 }
1773 current = parent;
1774 }
1775
1776 found_class_name.unwrap_or_default()
1778}
1779
1780fn extract_return_type_annotation(func_node: Node<'_>, content: &[u8]) -> Option<String> {
1787 let return_type_node = func_node.child_by_field_name("return_type")?;
1788 extract_type_from_node(return_type_node, content)
1789}
1790
1791fn extract_return_type_source_text(func_node: Node<'_>, content: &[u8]) -> Option<String> {
1810 let return_type_node = func_node.child_by_field_name("return_type")?;
1811 let text = return_type_node.utf8_text(content).ok()?.trim();
1812 if text.is_empty() {
1813 None
1814 } else {
1815 Some(text.to_string())
1816 }
1817}
1818
1819fn process_function_parameters(
1826 func_node: Node<'_>,
1827 content: &[u8],
1828 ast_graph: &ASTGraph,
1829 helper: &mut GraphBuildHelper,
1830) {
1831 let Some(params_node) = func_node.child_by_field_name("parameters") else {
1832 return;
1833 };
1834
1835 let scope_prefix = ast_graph
1837 .get_callable_context(func_node.id())
1838 .map_or("", |ctx| ctx.qualified_name.as_str());
1839
1840 for param in params_node.children(&mut params_node.walk()) {
1842 match param.kind() {
1845 "typed_parameter" | "typed_default_parameter" => {
1846 process_typed_parameter(param, content, scope_prefix, helper);
1847 }
1848 "identifier" | "default_parameter" => {}
1852 _ => {
1853 if param.child_by_field_name("type").is_some() {
1856 process_typed_parameter(param, content, scope_prefix, helper);
1857 }
1858 }
1859 }
1860 }
1861}
1862
1863fn process_typed_parameter(
1868 param: Node<'_>,
1869 content: &[u8],
1870 scope_prefix: &str,
1871 helper: &mut GraphBuildHelper,
1872) {
1873 let param_name = if let Some(name_node) = param.child_by_field_name("name") {
1875 name_node.utf8_text(content).ok()
1876 } else {
1877 param
1879 .children(&mut param.walk())
1880 .find(|c| c.kind() == "identifier")
1881 .and_then(|n| n.utf8_text(content).ok())
1882 };
1883
1884 let Some(param_name) = param_name else {
1885 return;
1886 };
1887
1888 if param_name == "self" || param_name == "cls" {
1890 return;
1891 }
1892
1893 let Some(type_node) = param.child_by_field_name("type") else {
1895 return;
1896 };
1897
1898 let Some(type_name) = extract_type_from_node(type_node, content) else {
1899 return;
1900 };
1901
1902 let qualified_param_name = if scope_prefix.is_empty() {
1905 format!(":{param_name}")
1907 } else {
1908 format!("{scope_prefix}:{param_name}")
1909 };
1910
1911 let param_id = helper.add_variable(&qualified_param_name, Some(span_from_node(param)));
1913
1914 let type_id = helper.add_type(&type_name, None);
1916
1917 helper.add_typeof_edge(param_id, type_id);
1919 helper.add_reference_edge(param_id, type_id);
1920}
1921
1922fn process_annotated_assignment(
1929 expr_stmt_node: Node<'_>,
1930 content: &[u8],
1931 ast_graph: &ASTGraph,
1932 helper: &mut GraphBuildHelper,
1933) {
1934 let scope_prefix = find_containing_scope(expr_stmt_node, content, ast_graph);
1937
1938 for child in expr_stmt_node.children(&mut expr_stmt_node.walk()) {
1940 if child.kind() == "assignment" {
1941 process_typed_assignment(child, content, &scope_prefix, helper);
1942 }
1943 }
1944}
1945
1946fn process_typed_assignment(
1950 assignment_node: Node<'_>,
1951 content: &[u8],
1952 scope_prefix: &str,
1953 helper: &mut GraphBuildHelper,
1954) {
1955 let Some(left) = assignment_node.child_by_field_name("left") else {
1960 return;
1961 };
1962
1963 let Some(type_node) = assignment_node.child_by_field_name("type") else {
1964 return;
1965 };
1966
1967 let Ok(var_name) = left.utf8_text(content) else {
1969 return;
1970 };
1971
1972 let Some(type_name) = extract_type_from_node(type_node, content) else {
1974 return;
1975 };
1976
1977 let qualified_var_name = if scope_prefix.is_empty() {
1981 var_name.to_string()
1983 } else if scope_prefix.contains('.') && !scope_prefix.contains(':') {
1984 format!("{scope_prefix}.{var_name}")
1986 } else {
1987 format!("{scope_prefix}:{var_name}")
1989 };
1990
1991 let var_id = helper.add_variable(&qualified_var_name, Some(span_from_node(assignment_node)));
1993
1994 let type_id = helper.add_type(&type_name, None);
1996
1997 helper.add_typeof_edge(var_id, type_id);
1999 helper.add_reference_edge(var_id, type_id);
2000}
2001
2002fn extract_type_from_node(type_node: Node<'_>, content: &[u8]) -> Option<String> {
2012 match type_node.kind() {
2013 "type" => {
2014 type_node
2016 .named_child(0)
2017 .and_then(|child| extract_type_from_node(child, content))
2018 }
2019 "identifier" => {
2020 type_node.utf8_text(content).ok().map(String::from)
2022 }
2023 "string" => {
2024 let text = type_node.utf8_text(content).ok()?;
2027 let trimmed = text.trim();
2028
2029 if (trimmed.starts_with('"') && trimmed.ends_with('"'))
2031 || (trimmed.starts_with('\'') && trimmed.ends_with('\''))
2032 {
2033 let unquoted = &trimmed[1..trimmed.len() - 1];
2034 Some(normalize_union_type(unquoted))
2036 } else {
2037 Some(trimmed.to_string())
2038 }
2039 }
2040 "binary_operator" => {
2041 if let Some(left) = type_node.child_by_field_name("left") {
2044 extract_type_from_node(left, content)
2045 } else {
2046 type_node
2048 .utf8_text(content)
2049 .ok()
2050 .map(|text| normalize_union_type(text.trim()))
2051 }
2052 }
2053 "generic_type" | "subscript" => {
2054 if let Some(value_node) = type_node.child_by_field_name("value") {
2058 extract_type_from_node(value_node, content)
2059 } else {
2060 type_node
2062 .named_child(0)
2063 .and_then(|child| extract_type_from_node(child, content))
2064 .or_else(|| {
2065 type_node.utf8_text(content).ok().and_then(|text| {
2067 text.split('[').next().map(|s| s.trim().to_string())
2069 })
2070 })
2071 }
2072 }
2073 "attribute" => {
2074 type_node.utf8_text(content).ok().map(String::from)
2076 }
2077 "list" | "tuple" | "set" => {
2078 type_node.utf8_text(content).ok().map(String::from)
2080 }
2081 _ => {
2082 let text = type_node.utf8_text(content).ok()?;
2085 let trimmed = text.trim();
2086
2087 if trimmed.contains('[') {
2089 trimmed.split('[').next().map(|s| s.trim().to_string())
2090 } else {
2091 Some(normalize_union_type(trimmed))
2093 }
2094 }
2095 }
2096}
2097
2098fn normalize_union_type(type_str: &str) -> String {
2105 if let Some(pipe_pos) = type_str.find('|') {
2106 type_str[..pipe_pos].trim().to_string()
2108 } else {
2109 type_str.to_string()
2110 }
2111}
2112
2113#[cfg(test)]
2114mod tests {
2115 use super::*;
2116
2117 #[test]
2118 fn test_simple_name_extracts_dotted_identifiers() {
2119 assert_eq!(simple_name("module.func"), "func");
2121 assert_eq!(simple_name("obj.method"), "method");
2122 assert_eq!(simple_name("package.module.func"), "func");
2123 assert_eq!(simple_name("self.helper"), "helper");
2124
2125 assert_eq!(simple_name("function"), "function");
2127 assert_eq!(simple_name(""), "");
2128 }
2129
2130 #[test]
2131 fn test_ffi_library_simple_name_extracts_library_base_names() {
2132 assert_eq!(ffi_library_simple_name("libfoo.so"), "libfoo");
2134 assert_eq!(ffi_library_simple_name("lib1.so"), "lib1");
2135 assert_eq!(ffi_library_simple_name("lib2.so"), "lib2");
2136
2137 assert_eq!(ffi_library_simple_name("kernel32.dll"), "kernel32");
2139 assert_eq!(ffi_library_simple_name("libSystem.dylib"), "libSystem");
2140
2141 assert_eq!(ffi_library_simple_name("libc.so.6"), "libc");
2143
2144 assert_eq!(ffi_library_simple_name("kernel32"), "kernel32");
2146 assert_eq!(ffi_library_simple_name("numpy"), "numpy");
2147
2148 assert_eq!(ffi_library_simple_name("$libname"), "$libname");
2150
2151 assert_eq!(ffi_library_simple_name(""), "");
2153 assert_eq!(ffi_library_simple_name("lib.so"), "lib");
2154 }
2155
2156 #[test]
2157 fn test_ffi_library_simple_name_prevents_duplicate_edges() {
2158 let name1 = ffi_library_simple_name("lib1.so");
2160 let name2 = ffi_library_simple_name("lib2.so");
2161
2162 assert_ne!(
2164 name1, name2,
2165 "lib1.so and lib2.so must produce different simple names"
2166 );
2167 assert_eq!(name1, "lib1");
2168 assert_eq!(name2, "lib2");
2169 }
2170
2171 #[test]
2172 fn test_ffi_library_simple_name_handles_directory_paths() {
2173 assert_eq!(ffi_library_simple_name("/opt/v1.2/libfoo.so"), "libfoo");
2175 assert_eq!(
2176 ffi_library_simple_name("/usr/lib/x86_64-linux-gnu/libc.so.6"),
2177 "libc"
2178 );
2179 assert_eq!(ffi_library_simple_name("libs/lib1.so"), "lib1");
2180
2181 assert_eq!(ffi_library_simple_name("./libs/kernel32.dll"), "kernel32");
2183 assert_eq!(
2184 ffi_library_simple_name("../lib/libSystem.dylib"),
2185 "libSystem"
2186 );
2187 }
2188
2189 #[test]
2194 fn test_parse_route_decorator_app_route_default_get() {
2195 let result = parse_route_decorator_text("app.route('/api/users')");
2196 assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2197 }
2198
2199 #[test]
2200 fn test_parse_route_decorator_app_route_with_methods_post() {
2201 let result = parse_route_decorator_text("app.route('/api/users', methods=['POST'])");
2202 assert_eq!(result, Some(("POST".to_string(), "/api/users".to_string())));
2203 }
2204
2205 #[test]
2206 fn test_parse_route_decorator_app_route_with_methods_put_double_quotes() {
2207 let result = parse_route_decorator_text("app.route(\"/api/items\", methods=[\"PUT\"])");
2208 assert_eq!(result, Some(("PUT".to_string(), "/api/items".to_string())));
2209 }
2210
2211 #[test]
2212 fn test_parse_route_decorator_app_get() {
2213 let result = parse_route_decorator_text("app.get('/api/users')");
2214 assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2215 }
2216
2217 #[test]
2218 fn test_parse_route_decorator_app_post() {
2219 let result = parse_route_decorator_text("app.post('/api/items')");
2220 assert_eq!(result, Some(("POST".to_string(), "/api/items".to_string())));
2221 }
2222
2223 #[test]
2224 fn test_parse_route_decorator_app_put() {
2225 let result = parse_route_decorator_text("app.put('/api/items/1')");
2226 assert_eq!(
2227 result,
2228 Some(("PUT".to_string(), "/api/items/1".to_string()))
2229 );
2230 }
2231
2232 #[test]
2233 fn test_parse_route_decorator_app_delete() {
2234 let result = parse_route_decorator_text("app.delete('/api/items/1')");
2235 assert_eq!(
2236 result,
2237 Some(("DELETE".to_string(), "/api/items/1".to_string()))
2238 );
2239 }
2240
2241 #[test]
2242 fn test_parse_route_decorator_app_patch() {
2243 let result = parse_route_decorator_text("app.patch('/api/items/1')");
2244 assert_eq!(
2245 result,
2246 Some(("PATCH".to_string(), "/api/items/1".to_string()))
2247 );
2248 }
2249
2250 #[test]
2251 fn test_parse_route_decorator_router_get_fastapi() {
2252 let result = parse_route_decorator_text("router.get('/api/users')");
2253 assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2254 }
2255
2256 #[test]
2257 fn test_parse_route_decorator_router_post_fastapi() {
2258 let result = parse_route_decorator_text("router.post('/api/items')");
2259 assert_eq!(result, Some(("POST".to_string(), "/api/items".to_string())));
2260 }
2261
2262 #[test]
2263 fn test_parse_route_decorator_blueprint_route() {
2264 let result = parse_route_decorator_text("blueprint.route('/health')");
2265 assert_eq!(result, Some(("GET".to_string(), "/health".to_string())));
2266 }
2267
2268 #[test]
2269 fn test_parse_route_decorator_unknown_receiver_returns_none() {
2270 let result = parse_route_decorator_text("server.get('/api/users')");
2272 assert_eq!(result, None);
2273 }
2274
2275 #[test]
2276 fn test_parse_route_decorator_unknown_method_returns_none() {
2277 let result = parse_route_decorator_text("app.options('/api/users')");
2279 assert_eq!(result, None);
2280 }
2281
2282 #[test]
2283 fn test_parse_route_decorator_no_parens_returns_none() {
2284 let result = parse_route_decorator_text("app.route");
2285 assert_eq!(result, None);
2286 }
2287
2288 #[test]
2289 fn test_parse_route_decorator_no_dot_returns_none() {
2290 let result = parse_route_decorator_text("route('/api/users')");
2291 assert_eq!(result, None);
2292 }
2293
2294 #[test]
2295 fn test_extract_path_from_decorator_args_single_quotes() {
2296 let result = extract_path_from_decorator_args("'/api/users')");
2297 assert_eq!(result, Some("/api/users".to_string()));
2298 }
2299
2300 #[test]
2301 fn test_extract_path_from_decorator_args_double_quotes() {
2302 let result = extract_path_from_decorator_args("\"/api/items\")");
2303 assert_eq!(result, Some("/api/items".to_string()));
2304 }
2305
2306 #[test]
2307 fn test_extract_path_from_decorator_args_empty_returns_none() {
2308 let result = extract_path_from_decorator_args("'')");
2309 assert_eq!(result, None);
2310 }
2311
2312 #[test]
2313 fn test_extract_path_from_decorator_args_no_string_returns_none() {
2314 let result = extract_path_from_decorator_args("some_var)");
2315 assert_eq!(result, None);
2316 }
2317
2318 #[test]
2319 fn test_extract_method_from_route_args_with_methods_keyword() {
2320 let result = extract_method_from_route_args("'/api/users', methods=['POST'])");
2321 assert_eq!(result, "POST");
2322 }
2323
2324 #[test]
2325 fn test_extract_method_from_route_args_without_methods_keyword() {
2326 let result = extract_method_from_route_args("'/api/users')");
2327 assert_eq!(result, "GET");
2328 }
2329
2330 #[test]
2331 fn test_extract_method_from_route_args_delete() {
2332 let result = extract_method_from_route_args("'/api/items', methods=['DELETE'])");
2333 assert_eq!(result, "DELETE");
2334 }
2335
2336 #[test]
2337 fn test_extract_method_from_route_args_lowercase_normalizes() {
2338 let result = extract_method_from_route_args("'/x', methods=['put'])");
2339 assert_eq!(result, "PUT");
2340 }
2341
2342 #[test]
2343 fn test_extract_first_string_literal_single_quotes() {
2344 let result = extract_first_string_literal("'POST']");
2345 assert_eq!(result, Some("POST".to_string()));
2346 }
2347
2348 #[test]
2349 fn test_extract_first_string_literal_double_quotes() {
2350 let result = extract_first_string_literal("\"DELETE\"]");
2351 assert_eq!(result, Some("DELETE".to_string()));
2352 }
2353
2354 #[test]
2355 fn test_extract_first_string_literal_empty_returns_none() {
2356 let result = extract_first_string_literal("no quotes here");
2357 assert_eq!(result, None);
2358 }
2359}
2360
2361#[cfg(test)]
2362mod shape_tests {
2363 use super::{cf_bucket_for_python_kind, python_shape_mapping};
2364 use sqry_core::graph::unified::build::shape::{
2365 CfBucket, ShapeBudget, ShapeMapping, compute_shape_descriptor,
2366 };
2367
2368 const SAMPLE: &str = include_str!(concat!(
2369 env!("CARGO_MANIFEST_DIR"),
2370 "/../test-fixtures/shape/reference/sample.py"
2371 ));
2372
2373 fn parse(src: &str) -> tree_sitter::Tree {
2374 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2375 let mut p = tree_sitter::Parser::new();
2376 p.set_language(&lang).expect("load python grammar");
2377 p.parse(src, None).expect("parse")
2378 }
2379
2380 fn function_named<'t>(tree: &'t tree_sitter::Tree, name: &str) -> tree_sitter::Node<'t> {
2382 let root = tree.root_node();
2383 let mut stack = vec![root];
2384 while let Some(node) = stack.pop() {
2385 if node.kind() == "function_definition"
2386 && node
2387 .child_by_field_name("name")
2388 .and_then(|n| n.utf8_text(SAMPLE.as_bytes()).ok())
2389 == Some(name)
2390 {
2391 return node;
2392 }
2393 let mut c = node.walk();
2394 for ch in node.children(&mut c) {
2395 stack.push(ch);
2396 }
2397 }
2398 panic!("no function_definition named {name}");
2399 }
2400
2401 #[test]
2402 fn cf_table_is_non_empty() {
2403 let mapping = python_shape_mapping();
2404 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2405 let mut covered = 0;
2406 for id in 0..lang.node_kind_count() {
2407 let kid = id as u16;
2408 if mapping.cf_bucket(kid).is_some() {
2409 covered += 1;
2410 }
2411 }
2412 assert!(
2413 covered >= 10,
2414 "expected many python CF kinds mapped, got {covered}"
2415 );
2416 }
2417
2418 #[test]
2419 fn histogram_covers_real_control_flow() {
2420 let tree = parse(SAMPLE);
2421 let func = function_named(&tree, "classify");
2422 let d = compute_shape_descriptor(
2423 func,
2424 SAMPLE.as_bytes(),
2425 python_shape_mapping(),
2426 &ShapeBudget::default(),
2427 );
2428 assert!(!d.is_unhashable(), "classify body must be hashable");
2429 for bucket in [
2430 CfBucket::Branch,
2431 CfBucket::Loop,
2432 CfBucket::Match,
2433 CfBucket::Try,
2434 CfBucket::Catch,
2435 CfBucket::Throw,
2436 CfBucket::Resource,
2437 CfBucket::Return,
2438 CfBucket::BreakContinue,
2439 CfBucket::Call,
2440 CfBucket::Assign,
2441 CfBucket::Comprehension,
2442 ] {
2443 assert!(
2444 d.cf_histogram[bucket.index()] >= 1,
2445 "classify must exercise {bucket:?}"
2446 );
2447 }
2448 }
2449
2450 #[test]
2451 fn async_body_covers_yield_await_closure() {
2452 let tree = parse(SAMPLE);
2453 let func = function_named(&tree, "fetch");
2454 let d = compute_shape_descriptor(
2455 func,
2456 SAMPLE.as_bytes(),
2457 python_shape_mapping(),
2458 &ShapeBudget::default(),
2459 );
2460 assert!(d.cf_histogram[CfBucket::Await.index()] >= 1, "await");
2461 assert!(d.cf_histogram[CfBucket::Yield.index()] >= 1, "yield");
2462 assert!(
2463 d.cf_histogram[CfBucket::Closure.index()] >= 1,
2464 "lambda closure"
2465 );
2466 assert!(
2467 d.signature_shape.has_return_annotation,
2468 "-> str return annotation"
2469 );
2470 }
2471
2472 #[test]
2473 fn signature_shape_reads_arity_and_splats() {
2474 let tree = parse(SAMPLE);
2475 let func = function_named(&tree, "classify");
2476 let mapping = python_shape_mapping();
2477 let shape = mapping.signature_shape(func, SAMPLE.as_bytes());
2478 assert_eq!(
2480 shape.arity_positional, 2,
2481 "values + threshold are positional"
2482 );
2483 assert!(shape.has_defaults, "threshold=0");
2484 assert!(shape.has_varargs, "*extra");
2485 assert!(shape.has_kwargs, "**opts");
2486 }
2487
2488 #[test]
2489 fn unknown_kind_maps_to_none() {
2490 assert!(cf_bucket_for_python_kind("module").is_none());
2491 assert!(cf_bucket_for_python_kind("identifier").is_none());
2492 }
2493}