Skip to main content

sqry_lang_python/relations/
graph_builder.rs

1use std::{collections::HashMap, path::Path, sync::OnceLock};
2
3use sqry_core::graph::unified::StagingGraph;
4use sqry_core::graph::unified::build::GraphBuildHelper;
5use sqry_core::graph::unified::build::helper::CalleeKindHint;
6use sqry_core::graph::unified::build::shape::{CfBucket, ShapeMapping};
7use sqry_core::graph::unified::edge::FfiConvention;
8use sqry_core::graph::unified::edge::kind::TypeOfContext;
9use sqry_core::graph::unified::node::NodeId as UnifiedNodeId;
10use sqry_core::graph::unified::storage::shape::SignatureShape;
11use sqry_core::graph::{GraphBuilder, GraphBuilderError, GraphResult, Language, Span};
12use tree_sitter::{Node, Tree};
13
14use super::local_scopes;
15
16const DEFAULT_SCOPE_DEPTH: usize = 4;
17const STD_C_MODULES: &[&str] = &[
18    "_ctypes",
19    "_socket",
20    "_ssl",
21    "_hashlib",
22    "_json",
23    "_pickle",
24    "_struct",
25    "_sqlite3",
26    "_decimal",
27    "_lzma",
28    "_bz2",
29    "_zlib",
30    "_elementtree",
31    "_csv",
32    "_datetime",
33    "_heapq",
34    "_bisect",
35    "_random",
36    "_collections",
37    "_functools",
38    "_itertools",
39    "_operator",
40    "_io",
41    "_thread",
42    "_multiprocessing",
43    "_posixsubprocess",
44    "_asyncio",
45    "array",
46    "math",
47    "cmath",
48];
49const THIRD_PARTY_C_PACKAGES: &[&str] = &[
50    "numpy",
51    "pandas",
52    "scipy",
53    "sklearn",
54    "cv2",
55    "PIL",
56    "torch",
57    "tensorflow",
58    "lxml",
59    "psycopg2",
60    "MySQLdb",
61    "sqlite3",
62    "cryptography",
63    "bcrypt",
64    "regex",
65    "ujson",
66    "orjson",
67    "msgpack",
68    "greenlet",
69    "gevent",
70    "uvloop",
71];
72
73/// Graph builder for Python files using unified `CodeGraph` architecture.
74#[derive(Debug, Clone, Copy)]
75pub struct PythonGraphBuilder {
76    max_scope_depth: usize,
77}
78
79impl Default for PythonGraphBuilder {
80    fn default() -> Self {
81        Self {
82            max_scope_depth: DEFAULT_SCOPE_DEPTH,
83        }
84    }
85}
86
87impl PythonGraphBuilder {
88    #[must_use]
89    pub fn new(max_scope_depth: usize) -> Self {
90        Self { max_scope_depth }
91    }
92}
93
94impl GraphBuilder for PythonGraphBuilder {
95    fn build_graph(
96        &self,
97        tree: &Tree,
98        content: &[u8],
99        file: &Path,
100        staging: &mut StagingGraph,
101    ) -> GraphResult<()> {
102        // Create helper for staging graph population
103        let mut helper = GraphBuildHelper::new(staging, file, Language::Python);
104
105        // Build AST graph for call context tracking
106        let ast_graph = ASTGraph::from_tree(tree, content, self.max_scope_depth).map_err(|e| {
107            GraphBuilderError::ParseError {
108                span: Span::default(),
109                reason: e,
110            }
111        })?;
112
113        // Check if __all__ is defined in the module
114        let has_all = has_all_assignment(tree.root_node(), content);
115
116        // Build local variable scope tree
117        let mut scope_tree = local_scopes::build(tree.root_node(), content)?;
118
119        // Create recursion guard for tree walking
120        let recursion_limits =
121            sqry_core::config::RecursionLimits::load_or_default().map_err(|e| {
122                GraphBuilderError::ParseError {
123                    span: Span::default(),
124                    reason: format!("Failed to load recursion limits: {e}"),
125                }
126            })?;
127        let file_ops_depth = recursion_limits.effective_file_ops_depth().map_err(|e| {
128            GraphBuilderError::ParseError {
129                span: Span::default(),
130                reason: format!("Invalid file_ops_depth configuration: {e}"),
131            }
132        })?;
133        let mut guard =
134            sqry_core::query::security::RecursionGuard::new(file_ops_depth).map_err(|e| {
135                GraphBuilderError::ParseError {
136                    span: Span::default(),
137                    reason: format!("Failed to create recursion guard: {e}"),
138                }
139            })?;
140
141        // Walk tree to find functions, classes, methods, calls, and imports
142        walk_tree_for_graph(
143            tree.root_node(),
144            content,
145            &ast_graph,
146            &mut helper,
147            has_all,
148            &mut guard,
149            &mut scope_tree,
150        )?;
151
152        Ok(())
153    }
154
155    fn language(&self) -> Language {
156        Language::Python
157    }
158
159    fn shape_mapping(&self) -> Option<&dyn ShapeMapping> {
160        Some(python_shape_mapping())
161    }
162}
163
164/// Per-language [`ShapeMapping`] for Python: the SPEC anchor for the
165/// identifier-blind body-shape descriptor.
166///
167/// Holds a precomputed `kind_id -> CfBucket` table so the hot shape walk does a
168/// single array index per node instead of a grammar string lookup. The table is
169/// built once from the tree-sitter-python grammar and shared process-wide via
170/// [`python_shape_mapping`]. Everything except this mapping is the one shared
171/// `compute_shape_descriptor` routine in sqry-core.
172pub struct PythonShapeMapping {
173    cf_by_kind_id: Vec<Option<CfBucket>>,
174}
175
176impl PythonShapeMapping {
177    /// Build the `kind_id -> CfBucket` table from the tree-sitter-python grammar.
178    fn build() -> Self {
179        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
180        let count = lang.node_kind_count();
181        let mut cf_by_kind_id = vec![None; count];
182        for (id, slot) in cf_by_kind_id.iter_mut().enumerate() {
183            let Ok(kind_id) = u16::try_from(id) else {
184                break;
185            };
186            if !lang.node_kind_is_named(kind_id) {
187                continue;
188            }
189            if let Some(name) = lang.node_kind_for_id(kind_id) {
190                *slot = cf_bucket_for_python_kind(name);
191            }
192        }
193        Self { cf_by_kind_id }
194    }
195}
196
197impl ShapeMapping for PythonShapeMapping {
198    fn cf_bucket(&self, ts_node_kind_id: u16) -> Option<CfBucket> {
199        self.cf_by_kind_id
200            .get(ts_node_kind_id as usize)
201            .copied()
202            .flatten()
203    }
204
205    fn signature_shape(&self, fn_node: Node, _src: &[u8]) -> SignatureShape {
206        let mut shape = SignatureShape::default();
207        if let Some(params) = fn_node.child_by_field_name("parameters") {
208            // Python keyword-only parameters follow a bare `*` or a `*args`
209            // splat. Track whether we have crossed that boundary so positional
210            // and keyword-only arities are counted into the right slot.
211            let mut keyword_only = false;
212            let mut cursor = params.walk();
213            for child in params.named_children(&mut cursor) {
214                match child.kind() {
215                    // `*args`: variadic AND the start of the keyword-only region.
216                    "list_splat_pattern" => {
217                        shape.has_varargs = true;
218                        keyword_only = true;
219                    }
220                    // `**kwargs`.
221                    "dictionary_splat_pattern" => shape.has_kwargs = true,
222                    // A plain positional / keyword parameter (`x`).
223                    "identifier" | "typed_parameter" => {
224                        bump_arity(&mut shape, keyword_only);
225                    }
226                    // A parameter carrying a default value (`x=1`, `x: int = 1`).
227                    "default_parameter" | "typed_default_parameter" => {
228                        shape.has_defaults = true;
229                        bump_arity(&mut shape, keyword_only);
230                    }
231                    _ => {}
232                }
233            }
234        }
235        shape.has_return_annotation = fn_node.child_by_field_name("return_type").is_some();
236        shape
237    }
238}
239
240/// Count one parameter into the positional or keyword-only arity slot.
241fn bump_arity(shape: &mut SignatureShape, keyword_only: bool) {
242    if keyword_only {
243        shape.arity_keyword_only = shape.arity_keyword_only.saturating_add(1);
244    } else {
245        shape.arity_positional = shape.arity_positional.saturating_add(1);
246    }
247}
248
249/// Map one tree-sitter-python grammar node-kind name to its canonical
250/// control-flow bucket. Additive-only: the bucket set is frozen (see
251/// [`CfBucket`]), so new Python kinds extend the match, never reorder the buckets.
252fn cf_bucket_for_python_kind(name: &str) -> Option<CfBucket> {
253    let bucket = match name {
254        "if_statement" | "elif_clause" | "conditional_expression" => CfBucket::Branch,
255        "for_statement" | "while_statement" => CfBucket::Loop,
256        "match_statement" | "case_clause" => CfBucket::Match,
257        "try_statement" => CfBucket::Try,
258        "except_clause" | "except_group_clause" => CfBucket::Catch,
259        "raise_statement" => CfBucket::Throw,
260        // `with`/`async with` is Python's resource-acquisition construct.
261        "with_statement" => CfBucket::Resource,
262        "return_statement" => CfBucket::Return,
263        "yield" => CfBucket::Yield,
264        "await" => CfBucket::Await,
265        "break_statement" | "continue_statement" => CfBucket::BreakContinue,
266        "call" => CfBucket::Call,
267        "assignment" | "augmented_assignment" | "named_expression" => CfBucket::Assign,
268        "lambda" => CfBucket::Closure,
269        "list_comprehension"
270        | "dictionary_comprehension"
271        | "set_comprehension"
272        | "generator_expression" => CfBucket::Comprehension,
273        _ => return None,
274    };
275    Some(bucket)
276}
277
278/// The process-wide Python shape mapping, built once on first use.
279#[must_use]
280pub fn python_shape_mapping() -> &'static PythonShapeMapping {
281    static MAPPING: OnceLock<PythonShapeMapping> = OnceLock::new();
282    MAPPING.get_or_init(PythonShapeMapping::build)
283}
284
285/// Check if the module defines `__all__`.
286fn has_all_assignment(node: Node, content: &[u8]) -> bool {
287    let mut cursor = node.walk();
288    for child in node.children(&mut cursor) {
289        if child.kind() == "expression_statement" {
290            // Check for __all__ assignment
291            let assignment = child
292                .children(&mut child.walk())
293                .find(|c| c.kind() == "assignment" || c.kind() == "augmented_assignment");
294
295            if let Some(assignment) = assignment
296                && let Some(left) = assignment.child_by_field_name("left")
297                && let Ok(left_text) = left.utf8_text(content)
298                && left_text.trim() == "__all__"
299            {
300                return true;
301            }
302        }
303    }
304    false
305}
306
307/// Walk the tree and populate the staging graph.
308/// # Errors
309///
310/// Returns [`GraphBuilderError`] if graph operations fail or recursion depth exceeds the guard's limit.
311#[allow(clippy::too_many_lines)]
312fn walk_tree_for_graph(
313    node: Node,
314    content: &[u8],
315    ast_graph: &ASTGraph,
316    helper: &mut GraphBuildHelper,
317    has_all: bool,
318    guard: &mut sqry_core::query::security::RecursionGuard,
319    scope_tree: &mut local_scopes::PythonScopeTree,
320) -> GraphResult<()> {
321    guard.enter().map_err(|e| GraphBuilderError::ParseError {
322        span: Span::default(),
323        reason: format!("Recursion limit exceeded: {e}"),
324    })?;
325
326    match node.kind() {
327        "class_definition" => {
328            // Extract class name
329            if let Some(name_node) = node.child_by_field_name("name")
330                && let Ok(class_name) = name_node.utf8_text(content)
331            {
332                let span = span_from_node(node);
333
334                // Build qualified class name from scope
335                let qualified_name = class_name.to_string();
336
337                // Add class node. Real class declaration (issue #394): opt the
338                // dual-use add_class bare helper into is_definition = true.
339                let class_id = helper.add_class(&qualified_name, Some(span));
340                helper.mark_definition(class_id);
341
342                // Process inheritance (base classes)
343                process_class_inheritance(node, content, class_id, helper);
344
345                // Note: Class body annotations are processed via normal recursion in walk_tree_for_graph
346
347                // Export public classes at module level (only if __all__ is not defined)
348                if !has_all && is_module_level(node) && is_public_name(class_name) {
349                    export_from_file_module(helper, class_id);
350                }
351            }
352        }
353        "expression_statement" => {
354            // Check for __all__ assignment (exports)
355            process_all_assignment(node, content, helper);
356
357            // Check for annotated assignments (type hints on variables)
358            process_annotated_assignment(node, content, ast_graph, helper);
359        }
360        "function_definition" => {
361            // Extract function context from AST graph
362            if let Some(call_context) = ast_graph.get_callable_context(node.id()) {
363                let span = span_from_node(node);
364
365                // Extract visibility from function name
366                let func_name = node
367                    .child_by_field_name("name")
368                    .and_then(|n| n.utf8_text(content).ok())
369                    .unwrap_or("");
370                let visibility = extract_visibility_from_name(func_name);
371
372                // Check if this is a property (has @property decorator)
373                let is_property = has_property_decorator(node, content);
374
375                // Extract return type annotation for signature (normalized — strips
376                // generics/unions/quotes for human-readable display).
377                let return_type = extract_return_type_annotation(node, content);
378
379                // Extract byte-exact source text of the return-type annotation for
380                // the `TypeOf { context: Return }` edge consumed by `returns:<Type>`
381                // queries. This text is intentionally NOT normalized — `Optional[int]`,
382                // `List[Dict[str, int]]`, `pd.DataFrame`, `"User"` are all preserved
383                // verbatim so byte-exact predicates work as documented.
384                let return_type_source = extract_return_type_source_text(node, content);
385
386                // Add function/method/property node
387                let function_id = if is_property && call_context.is_method {
388                    // Property node
389                    helper.add_node_with_visibility(
390                        &call_context.qualified_name,
391                        Some(span),
392                        sqry_core::graph::unified::node::NodeKind::Property,
393                        Some(visibility),
394                    )
395                } else if call_context.is_method {
396                    // Regular method with signature
397                    if return_type.is_some() {
398                        helper.add_method_with_signature(
399                            &call_context.qualified_name,
400                            Some(span),
401                            call_context.is_async,
402                            false, // Python doesn't have static methods in the same way
403                            Some(visibility),
404                            return_type.as_deref(),
405                        )
406                    } else {
407                        helper.add_method_with_visibility(
408                            &call_context.qualified_name,
409                            Some(span),
410                            call_context.is_async,
411                            false,
412                            Some(visibility),
413                        )
414                    }
415                } else {
416                    // Regular function with signature
417                    if return_type.is_some() {
418                        helper.add_function_with_signature(
419                            &call_context.qualified_name,
420                            Some(span),
421                            call_context.is_async,
422                            false, // Python doesn't have unsafe
423                            Some(visibility),
424                            return_type.as_deref(),
425                        )
426                    } else {
427                        helper.add_function_with_visibility(
428                            &call_context.qualified_name,
429                            Some(span),
430                            call_context.is_async,
431                            false,
432                            Some(visibility),
433                        )
434                    }
435                };
436
437                // Emit `TypeOf { context: Return }` edge for the return type
438                // annotation when present. Property nodes (Python `@property`) and
439                // un-annotated functions get no edge — `extract_return_type_source_text`
440                // returns `None` for `def foo():` (no `-> Type`).
441                //
442                // The type-text is byte-exact source from the annotation node so
443                // `returns:Optional[int]`, `returns:pd.DataFrame`, etc. work as
444                // documented. A paired Reference edge is also emitted to keep
445                // typeof/reference-edge invariants in sync with C# / Go / Kotlin /
446                // TypeScript plugins.
447                //
448                // The synthesized Type node is anchored at the return-type
449                // annotation's span (mirroring the Rust precedent in
450                // `sqry-lang-rust/src/relations/graph_builder.rs`) so downstream
451                // consumers (LSP `textDocument/documentSymbol`, MCP
452                // `get_document_symbols`) report a concrete source location
453                // rather than line 0.
454                if !(is_property && call_context.is_method)
455                    && let Some(annotation_text) = return_type_source.as_deref()
456                    && let Some(return_type_node) = node.child_by_field_name("return_type")
457                {
458                    let type_span = span_from_node(return_type_node);
459                    let type_id = helper.add_type(annotation_text, Some(type_span));
460                    helper.add_typeof_edge_with_context(
461                        function_id,
462                        type_id,
463                        Some(TypeOfContext::Return),
464                        Some(0),
465                        Some(call_context.qualified_name.as_str()),
466                    );
467                    helper.add_reference_edge(function_id, type_id);
468                }
469
470                // Check for HTTP route decorators (Flask/FastAPI)
471                if let Some((http_method, route_path)) = extract_route_decorator_info(node, content)
472                {
473                    let endpoint_name = format!("route::{http_method}::{route_path}");
474                    let endpoint_id = helper.add_endpoint(&endpoint_name, Some(span));
475                    helper.add_contains_edge(endpoint_id, function_id);
476                }
477
478                // Process parameters to create TypeOf and Reference edges for type hints
479                process_function_parameters(node, content, ast_graph, helper);
480
481                // Export public functions at module level (not methods, only if __all__ is not defined)
482                if !has_all
483                    && !call_context.is_method
484                    && is_module_level(node)
485                    && let Some(name_node) = node.child_by_field_name("name")
486                    && let Ok(func_name) = name_node.utf8_text(content)
487                    && is_public_name(func_name)
488                {
489                    export_from_file_module(helper, function_id);
490                }
491            }
492        }
493        "call" => {
494            // Check for FFI patterns first (ctypes, cffi)
495            let is_ffi = build_ffi_call_edge(ast_graph, node, content, helper)?;
496            if !is_ffi {
497                // Not an FFI call - build regular call edge
498                if let Ok(Some((caller_qname, callee_qname, argument_count, is_awaited))) =
499                    build_call_for_staging(ast_graph, node, content)
500                {
501                    // Ensure both nodes exist
502                    let call_context = ast_graph.get_callable_context(node.id());
503                    let _is_async = call_context.is_some_and(|c| c.is_async);
504
505                    let call_span = span_from_node(node);
506                    let source_id =
507                        helper.ensure_callee(&caller_qname, call_span, CalleeKindHint::Function);
508                    let target_id =
509                        helper.ensure_callee(&callee_qname, call_span, CalleeKindHint::Function);
510
511                    // Add call edge
512                    let argument_count = u8::try_from(argument_count).unwrap_or(u8::MAX);
513                    helper.add_call_edge_full_with_span(
514                        source_id,
515                        target_id,
516                        argument_count,
517                        is_awaited,
518                        vec![call_span],
519                    );
520                }
521            }
522        }
523        "import_statement" | "import_from_statement" => {
524            // Build import edge
525            if let Ok(Some((from_qname, to_qname))) =
526                build_import_for_staging(node, content, helper)
527            {
528                // Ensure both module nodes exist
529                let from_id = helper.add_import(&from_qname, None);
530                let to_id = helper.add_import(&to_qname, Some(span_from_node(node)));
531
532                // Add import edge
533                helper.add_import_edge(from_id, to_id);
534
535                // Check if this imports a known native C extension module
536                if is_native_extension_import(&to_qname) {
537                    build_native_import_ffi_edge(&to_qname, node, helper);
538                }
539            }
540        }
541        "identifier" => {
542            // Local variable reference tracking
543            local_scopes::handle_identifier_for_reference(node, content, scope_tree, helper);
544        }
545        _ => {}
546    }
547
548    // Recurse into children
549    let mut cursor = node.walk();
550    for child in node.children(&mut cursor) {
551        walk_tree_for_graph(
552            child, content, ast_graph, helper, has_all, guard, scope_tree,
553        )?;
554    }
555
556    guard.exit();
557    Ok(())
558}
559
560/// Build call edge information for the staging graph.
561fn build_call_for_staging(
562    ast_graph: &ASTGraph,
563    call_node: Node<'_>,
564    content: &[u8],
565) -> GraphResult<Option<(String, String, usize, bool)>> {
566    // Get or create module-level context for top-level calls
567    let module_context;
568    let call_context = if let Some(ctx) = ast_graph.get_callable_context(call_node.id()) {
569        ctx
570    } else {
571        // Create synthetic module-level context for top-level calls
572        module_context = CallContext {
573            qualified_name: "<module>".to_string(),
574            span: (0, content.len()),
575            is_async: false,
576            is_method: false,
577            class_name: None,
578        };
579        &module_context
580    };
581
582    let Some(callee_expr) = call_node.child_by_field_name("function") else {
583        return Ok(None);
584    };
585
586    let callee_text = callee_expr
587        .utf8_text(content)
588        .map_err(|_| GraphBuilderError::ParseError {
589            span: span_from_node(call_node),
590            reason: "failed to read call expression".to_string(),
591        })?
592        .trim()
593        .to_string();
594
595    if callee_text.is_empty() {
596        return Ok(None);
597    }
598
599    let callee_simple = simple_name(&callee_text);
600    if callee_simple.is_empty() {
601        return Ok(None);
602    }
603
604    // Derive qualified callee name with proper self resolution
605    let caller_qname = call_context.qualified_name();
606    let target_qname = if let Some(method_name) = callee_text.strip_prefix("self.") {
607        // Resolve self.method() to ClassName.method()
608        if let Some(class_name) = &call_context.class_name {
609            format!("{}.{}", class_name, simple_name(method_name))
610        } else {
611            callee_simple.to_string()
612        }
613    } else {
614        callee_simple.to_string()
615    };
616
617    let argument_count = count_arguments(call_node);
618    let is_awaited = is_awaited_call(call_node);
619    Ok(Some((
620        caller_qname,
621        target_qname,
622        argument_count,
623        is_awaited,
624    )))
625}
626
627/// Build import edge information for the staging graph.
628fn build_import_for_staging(
629    import_node: Node<'_>,
630    content: &[u8],
631    helper: &GraphBuildHelper,
632) -> GraphResult<Option<(String, String)>> {
633    // Extract the raw module name from the AST
634    let raw_module_name = if import_node.kind() == "import_statement" {
635        import_node
636            .child_by_field_name("name")
637            .and_then(|n| extract_module_name(n, content))
638    } else if import_node.kind() == "import_from_statement" {
639        import_node
640            .child_by_field_name("module_name")
641            .and_then(|n| extract_module_name(n, content))
642    } else {
643        None
644    };
645
646    // Handle relative imports with no module name
647    let module_name = if raw_module_name.is_none() && import_node.kind() == "import_from_statement"
648    {
649        if let Ok(import_text) = import_node.utf8_text(content) {
650            if let Some(from_idx) = import_text.find("from") {
651                if let Some(import_idx) = import_text.find("import") {
652                    let between = import_text[from_idx + 4..import_idx].trim();
653                    if between.starts_with('.') {
654                        Some(between.to_string())
655                    } else {
656                        None
657                    }
658                } else {
659                    None
660                }
661            } else {
662                None
663            }
664        } else {
665            None
666        }
667    } else {
668        raw_module_name
669    };
670
671    let Some(module_name) = module_name else {
672        return Ok(None);
673    };
674
675    if module_name.is_empty() {
676        return Ok(None);
677    }
678
679    // Resolve the import path to a canonical module identifier
680    let resolved_path = sqry_core::graph::resolve_python_import(
681        std::path::Path::new(helper.file_path()),
682        &module_name,
683        import_node.kind() == "import_from_statement",
684    )?;
685
686    // Return from/to qualified names
687    Ok(Some((helper.file_path().to_string(), resolved_path)))
688}
689
690fn span_from_node(node: Node<'_>) -> Span {
691    let start = node.start_position();
692    let end = node.end_position();
693    Span::new(
694        sqry_core::graph::node::Position::new(start.row, start.column),
695        sqry_core::graph::node::Position::new(end.row, end.column),
696    )
697}
698
699fn count_arguments(call_node: Node<'_>) -> usize {
700    call_node
701        .child_by_field_name("arguments")
702        .map_or(0, |args| {
703            args.named_children(&mut args.walk())
704                .filter(|child| {
705                    // Count actual arguments, not commas or parentheses
706                    !matches!(child.kind(), "," | "(" | ")")
707                })
708                .count()
709        })
710}
711
712fn is_awaited_call(call_node: Node<'_>) -> bool {
713    let mut current = call_node.parent();
714    while let Some(node) = current {
715        let kind = node.kind();
716        if kind == "await" || kind == "await_expression" {
717            return true;
718        }
719        current = node.parent();
720    }
721    false
722}
723
724/// Extract the simple name from a dotted identifier (for general call targets).
725///
726/// Takes the last component after splitting by dots.
727/// Used for qualified names like "module.func" → "func" or "obj.method" → "method".
728fn simple_name(qualified: &str) -> &str {
729    qualified.split('.').next_back().unwrap_or(qualified)
730}
731
732/// Extract a simple library name from an FFI library path.
733///
734/// For library paths with file extensions, extracts the base name before the extension.
735/// This prevents different libraries with the same extension (lib1.so, lib2.so) from
736/// colliding as duplicate "so" targets.
737///
738/// Handles:
739/// - Full paths: "/opt/v1.2/libfoo.so" → "libfoo"
740/// - Relative paths: "libs/lib1.so" → "lib1"
741/// - Versioned libs: "libc.so.6" → "libc"
742/// - Simple names: "kernel32" → "kernel32"
743/// - Variable refs: "$libname" → "$libname"
744fn ffi_library_simple_name(library_path: &str) -> String {
745    use std::path::Path;
746
747    // Strip directory components first (handles /opt/v1.2/libfoo.so)
748    let filename = Path::new(library_path)
749        .file_name()
750        .and_then(|f| f.to_str())
751        .unwrap_or(library_path);
752
753    // Handle versioned .so files first (libc.so.6 → libc)
754    if let Some(so_pos) = filename.find(".so.") {
755        return filename[..so_pos].to_string();
756    }
757
758    // Handle standard library extensions
759    if let Some(dot_pos) = filename.find('.') {
760        let extension = &filename[dot_pos + 1..];
761
762        // Check for known library extensions
763        if extension == "so" || extension == "dll" || extension == "dylib" {
764            // Extract base name before extension
765            return filename[..dot_pos].to_string();
766        }
767    }
768
769    // No library extension found - return filename as-is
770    filename.to_string()
771}
772
773/// Check if a name is public (does not start with underscore).
774///
775/// In Python, names starting with a single underscore are considered private by convention.
776/// Names starting with double underscores trigger name mangling in classes.
777/// Public names do not start with an underscore.
778fn is_public_name(name: &str) -> bool {
779    !name.starts_with('_')
780}
781
782/// Check if a node is at module level (direct child of the module body).
783///
784/// In tree-sitter Python AST, module-level items are direct children of the root "module" node.
785/// We check if the parent is "module" to determine module-level scope.
786fn is_module_level(node: Node<'_>) -> bool {
787    // Walk up the tree to find the immediate container
788    let mut current = node.parent();
789    while let Some(parent) = current {
790        match parent.kind() {
791            "module" => return true,
792            "function_definition" | "class_definition" => return false,
793            _ => current = parent.parent(),
794        }
795    }
796    false
797}
798
799/// Export a symbol from the file module.
800///
801/// File-level module name for exports/imports.
802/// Distinct from `<module>` to avoid conflicts with top-level call context.
803const FILE_MODULE_NAME: &str = "<file_module>";
804
805fn export_from_file_module(
806    helper: &mut GraphBuildHelper,
807    exported: sqry_core::graph::unified::node::NodeId,
808) {
809    let module_id = helper.add_module(FILE_MODULE_NAME, None);
810    helper.add_export_edge(module_id, exported);
811}
812
813/// Extract module name from a `dotted_name`, `aliased_import`, or `relative_import` node
814///
815/// For `import numpy as np`, the "name" field is an `aliased_import` node with structure:
816/// `aliased_import { name: dotted_name("numpy"), alias: identifier("np") }`
817/// We need to extract just "numpy", not "numpy as np".
818fn extract_module_name(node: Node<'_>, content: &[u8]) -> Option<String> {
819    // Handle aliased imports: `import numpy as np` -> extract "numpy"
820    if node.kind() == "aliased_import" {
821        // The "name" field of aliased_import contains the actual module name
822        return node
823            .child_by_field_name("name")
824            .and_then(|name_node| name_node.utf8_text(content).ok())
825            .map(std::string::ToString::to_string);
826    }
827
828    // Regular dotted_name or identifier
829    node.utf8_text(content)
830        .ok()
831        .map(std::string::ToString::to_string)
832}
833
834// ============================================================================
835// Exports - __all__ assignment handling
836// ============================================================================
837
838/// Process `__all__ = ['name1', 'name2']` assignments to create export edges.
839///
840/// Python's `__all__` list explicitly defines the public API of a module.
841/// Each name in the list gets an Export edge from the module to the exported symbol.
842fn process_all_assignment(node: Node<'_>, content: &[u8], helper: &mut GraphBuildHelper) {
843    // expression_statement contains an assignment child
844    let assignment = node
845        .children(&mut node.walk())
846        .find(|child| child.kind() == "assignment" || child.kind() == "augmented_assignment");
847
848    let Some(assignment) = assignment else {
849        return;
850    };
851
852    // Check if left side is __all__
853    let left = assignment.child_by_field_name("left");
854    let Some(left) = left else {
855        return;
856    };
857
858    let Ok(left_text) = left.utf8_text(content) else {
859        return;
860    };
861
862    if left_text.trim() != "__all__" {
863        return;
864    }
865
866    // Get the right side (should be a list)
867    let right = assignment.child_by_field_name("right");
868    let Some(right) = right else {
869        return;
870    };
871
872    // Handle list or tuple literal (both valid for __all__)
873    if right.kind() == "list" || right.kind() == "tuple" {
874        process_all_list(right, content, helper);
875    }
876}
877
878/// Process a list/tuple of exported names from __all__.
879fn process_all_list(list_node: Node<'_>, content: &[u8], helper: &mut GraphBuildHelper) {
880    for child in list_node.children(&mut list_node.walk()) {
881        // Look for string literals
882        if child.kind() == "string"
883            && let Some(export_name) = extract_string_content(child, content)
884            && !export_name.is_empty()
885        {
886            // Create a node for the exported symbol
887            // We use add_function here as a generic symbol; the actual type
888            // will be resolved later by cross-file analysis
889            let span = span_from_node(child);
890            let export_id = helper.add_function(&export_name, Some(span), false, false);
891
892            // Add export edge (Direct export, no alias for Python __all__)
893            export_from_file_module(helper, export_id);
894        }
895    }
896}
897
898/// Extract the content of a string literal node (removing quotes).
899fn extract_string_content(string_node: Node<'_>, content: &[u8]) -> Option<String> {
900    // String nodes contain string_content or string_start/string_content/string_end
901    // Try to get the full text and strip quotes
902    let Ok(text) = string_node.utf8_text(content) else {
903        return None;
904    };
905
906    let text = text.trim();
907
908    // Handle various Python string formats: 'x', "x", '''x''', """x""", r'x', etc.
909    let stripped = text
910        .trim_start_matches(|c: char| {
911            c == 'r'
912                || c == 'b'
913                || c == 'f'
914                || c == 'u'
915                || c == 'R'
916                || c == 'B'
917                || c == 'F'
918                || c == 'U'
919        })
920        .trim_start_matches("'''")
921        .trim_end_matches("'''")
922        .trim_start_matches("\"\"\"")
923        .trim_end_matches("\"\"\"")
924        .trim_start_matches('\'')
925        .trim_end_matches('\'')
926        .trim_start_matches('"')
927        .trim_end_matches('"');
928
929    Some(stripped.to_string())
930}
931
932// ============================================================================
933// OOP - Inheritance handling
934// ============================================================================
935
936/// Process class inheritance to create Inherits edges.
937///
938/// Python supports multiple inheritance: `class Child(Parent1, Parent2):`
939/// Each base class gets an Inherits edge from the child class.
940fn process_class_inheritance(
941    class_node: Node<'_>,
942    content: &[u8],
943    class_id: UnifiedNodeId,
944    helper: &mut GraphBuildHelper,
945) {
946    // In Python AST, base classes are in the superclasses field (argument_list)
947    // class_definition has a "superclasses" field containing argument_list
948    let superclasses = class_node.child_by_field_name("superclasses");
949
950    let Some(superclasses) = superclasses else {
951        return;
952    };
953
954    // argument_list contains the base classes
955    for child in superclasses.children(&mut superclasses.walk()) {
956        if child.kind() == "keyword_argument" {
957            // Skip keyword arguments like metaclass=ABCMeta.
958            continue;
959        }
960
961        match child.kind() {
962            "identifier" => {
963                // Simple base class: class Child(Parent):
964                if let Ok(base_name) = child.utf8_text(content) {
965                    let base_name = base_name.trim();
966                    if !base_name.is_empty() {
967                        let span = span_from_node(child);
968                        let base_id = helper.add_class(base_name, Some(span));
969                        helper.add_inherits_edge(class_id, base_id);
970                    }
971                }
972            }
973            "attribute" => {
974                // Qualified base class: class Child(module.Parent):
975                if let Ok(base_name) = child.utf8_text(content) {
976                    let base_name = base_name.trim();
977                    if !base_name.is_empty() {
978                        let span = span_from_node(child);
979                        let base_id = helper.add_class(base_name, Some(span));
980                        helper.add_inherits_edge(class_id, base_id);
981                    }
982                }
983            }
984            "call" => {
985                // Parameterized base class with call syntax: class Child(SomeBase(arg)):
986                // Extract the function being called
987                if let Some(func) = child.child_by_field_name("function")
988                    && let Ok(base_name) = func.utf8_text(content)
989                {
990                    let base_name = base_name.trim();
991                    if !base_name.is_empty() {
992                        let span = span_from_node(child);
993                        let base_id = helper.add_class(base_name, Some(span));
994                        helper.add_inherits_edge(class_id, base_id);
995                    }
996                }
997            }
998            "subscript" => {
999                // Generic base class: class Child(Generic[T]): or class Child(List[int]):
1000                // Extract the base type from the subscript (value field)
1001                if let Some(value) = child.child_by_field_name("value")
1002                    && let Ok(base_name) = value.utf8_text(content)
1003                {
1004                    let base_name = base_name.trim();
1005                    if !base_name.is_empty() {
1006                        let span = span_from_node(child);
1007                        let base_id = helper.add_class(base_name, Some(span));
1008                        helper.add_inherits_edge(class_id, base_id);
1009                    }
1010                }
1011            }
1012            _ => {}
1013        }
1014    }
1015}
1016
1017// ============================================================================
1018// AST Graph - tracks callable contexts (functions, methods, classes)
1019// ============================================================================
1020
1021#[derive(Debug, Clone)]
1022struct CallContext {
1023    qualified_name: String,
1024    #[allow(dead_code)] // Reserved for scope analysis
1025    span: (usize, usize),
1026    is_async: bool,
1027    is_method: bool,
1028    class_name: Option<String>,
1029}
1030
1031impl CallContext {
1032    fn qualified_name(&self) -> String {
1033        self.qualified_name.clone()
1034    }
1035}
1036
1037struct ASTGraph {
1038    contexts: Vec<CallContext>,
1039    node_to_context: HashMap<usize, usize>,
1040}
1041
1042impl ASTGraph {
1043    fn from_tree(tree: &Tree, content: &[u8], max_depth: usize) -> Result<Self, String> {
1044        let mut contexts = Vec::new();
1045        let mut node_to_context = HashMap::new();
1046        let mut scope_stack: Vec<String> = Vec::new();
1047        let mut class_stack: Vec<String> = Vec::new();
1048
1049        walk_ast(
1050            tree.root_node(),
1051            content,
1052            &mut contexts,
1053            &mut node_to_context,
1054            &mut scope_stack,
1055            &mut class_stack,
1056            max_depth,
1057        )?;
1058
1059        Ok(Self {
1060            contexts,
1061            node_to_context,
1062        })
1063    }
1064
1065    #[allow(dead_code)] // Reserved for future context queries
1066    fn contexts(&self) -> &[CallContext] {
1067        &self.contexts
1068    }
1069
1070    fn get_callable_context(&self, node_id: usize) -> Option<&CallContext> {
1071        self.node_to_context
1072            .get(&node_id)
1073            .and_then(|idx| self.contexts.get(*idx))
1074    }
1075}
1076
1077fn walk_ast(
1078    node: Node,
1079    content: &[u8],
1080    contexts: &mut Vec<CallContext>,
1081    node_to_context: &mut HashMap<usize, usize>,
1082    scope_stack: &mut Vec<String>,
1083    class_stack: &mut Vec<String>,
1084    max_depth: usize,
1085) -> Result<(), String> {
1086    if scope_stack.len() > max_depth {
1087        return Ok(());
1088    }
1089
1090    match node.kind() {
1091        "class_definition" => {
1092            let name_node = node
1093                .child_by_field_name("name")
1094                .ok_or_else(|| "class_definition missing name".to_string())?;
1095            let class_name = name_node
1096                .utf8_text(content)
1097                .map_err(|_| "failed to read class name".to_string())?;
1098
1099            // Build qualified class name
1100            let qualified_class = if scope_stack.is_empty() {
1101                class_name.to_string()
1102            } else {
1103                format!("{}.{}", scope_stack.join("."), class_name)
1104            };
1105
1106            class_stack.push(qualified_class.clone());
1107            scope_stack.push(class_name.to_string());
1108
1109            // Recurse into class body
1110            if let Some(body) = node.child_by_field_name("body") {
1111                let mut cursor = body.walk();
1112                for child in body.children(&mut cursor) {
1113                    walk_ast(
1114                        child,
1115                        content,
1116                        contexts,
1117                        node_to_context,
1118                        scope_stack,
1119                        class_stack,
1120                        max_depth,
1121                    )?;
1122                }
1123            }
1124
1125            class_stack.pop();
1126            scope_stack.pop();
1127        }
1128        "function_definition" => {
1129            let name_node = node
1130                .child_by_field_name("name")
1131                .ok_or_else(|| "function_definition missing name".to_string())?;
1132            let func_name = name_node
1133                .utf8_text(content)
1134                .map_err(|_| "failed to read function name".to_string())?;
1135
1136            // Check if async
1137            let is_async = node
1138                .children(&mut node.walk())
1139                .any(|child| child.kind() == "async");
1140
1141            // Build qualified function name
1142            let qualified_func = if scope_stack.is_empty() {
1143                func_name.to_string()
1144            } else {
1145                format!("{}.{}", scope_stack.join("."), func_name)
1146            };
1147
1148            // Determine if this is a method (inside a class)
1149            let is_method = !class_stack.is_empty();
1150            let class_name = class_stack.last().cloned();
1151
1152            let context_idx = contexts.len();
1153            contexts.push(CallContext {
1154                qualified_name: qualified_func.clone(),
1155                span: (node.start_byte(), node.end_byte()),
1156                is_async,
1157                is_method,
1158                class_name,
1159            });
1160
1161            // Associate the function definition node itself with this context
1162            // This is required so walk_tree_for_graph can find the context
1163            node_to_context.insert(node.id(), context_idx);
1164
1165            // Associate all descendants with this context
1166            if let Some(body) = node.child_by_field_name("body") {
1167                associate_descendants(body, context_idx, node_to_context);
1168            }
1169
1170            scope_stack.push(func_name.to_string());
1171
1172            // Recurse into function body to find nested functions
1173            if let Some(body) = node.child_by_field_name("body") {
1174                let mut cursor = body.walk();
1175                for child in body.children(&mut cursor) {
1176                    walk_ast(
1177                        child,
1178                        content,
1179                        contexts,
1180                        node_to_context,
1181                        scope_stack,
1182                        class_stack,
1183                        max_depth,
1184                    )?;
1185                }
1186            }
1187
1188            scope_stack.pop();
1189        }
1190        _ => {
1191            // Recurse into children for other node types
1192            let mut cursor = node.walk();
1193            for child in node.children(&mut cursor) {
1194                walk_ast(
1195                    child,
1196                    content,
1197                    contexts,
1198                    node_to_context,
1199                    scope_stack,
1200                    class_stack,
1201                    max_depth,
1202                )?;
1203            }
1204        }
1205    }
1206
1207    Ok(())
1208}
1209
1210fn associate_descendants(
1211    node: Node,
1212    context_idx: usize,
1213    node_to_context: &mut HashMap<usize, usize>,
1214) {
1215    node_to_context.insert(node.id(), context_idx);
1216
1217    let mut stack = vec![node];
1218    while let Some(current) = stack.pop() {
1219        node_to_context.insert(current.id(), context_idx);
1220
1221        let mut cursor = current.walk();
1222        for child in current.children(&mut cursor) {
1223            stack.push(child);
1224        }
1225    }
1226}
1227
1228// ============================================================================
1229// FFI Detection - ctypes, cffi, and C extensions
1230// ============================================================================
1231
1232/// Build FFI edges for call expressions.
1233///
1234/// Detects Python FFI patterns:
1235/// - `ctypes.CDLL('libfoo.so')` / `ctypes.cdll.LoadLibrary('libfoo.so')`
1236/// - `ctypes.WinDLL('kernel32')` / `ctypes.windll.kernel32`
1237/// - `ctypes.PyDLL('libpython.so')`
1238/// - `cffi.FFI().dlopen('libfoo.so')`
1239/// - `ffi.dlopen('libfoo.so')`
1240///
1241/// Returns true if an FFI edge was created, false otherwise.
1242fn build_ffi_call_edge(
1243    ast_graph: &ASTGraph,
1244    call_node: Node<'_>,
1245    content: &[u8],
1246    helper: &mut GraphBuildHelper,
1247) -> GraphResult<bool> {
1248    let Some(callee_expr) = call_node.child_by_field_name("function") else {
1249        return Ok(false);
1250    };
1251
1252    let callee_text = callee_expr
1253        .utf8_text(content)
1254        .map_err(|_| GraphBuilderError::ParseError {
1255            span: span_from_node(call_node),
1256            reason: "failed to read call expression".to_string(),
1257        })?
1258        .trim();
1259
1260    // Check for ctypes library loading patterns
1261    if is_ctypes_load_call(callee_text) {
1262        return Ok(build_ctypes_ffi_edge(
1263            ast_graph,
1264            call_node,
1265            content,
1266            callee_text,
1267            helper,
1268        ));
1269    }
1270
1271    // Check for cffi dlopen patterns
1272    if is_cffi_dlopen_call(callee_text) {
1273        return Ok(build_cffi_ffi_edge(ast_graph, call_node, content, helper));
1274    }
1275
1276    Ok(false)
1277}
1278
1279/// Check if the callee is a ctypes library loading function.
1280///
1281/// Narrowed patterns to reduce false positives - only match explicit ctypes paths.
1282/// Previous: `callee_text.ends_with(".LoadLibrary")` matched too broadly.
1283///
1284/// Note: `ctypes.cdll.kernel32` style attribute access patterns are not detected
1285/// because they're attribute access (not function calls). We only detect explicit
1286/// library loading function calls like CDLL('lib.so').
1287fn is_ctypes_load_call(callee_text: &str) -> bool {
1288    // Direct ctypes constructors (fully qualified)
1289    callee_text == "ctypes.CDLL"
1290        || callee_text == "ctypes.WinDLL"
1291        || callee_text == "ctypes.OleDLL"
1292        || callee_text == "ctypes.PyDLL"
1293        // ctypes.cdll/windll LoadLibrary (fully qualified)
1294        || callee_text == "ctypes.cdll.LoadLibrary"
1295        || callee_text == "ctypes.windll.LoadLibrary"
1296        || callee_text == "ctypes.oledll.LoadLibrary"
1297        // After `from ctypes import *` or `from ctypes import CDLL, etc.`
1298        || callee_text == "CDLL"
1299        || callee_text == "WinDLL"
1300        || callee_text == "OleDLL"
1301        || callee_text == "PyDLL"
1302        // After `from ctypes import cdll` or similar
1303        || callee_text == "cdll.LoadLibrary"
1304        || callee_text == "windll.LoadLibrary"
1305        || callee_text == "oledll.LoadLibrary"
1306}
1307
1308/// Check if the callee is a cffi dlopen function.
1309///
1310/// Narrowed patterns to reduce false positives - only match known cffi patterns.
1311/// Previous: `callee_text.ends_with(".dlopen")` matched too broadly.
1312fn is_cffi_dlopen_call(callee_text: &str) -> bool {
1313    // Common cffi FFI variable names followed by dlopen
1314    callee_text == "ffi.dlopen"
1315        || callee_text == "cffi.dlopen"
1316        || callee_text == "_ffi.dlopen"
1317        // FFI() constructor followed by dlopen (chained call)
1318        // This pattern typically appears as: FFI().dlopen('lib.so')
1319        // In tree-sitter, the callee text would be the method access part
1320        // After `from cffi import FFI`
1321        || callee_text == "FFI().dlopen"
1322}
1323
1324/// Build FFI edge for ctypes library loading.
1325fn build_ctypes_ffi_edge(
1326    ast_graph: &ASTGraph,
1327    call_node: Node<'_>,
1328    content: &[u8],
1329    callee_text: &str,
1330    helper: &mut GraphBuildHelper,
1331) -> bool {
1332    // Get caller context
1333    let caller_id = get_ffi_caller_node_id(ast_graph, call_node, content, helper);
1334
1335    // Determine FFI convention based on the ctypes type
1336    let convention = if callee_text.contains("WinDLL")
1337        || callee_text.contains("windll")
1338        || callee_text.contains("OleDLL")
1339    {
1340        FfiConvention::Stdcall
1341    } else {
1342        FfiConvention::C
1343    };
1344
1345    // Try to extract library name from first argument
1346    let library_name = extract_ffi_library_name(call_node, content)
1347        .unwrap_or_else(|| "ctypes::unknown".to_string());
1348
1349    let ffi_name = format!("native::{}", ffi_library_simple_name(&library_name));
1350    let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(call_node)));
1351
1352    // Add FFI edge
1353    helper.add_ffi_edge(caller_id, ffi_node_id, convention);
1354
1355    true
1356}
1357
1358/// Build FFI edge for cffi dlopen.
1359fn build_cffi_ffi_edge(
1360    ast_graph: &ASTGraph,
1361    call_node: Node<'_>,
1362    content: &[u8],
1363    helper: &mut GraphBuildHelper,
1364) -> bool {
1365    // Get caller context
1366    let caller_id = get_ffi_caller_node_id(ast_graph, call_node, content, helper);
1367
1368    // Try to extract library name from first argument
1369    let library_name =
1370        extract_ffi_library_name(call_node, content).unwrap_or_else(|| "cffi::unknown".to_string());
1371
1372    let ffi_name = format!("native::{}", ffi_library_simple_name(&library_name));
1373    let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(call_node)));
1374
1375    // cffi uses C calling convention
1376    helper.add_ffi_edge(caller_id, ffi_node_id, FfiConvention::C);
1377
1378    true
1379}
1380
1381/// Get the caller node ID for FFI edges.
1382fn get_ffi_caller_node_id(
1383    ast_graph: &ASTGraph,
1384    node: Node<'_>,
1385    content: &[u8],
1386    helper: &mut GraphBuildHelper,
1387) -> UnifiedNodeId {
1388    let module_context;
1389    let call_context = if let Some(ctx) = ast_graph.get_callable_context(node.id()) {
1390        ctx
1391    } else {
1392        module_context = CallContext {
1393            qualified_name: "<module>".to_string(),
1394            span: (0, content.len()),
1395            is_async: false,
1396            is_method: false,
1397            class_name: None,
1398        };
1399        &module_context
1400    };
1401
1402    let caller_span = Some(Span::from_bytes(call_context.span.0, call_context.span.1));
1403    helper.ensure_function(
1404        &call_context.qualified_name(),
1405        caller_span,
1406        call_context.is_async,
1407        false,
1408    )
1409}
1410
1411/// Extract the library name from the first argument of a call.
1412fn extract_ffi_library_name(call_node: Node<'_>, content: &[u8]) -> Option<String> {
1413    let args = call_node.child_by_field_name("arguments")?;
1414
1415    let mut cursor = args.walk();
1416    let first_arg = args
1417        .children(&mut cursor)
1418        .find(|child| !matches!(child.kind(), "(" | ")" | ","))?;
1419
1420    // Handle string literals
1421    if first_arg.kind() == "string" {
1422        return extract_string_content(first_arg, content);
1423    }
1424
1425    // Handle identifiers (variable names) - we can't resolve them statically
1426    if first_arg.kind() == "identifier" {
1427        let text = first_arg.utf8_text(content).ok()?;
1428        return Some(format!("${}", text.trim())); // Mark as variable reference
1429    }
1430
1431    None
1432}
1433
1434/// Check if an import statement imports a known native extension module.
1435///
1436/// This detects patterns like:
1437/// - `import numpy` (known C extension)
1438/// - `from numpy import array` (known C extension)
1439/// - `import _sqlite3` (private C module)
1440fn is_native_extension_import(module_name: &str) -> bool {
1441    // Private C modules (underscore prefix)
1442    if module_name.starts_with('_') && !module_name.starts_with("__") {
1443        return true;
1444    }
1445
1446    // Check against known modules
1447    let base_module = module_name.split('.').next().unwrap_or(module_name);
1448
1449    STD_C_MODULES.contains(&base_module) || THIRD_PARTY_C_PACKAGES.contains(&base_module)
1450}
1451
1452/// Build FFI edge for native extension import.
1453fn build_native_import_ffi_edge(
1454    module_name: &str,
1455    import_node: Node<'_>,
1456    helper: &mut GraphBuildHelper,
1457) {
1458    // Create module node for the importing file
1459    let file_path = helper.file_path().to_string();
1460    let importer_id = helper.add_module(&file_path, None);
1461
1462    // Create node for the native module
1463    let ffi_name = format!("native::{}", simple_name(module_name));
1464    let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(import_node)));
1465
1466    // Add FFI edge (C convention for Python C extensions)
1467    helper.add_ffi_edge(importer_id, ffi_node_id, FfiConvention::C);
1468}
1469
1470// ============================================================================
1471// HTTP Route Endpoint Detection - Flask/FastAPI decorators
1472// ============================================================================
1473
1474/// HTTP methods recognized in route decorators.
1475const ROUTE_METHOD_NAMES: &[&str] = &["get", "post", "put", "delete", "patch"];
1476
1477/// Receiver names recognized as route-capable objects.
1478///
1479/// `Flask` uses `app` or `blueprint`, `FastAPI` uses `app` or `router`.
1480const ROUTE_RECEIVER_NAMES: &[&str] = &["app", "router", "blueprint"];
1481
1482/// Extract HTTP route information from Flask/FastAPI-style decorators on a function.
1483///
1484/// Checks whether the given `function_definition` node is wrapped in a `decorated_definition`
1485/// and whether any of its decorators match known route patterns:
1486///
1487/// - `@app.route('/path')` or `@app.route('/path', methods=['GET'])` -- GET by default
1488/// - `@app.get('/path')` / `@app.post('/path')` / `@app.put('/path')` / etc.
1489/// - `@router.get('/path')` (`FastAPI`)
1490/// - `@blueprint.route('/path')` (Flask blueprints)
1491///
1492/// Returns `Some((method, path))` where `method` is the uppercased HTTP method and
1493/// `path` is the route path string, or `None` if no route decorator is found.
1494fn extract_route_decorator_info(func_node: Node<'_>, content: &[u8]) -> Option<(String, String)> {
1495    // The function_definition must be a child of decorated_definition
1496    let parent = func_node.parent()?;
1497    if parent.kind() != "decorated_definition" {
1498        return None;
1499    }
1500
1501    // Iterate through decorator children of the decorated_definition
1502    let mut cursor = parent.walk();
1503    for child in parent.children(&mut cursor) {
1504        if child.kind() != "decorator" {
1505            continue;
1506        }
1507
1508        let Ok(decorator_text) = child.utf8_text(content) else {
1509            continue;
1510        };
1511        let decorator_text = decorator_text.trim();
1512
1513        // Strip the leading '@'
1514        let without_at = decorator_text.strip_prefix('@')?;
1515
1516        // Try to parse as a route decorator
1517        if let Some(result) = parse_route_decorator_text(without_at) {
1518            return Some(result);
1519        }
1520    }
1521
1522    None
1523}
1524
1525/// Parse a single decorator text (without the leading `@`) to extract route information.
1526///
1527/// Recognized patterns:
1528/// - `app.route('/path')` or `app.route('/path', methods=['POST'])`
1529/// - `app.get('/path')` / `router.post('/path')` / `blueprint.delete('/path')`
1530///
1531/// Returns `Some((HTTP_METHOD, path))` or `None`.
1532fn parse_route_decorator_text(text: &str) -> Option<(String, String)> {
1533    // Split into receiver.method and argument portion
1534    // e.g. "app.route('/api/users')" -> ("app.route", "'/api/users')")
1535    let paren_pos = text.find('(')?;
1536    let accessor = &text[..paren_pos];
1537    let args_text = &text[paren_pos + 1..];
1538
1539    // Split accessor into receiver and method_name
1540    let dot_pos = accessor.rfind('.')?;
1541    let receiver = &accessor[..dot_pos];
1542    let method_name = &accessor[dot_pos + 1..];
1543
1544    // Check that the receiver is a known route-capable object.
1545    // Allow dotted receivers (e.g., "api.v1") as long as the final segment matches.
1546    let receiver_base = receiver.rsplit('.').next().unwrap_or(receiver);
1547    if !ROUTE_RECEIVER_NAMES.contains(&receiver_base) {
1548        return None;
1549    }
1550
1551    // Extract the route path from the first argument (string literal)
1552    let path = extract_path_from_decorator_args(args_text)?;
1553
1554    // Determine the HTTP method
1555    let method_lower = method_name.to_ascii_lowercase();
1556    if ROUTE_METHOD_NAMES.contains(&method_lower.as_str()) {
1557        // Direct method decorator: @app.get('/path') -> GET
1558        return Some((method_lower.to_ascii_uppercase(), path));
1559    }
1560
1561    if method_lower == "route" {
1562        // Generic route decorator: @app.route('/path', methods=['POST'])
1563        let http_method = extract_method_from_route_args(args_text);
1564        return Some((http_method, path));
1565    }
1566
1567    None
1568}
1569
1570/// Extract the route path string from decorator arguments text.
1571///
1572/// The `args_text` parameter is everything after the opening parenthesis of the decorator call,
1573/// e.g. `'/api/users', methods=['GET'])` or `"/api/items")`.
1574///
1575/// Returns the path string with quotes stripped, or `None` if no path is found.
1576fn extract_path_from_decorator_args(args_text: &str) -> Option<String> {
1577    let trimmed = args_text.trim();
1578
1579    // Find the first string literal (single or double quoted)
1580    let (quote_char, start_pos) = {
1581        let single_pos = trimmed.find('\'');
1582        let double_pos = trimmed.find('"');
1583        match (single_pos, double_pos) {
1584            (Some(s), Some(d)) => {
1585                if s < d {
1586                    ('\'', s)
1587                } else {
1588                    ('"', d)
1589                }
1590            }
1591            (Some(s), None) => ('\'', s),
1592            (None, Some(d)) => ('"', d),
1593            (None, None) => return None,
1594        }
1595    };
1596
1597    // Find the closing quote
1598    let after_open = start_pos + 1;
1599    let close_pos = trimmed[after_open..].find(quote_char)?;
1600    let path = &trimmed[after_open..after_open + close_pos];
1601
1602    if path.is_empty() {
1603        return None;
1604    }
1605
1606    Some(path.to_string())
1607}
1608
1609/// Extract the HTTP method from `@app.route('/path', methods=['POST'])` style arguments.
1610///
1611/// Looks for a `methods=` keyword argument containing a list of method strings.
1612/// If found, returns the first method in uppercase. Otherwise defaults to `"GET"`.
1613fn extract_method_from_route_args(args_text: &str) -> String {
1614    // Look for 'methods' keyword in the arguments
1615    let Some(methods_pos) = args_text.find("methods") else {
1616        return "GET".to_string();
1617    };
1618
1619    // Find the opening bracket after 'methods='
1620    let after_methods = &args_text[methods_pos..];
1621    let Some(bracket_pos) = after_methods.find('[') else {
1622        return "GET".to_string();
1623    };
1624
1625    let after_bracket = &after_methods[bracket_pos + 1..];
1626
1627    // Find the first string literal inside the bracket
1628    let method_str = extract_first_string_literal(after_bracket);
1629    match method_str {
1630        Some(m) => m.to_ascii_uppercase(),
1631        None => "GET".to_string(),
1632    }
1633}
1634
1635/// Extract the first single- or double-quoted string literal from the given text.
1636fn extract_first_string_literal(text: &str) -> Option<String> {
1637    let trimmed = text.trim();
1638
1639    let (quote_char, start_pos) = {
1640        let single_pos = trimmed.find('\'');
1641        let double_pos = trimmed.find('"');
1642        match (single_pos, double_pos) {
1643            (Some(s), Some(d)) => {
1644                if s < d {
1645                    ('\'', s)
1646                } else {
1647                    ('"', d)
1648                }
1649            }
1650            (Some(s), None) => ('\'', s),
1651            (None, Some(d)) => ('"', d),
1652            (None, None) => return None,
1653        }
1654    };
1655
1656    let after_open = start_pos + 1;
1657    let close_pos = trimmed[after_open..].find(quote_char)?;
1658    let literal = &trimmed[after_open..after_open + close_pos];
1659
1660    if literal.is_empty() {
1661        return None;
1662    }
1663
1664    Some(literal.to_string())
1665}
1666
1667// ============================================================================
1668// Property Detection - @property decorator
1669// ============================================================================
1670
1671/// Check if a function definition has a `@property` decorator.
1672///
1673/// Python AST structure for decorated functions:
1674/// ```python
1675/// @property
1676/// def name(self):
1677///     return self._name
1678/// ```
1679///
1680/// The tree-sitter AST wraps the `function_definition` in a `decorated_definition` node:
1681/// ```text
1682/// (block
1683///   (decorated_definition
1684///     decorator: (decorator "@property")
1685///     definition: (function_definition)))
1686/// ```
1687fn has_property_decorator(func_node: Node<'_>, content: &[u8]) -> bool {
1688    // The function_definition is a child of decorated_definition
1689    let Some(parent) = func_node.parent() else {
1690        return false;
1691    };
1692
1693    // Check if parent is decorated_definition
1694    if parent.kind() != "decorated_definition" {
1695        return false;
1696    }
1697
1698    // Look for @property decorator in the decorated_definition
1699    let mut cursor = parent.walk();
1700    for child in parent.children(&mut cursor) {
1701        if child.kind() == "decorator" {
1702            // Extract decorator text
1703            if let Ok(decorator_text) = child.utf8_text(content) {
1704                let decorator_text = decorator_text.trim();
1705                // Match @property or @property()
1706                if decorator_text == "@property"
1707                    || decorator_text.starts_with("@property(")
1708                    || decorator_text.starts_with("@property (")
1709                {
1710                    return true;
1711                }
1712            }
1713        }
1714    }
1715
1716    false
1717}
1718
1719/// Extract visibility from Python identifier based on naming convention.
1720///
1721/// Python uses naming conventions for visibility:
1722/// - `__name` (dunder) -> private (name mangling)
1723/// - `_name` (single underscore) -> protected/internal
1724/// - `name` -> public
1725fn extract_visibility_from_name(name: &str) -> &'static str {
1726    if name.starts_with("__") && !name.ends_with("__") {
1727        "private"
1728    } else if name.starts_with('_') {
1729        "protected"
1730    } else {
1731        "public"
1732    }
1733}
1734
1735// ============================================================================
1736// Type Hint Processing - TypeOf and Reference Edges
1737// ============================================================================
1738
1739/// Find the containing scope (function/class) for a node to create scope-qualified names.
1740///
1741/// This walks up the AST to find the nearest enclosing function or class definition.
1742/// Returns:
1743/// - Empty string for module-level
1744/// - Class name for class-level (e.g., "`MyClass`")
1745/// - Function qualified name for function-level (e.g., "MyClass.method" or "process")
1746fn find_containing_scope(node: Node<'_>, content: &[u8], ast_graph: &ASTGraph) -> String {
1747    let mut current = node;
1748    let mut found_class_name: Option<String> = None;
1749
1750    // Walk up the tree to find enclosing function or class
1751    while let Some(parent) = current.parent() {
1752        match parent.kind() {
1753            "function_definition" => {
1754                // Found enclosing function - get its qualified name
1755                if let Some(ctx) = ast_graph.get_callable_context(parent.id()) {
1756                    return ctx.qualified_name.clone();
1757                }
1758            }
1759            "class_definition" => {
1760                // Remember the class name but continue walking up
1761                // to check if we're inside a function within this class
1762                if found_class_name.is_none() {
1763                    // Extract class name directly from node
1764                    if let Some(name_node) = parent.child_by_field_name("name")
1765                        && let Ok(class_name) = name_node.utf8_text(content)
1766                    {
1767                        found_class_name = Some(class_name.to_string());
1768                    }
1769                }
1770            }
1771            _ => {}
1772        }
1773        current = parent;
1774    }
1775
1776    // If we found a class but no enclosing function, it's a class attribute
1777    found_class_name.unwrap_or_default()
1778}
1779
1780/// Extract return type annotation from a function definition.
1781///
1782/// Python AST structure:
1783/// ```python
1784/// def foo() -> int:  # return_type field contains type annotation
1785/// ```
1786fn extract_return_type_annotation(func_node: Node<'_>, content: &[u8]) -> Option<String> {
1787    let return_type_node = func_node.child_by_field_name("return_type")?;
1788    extract_type_from_node(return_type_node, content)
1789}
1790
1791/// Extract the byte-exact source text of a function's `-> Type` annotation.
1792///
1793/// Unlike [`extract_return_type_annotation`], this returns the raw annotation
1794/// text verbatim — no quote stripping, no union flattening, no generic-base
1795/// extraction. This is the form consumed by `returns:<TypeName>` predicates,
1796/// which match the byte-exact qualified name of the target Type node.
1797///
1798/// Returns `None` when the function has no `-> Type` annotation (e.g.
1799/// `def foo():`), in which case no Return edge is emitted.
1800///
1801/// Examples (input → returned text):
1802/// - `def foo() -> int:` → `Some("int")`
1803/// - `def foo() -> Optional[int]:` → `Some("Optional[int]")`
1804/// - `def foo() -> List[Dict[str, int]]:` → `Some("List[Dict[str, int]]")`
1805/// - `def foo() -> pd.DataFrame:` → `Some("pd.DataFrame")`
1806/// - `async def foo() -> AsyncIterator[int]:` → `Some("AsyncIterator[int]")`
1807/// - `def foo() -> "User":` → `Some("\"User\"")`
1808/// - `def foo():` → `None`
1809fn extract_return_type_source_text(func_node: Node<'_>, content: &[u8]) -> Option<String> {
1810    let return_type_node = func_node.child_by_field_name("return_type")?;
1811    let text = return_type_node.utf8_text(content).ok()?.trim();
1812    if text.is_empty() {
1813        None
1814    } else {
1815        Some(text.to_string())
1816    }
1817}
1818
1819/// Process function parameters to create `TypeOf` and Reference edges for type hints.
1820///
1821/// Handles:
1822/// - `def foo(x: int, y: str):` - typed parameters
1823/// - `def foo(self, x: int):` - skips self/cls
1824/// - `def foo(x: List[int]):` - extracts base type from generics
1825fn process_function_parameters(
1826    func_node: Node<'_>,
1827    content: &[u8],
1828    ast_graph: &ASTGraph,
1829    helper: &mut GraphBuildHelper,
1830) {
1831    let Some(params_node) = func_node.child_by_field_name("parameters") else {
1832        return;
1833    };
1834
1835    // Get the qualified name of the containing function/method for scope qualification
1836    let scope_prefix = ast_graph
1837        .get_callable_context(func_node.id())
1838        .map_or("", |ctx| ctx.qualified_name.as_str());
1839
1840    // Iterate through parameters in the parameter_list
1841    for param in params_node.children(&mut params_node.walk()) {
1842        // Python tree-sitter uses "typed_parameter" and "typed_default_parameter"
1843        // but we need to handle the actual structure
1844        match param.kind() {
1845            "typed_parameter" | "typed_default_parameter" => {
1846                process_typed_parameter(param, content, scope_prefix, helper);
1847            }
1848            // Untyped parameter - check if it has a type annotation in parent context
1849            // For now, skip (no type hint)
1850            // Default parameter without type - skip
1851            "identifier" | "default_parameter" => {}
1852            _ => {
1853                // Other parameter types - try to process if they have type annotations
1854                // This handles various parameter structures
1855                if param.child_by_field_name("type").is_some() {
1856                    process_typed_parameter(param, content, scope_prefix, helper);
1857                }
1858            }
1859        }
1860    }
1861}
1862
1863/// Process a single typed parameter node.
1864///
1865/// Creates scope-qualified variable names to prevent cross-scope type contamination.
1866/// Format: `<scope_prefix>:<param_name>` (e.g., `MyClass.method:x` or `process:x`)
1867fn process_typed_parameter(
1868    param: Node<'_>,
1869    content: &[u8],
1870    scope_prefix: &str,
1871    helper: &mut GraphBuildHelper,
1872) {
1873    // Extract parameter name (could be in "name" field or as identifier child)
1874    let param_name = if let Some(name_node) = param.child_by_field_name("name") {
1875        name_node.utf8_text(content).ok()
1876    } else {
1877        // Fallback: look for identifier child
1878        param
1879            .children(&mut param.walk())
1880            .find(|c| c.kind() == "identifier")
1881            .and_then(|n| n.utf8_text(content).ok())
1882    };
1883
1884    let Some(param_name) = param_name else {
1885        return;
1886    };
1887
1888    // Skip self and cls (special method parameters)
1889    if param_name == "self" || param_name == "cls" {
1890        return;
1891    }
1892
1893    // Extract type annotation
1894    let Some(type_node) = param.child_by_field_name("type") else {
1895        return;
1896    };
1897
1898    let Some(type_name) = extract_type_from_node(type_node, content) else {
1899        return;
1900    };
1901
1902    // Create scope-qualified parameter name to prevent cross-scope contamination
1903    // Format: <scope_prefix>:<param_name> (e.g., "MyClass.method:x" or "process:x")
1904    let qualified_param_name = if scope_prefix.is_empty() {
1905        // Top-level function parameter
1906        format!(":{param_name}")
1907    } else {
1908        format!("{scope_prefix}:{param_name}")
1909    };
1910
1911    // Create parameter variable node with qualified name
1912    let param_id = helper.add_variable(&qualified_param_name, Some(span_from_node(param)));
1913
1914    // Create type node
1915    let type_id = helper.add_type(&type_name, None);
1916
1917    // Add TypeOf and Reference edges
1918    helper.add_typeof_edge(param_id, type_id);
1919    helper.add_reference_edge(param_id, type_id);
1920}
1921
1922/// Process annotated assignments to create `TypeOf` and Reference edges.
1923///
1924/// Handles:
1925/// - `user: User = get_user()` - annotated assignment with value
1926/// - `count: int` - annotated assignment without value
1927/// - `items: List[str] = []` - generic types
1928fn process_annotated_assignment(
1929    expr_stmt_node: Node<'_>,
1930    content: &[u8],
1931    ast_graph: &ASTGraph,
1932    helper: &mut GraphBuildHelper,
1933) {
1934    // Get the containing scope for scope qualification
1935    // For assignments, we need to find the enclosing function/class
1936    let scope_prefix = find_containing_scope(expr_stmt_node, content, ast_graph);
1937
1938    // Look for expression_statement containing an assignment
1939    for child in expr_stmt_node.children(&mut expr_stmt_node.walk()) {
1940        if child.kind() == "assignment" {
1941            process_typed_assignment(child, content, &scope_prefix, helper);
1942        }
1943    }
1944}
1945
1946/// Process a typed assignment node (shared logic for variables and class attributes).
1947///
1948/// Creates scope-qualified variable names to prevent cross-scope type contamination.
1949fn process_typed_assignment(
1950    assignment_node: Node<'_>,
1951    content: &[u8],
1952    scope_prefix: &str,
1953    helper: &mut GraphBuildHelper,
1954) {
1955    // Check if this is a typed assignment by looking for type annotation
1956    // In Python, annotated assignments look like: name: type = value
1957    // The AST structure is: assignment { left: identifier, type: type, right: expression }
1958
1959    let Some(left) = assignment_node.child_by_field_name("left") else {
1960        return;
1961    };
1962
1963    let Some(type_node) = assignment_node.child_by_field_name("type") else {
1964        return;
1965    };
1966
1967    // Extract variable name
1968    let Ok(var_name) = left.utf8_text(content) else {
1969        return;
1970    };
1971
1972    // Extract type
1973    let Some(type_name) = extract_type_from_node(type_node, content) else {
1974        return;
1975    };
1976
1977    // Create scope-qualified variable name to prevent cross-scope contamination
1978    // For class attributes (module-level or class-level), use simple name
1979    // For function-local variables, use qualified name
1980    let qualified_var_name = if scope_prefix.is_empty() {
1981        // Module-level variable
1982        var_name.to_string()
1983    } else if scope_prefix.contains('.') && !scope_prefix.contains(':') {
1984        // Class attribute (scope_prefix is class name without function)
1985        format!("{scope_prefix}.{var_name}")
1986    } else {
1987        // Function-local variable
1988        format!("{scope_prefix}:{var_name}")
1989    };
1990
1991    // Create variable node with qualified name
1992    let var_id = helper.add_variable(&qualified_var_name, Some(span_from_node(assignment_node)));
1993
1994    // Create type node
1995    let type_id = helper.add_type(&type_name, None);
1996
1997    // Add TypeOf and Reference edges
1998    helper.add_typeof_edge(var_id, type_id);
1999    helper.add_reference_edge(var_id, type_id);
2000}
2001
2002/// Extract type name from a type annotation node.
2003///
2004/// Handles:
2005/// - Simple types: `int`, `str`, `bool`
2006/// - Generic types: `List[int]` → extract base type `List`
2007/// - Optional types: `Optional[User]` → extract base type `Optional`
2008/// - Qualified types: `module.Type` → extract full qualified name
2009/// - Forward references: `"User"` → `User` (strips quotes)
2010/// - PEP 604 unions: `User | None` → `User` (extracts left-most base type)
2011fn extract_type_from_node(type_node: Node<'_>, content: &[u8]) -> Option<String> {
2012    match type_node.kind() {
2013        "type" => {
2014            // The "type" node wraps the actual type - recurse into first child
2015            type_node
2016                .named_child(0)
2017                .and_then(|child| extract_type_from_node(child, content))
2018        }
2019        "identifier" => {
2020            // Simple type: int, str, User
2021            type_node.utf8_text(content).ok().map(String::from)
2022        }
2023        "string" => {
2024            // Forward reference: "User" -> User
2025            // Strip surrounding quotes from string literal annotations
2026            let text = type_node.utf8_text(content).ok()?;
2027            let trimmed = text.trim();
2028
2029            // Remove quotes: "User" or 'User' -> User
2030            if (trimmed.starts_with('"') && trimmed.ends_with('"'))
2031                || (trimmed.starts_with('\'') && trimmed.ends_with('\''))
2032            {
2033                let unquoted = &trimmed[1..trimmed.len() - 1];
2034                // Handle potential unions inside string: "User | None" -> "User"
2035                Some(normalize_union_type(unquoted))
2036            } else {
2037                Some(trimmed.to_string())
2038            }
2039        }
2040        "binary_operator" => {
2041            // PEP 604 union: User | None -> User
2042            // Extract left operand as the primary type
2043            if let Some(left) = type_node.child_by_field_name("left") {
2044                extract_type_from_node(left, content)
2045            } else {
2046                // Fallback: extract text and normalize
2047                type_node
2048                    .utf8_text(content)
2049                    .ok()
2050                    .map(|text| normalize_union_type(text.trim()))
2051            }
2052        }
2053        "generic_type" | "subscript" => {
2054            // Generic type: List[int], Dict[str, int], Optional[User]
2055            // Extract base type (before the brackets)
2056            // Structure: subscript { value: identifier, subscript: [...] }
2057            if let Some(value_node) = type_node.child_by_field_name("value") {
2058                extract_type_from_node(value_node, content)
2059            } else {
2060                // Fallback: try first named child
2061                type_node
2062                    .named_child(0)
2063                    .and_then(|child| extract_type_from_node(child, content))
2064                    .or_else(|| {
2065                        // Last resort: extract the full text and take the base type
2066                        type_node.utf8_text(content).ok().and_then(|text| {
2067                            // Extract base type from "List[str]" -> "List"
2068                            text.split('[').next().map(|s| s.trim().to_string())
2069                        })
2070                    })
2071            }
2072        }
2073        "attribute" => {
2074            // Qualified type: module.Type or package.module.Type
2075            type_node.utf8_text(content).ok().map(String::from)
2076        }
2077        "list" | "tuple" | "set" => {
2078            // Collection literals (though rare in type annotations)
2079            type_node.utf8_text(content).ok().map(String::from)
2080        }
2081        _ => {
2082            // Fallback: try to extract text from any other node
2083            // For unknown node types, try to extract intelligently
2084            let text = type_node.utf8_text(content).ok()?;
2085            let trimmed = text.trim();
2086
2087            // If it looks like a generic type, extract base type
2088            if trimmed.contains('[') {
2089                trimmed.split('[').next().map(|s| s.trim().to_string())
2090            } else {
2091                // Check for union syntax
2092                Some(normalize_union_type(trimmed))
2093            }
2094        }
2095    }
2096}
2097
2098/// Normalize union types by extracting the left-most/primary type.
2099///
2100/// Examples:
2101/// - `User | None` → `User`
2102/// - `str | int` → `str`
2103/// - `Optional[User]` → `Optional[User]` (unchanged, not a union)
2104fn normalize_union_type(type_str: &str) -> String {
2105    if let Some(pipe_pos) = type_str.find('|') {
2106        // Extract left side of union and trim
2107        type_str[..pipe_pos].trim().to_string()
2108    } else {
2109        type_str.to_string()
2110    }
2111}
2112
2113#[cfg(test)]
2114mod tests {
2115    use super::*;
2116
2117    #[test]
2118    fn test_simple_name_extracts_dotted_identifiers() {
2119        // General dotted identifier handling (for call targets)
2120        assert_eq!(simple_name("module.func"), "func");
2121        assert_eq!(simple_name("obj.method"), "method");
2122        assert_eq!(simple_name("package.module.func"), "func");
2123        assert_eq!(simple_name("self.helper"), "helper");
2124
2125        // No dots - return as-is
2126        assert_eq!(simple_name("function"), "function");
2127        assert_eq!(simple_name(""), "");
2128    }
2129
2130    #[test]
2131    fn test_ffi_library_simple_name_extracts_library_base_names() {
2132        // Standard shared library names
2133        assert_eq!(ffi_library_simple_name("libfoo.so"), "libfoo");
2134        assert_eq!(ffi_library_simple_name("lib1.so"), "lib1");
2135        assert_eq!(ffi_library_simple_name("lib2.so"), "lib2");
2136
2137        // Different extensions
2138        assert_eq!(ffi_library_simple_name("kernel32.dll"), "kernel32");
2139        assert_eq!(ffi_library_simple_name("libSystem.dylib"), "libSystem");
2140
2141        // Versioned shared libraries (libc.so.6)
2142        assert_eq!(ffi_library_simple_name("libc.so.6"), "libc");
2143
2144        // No extension - return as-is
2145        assert_eq!(ffi_library_simple_name("kernel32"), "kernel32");
2146        assert_eq!(ffi_library_simple_name("numpy"), "numpy");
2147
2148        // Variable references (prefixed with $)
2149        assert_eq!(ffi_library_simple_name("$libname"), "$libname");
2150
2151        // Edge cases
2152        assert_eq!(ffi_library_simple_name(""), "");
2153        assert_eq!(ffi_library_simple_name("lib.so"), "lib");
2154    }
2155
2156    #[test]
2157    fn test_ffi_library_simple_name_prevents_duplicate_edges() {
2158        // This was the bug: lib1.so and lib2.so both became "so"
2159        let name1 = ffi_library_simple_name("lib1.so");
2160        let name2 = ffi_library_simple_name("lib2.so");
2161
2162        // They should be different
2163        assert_ne!(
2164            name1, name2,
2165            "lib1.so and lib2.so must produce different simple names"
2166        );
2167        assert_eq!(name1, "lib1");
2168        assert_eq!(name2, "lib2");
2169    }
2170
2171    #[test]
2172    fn test_ffi_library_simple_name_handles_directory_paths() {
2173        // Full paths with directories containing dots (Codex finding)
2174        assert_eq!(ffi_library_simple_name("/opt/v1.2/libfoo.so"), "libfoo");
2175        assert_eq!(
2176            ffi_library_simple_name("/usr/lib/x86_64-linux-gnu/libc.so.6"),
2177            "libc"
2178        );
2179        assert_eq!(ffi_library_simple_name("libs/lib1.so"), "lib1");
2180
2181        // Relative paths
2182        assert_eq!(ffi_library_simple_name("./libs/kernel32.dll"), "kernel32");
2183        assert_eq!(
2184            ffi_library_simple_name("../lib/libSystem.dylib"),
2185            "libSystem"
2186        );
2187    }
2188
2189    // ====================================================================
2190    // Route decorator parsing unit tests
2191    // ====================================================================
2192
2193    #[test]
2194    fn test_parse_route_decorator_app_route_default_get() {
2195        let result = parse_route_decorator_text("app.route('/api/users')");
2196        assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2197    }
2198
2199    #[test]
2200    fn test_parse_route_decorator_app_route_with_methods_post() {
2201        let result = parse_route_decorator_text("app.route('/api/users', methods=['POST'])");
2202        assert_eq!(result, Some(("POST".to_string(), "/api/users".to_string())));
2203    }
2204
2205    #[test]
2206    fn test_parse_route_decorator_app_route_with_methods_put_double_quotes() {
2207        let result = parse_route_decorator_text("app.route(\"/api/items\", methods=[\"PUT\"])");
2208        assert_eq!(result, Some(("PUT".to_string(), "/api/items".to_string())));
2209    }
2210
2211    #[test]
2212    fn test_parse_route_decorator_app_get() {
2213        let result = parse_route_decorator_text("app.get('/api/users')");
2214        assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2215    }
2216
2217    #[test]
2218    fn test_parse_route_decorator_app_post() {
2219        let result = parse_route_decorator_text("app.post('/api/items')");
2220        assert_eq!(result, Some(("POST".to_string(), "/api/items".to_string())));
2221    }
2222
2223    #[test]
2224    fn test_parse_route_decorator_app_put() {
2225        let result = parse_route_decorator_text("app.put('/api/items/1')");
2226        assert_eq!(
2227            result,
2228            Some(("PUT".to_string(), "/api/items/1".to_string()))
2229        );
2230    }
2231
2232    #[test]
2233    fn test_parse_route_decorator_app_delete() {
2234        let result = parse_route_decorator_text("app.delete('/api/items/1')");
2235        assert_eq!(
2236            result,
2237            Some(("DELETE".to_string(), "/api/items/1".to_string()))
2238        );
2239    }
2240
2241    #[test]
2242    fn test_parse_route_decorator_app_patch() {
2243        let result = parse_route_decorator_text("app.patch('/api/items/1')");
2244        assert_eq!(
2245            result,
2246            Some(("PATCH".to_string(), "/api/items/1".to_string()))
2247        );
2248    }
2249
2250    #[test]
2251    fn test_parse_route_decorator_router_get_fastapi() {
2252        let result = parse_route_decorator_text("router.get('/api/users')");
2253        assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2254    }
2255
2256    #[test]
2257    fn test_parse_route_decorator_router_post_fastapi() {
2258        let result = parse_route_decorator_text("router.post('/api/items')");
2259        assert_eq!(result, Some(("POST".to_string(), "/api/items".to_string())));
2260    }
2261
2262    #[test]
2263    fn test_parse_route_decorator_blueprint_route() {
2264        let result = parse_route_decorator_text("blueprint.route('/health')");
2265        assert_eq!(result, Some(("GET".to_string(), "/health".to_string())));
2266    }
2267
2268    #[test]
2269    fn test_parse_route_decorator_unknown_receiver_returns_none() {
2270        // "server" is not a recognized receiver
2271        let result = parse_route_decorator_text("server.get('/api/users')");
2272        assert_eq!(result, None);
2273    }
2274
2275    #[test]
2276    fn test_parse_route_decorator_unknown_method_returns_none() {
2277        // "options" is not in the ROUTE_METHOD_NAMES list and is not "route"
2278        let result = parse_route_decorator_text("app.options('/api/users')");
2279        assert_eq!(result, None);
2280    }
2281
2282    #[test]
2283    fn test_parse_route_decorator_no_parens_returns_none() {
2284        let result = parse_route_decorator_text("app.route");
2285        assert_eq!(result, None);
2286    }
2287
2288    #[test]
2289    fn test_parse_route_decorator_no_dot_returns_none() {
2290        let result = parse_route_decorator_text("route('/api/users')");
2291        assert_eq!(result, None);
2292    }
2293
2294    #[test]
2295    fn test_extract_path_from_decorator_args_single_quotes() {
2296        let result = extract_path_from_decorator_args("'/api/users')");
2297        assert_eq!(result, Some("/api/users".to_string()));
2298    }
2299
2300    #[test]
2301    fn test_extract_path_from_decorator_args_double_quotes() {
2302        let result = extract_path_from_decorator_args("\"/api/items\")");
2303        assert_eq!(result, Some("/api/items".to_string()));
2304    }
2305
2306    #[test]
2307    fn test_extract_path_from_decorator_args_empty_returns_none() {
2308        let result = extract_path_from_decorator_args("'')");
2309        assert_eq!(result, None);
2310    }
2311
2312    #[test]
2313    fn test_extract_path_from_decorator_args_no_string_returns_none() {
2314        let result = extract_path_from_decorator_args("some_var)");
2315        assert_eq!(result, None);
2316    }
2317
2318    #[test]
2319    fn test_extract_method_from_route_args_with_methods_keyword() {
2320        let result = extract_method_from_route_args("'/api/users', methods=['POST'])");
2321        assert_eq!(result, "POST");
2322    }
2323
2324    #[test]
2325    fn test_extract_method_from_route_args_without_methods_keyword() {
2326        let result = extract_method_from_route_args("'/api/users')");
2327        assert_eq!(result, "GET");
2328    }
2329
2330    #[test]
2331    fn test_extract_method_from_route_args_delete() {
2332        let result = extract_method_from_route_args("'/api/items', methods=['DELETE'])");
2333        assert_eq!(result, "DELETE");
2334    }
2335
2336    #[test]
2337    fn test_extract_method_from_route_args_lowercase_normalizes() {
2338        let result = extract_method_from_route_args("'/x', methods=['put'])");
2339        assert_eq!(result, "PUT");
2340    }
2341
2342    #[test]
2343    fn test_extract_first_string_literal_single_quotes() {
2344        let result = extract_first_string_literal("'POST']");
2345        assert_eq!(result, Some("POST".to_string()));
2346    }
2347
2348    #[test]
2349    fn test_extract_first_string_literal_double_quotes() {
2350        let result = extract_first_string_literal("\"DELETE\"]");
2351        assert_eq!(result, Some("DELETE".to_string()));
2352    }
2353
2354    #[test]
2355    fn test_extract_first_string_literal_empty_returns_none() {
2356        let result = extract_first_string_literal("no quotes here");
2357        assert_eq!(result, None);
2358    }
2359}
2360
2361#[cfg(test)]
2362mod shape_tests {
2363    use super::{cf_bucket_for_python_kind, python_shape_mapping};
2364    use sqry_core::graph::unified::build::shape::{
2365        CfBucket, ShapeBudget, ShapeMapping, compute_shape_descriptor,
2366    };
2367
2368    const SAMPLE: &str = include_str!(concat!(
2369        env!("CARGO_MANIFEST_DIR"),
2370        "/../test-fixtures/shape/reference/sample.py"
2371    ));
2372
2373    fn parse(src: &str) -> tree_sitter::Tree {
2374        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2375        let mut p = tree_sitter::Parser::new();
2376        p.set_language(&lang).expect("load python grammar");
2377        p.parse(src, None).expect("parse")
2378    }
2379
2380    /// Resolve the function_definition with the given name from the fixture.
2381    fn function_named<'t>(tree: &'t tree_sitter::Tree, name: &str) -> tree_sitter::Node<'t> {
2382        let root = tree.root_node();
2383        let mut stack = vec![root];
2384        while let Some(node) = stack.pop() {
2385            if node.kind() == "function_definition"
2386                && node
2387                    .child_by_field_name("name")
2388                    .and_then(|n| n.utf8_text(SAMPLE.as_bytes()).ok())
2389                    == Some(name)
2390            {
2391                return node;
2392            }
2393            let mut c = node.walk();
2394            for ch in node.children(&mut c) {
2395                stack.push(ch);
2396            }
2397        }
2398        panic!("no function_definition named {name}");
2399    }
2400
2401    #[test]
2402    fn cf_table_is_non_empty() {
2403        let mapping = python_shape_mapping();
2404        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2405        let mut covered = 0;
2406        for id in 0..lang.node_kind_count() {
2407            let kid = id as u16;
2408            if mapping.cf_bucket(kid).is_some() {
2409                covered += 1;
2410            }
2411        }
2412        assert!(
2413            covered >= 10,
2414            "expected many python CF kinds mapped, got {covered}"
2415        );
2416    }
2417
2418    #[test]
2419    fn histogram_covers_real_control_flow() {
2420        let tree = parse(SAMPLE);
2421        let func = function_named(&tree, "classify");
2422        let d = compute_shape_descriptor(
2423            func,
2424            SAMPLE.as_bytes(),
2425            python_shape_mapping(),
2426            &ShapeBudget::default(),
2427        );
2428        assert!(!d.is_unhashable(), "classify body must be hashable");
2429        for bucket in [
2430            CfBucket::Branch,
2431            CfBucket::Loop,
2432            CfBucket::Match,
2433            CfBucket::Try,
2434            CfBucket::Catch,
2435            CfBucket::Throw,
2436            CfBucket::Resource,
2437            CfBucket::Return,
2438            CfBucket::BreakContinue,
2439            CfBucket::Call,
2440            CfBucket::Assign,
2441            CfBucket::Comprehension,
2442        ] {
2443            assert!(
2444                d.cf_histogram[bucket.index()] >= 1,
2445                "classify must exercise {bucket:?}"
2446            );
2447        }
2448    }
2449
2450    #[test]
2451    fn async_body_covers_yield_await_closure() {
2452        let tree = parse(SAMPLE);
2453        let func = function_named(&tree, "fetch");
2454        let d = compute_shape_descriptor(
2455            func,
2456            SAMPLE.as_bytes(),
2457            python_shape_mapping(),
2458            &ShapeBudget::default(),
2459        );
2460        assert!(d.cf_histogram[CfBucket::Await.index()] >= 1, "await");
2461        assert!(d.cf_histogram[CfBucket::Yield.index()] >= 1, "yield");
2462        assert!(
2463            d.cf_histogram[CfBucket::Closure.index()] >= 1,
2464            "lambda closure"
2465        );
2466        assert!(
2467            d.signature_shape.has_return_annotation,
2468            "-> str return annotation"
2469        );
2470    }
2471
2472    #[test]
2473    fn signature_shape_reads_arity_and_splats() {
2474        let tree = parse(SAMPLE);
2475        let func = function_named(&tree, "classify");
2476        let mapping = python_shape_mapping();
2477        let shape = mapping.signature_shape(func, SAMPLE.as_bytes());
2478        // classify(values, threshold=0, *extra, **opts)
2479        assert_eq!(
2480            shape.arity_positional, 2,
2481            "values + threshold are positional"
2482        );
2483        assert!(shape.has_defaults, "threshold=0");
2484        assert!(shape.has_varargs, "*extra");
2485        assert!(shape.has_kwargs, "**opts");
2486    }
2487
2488    #[test]
2489    fn unknown_kind_maps_to_none() {
2490        assert!(cf_bucket_for_python_kind("module").is_none());
2491        assert!(cf_bucket_for_python_kind("identifier").is_none());
2492    }
2493}