Skip to main content

sqry_lang_python/relations/
graph_builder.rs

1use std::{collections::HashMap, path::Path, sync::OnceLock};
2
3use sqry_core::graph::unified::StagingGraph;
4use sqry_core::graph::unified::build::GraphBuildHelper;
5use sqry_core::graph::unified::build::helper::CalleeKindHint;
6use sqry_core::graph::unified::build::shape::{CfBucket, ShapeMapping};
7use sqry_core::graph::unified::edge::FfiConvention;
8use sqry_core::graph::unified::edge::kind::TypeOfContext;
9use sqry_core::graph::unified::node::NodeId as UnifiedNodeId;
10use sqry_core::graph::unified::storage::shape::SignatureShape;
11use sqry_core::graph::{GraphBuilder, GraphBuilderError, GraphResult, Language, Span};
12use tree_sitter::{Node, Tree};
13
14use super::local_scopes;
15
16const DEFAULT_SCOPE_DEPTH: usize = 4;
17const STD_C_MODULES: &[&str] = &[
18    "_ctypes",
19    "_socket",
20    "_ssl",
21    "_hashlib",
22    "_json",
23    "_pickle",
24    "_struct",
25    "_sqlite3",
26    "_decimal",
27    "_lzma",
28    "_bz2",
29    "_zlib",
30    "_elementtree",
31    "_csv",
32    "_datetime",
33    "_heapq",
34    "_bisect",
35    "_random",
36    "_collections",
37    "_functools",
38    "_itertools",
39    "_operator",
40    "_io",
41    "_thread",
42    "_multiprocessing",
43    "_posixsubprocess",
44    "_asyncio",
45    "array",
46    "math",
47    "cmath",
48];
49const THIRD_PARTY_C_PACKAGES: &[&str] = &[
50    "numpy",
51    "pandas",
52    "scipy",
53    "sklearn",
54    "cv2",
55    "PIL",
56    "torch",
57    "tensorflow",
58    "lxml",
59    "psycopg2",
60    "MySQLdb",
61    "sqlite3",
62    "cryptography",
63    "bcrypt",
64    "regex",
65    "ujson",
66    "orjson",
67    "msgpack",
68    "greenlet",
69    "gevent",
70    "uvloop",
71];
72
73/// Graph builder for Python files using unified `CodeGraph` architecture.
74#[derive(Debug, Clone, Copy)]
75pub struct PythonGraphBuilder {
76    max_scope_depth: usize,
77}
78
79impl Default for PythonGraphBuilder {
80    fn default() -> Self {
81        Self {
82            max_scope_depth: DEFAULT_SCOPE_DEPTH,
83        }
84    }
85}
86
87impl PythonGraphBuilder {
88    #[must_use]
89    pub fn new(max_scope_depth: usize) -> Self {
90        Self { max_scope_depth }
91    }
92}
93
94impl GraphBuilder for PythonGraphBuilder {
95    fn build_graph(
96        &self,
97        tree: &Tree,
98        content: &[u8],
99        file: &Path,
100        staging: &mut StagingGraph,
101    ) -> GraphResult<()> {
102        // Create helper for staging graph population
103        let mut helper = GraphBuildHelper::new(staging, file, Language::Python);
104
105        // Build AST graph for call context tracking
106        let ast_graph = ASTGraph::from_tree(tree, content, self.max_scope_depth).map_err(|e| {
107            GraphBuilderError::ParseError {
108                span: Span::default(),
109                reason: e,
110            }
111        })?;
112
113        // Check if __all__ is defined in the module
114        let has_all = has_all_assignment(tree.root_node(), content);
115
116        // Build local variable scope tree
117        let mut scope_tree = local_scopes::build(tree.root_node(), content)?;
118
119        // Create recursion guard for tree walking
120        let recursion_limits =
121            sqry_core::config::RecursionLimits::load_or_default().map_err(|e| {
122                GraphBuilderError::ParseError {
123                    span: Span::default(),
124                    reason: format!("Failed to load recursion limits: {e}"),
125                }
126            })?;
127        let file_ops_depth = recursion_limits.effective_file_ops_depth().map_err(|e| {
128            GraphBuilderError::ParseError {
129                span: Span::default(),
130                reason: format!("Invalid file_ops_depth configuration: {e}"),
131            }
132        })?;
133        let mut guard =
134            sqry_core::query::security::RecursionGuard::new(file_ops_depth).map_err(|e| {
135                GraphBuilderError::ParseError {
136                    span: Span::default(),
137                    reason: format!("Failed to create recursion guard: {e}"),
138                }
139            })?;
140
141        // Walk tree to find functions, classes, methods, calls, and imports
142        walk_tree_for_graph(
143            tree.root_node(),
144            content,
145            &ast_graph,
146            &mut helper,
147            has_all,
148            &mut guard,
149            &mut scope_tree,
150        )?;
151
152        Ok(())
153    }
154
155    fn language(&self) -> Language {
156        Language::Python
157    }
158
159    fn shape_mapping(&self) -> Option<&dyn ShapeMapping> {
160        Some(python_shape_mapping())
161    }
162}
163
164/// Per-language [`ShapeMapping`] for Python: the SPEC anchor for the
165/// identifier-blind body-shape descriptor.
166///
167/// Holds a precomputed `kind_id -> CfBucket` table so the hot shape walk does a
168/// single array index per node instead of a grammar string lookup. The table is
169/// built once from the tree-sitter-python grammar and shared process-wide via
170/// [`python_shape_mapping`]. Everything except this mapping is the one shared
171/// `compute_shape_descriptor` routine in sqry-core.
172pub struct PythonShapeMapping {
173    cf_by_kind_id: Vec<Option<CfBucket>>,
174}
175
176impl PythonShapeMapping {
177    /// Build the `kind_id -> CfBucket` table from the tree-sitter-python grammar.
178    fn build() -> Self {
179        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
180        let count = lang.node_kind_count();
181        let mut cf_by_kind_id = vec![None; count];
182        for (id, slot) in cf_by_kind_id.iter_mut().enumerate() {
183            let Ok(kind_id) = u16::try_from(id) else {
184                break;
185            };
186            if !lang.node_kind_is_named(kind_id) {
187                continue;
188            }
189            if let Some(name) = lang.node_kind_for_id(kind_id) {
190                *slot = cf_bucket_for_python_kind(name);
191            }
192        }
193        Self { cf_by_kind_id }
194    }
195}
196
197impl ShapeMapping for PythonShapeMapping {
198    fn cf_bucket(&self, ts_node_kind_id: u16) -> Option<CfBucket> {
199        self.cf_by_kind_id
200            .get(ts_node_kind_id as usize)
201            .copied()
202            .flatten()
203    }
204
205    fn signature_shape(&self, fn_node: Node, _src: &[u8]) -> SignatureShape {
206        let mut shape = SignatureShape::default();
207        if let Some(params) = fn_node.child_by_field_name("parameters") {
208            // Python keyword-only parameters follow a bare `*` or a `*args`
209            // splat. Track whether we have crossed that boundary so positional
210            // and keyword-only arities are counted into the right slot.
211            let mut keyword_only = false;
212            let mut cursor = params.walk();
213            for child in params.named_children(&mut cursor) {
214                match child.kind() {
215                    // `*args`: variadic AND the start of the keyword-only region.
216                    "list_splat_pattern" => {
217                        shape.has_varargs = true;
218                        keyword_only = true;
219                    }
220                    // `**kwargs`.
221                    "dictionary_splat_pattern" => shape.has_kwargs = true,
222                    // A plain positional / keyword parameter (`x`).
223                    "identifier" | "typed_parameter" => {
224                        bump_arity(&mut shape, keyword_only);
225                    }
226                    // A parameter carrying a default value (`x=1`, `x: int = 1`).
227                    "default_parameter" | "typed_default_parameter" => {
228                        shape.has_defaults = true;
229                        bump_arity(&mut shape, keyword_only);
230                    }
231                    _ => {}
232                }
233            }
234        }
235        shape.has_return_annotation = fn_node.child_by_field_name("return_type").is_some();
236        shape
237    }
238}
239
240/// Count one parameter into the positional or keyword-only arity slot.
241fn bump_arity(shape: &mut SignatureShape, keyword_only: bool) {
242    if keyword_only {
243        shape.arity_keyword_only = shape.arity_keyword_only.saturating_add(1);
244    } else {
245        shape.arity_positional = shape.arity_positional.saturating_add(1);
246    }
247}
248
249/// Map one tree-sitter-python grammar node-kind name to its canonical
250/// control-flow bucket. Additive-only: the bucket set is frozen (see
251/// [`CfBucket`]), so new Python kinds extend the match, never reorder the buckets.
252fn cf_bucket_for_python_kind(name: &str) -> Option<CfBucket> {
253    let bucket = match name {
254        "if_statement" | "elif_clause" | "conditional_expression" => CfBucket::Branch,
255        "for_statement" | "while_statement" => CfBucket::Loop,
256        "match_statement" | "case_clause" => CfBucket::Match,
257        "try_statement" => CfBucket::Try,
258        "except_clause" | "except_group_clause" => CfBucket::Catch,
259        "raise_statement" => CfBucket::Throw,
260        // `with`/`async with` is Python's resource-acquisition construct.
261        "with_statement" => CfBucket::Resource,
262        "return_statement" => CfBucket::Return,
263        "yield" => CfBucket::Yield,
264        "await" => CfBucket::Await,
265        "break_statement" | "continue_statement" => CfBucket::BreakContinue,
266        "call" => CfBucket::Call,
267        "assignment" | "augmented_assignment" | "named_expression" => CfBucket::Assign,
268        "lambda" => CfBucket::Closure,
269        "list_comprehension"
270        | "dictionary_comprehension"
271        | "set_comprehension"
272        | "generator_expression" => CfBucket::Comprehension,
273        _ => return None,
274    };
275    Some(bucket)
276}
277
278/// The process-wide Python shape mapping, built once on first use.
279#[must_use]
280pub fn python_shape_mapping() -> &'static PythonShapeMapping {
281    static MAPPING: OnceLock<PythonShapeMapping> = OnceLock::new();
282    MAPPING.get_or_init(PythonShapeMapping::build)
283}
284
285/// Check if the module defines `__all__`.
286fn has_all_assignment(node: Node, content: &[u8]) -> bool {
287    let mut cursor = node.walk();
288    for child in node.children(&mut cursor) {
289        if child.kind() == "expression_statement" {
290            // Check for __all__ assignment
291            let assignment = child
292                .children(&mut child.walk())
293                .find(|c| c.kind() == "assignment" || c.kind() == "augmented_assignment");
294
295            if let Some(assignment) = assignment
296                && let Some(left) = assignment.child_by_field_name("left")
297                && let Ok(left_text) = left.utf8_text(content)
298                && left_text.trim() == "__all__"
299            {
300                return true;
301            }
302        }
303    }
304    false
305}
306
307/// Walk the tree and populate the staging graph.
308/// # Errors
309///
310/// Returns [`GraphBuilderError`] if graph operations fail or recursion depth exceeds the guard's limit.
311#[allow(clippy::too_many_lines)]
312fn walk_tree_for_graph(
313    node: Node,
314    content: &[u8],
315    ast_graph: &ASTGraph,
316    helper: &mut GraphBuildHelper,
317    has_all: bool,
318    guard: &mut sqry_core::query::security::RecursionGuard,
319    scope_tree: &mut local_scopes::PythonScopeTree,
320) -> GraphResult<()> {
321    guard.enter().map_err(|e| GraphBuilderError::ParseError {
322        span: Span::default(),
323        reason: format!("Recursion limit exceeded: {e}"),
324    })?;
325
326    match node.kind() {
327        "class_definition" => {
328            // Extract class name
329            if let Some(name_node) = node.child_by_field_name("name")
330                && let Ok(class_name) = name_node.utf8_text(content)
331            {
332                let span = span_from_node(node);
333
334                // Build qualified class name from scope
335                let qualified_name = class_name.to_string();
336
337                // Add class node
338                let class_id = helper.add_class(&qualified_name, Some(span));
339
340                // Process inheritance (base classes)
341                process_class_inheritance(node, content, class_id, helper);
342
343                // Note: Class body annotations are processed via normal recursion in walk_tree_for_graph
344
345                // Export public classes at module level (only if __all__ is not defined)
346                if !has_all && is_module_level(node) && is_public_name(class_name) {
347                    export_from_file_module(helper, class_id);
348                }
349            }
350        }
351        "expression_statement" => {
352            // Check for __all__ assignment (exports)
353            process_all_assignment(node, content, helper);
354
355            // Check for annotated assignments (type hints on variables)
356            process_annotated_assignment(node, content, ast_graph, helper);
357        }
358        "function_definition" => {
359            // Extract function context from AST graph
360            if let Some(call_context) = ast_graph.get_callable_context(node.id()) {
361                let span = span_from_node(node);
362
363                // Extract visibility from function name
364                let func_name = node
365                    .child_by_field_name("name")
366                    .and_then(|n| n.utf8_text(content).ok())
367                    .unwrap_or("");
368                let visibility = extract_visibility_from_name(func_name);
369
370                // Check if this is a property (has @property decorator)
371                let is_property = has_property_decorator(node, content);
372
373                // Extract return type annotation for signature (normalized — strips
374                // generics/unions/quotes for human-readable display).
375                let return_type = extract_return_type_annotation(node, content);
376
377                // Extract byte-exact source text of the return-type annotation for
378                // the `TypeOf { context: Return }` edge consumed by `returns:<Type>`
379                // queries. This text is intentionally NOT normalized — `Optional[int]`,
380                // `List[Dict[str, int]]`, `pd.DataFrame`, `"User"` are all preserved
381                // verbatim so byte-exact predicates work as documented.
382                let return_type_source = extract_return_type_source_text(node, content);
383
384                // Add function/method/property node
385                let function_id = if is_property && call_context.is_method {
386                    // Property node
387                    helper.add_node_with_visibility(
388                        &call_context.qualified_name,
389                        Some(span),
390                        sqry_core::graph::unified::node::NodeKind::Property,
391                        Some(visibility),
392                    )
393                } else if call_context.is_method {
394                    // Regular method with signature
395                    if return_type.is_some() {
396                        helper.add_method_with_signature(
397                            &call_context.qualified_name,
398                            Some(span),
399                            call_context.is_async,
400                            false, // Python doesn't have static methods in the same way
401                            Some(visibility),
402                            return_type.as_deref(),
403                        )
404                    } else {
405                        helper.add_method_with_visibility(
406                            &call_context.qualified_name,
407                            Some(span),
408                            call_context.is_async,
409                            false,
410                            Some(visibility),
411                        )
412                    }
413                } else {
414                    // Regular function with signature
415                    if return_type.is_some() {
416                        helper.add_function_with_signature(
417                            &call_context.qualified_name,
418                            Some(span),
419                            call_context.is_async,
420                            false, // Python doesn't have unsafe
421                            Some(visibility),
422                            return_type.as_deref(),
423                        )
424                    } else {
425                        helper.add_function_with_visibility(
426                            &call_context.qualified_name,
427                            Some(span),
428                            call_context.is_async,
429                            false,
430                            Some(visibility),
431                        )
432                    }
433                };
434
435                // Emit `TypeOf { context: Return }` edge for the return type
436                // annotation when present. Property nodes (Python `@property`) and
437                // un-annotated functions get no edge — `extract_return_type_source_text`
438                // returns `None` for `def foo():` (no `-> Type`).
439                //
440                // The type-text is byte-exact source from the annotation node so
441                // `returns:Optional[int]`, `returns:pd.DataFrame`, etc. work as
442                // documented. A paired Reference edge is also emitted to keep
443                // typeof/reference-edge invariants in sync with C# / Go / Kotlin /
444                // TypeScript plugins.
445                //
446                // The synthesized Type node is anchored at the return-type
447                // annotation's span (mirroring the Rust precedent in
448                // `sqry-lang-rust/src/relations/graph_builder.rs`) so downstream
449                // consumers (LSP `textDocument/documentSymbol`, MCP
450                // `get_document_symbols`) report a concrete source location
451                // rather than line 0.
452                if !(is_property && call_context.is_method)
453                    && let Some(annotation_text) = return_type_source.as_deref()
454                    && let Some(return_type_node) = node.child_by_field_name("return_type")
455                {
456                    let type_span = span_from_node(return_type_node);
457                    let type_id = helper.add_type(annotation_text, Some(type_span));
458                    helper.add_typeof_edge_with_context(
459                        function_id,
460                        type_id,
461                        Some(TypeOfContext::Return),
462                        Some(0),
463                        Some(call_context.qualified_name.as_str()),
464                    );
465                    helper.add_reference_edge(function_id, type_id);
466                }
467
468                // Check for HTTP route decorators (Flask/FastAPI)
469                if let Some((http_method, route_path)) = extract_route_decorator_info(node, content)
470                {
471                    let endpoint_name = format!("route::{http_method}::{route_path}");
472                    let endpoint_id = helper.add_endpoint(&endpoint_name, Some(span));
473                    helper.add_contains_edge(endpoint_id, function_id);
474                }
475
476                // Process parameters to create TypeOf and Reference edges for type hints
477                process_function_parameters(node, content, ast_graph, helper);
478
479                // Export public functions at module level (not methods, only if __all__ is not defined)
480                if !has_all
481                    && !call_context.is_method
482                    && is_module_level(node)
483                    && let Some(name_node) = node.child_by_field_name("name")
484                    && let Ok(func_name) = name_node.utf8_text(content)
485                    && is_public_name(func_name)
486                {
487                    export_from_file_module(helper, function_id);
488                }
489            }
490        }
491        "call" => {
492            // Check for FFI patterns first (ctypes, cffi)
493            let is_ffi = build_ffi_call_edge(ast_graph, node, content, helper)?;
494            if !is_ffi {
495                // Not an FFI call - build regular call edge
496                if let Ok(Some((caller_qname, callee_qname, argument_count, is_awaited))) =
497                    build_call_for_staging(ast_graph, node, content)
498                {
499                    // Ensure both nodes exist
500                    let call_context = ast_graph.get_callable_context(node.id());
501                    let _is_async = call_context.is_some_and(|c| c.is_async);
502
503                    let call_span = span_from_node(node);
504                    let source_id =
505                        helper.ensure_callee(&caller_qname, call_span, CalleeKindHint::Function);
506                    let target_id =
507                        helper.ensure_callee(&callee_qname, call_span, CalleeKindHint::Function);
508
509                    // Add call edge
510                    let argument_count = u8::try_from(argument_count).unwrap_or(u8::MAX);
511                    helper.add_call_edge_full_with_span(
512                        source_id,
513                        target_id,
514                        argument_count,
515                        is_awaited,
516                        vec![call_span],
517                    );
518                }
519            }
520        }
521        "import_statement" | "import_from_statement" => {
522            // Build import edge
523            if let Ok(Some((from_qname, to_qname))) =
524                build_import_for_staging(node, content, helper)
525            {
526                // Ensure both module nodes exist
527                let from_id = helper.add_import(&from_qname, None);
528                let to_id = helper.add_import(&to_qname, Some(span_from_node(node)));
529
530                // Add import edge
531                helper.add_import_edge(from_id, to_id);
532
533                // Check if this imports a known native C extension module
534                if is_native_extension_import(&to_qname) {
535                    build_native_import_ffi_edge(&to_qname, node, helper);
536                }
537            }
538        }
539        "identifier" => {
540            // Local variable reference tracking
541            local_scopes::handle_identifier_for_reference(node, content, scope_tree, helper);
542        }
543        _ => {}
544    }
545
546    // Recurse into children
547    let mut cursor = node.walk();
548    for child in node.children(&mut cursor) {
549        walk_tree_for_graph(
550            child, content, ast_graph, helper, has_all, guard, scope_tree,
551        )?;
552    }
553
554    guard.exit();
555    Ok(())
556}
557
558/// Build call edge information for the staging graph.
559fn build_call_for_staging(
560    ast_graph: &ASTGraph,
561    call_node: Node<'_>,
562    content: &[u8],
563) -> GraphResult<Option<(String, String, usize, bool)>> {
564    // Get or create module-level context for top-level calls
565    let module_context;
566    let call_context = if let Some(ctx) = ast_graph.get_callable_context(call_node.id()) {
567        ctx
568    } else {
569        // Create synthetic module-level context for top-level calls
570        module_context = CallContext {
571            qualified_name: "<module>".to_string(),
572            span: (0, content.len()),
573            is_async: false,
574            is_method: false,
575            class_name: None,
576        };
577        &module_context
578    };
579
580    let Some(callee_expr) = call_node.child_by_field_name("function") else {
581        return Ok(None);
582    };
583
584    let callee_text = callee_expr
585        .utf8_text(content)
586        .map_err(|_| GraphBuilderError::ParseError {
587            span: span_from_node(call_node),
588            reason: "failed to read call expression".to_string(),
589        })?
590        .trim()
591        .to_string();
592
593    if callee_text.is_empty() {
594        return Ok(None);
595    }
596
597    let callee_simple = simple_name(&callee_text);
598    if callee_simple.is_empty() {
599        return Ok(None);
600    }
601
602    // Derive qualified callee name with proper self resolution
603    let caller_qname = call_context.qualified_name();
604    let target_qname = if let Some(method_name) = callee_text.strip_prefix("self.") {
605        // Resolve self.method() to ClassName.method()
606        if let Some(class_name) = &call_context.class_name {
607            format!("{}.{}", class_name, simple_name(method_name))
608        } else {
609            callee_simple.to_string()
610        }
611    } else {
612        callee_simple.to_string()
613    };
614
615    let argument_count = count_arguments(call_node);
616    let is_awaited = is_awaited_call(call_node);
617    Ok(Some((
618        caller_qname,
619        target_qname,
620        argument_count,
621        is_awaited,
622    )))
623}
624
625/// Build import edge information for the staging graph.
626fn build_import_for_staging(
627    import_node: Node<'_>,
628    content: &[u8],
629    helper: &GraphBuildHelper,
630) -> GraphResult<Option<(String, String)>> {
631    // Extract the raw module name from the AST
632    let raw_module_name = if import_node.kind() == "import_statement" {
633        import_node
634            .child_by_field_name("name")
635            .and_then(|n| extract_module_name(n, content))
636    } else if import_node.kind() == "import_from_statement" {
637        import_node
638            .child_by_field_name("module_name")
639            .and_then(|n| extract_module_name(n, content))
640    } else {
641        None
642    };
643
644    // Handle relative imports with no module name
645    let module_name = if raw_module_name.is_none() && import_node.kind() == "import_from_statement"
646    {
647        if let Ok(import_text) = import_node.utf8_text(content) {
648            if let Some(from_idx) = import_text.find("from") {
649                if let Some(import_idx) = import_text.find("import") {
650                    let between = import_text[from_idx + 4..import_idx].trim();
651                    if between.starts_with('.') {
652                        Some(between.to_string())
653                    } else {
654                        None
655                    }
656                } else {
657                    None
658                }
659            } else {
660                None
661            }
662        } else {
663            None
664        }
665    } else {
666        raw_module_name
667    };
668
669    let Some(module_name) = module_name else {
670        return Ok(None);
671    };
672
673    if module_name.is_empty() {
674        return Ok(None);
675    }
676
677    // Resolve the import path to a canonical module identifier
678    let resolved_path = sqry_core::graph::resolve_python_import(
679        std::path::Path::new(helper.file_path()),
680        &module_name,
681        import_node.kind() == "import_from_statement",
682    )?;
683
684    // Return from/to qualified names
685    Ok(Some((helper.file_path().to_string(), resolved_path)))
686}
687
688fn span_from_node(node: Node<'_>) -> Span {
689    let start = node.start_position();
690    let end = node.end_position();
691    Span::new(
692        sqry_core::graph::node::Position::new(start.row, start.column),
693        sqry_core::graph::node::Position::new(end.row, end.column),
694    )
695}
696
697fn count_arguments(call_node: Node<'_>) -> usize {
698    call_node
699        .child_by_field_name("arguments")
700        .map_or(0, |args| {
701            args.named_children(&mut args.walk())
702                .filter(|child| {
703                    // Count actual arguments, not commas or parentheses
704                    !matches!(child.kind(), "," | "(" | ")")
705                })
706                .count()
707        })
708}
709
710fn is_awaited_call(call_node: Node<'_>) -> bool {
711    let mut current = call_node.parent();
712    while let Some(node) = current {
713        let kind = node.kind();
714        if kind == "await" || kind == "await_expression" {
715            return true;
716        }
717        current = node.parent();
718    }
719    false
720}
721
722/// Extract the simple name from a dotted identifier (for general call targets).
723///
724/// Takes the last component after splitting by dots.
725/// Used for qualified names like "module.func" → "func" or "obj.method" → "method".
726fn simple_name(qualified: &str) -> &str {
727    qualified.split('.').next_back().unwrap_or(qualified)
728}
729
730/// Extract a simple library name from an FFI library path.
731///
732/// For library paths with file extensions, extracts the base name before the extension.
733/// This prevents different libraries with the same extension (lib1.so, lib2.so) from
734/// colliding as duplicate "so" targets.
735///
736/// Handles:
737/// - Full paths: "/opt/v1.2/libfoo.so" → "libfoo"
738/// - Relative paths: "libs/lib1.so" → "lib1"
739/// - Versioned libs: "libc.so.6" → "libc"
740/// - Simple names: "kernel32" → "kernel32"
741/// - Variable refs: "$libname" → "$libname"
742fn ffi_library_simple_name(library_path: &str) -> String {
743    use std::path::Path;
744
745    // Strip directory components first (handles /opt/v1.2/libfoo.so)
746    let filename = Path::new(library_path)
747        .file_name()
748        .and_then(|f| f.to_str())
749        .unwrap_or(library_path);
750
751    // Handle versioned .so files first (libc.so.6 → libc)
752    if let Some(so_pos) = filename.find(".so.") {
753        return filename[..so_pos].to_string();
754    }
755
756    // Handle standard library extensions
757    if let Some(dot_pos) = filename.find('.') {
758        let extension = &filename[dot_pos + 1..];
759
760        // Check for known library extensions
761        if extension == "so" || extension == "dll" || extension == "dylib" {
762            // Extract base name before extension
763            return filename[..dot_pos].to_string();
764        }
765    }
766
767    // No library extension found - return filename as-is
768    filename.to_string()
769}
770
771/// Check if a name is public (does not start with underscore).
772///
773/// In Python, names starting with a single underscore are considered private by convention.
774/// Names starting with double underscores trigger name mangling in classes.
775/// Public names do not start with an underscore.
776fn is_public_name(name: &str) -> bool {
777    !name.starts_with('_')
778}
779
780/// Check if a node is at module level (direct child of the module body).
781///
782/// In tree-sitter Python AST, module-level items are direct children of the root "module" node.
783/// We check if the parent is "module" to determine module-level scope.
784fn is_module_level(node: Node<'_>) -> bool {
785    // Walk up the tree to find the immediate container
786    let mut current = node.parent();
787    while let Some(parent) = current {
788        match parent.kind() {
789            "module" => return true,
790            "function_definition" | "class_definition" => return false,
791            _ => current = parent.parent(),
792        }
793    }
794    false
795}
796
797/// Export a symbol from the file module.
798///
799/// File-level module name for exports/imports.
800/// Distinct from `<module>` to avoid conflicts with top-level call context.
801const FILE_MODULE_NAME: &str = "<file_module>";
802
803fn export_from_file_module(
804    helper: &mut GraphBuildHelper,
805    exported: sqry_core::graph::unified::node::NodeId,
806) {
807    let module_id = helper.add_module(FILE_MODULE_NAME, None);
808    helper.add_export_edge(module_id, exported);
809}
810
811/// Extract module name from a `dotted_name`, `aliased_import`, or `relative_import` node
812///
813/// For `import numpy as np`, the "name" field is an `aliased_import` node with structure:
814/// `aliased_import { name: dotted_name("numpy"), alias: identifier("np") }`
815/// We need to extract just "numpy", not "numpy as np".
816fn extract_module_name(node: Node<'_>, content: &[u8]) -> Option<String> {
817    // Handle aliased imports: `import numpy as np` -> extract "numpy"
818    if node.kind() == "aliased_import" {
819        // The "name" field of aliased_import contains the actual module name
820        return node
821            .child_by_field_name("name")
822            .and_then(|name_node| name_node.utf8_text(content).ok())
823            .map(std::string::ToString::to_string);
824    }
825
826    // Regular dotted_name or identifier
827    node.utf8_text(content)
828        .ok()
829        .map(std::string::ToString::to_string)
830}
831
832// ============================================================================
833// Exports - __all__ assignment handling
834// ============================================================================
835
836/// Process `__all__ = ['name1', 'name2']` assignments to create export edges.
837///
838/// Python's `__all__` list explicitly defines the public API of a module.
839/// Each name in the list gets an Export edge from the module to the exported symbol.
840fn process_all_assignment(node: Node<'_>, content: &[u8], helper: &mut GraphBuildHelper) {
841    // expression_statement contains an assignment child
842    let assignment = node
843        .children(&mut node.walk())
844        .find(|child| child.kind() == "assignment" || child.kind() == "augmented_assignment");
845
846    let Some(assignment) = assignment else {
847        return;
848    };
849
850    // Check if left side is __all__
851    let left = assignment.child_by_field_name("left");
852    let Some(left) = left else {
853        return;
854    };
855
856    let Ok(left_text) = left.utf8_text(content) else {
857        return;
858    };
859
860    if left_text.trim() != "__all__" {
861        return;
862    }
863
864    // Get the right side (should be a list)
865    let right = assignment.child_by_field_name("right");
866    let Some(right) = right else {
867        return;
868    };
869
870    // Handle list or tuple literal (both valid for __all__)
871    if right.kind() == "list" || right.kind() == "tuple" {
872        process_all_list(right, content, helper);
873    }
874}
875
876/// Process a list/tuple of exported names from __all__.
877fn process_all_list(list_node: Node<'_>, content: &[u8], helper: &mut GraphBuildHelper) {
878    for child in list_node.children(&mut list_node.walk()) {
879        // Look for string literals
880        if child.kind() == "string"
881            && let Some(export_name) = extract_string_content(child, content)
882            && !export_name.is_empty()
883        {
884            // Create a node for the exported symbol
885            // We use add_function here as a generic symbol; the actual type
886            // will be resolved later by cross-file analysis
887            let span = span_from_node(child);
888            let export_id = helper.add_function(&export_name, Some(span), false, false);
889
890            // Add export edge (Direct export, no alias for Python __all__)
891            export_from_file_module(helper, export_id);
892        }
893    }
894}
895
896/// Extract the content of a string literal node (removing quotes).
897fn extract_string_content(string_node: Node<'_>, content: &[u8]) -> Option<String> {
898    // String nodes contain string_content or string_start/string_content/string_end
899    // Try to get the full text and strip quotes
900    let Ok(text) = string_node.utf8_text(content) else {
901        return None;
902    };
903
904    let text = text.trim();
905
906    // Handle various Python string formats: 'x', "x", '''x''', """x""", r'x', etc.
907    let stripped = text
908        .trim_start_matches(|c: char| {
909            c == 'r'
910                || c == 'b'
911                || c == 'f'
912                || c == 'u'
913                || c == 'R'
914                || c == 'B'
915                || c == 'F'
916                || c == 'U'
917        })
918        .trim_start_matches("'''")
919        .trim_end_matches("'''")
920        .trim_start_matches("\"\"\"")
921        .trim_end_matches("\"\"\"")
922        .trim_start_matches('\'')
923        .trim_end_matches('\'')
924        .trim_start_matches('"')
925        .trim_end_matches('"');
926
927    Some(stripped.to_string())
928}
929
930// ============================================================================
931// OOP - Inheritance handling
932// ============================================================================
933
934/// Process class inheritance to create Inherits edges.
935///
936/// Python supports multiple inheritance: `class Child(Parent1, Parent2):`
937/// Each base class gets an Inherits edge from the child class.
938fn process_class_inheritance(
939    class_node: Node<'_>,
940    content: &[u8],
941    class_id: UnifiedNodeId,
942    helper: &mut GraphBuildHelper,
943) {
944    // In Python AST, base classes are in the superclasses field (argument_list)
945    // class_definition has a "superclasses" field containing argument_list
946    let superclasses = class_node.child_by_field_name("superclasses");
947
948    let Some(superclasses) = superclasses else {
949        return;
950    };
951
952    // argument_list contains the base classes
953    for child in superclasses.children(&mut superclasses.walk()) {
954        if child.kind() == "keyword_argument" {
955            // Skip keyword arguments like metaclass=ABCMeta.
956            continue;
957        }
958
959        match child.kind() {
960            "identifier" => {
961                // Simple base class: class Child(Parent):
962                if let Ok(base_name) = child.utf8_text(content) {
963                    let base_name = base_name.trim();
964                    if !base_name.is_empty() {
965                        let span = span_from_node(child);
966                        let base_id = helper.add_class(base_name, Some(span));
967                        helper.add_inherits_edge(class_id, base_id);
968                    }
969                }
970            }
971            "attribute" => {
972                // Qualified base class: class Child(module.Parent):
973                if let Ok(base_name) = child.utf8_text(content) {
974                    let base_name = base_name.trim();
975                    if !base_name.is_empty() {
976                        let span = span_from_node(child);
977                        let base_id = helper.add_class(base_name, Some(span));
978                        helper.add_inherits_edge(class_id, base_id);
979                    }
980                }
981            }
982            "call" => {
983                // Parameterized base class with call syntax: class Child(SomeBase(arg)):
984                // Extract the function being called
985                if let Some(func) = child.child_by_field_name("function")
986                    && let Ok(base_name) = func.utf8_text(content)
987                {
988                    let base_name = base_name.trim();
989                    if !base_name.is_empty() {
990                        let span = span_from_node(child);
991                        let base_id = helper.add_class(base_name, Some(span));
992                        helper.add_inherits_edge(class_id, base_id);
993                    }
994                }
995            }
996            "subscript" => {
997                // Generic base class: class Child(Generic[T]): or class Child(List[int]):
998                // Extract the base type from the subscript (value field)
999                if let Some(value) = child.child_by_field_name("value")
1000                    && let Ok(base_name) = value.utf8_text(content)
1001                {
1002                    let base_name = base_name.trim();
1003                    if !base_name.is_empty() {
1004                        let span = span_from_node(child);
1005                        let base_id = helper.add_class(base_name, Some(span));
1006                        helper.add_inherits_edge(class_id, base_id);
1007                    }
1008                }
1009            }
1010            _ => {}
1011        }
1012    }
1013}
1014
1015// ============================================================================
1016// AST Graph - tracks callable contexts (functions, methods, classes)
1017// ============================================================================
1018
1019#[derive(Debug, Clone)]
1020struct CallContext {
1021    qualified_name: String,
1022    #[allow(dead_code)] // Reserved for scope analysis
1023    span: (usize, usize),
1024    is_async: bool,
1025    is_method: bool,
1026    class_name: Option<String>,
1027}
1028
1029impl CallContext {
1030    fn qualified_name(&self) -> String {
1031        self.qualified_name.clone()
1032    }
1033}
1034
1035struct ASTGraph {
1036    contexts: Vec<CallContext>,
1037    node_to_context: HashMap<usize, usize>,
1038}
1039
1040impl ASTGraph {
1041    fn from_tree(tree: &Tree, content: &[u8], max_depth: usize) -> Result<Self, String> {
1042        let mut contexts = Vec::new();
1043        let mut node_to_context = HashMap::new();
1044        let mut scope_stack: Vec<String> = Vec::new();
1045        let mut class_stack: Vec<String> = Vec::new();
1046
1047        walk_ast(
1048            tree.root_node(),
1049            content,
1050            &mut contexts,
1051            &mut node_to_context,
1052            &mut scope_stack,
1053            &mut class_stack,
1054            max_depth,
1055        )?;
1056
1057        Ok(Self {
1058            contexts,
1059            node_to_context,
1060        })
1061    }
1062
1063    #[allow(dead_code)] // Reserved for future context queries
1064    fn contexts(&self) -> &[CallContext] {
1065        &self.contexts
1066    }
1067
1068    fn get_callable_context(&self, node_id: usize) -> Option<&CallContext> {
1069        self.node_to_context
1070            .get(&node_id)
1071            .and_then(|idx| self.contexts.get(*idx))
1072    }
1073}
1074
1075fn walk_ast(
1076    node: Node,
1077    content: &[u8],
1078    contexts: &mut Vec<CallContext>,
1079    node_to_context: &mut HashMap<usize, usize>,
1080    scope_stack: &mut Vec<String>,
1081    class_stack: &mut Vec<String>,
1082    max_depth: usize,
1083) -> Result<(), String> {
1084    if scope_stack.len() > max_depth {
1085        return Ok(());
1086    }
1087
1088    match node.kind() {
1089        "class_definition" => {
1090            let name_node = node
1091                .child_by_field_name("name")
1092                .ok_or_else(|| "class_definition missing name".to_string())?;
1093            let class_name = name_node
1094                .utf8_text(content)
1095                .map_err(|_| "failed to read class name".to_string())?;
1096
1097            // Build qualified class name
1098            let qualified_class = if scope_stack.is_empty() {
1099                class_name.to_string()
1100            } else {
1101                format!("{}.{}", scope_stack.join("."), class_name)
1102            };
1103
1104            class_stack.push(qualified_class.clone());
1105            scope_stack.push(class_name.to_string());
1106
1107            // Recurse into class body
1108            if let Some(body) = node.child_by_field_name("body") {
1109                let mut cursor = body.walk();
1110                for child in body.children(&mut cursor) {
1111                    walk_ast(
1112                        child,
1113                        content,
1114                        contexts,
1115                        node_to_context,
1116                        scope_stack,
1117                        class_stack,
1118                        max_depth,
1119                    )?;
1120                }
1121            }
1122
1123            class_stack.pop();
1124            scope_stack.pop();
1125        }
1126        "function_definition" => {
1127            let name_node = node
1128                .child_by_field_name("name")
1129                .ok_or_else(|| "function_definition missing name".to_string())?;
1130            let func_name = name_node
1131                .utf8_text(content)
1132                .map_err(|_| "failed to read function name".to_string())?;
1133
1134            // Check if async
1135            let is_async = node
1136                .children(&mut node.walk())
1137                .any(|child| child.kind() == "async");
1138
1139            // Build qualified function name
1140            let qualified_func = if scope_stack.is_empty() {
1141                func_name.to_string()
1142            } else {
1143                format!("{}.{}", scope_stack.join("."), func_name)
1144            };
1145
1146            // Determine if this is a method (inside a class)
1147            let is_method = !class_stack.is_empty();
1148            let class_name = class_stack.last().cloned();
1149
1150            let context_idx = contexts.len();
1151            contexts.push(CallContext {
1152                qualified_name: qualified_func.clone(),
1153                span: (node.start_byte(), node.end_byte()),
1154                is_async,
1155                is_method,
1156                class_name,
1157            });
1158
1159            // Associate the function definition node itself with this context
1160            // This is required so walk_tree_for_graph can find the context
1161            node_to_context.insert(node.id(), context_idx);
1162
1163            // Associate all descendants with this context
1164            if let Some(body) = node.child_by_field_name("body") {
1165                associate_descendants(body, context_idx, node_to_context);
1166            }
1167
1168            scope_stack.push(func_name.to_string());
1169
1170            // Recurse into function body to find nested functions
1171            if let Some(body) = node.child_by_field_name("body") {
1172                let mut cursor = body.walk();
1173                for child in body.children(&mut cursor) {
1174                    walk_ast(
1175                        child,
1176                        content,
1177                        contexts,
1178                        node_to_context,
1179                        scope_stack,
1180                        class_stack,
1181                        max_depth,
1182                    )?;
1183                }
1184            }
1185
1186            scope_stack.pop();
1187        }
1188        _ => {
1189            // Recurse into children for other node types
1190            let mut cursor = node.walk();
1191            for child in node.children(&mut cursor) {
1192                walk_ast(
1193                    child,
1194                    content,
1195                    contexts,
1196                    node_to_context,
1197                    scope_stack,
1198                    class_stack,
1199                    max_depth,
1200                )?;
1201            }
1202        }
1203    }
1204
1205    Ok(())
1206}
1207
1208fn associate_descendants(
1209    node: Node,
1210    context_idx: usize,
1211    node_to_context: &mut HashMap<usize, usize>,
1212) {
1213    node_to_context.insert(node.id(), context_idx);
1214
1215    let mut stack = vec![node];
1216    while let Some(current) = stack.pop() {
1217        node_to_context.insert(current.id(), context_idx);
1218
1219        let mut cursor = current.walk();
1220        for child in current.children(&mut cursor) {
1221            stack.push(child);
1222        }
1223    }
1224}
1225
1226// ============================================================================
1227// FFI Detection - ctypes, cffi, and C extensions
1228// ============================================================================
1229
1230/// Build FFI edges for call expressions.
1231///
1232/// Detects Python FFI patterns:
1233/// - `ctypes.CDLL('libfoo.so')` / `ctypes.cdll.LoadLibrary('libfoo.so')`
1234/// - `ctypes.WinDLL('kernel32')` / `ctypes.windll.kernel32`
1235/// - `ctypes.PyDLL('libpython.so')`
1236/// - `cffi.FFI().dlopen('libfoo.so')`
1237/// - `ffi.dlopen('libfoo.so')`
1238///
1239/// Returns true if an FFI edge was created, false otherwise.
1240fn build_ffi_call_edge(
1241    ast_graph: &ASTGraph,
1242    call_node: Node<'_>,
1243    content: &[u8],
1244    helper: &mut GraphBuildHelper,
1245) -> GraphResult<bool> {
1246    let Some(callee_expr) = call_node.child_by_field_name("function") else {
1247        return Ok(false);
1248    };
1249
1250    let callee_text = callee_expr
1251        .utf8_text(content)
1252        .map_err(|_| GraphBuilderError::ParseError {
1253            span: span_from_node(call_node),
1254            reason: "failed to read call expression".to_string(),
1255        })?
1256        .trim();
1257
1258    // Check for ctypes library loading patterns
1259    if is_ctypes_load_call(callee_text) {
1260        return Ok(build_ctypes_ffi_edge(
1261            ast_graph,
1262            call_node,
1263            content,
1264            callee_text,
1265            helper,
1266        ));
1267    }
1268
1269    // Check for cffi dlopen patterns
1270    if is_cffi_dlopen_call(callee_text) {
1271        return Ok(build_cffi_ffi_edge(ast_graph, call_node, content, helper));
1272    }
1273
1274    Ok(false)
1275}
1276
1277/// Check if the callee is a ctypes library loading function.
1278///
1279/// Narrowed patterns to reduce false positives - only match explicit ctypes paths.
1280/// Previous: `callee_text.ends_with(".LoadLibrary")` matched too broadly.
1281///
1282/// Note: `ctypes.cdll.kernel32` style attribute access patterns are not detected
1283/// because they're attribute access (not function calls). We only detect explicit
1284/// library loading function calls like CDLL('lib.so').
1285fn is_ctypes_load_call(callee_text: &str) -> bool {
1286    // Direct ctypes constructors (fully qualified)
1287    callee_text == "ctypes.CDLL"
1288        || callee_text == "ctypes.WinDLL"
1289        || callee_text == "ctypes.OleDLL"
1290        || callee_text == "ctypes.PyDLL"
1291        // ctypes.cdll/windll LoadLibrary (fully qualified)
1292        || callee_text == "ctypes.cdll.LoadLibrary"
1293        || callee_text == "ctypes.windll.LoadLibrary"
1294        || callee_text == "ctypes.oledll.LoadLibrary"
1295        // After `from ctypes import *` or `from ctypes import CDLL, etc.`
1296        || callee_text == "CDLL"
1297        || callee_text == "WinDLL"
1298        || callee_text == "OleDLL"
1299        || callee_text == "PyDLL"
1300        // After `from ctypes import cdll` or similar
1301        || callee_text == "cdll.LoadLibrary"
1302        || callee_text == "windll.LoadLibrary"
1303        || callee_text == "oledll.LoadLibrary"
1304}
1305
1306/// Check if the callee is a cffi dlopen function.
1307///
1308/// Narrowed patterns to reduce false positives - only match known cffi patterns.
1309/// Previous: `callee_text.ends_with(".dlopen")` matched too broadly.
1310fn is_cffi_dlopen_call(callee_text: &str) -> bool {
1311    // Common cffi FFI variable names followed by dlopen
1312    callee_text == "ffi.dlopen"
1313        || callee_text == "cffi.dlopen"
1314        || callee_text == "_ffi.dlopen"
1315        // FFI() constructor followed by dlopen (chained call)
1316        // This pattern typically appears as: FFI().dlopen('lib.so')
1317        // In tree-sitter, the callee text would be the method access part
1318        // After `from cffi import FFI`
1319        || callee_text == "FFI().dlopen"
1320}
1321
1322/// Build FFI edge for ctypes library loading.
1323fn build_ctypes_ffi_edge(
1324    ast_graph: &ASTGraph,
1325    call_node: Node<'_>,
1326    content: &[u8],
1327    callee_text: &str,
1328    helper: &mut GraphBuildHelper,
1329) -> bool {
1330    // Get caller context
1331    let caller_id = get_ffi_caller_node_id(ast_graph, call_node, content, helper);
1332
1333    // Determine FFI convention based on the ctypes type
1334    let convention = if callee_text.contains("WinDLL")
1335        || callee_text.contains("windll")
1336        || callee_text.contains("OleDLL")
1337    {
1338        FfiConvention::Stdcall
1339    } else {
1340        FfiConvention::C
1341    };
1342
1343    // Try to extract library name from first argument
1344    let library_name = extract_ffi_library_name(call_node, content)
1345        .unwrap_or_else(|| "ctypes::unknown".to_string());
1346
1347    let ffi_name = format!("native::{}", ffi_library_simple_name(&library_name));
1348    let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(call_node)));
1349
1350    // Add FFI edge
1351    helper.add_ffi_edge(caller_id, ffi_node_id, convention);
1352
1353    true
1354}
1355
1356/// Build FFI edge for cffi dlopen.
1357fn build_cffi_ffi_edge(
1358    ast_graph: &ASTGraph,
1359    call_node: Node<'_>,
1360    content: &[u8],
1361    helper: &mut GraphBuildHelper,
1362) -> bool {
1363    // Get caller context
1364    let caller_id = get_ffi_caller_node_id(ast_graph, call_node, content, helper);
1365
1366    // Try to extract library name from first argument
1367    let library_name =
1368        extract_ffi_library_name(call_node, content).unwrap_or_else(|| "cffi::unknown".to_string());
1369
1370    let ffi_name = format!("native::{}", ffi_library_simple_name(&library_name));
1371    let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(call_node)));
1372
1373    // cffi uses C calling convention
1374    helper.add_ffi_edge(caller_id, ffi_node_id, FfiConvention::C);
1375
1376    true
1377}
1378
1379/// Get the caller node ID for FFI edges.
1380fn get_ffi_caller_node_id(
1381    ast_graph: &ASTGraph,
1382    node: Node<'_>,
1383    content: &[u8],
1384    helper: &mut GraphBuildHelper,
1385) -> UnifiedNodeId {
1386    let module_context;
1387    let call_context = if let Some(ctx) = ast_graph.get_callable_context(node.id()) {
1388        ctx
1389    } else {
1390        module_context = CallContext {
1391            qualified_name: "<module>".to_string(),
1392            span: (0, content.len()),
1393            is_async: false,
1394            is_method: false,
1395            class_name: None,
1396        };
1397        &module_context
1398    };
1399
1400    let caller_span = Some(Span::from_bytes(call_context.span.0, call_context.span.1));
1401    helper.ensure_function(
1402        &call_context.qualified_name(),
1403        caller_span,
1404        call_context.is_async,
1405        false,
1406    )
1407}
1408
1409/// Extract the library name from the first argument of a call.
1410fn extract_ffi_library_name(call_node: Node<'_>, content: &[u8]) -> Option<String> {
1411    let args = call_node.child_by_field_name("arguments")?;
1412
1413    let mut cursor = args.walk();
1414    let first_arg = args
1415        .children(&mut cursor)
1416        .find(|child| !matches!(child.kind(), "(" | ")" | ","))?;
1417
1418    // Handle string literals
1419    if first_arg.kind() == "string" {
1420        return extract_string_content(first_arg, content);
1421    }
1422
1423    // Handle identifiers (variable names) - we can't resolve them statically
1424    if first_arg.kind() == "identifier" {
1425        let text = first_arg.utf8_text(content).ok()?;
1426        return Some(format!("${}", text.trim())); // Mark as variable reference
1427    }
1428
1429    None
1430}
1431
1432/// Check if an import statement imports a known native extension module.
1433///
1434/// This detects patterns like:
1435/// - `import numpy` (known C extension)
1436/// - `from numpy import array` (known C extension)
1437/// - `import _sqlite3` (private C module)
1438fn is_native_extension_import(module_name: &str) -> bool {
1439    // Private C modules (underscore prefix)
1440    if module_name.starts_with('_') && !module_name.starts_with("__") {
1441        return true;
1442    }
1443
1444    // Check against known modules
1445    let base_module = module_name.split('.').next().unwrap_or(module_name);
1446
1447    STD_C_MODULES.contains(&base_module) || THIRD_PARTY_C_PACKAGES.contains(&base_module)
1448}
1449
1450/// Build FFI edge for native extension import.
1451fn build_native_import_ffi_edge(
1452    module_name: &str,
1453    import_node: Node<'_>,
1454    helper: &mut GraphBuildHelper,
1455) {
1456    // Create module node for the importing file
1457    let file_path = helper.file_path().to_string();
1458    let importer_id = helper.add_module(&file_path, None);
1459
1460    // Create node for the native module
1461    let ffi_name = format!("native::{}", simple_name(module_name));
1462    let ffi_node_id = helper.add_module(&ffi_name, Some(span_from_node(import_node)));
1463
1464    // Add FFI edge (C convention for Python C extensions)
1465    helper.add_ffi_edge(importer_id, ffi_node_id, FfiConvention::C);
1466}
1467
1468// ============================================================================
1469// HTTP Route Endpoint Detection - Flask/FastAPI decorators
1470// ============================================================================
1471
1472/// HTTP methods recognized in route decorators.
1473const ROUTE_METHOD_NAMES: &[&str] = &["get", "post", "put", "delete", "patch"];
1474
1475/// Receiver names recognized as route-capable objects.
1476///
1477/// `Flask` uses `app` or `blueprint`, `FastAPI` uses `app` or `router`.
1478const ROUTE_RECEIVER_NAMES: &[&str] = &["app", "router", "blueprint"];
1479
1480/// Extract HTTP route information from Flask/FastAPI-style decorators on a function.
1481///
1482/// Checks whether the given `function_definition` node is wrapped in a `decorated_definition`
1483/// and whether any of its decorators match known route patterns:
1484///
1485/// - `@app.route('/path')` or `@app.route('/path', methods=['GET'])` -- GET by default
1486/// - `@app.get('/path')` / `@app.post('/path')` / `@app.put('/path')` / etc.
1487/// - `@router.get('/path')` (`FastAPI`)
1488/// - `@blueprint.route('/path')` (Flask blueprints)
1489///
1490/// Returns `Some((method, path))` where `method` is the uppercased HTTP method and
1491/// `path` is the route path string, or `None` if no route decorator is found.
1492fn extract_route_decorator_info(func_node: Node<'_>, content: &[u8]) -> Option<(String, String)> {
1493    // The function_definition must be a child of decorated_definition
1494    let parent = func_node.parent()?;
1495    if parent.kind() != "decorated_definition" {
1496        return None;
1497    }
1498
1499    // Iterate through decorator children of the decorated_definition
1500    let mut cursor = parent.walk();
1501    for child in parent.children(&mut cursor) {
1502        if child.kind() != "decorator" {
1503            continue;
1504        }
1505
1506        let Ok(decorator_text) = child.utf8_text(content) else {
1507            continue;
1508        };
1509        let decorator_text = decorator_text.trim();
1510
1511        // Strip the leading '@'
1512        let without_at = decorator_text.strip_prefix('@')?;
1513
1514        // Try to parse as a route decorator
1515        if let Some(result) = parse_route_decorator_text(without_at) {
1516            return Some(result);
1517        }
1518    }
1519
1520    None
1521}
1522
1523/// Parse a single decorator text (without the leading `@`) to extract route information.
1524///
1525/// Recognized patterns:
1526/// - `app.route('/path')` or `app.route('/path', methods=['POST'])`
1527/// - `app.get('/path')` / `router.post('/path')` / `blueprint.delete('/path')`
1528///
1529/// Returns `Some((HTTP_METHOD, path))` or `None`.
1530fn parse_route_decorator_text(text: &str) -> Option<(String, String)> {
1531    // Split into receiver.method and argument portion
1532    // e.g. "app.route('/api/users')" -> ("app.route", "'/api/users')")
1533    let paren_pos = text.find('(')?;
1534    let accessor = &text[..paren_pos];
1535    let args_text = &text[paren_pos + 1..];
1536
1537    // Split accessor into receiver and method_name
1538    let dot_pos = accessor.rfind('.')?;
1539    let receiver = &accessor[..dot_pos];
1540    let method_name = &accessor[dot_pos + 1..];
1541
1542    // Check that the receiver is a known route-capable object.
1543    // Allow dotted receivers (e.g., "api.v1") as long as the final segment matches.
1544    let receiver_base = receiver.rsplit('.').next().unwrap_or(receiver);
1545    if !ROUTE_RECEIVER_NAMES.contains(&receiver_base) {
1546        return None;
1547    }
1548
1549    // Extract the route path from the first argument (string literal)
1550    let path = extract_path_from_decorator_args(args_text)?;
1551
1552    // Determine the HTTP method
1553    let method_lower = method_name.to_ascii_lowercase();
1554    if ROUTE_METHOD_NAMES.contains(&method_lower.as_str()) {
1555        // Direct method decorator: @app.get('/path') -> GET
1556        return Some((method_lower.to_ascii_uppercase(), path));
1557    }
1558
1559    if method_lower == "route" {
1560        // Generic route decorator: @app.route('/path', methods=['POST'])
1561        let http_method = extract_method_from_route_args(args_text);
1562        return Some((http_method, path));
1563    }
1564
1565    None
1566}
1567
1568/// Extract the route path string from decorator arguments text.
1569///
1570/// The `args_text` parameter is everything after the opening parenthesis of the decorator call,
1571/// e.g. `'/api/users', methods=['GET'])` or `"/api/items")`.
1572///
1573/// Returns the path string with quotes stripped, or `None` if no path is found.
1574fn extract_path_from_decorator_args(args_text: &str) -> Option<String> {
1575    let trimmed = args_text.trim();
1576
1577    // Find the first string literal (single or double quoted)
1578    let (quote_char, start_pos) = {
1579        let single_pos = trimmed.find('\'');
1580        let double_pos = trimmed.find('"');
1581        match (single_pos, double_pos) {
1582            (Some(s), Some(d)) => {
1583                if s < d {
1584                    ('\'', s)
1585                } else {
1586                    ('"', d)
1587                }
1588            }
1589            (Some(s), None) => ('\'', s),
1590            (None, Some(d)) => ('"', d),
1591            (None, None) => return None,
1592        }
1593    };
1594
1595    // Find the closing quote
1596    let after_open = start_pos + 1;
1597    let close_pos = trimmed[after_open..].find(quote_char)?;
1598    let path = &trimmed[after_open..after_open + close_pos];
1599
1600    if path.is_empty() {
1601        return None;
1602    }
1603
1604    Some(path.to_string())
1605}
1606
1607/// Extract the HTTP method from `@app.route('/path', methods=['POST'])` style arguments.
1608///
1609/// Looks for a `methods=` keyword argument containing a list of method strings.
1610/// If found, returns the first method in uppercase. Otherwise defaults to `"GET"`.
1611fn extract_method_from_route_args(args_text: &str) -> String {
1612    // Look for 'methods' keyword in the arguments
1613    let Some(methods_pos) = args_text.find("methods") else {
1614        return "GET".to_string();
1615    };
1616
1617    // Find the opening bracket after 'methods='
1618    let after_methods = &args_text[methods_pos..];
1619    let Some(bracket_pos) = after_methods.find('[') else {
1620        return "GET".to_string();
1621    };
1622
1623    let after_bracket = &after_methods[bracket_pos + 1..];
1624
1625    // Find the first string literal inside the bracket
1626    let method_str = extract_first_string_literal(after_bracket);
1627    match method_str {
1628        Some(m) => m.to_ascii_uppercase(),
1629        None => "GET".to_string(),
1630    }
1631}
1632
1633/// Extract the first single- or double-quoted string literal from the given text.
1634fn extract_first_string_literal(text: &str) -> Option<String> {
1635    let trimmed = text.trim();
1636
1637    let (quote_char, start_pos) = {
1638        let single_pos = trimmed.find('\'');
1639        let double_pos = trimmed.find('"');
1640        match (single_pos, double_pos) {
1641            (Some(s), Some(d)) => {
1642                if s < d {
1643                    ('\'', s)
1644                } else {
1645                    ('"', d)
1646                }
1647            }
1648            (Some(s), None) => ('\'', s),
1649            (None, Some(d)) => ('"', d),
1650            (None, None) => return None,
1651        }
1652    };
1653
1654    let after_open = start_pos + 1;
1655    let close_pos = trimmed[after_open..].find(quote_char)?;
1656    let literal = &trimmed[after_open..after_open + close_pos];
1657
1658    if literal.is_empty() {
1659        return None;
1660    }
1661
1662    Some(literal.to_string())
1663}
1664
1665// ============================================================================
1666// Property Detection - @property decorator
1667// ============================================================================
1668
1669/// Check if a function definition has a `@property` decorator.
1670///
1671/// Python AST structure for decorated functions:
1672/// ```python
1673/// @property
1674/// def name(self):
1675///     return self._name
1676/// ```
1677///
1678/// The tree-sitter AST wraps the `function_definition` in a `decorated_definition` node:
1679/// ```text
1680/// (block
1681///   (decorated_definition
1682///     decorator: (decorator "@property")
1683///     definition: (function_definition)))
1684/// ```
1685fn has_property_decorator(func_node: Node<'_>, content: &[u8]) -> bool {
1686    // The function_definition is a child of decorated_definition
1687    let Some(parent) = func_node.parent() else {
1688        return false;
1689    };
1690
1691    // Check if parent is decorated_definition
1692    if parent.kind() != "decorated_definition" {
1693        return false;
1694    }
1695
1696    // Look for @property decorator in the decorated_definition
1697    let mut cursor = parent.walk();
1698    for child in parent.children(&mut cursor) {
1699        if child.kind() == "decorator" {
1700            // Extract decorator text
1701            if let Ok(decorator_text) = child.utf8_text(content) {
1702                let decorator_text = decorator_text.trim();
1703                // Match @property or @property()
1704                if decorator_text == "@property"
1705                    || decorator_text.starts_with("@property(")
1706                    || decorator_text.starts_with("@property (")
1707                {
1708                    return true;
1709                }
1710            }
1711        }
1712    }
1713
1714    false
1715}
1716
1717/// Extract visibility from Python identifier based on naming convention.
1718///
1719/// Python uses naming conventions for visibility:
1720/// - `__name` (dunder) -> private (name mangling)
1721/// - `_name` (single underscore) -> protected/internal
1722/// - `name` -> public
1723fn extract_visibility_from_name(name: &str) -> &'static str {
1724    if name.starts_with("__") && !name.ends_with("__") {
1725        "private"
1726    } else if name.starts_with('_') {
1727        "protected"
1728    } else {
1729        "public"
1730    }
1731}
1732
1733// ============================================================================
1734// Type Hint Processing - TypeOf and Reference Edges
1735// ============================================================================
1736
1737/// Find the containing scope (function/class) for a node to create scope-qualified names.
1738///
1739/// This walks up the AST to find the nearest enclosing function or class definition.
1740/// Returns:
1741/// - Empty string for module-level
1742/// - Class name for class-level (e.g., "`MyClass`")
1743/// - Function qualified name for function-level (e.g., "MyClass.method" or "process")
1744fn find_containing_scope(node: Node<'_>, content: &[u8], ast_graph: &ASTGraph) -> String {
1745    let mut current = node;
1746    let mut found_class_name: Option<String> = None;
1747
1748    // Walk up the tree to find enclosing function or class
1749    while let Some(parent) = current.parent() {
1750        match parent.kind() {
1751            "function_definition" => {
1752                // Found enclosing function - get its qualified name
1753                if let Some(ctx) = ast_graph.get_callable_context(parent.id()) {
1754                    return ctx.qualified_name.clone();
1755                }
1756            }
1757            "class_definition" => {
1758                // Remember the class name but continue walking up
1759                // to check if we're inside a function within this class
1760                if found_class_name.is_none() {
1761                    // Extract class name directly from node
1762                    if let Some(name_node) = parent.child_by_field_name("name")
1763                        && let Ok(class_name) = name_node.utf8_text(content)
1764                    {
1765                        found_class_name = Some(class_name.to_string());
1766                    }
1767                }
1768            }
1769            _ => {}
1770        }
1771        current = parent;
1772    }
1773
1774    // If we found a class but no enclosing function, it's a class attribute
1775    found_class_name.unwrap_or_default()
1776}
1777
1778/// Extract return type annotation from a function definition.
1779///
1780/// Python AST structure:
1781/// ```python
1782/// def foo() -> int:  # return_type field contains type annotation
1783/// ```
1784fn extract_return_type_annotation(func_node: Node<'_>, content: &[u8]) -> Option<String> {
1785    let return_type_node = func_node.child_by_field_name("return_type")?;
1786    extract_type_from_node(return_type_node, content)
1787}
1788
1789/// Extract the byte-exact source text of a function's `-> Type` annotation.
1790///
1791/// Unlike [`extract_return_type_annotation`], this returns the raw annotation
1792/// text verbatim — no quote stripping, no union flattening, no generic-base
1793/// extraction. This is the form consumed by `returns:<TypeName>` predicates,
1794/// which match the byte-exact qualified name of the target Type node.
1795///
1796/// Returns `None` when the function has no `-> Type` annotation (e.g.
1797/// `def foo():`), in which case no Return edge is emitted.
1798///
1799/// Examples (input → returned text):
1800/// - `def foo() -> int:` → `Some("int")`
1801/// - `def foo() -> Optional[int]:` → `Some("Optional[int]")`
1802/// - `def foo() -> List[Dict[str, int]]:` → `Some("List[Dict[str, int]]")`
1803/// - `def foo() -> pd.DataFrame:` → `Some("pd.DataFrame")`
1804/// - `async def foo() -> AsyncIterator[int]:` → `Some("AsyncIterator[int]")`
1805/// - `def foo() -> "User":` → `Some("\"User\"")`
1806/// - `def foo():` → `None`
1807fn extract_return_type_source_text(func_node: Node<'_>, content: &[u8]) -> Option<String> {
1808    let return_type_node = func_node.child_by_field_name("return_type")?;
1809    let text = return_type_node.utf8_text(content).ok()?.trim();
1810    if text.is_empty() {
1811        None
1812    } else {
1813        Some(text.to_string())
1814    }
1815}
1816
1817/// Process function parameters to create `TypeOf` and Reference edges for type hints.
1818///
1819/// Handles:
1820/// - `def foo(x: int, y: str):` - typed parameters
1821/// - `def foo(self, x: int):` - skips self/cls
1822/// - `def foo(x: List[int]):` - extracts base type from generics
1823fn process_function_parameters(
1824    func_node: Node<'_>,
1825    content: &[u8],
1826    ast_graph: &ASTGraph,
1827    helper: &mut GraphBuildHelper,
1828) {
1829    let Some(params_node) = func_node.child_by_field_name("parameters") else {
1830        return;
1831    };
1832
1833    // Get the qualified name of the containing function/method for scope qualification
1834    let scope_prefix = ast_graph
1835        .get_callable_context(func_node.id())
1836        .map_or("", |ctx| ctx.qualified_name.as_str());
1837
1838    // Iterate through parameters in the parameter_list
1839    for param in params_node.children(&mut params_node.walk()) {
1840        // Python tree-sitter uses "typed_parameter" and "typed_default_parameter"
1841        // but we need to handle the actual structure
1842        match param.kind() {
1843            "typed_parameter" | "typed_default_parameter" => {
1844                process_typed_parameter(param, content, scope_prefix, helper);
1845            }
1846            // Untyped parameter - check if it has a type annotation in parent context
1847            // For now, skip (no type hint)
1848            // Default parameter without type - skip
1849            "identifier" | "default_parameter" => {}
1850            _ => {
1851                // Other parameter types - try to process if they have type annotations
1852                // This handles various parameter structures
1853                if param.child_by_field_name("type").is_some() {
1854                    process_typed_parameter(param, content, scope_prefix, helper);
1855                }
1856            }
1857        }
1858    }
1859}
1860
1861/// Process a single typed parameter node.
1862///
1863/// Creates scope-qualified variable names to prevent cross-scope type contamination.
1864/// Format: `<scope_prefix>:<param_name>` (e.g., `MyClass.method:x` or `process:x`)
1865fn process_typed_parameter(
1866    param: Node<'_>,
1867    content: &[u8],
1868    scope_prefix: &str,
1869    helper: &mut GraphBuildHelper,
1870) {
1871    // Extract parameter name (could be in "name" field or as identifier child)
1872    let param_name = if let Some(name_node) = param.child_by_field_name("name") {
1873        name_node.utf8_text(content).ok()
1874    } else {
1875        // Fallback: look for identifier child
1876        param
1877            .children(&mut param.walk())
1878            .find(|c| c.kind() == "identifier")
1879            .and_then(|n| n.utf8_text(content).ok())
1880    };
1881
1882    let Some(param_name) = param_name else {
1883        return;
1884    };
1885
1886    // Skip self and cls (special method parameters)
1887    if param_name == "self" || param_name == "cls" {
1888        return;
1889    }
1890
1891    // Extract type annotation
1892    let Some(type_node) = param.child_by_field_name("type") else {
1893        return;
1894    };
1895
1896    let Some(type_name) = extract_type_from_node(type_node, content) else {
1897        return;
1898    };
1899
1900    // Create scope-qualified parameter name to prevent cross-scope contamination
1901    // Format: <scope_prefix>:<param_name> (e.g., "MyClass.method:x" or "process:x")
1902    let qualified_param_name = if scope_prefix.is_empty() {
1903        // Top-level function parameter
1904        format!(":{param_name}")
1905    } else {
1906        format!("{scope_prefix}:{param_name}")
1907    };
1908
1909    // Create parameter variable node with qualified name
1910    let param_id = helper.add_variable(&qualified_param_name, Some(span_from_node(param)));
1911
1912    // Create type node
1913    let type_id = helper.add_type(&type_name, None);
1914
1915    // Add TypeOf and Reference edges
1916    helper.add_typeof_edge(param_id, type_id);
1917    helper.add_reference_edge(param_id, type_id);
1918}
1919
1920/// Process annotated assignments to create `TypeOf` and Reference edges.
1921///
1922/// Handles:
1923/// - `user: User = get_user()` - annotated assignment with value
1924/// - `count: int` - annotated assignment without value
1925/// - `items: List[str] = []` - generic types
1926fn process_annotated_assignment(
1927    expr_stmt_node: Node<'_>,
1928    content: &[u8],
1929    ast_graph: &ASTGraph,
1930    helper: &mut GraphBuildHelper,
1931) {
1932    // Get the containing scope for scope qualification
1933    // For assignments, we need to find the enclosing function/class
1934    let scope_prefix = find_containing_scope(expr_stmt_node, content, ast_graph);
1935
1936    // Look for expression_statement containing an assignment
1937    for child in expr_stmt_node.children(&mut expr_stmt_node.walk()) {
1938        if child.kind() == "assignment" {
1939            process_typed_assignment(child, content, &scope_prefix, helper);
1940        }
1941    }
1942}
1943
1944/// Process a typed assignment node (shared logic for variables and class attributes).
1945///
1946/// Creates scope-qualified variable names to prevent cross-scope type contamination.
1947fn process_typed_assignment(
1948    assignment_node: Node<'_>,
1949    content: &[u8],
1950    scope_prefix: &str,
1951    helper: &mut GraphBuildHelper,
1952) {
1953    // Check if this is a typed assignment by looking for type annotation
1954    // In Python, annotated assignments look like: name: type = value
1955    // The AST structure is: assignment { left: identifier, type: type, right: expression }
1956
1957    let Some(left) = assignment_node.child_by_field_name("left") else {
1958        return;
1959    };
1960
1961    let Some(type_node) = assignment_node.child_by_field_name("type") else {
1962        return;
1963    };
1964
1965    // Extract variable name
1966    let Ok(var_name) = left.utf8_text(content) else {
1967        return;
1968    };
1969
1970    // Extract type
1971    let Some(type_name) = extract_type_from_node(type_node, content) else {
1972        return;
1973    };
1974
1975    // Create scope-qualified variable name to prevent cross-scope contamination
1976    // For class attributes (module-level or class-level), use simple name
1977    // For function-local variables, use qualified name
1978    let qualified_var_name = if scope_prefix.is_empty() {
1979        // Module-level variable
1980        var_name.to_string()
1981    } else if scope_prefix.contains('.') && !scope_prefix.contains(':') {
1982        // Class attribute (scope_prefix is class name without function)
1983        format!("{scope_prefix}.{var_name}")
1984    } else {
1985        // Function-local variable
1986        format!("{scope_prefix}:{var_name}")
1987    };
1988
1989    // Create variable node with qualified name
1990    let var_id = helper.add_variable(&qualified_var_name, Some(span_from_node(assignment_node)));
1991
1992    // Create type node
1993    let type_id = helper.add_type(&type_name, None);
1994
1995    // Add TypeOf and Reference edges
1996    helper.add_typeof_edge(var_id, type_id);
1997    helper.add_reference_edge(var_id, type_id);
1998}
1999
2000/// Extract type name from a type annotation node.
2001///
2002/// Handles:
2003/// - Simple types: `int`, `str`, `bool`
2004/// - Generic types: `List[int]` → extract base type `List`
2005/// - Optional types: `Optional[User]` → extract base type `Optional`
2006/// - Qualified types: `module.Type` → extract full qualified name
2007/// - Forward references: `"User"` → `User` (strips quotes)
2008/// - PEP 604 unions: `User | None` → `User` (extracts left-most base type)
2009fn extract_type_from_node(type_node: Node<'_>, content: &[u8]) -> Option<String> {
2010    match type_node.kind() {
2011        "type" => {
2012            // The "type" node wraps the actual type - recurse into first child
2013            type_node
2014                .named_child(0)
2015                .and_then(|child| extract_type_from_node(child, content))
2016        }
2017        "identifier" => {
2018            // Simple type: int, str, User
2019            type_node.utf8_text(content).ok().map(String::from)
2020        }
2021        "string" => {
2022            // Forward reference: "User" -> User
2023            // Strip surrounding quotes from string literal annotations
2024            let text = type_node.utf8_text(content).ok()?;
2025            let trimmed = text.trim();
2026
2027            // Remove quotes: "User" or 'User' -> User
2028            if (trimmed.starts_with('"') && trimmed.ends_with('"'))
2029                || (trimmed.starts_with('\'') && trimmed.ends_with('\''))
2030            {
2031                let unquoted = &trimmed[1..trimmed.len() - 1];
2032                // Handle potential unions inside string: "User | None" -> "User"
2033                Some(normalize_union_type(unquoted))
2034            } else {
2035                Some(trimmed.to_string())
2036            }
2037        }
2038        "binary_operator" => {
2039            // PEP 604 union: User | None -> User
2040            // Extract left operand as the primary type
2041            if let Some(left) = type_node.child_by_field_name("left") {
2042                extract_type_from_node(left, content)
2043            } else {
2044                // Fallback: extract text and normalize
2045                type_node
2046                    .utf8_text(content)
2047                    .ok()
2048                    .map(|text| normalize_union_type(text.trim()))
2049            }
2050        }
2051        "generic_type" | "subscript" => {
2052            // Generic type: List[int], Dict[str, int], Optional[User]
2053            // Extract base type (before the brackets)
2054            // Structure: subscript { value: identifier, subscript: [...] }
2055            if let Some(value_node) = type_node.child_by_field_name("value") {
2056                extract_type_from_node(value_node, content)
2057            } else {
2058                // Fallback: try first named child
2059                type_node
2060                    .named_child(0)
2061                    .and_then(|child| extract_type_from_node(child, content))
2062                    .or_else(|| {
2063                        // Last resort: extract the full text and take the base type
2064                        type_node.utf8_text(content).ok().and_then(|text| {
2065                            // Extract base type from "List[str]" -> "List"
2066                            text.split('[').next().map(|s| s.trim().to_string())
2067                        })
2068                    })
2069            }
2070        }
2071        "attribute" => {
2072            // Qualified type: module.Type or package.module.Type
2073            type_node.utf8_text(content).ok().map(String::from)
2074        }
2075        "list" | "tuple" | "set" => {
2076            // Collection literals (though rare in type annotations)
2077            type_node.utf8_text(content).ok().map(String::from)
2078        }
2079        _ => {
2080            // Fallback: try to extract text from any other node
2081            // For unknown node types, try to extract intelligently
2082            let text = type_node.utf8_text(content).ok()?;
2083            let trimmed = text.trim();
2084
2085            // If it looks like a generic type, extract base type
2086            if trimmed.contains('[') {
2087                trimmed.split('[').next().map(|s| s.trim().to_string())
2088            } else {
2089                // Check for union syntax
2090                Some(normalize_union_type(trimmed))
2091            }
2092        }
2093    }
2094}
2095
2096/// Normalize union types by extracting the left-most/primary type.
2097///
2098/// Examples:
2099/// - `User | None` → `User`
2100/// - `str | int` → `str`
2101/// - `Optional[User]` → `Optional[User]` (unchanged, not a union)
2102fn normalize_union_type(type_str: &str) -> String {
2103    if let Some(pipe_pos) = type_str.find('|') {
2104        // Extract left side of union and trim
2105        type_str[..pipe_pos].trim().to_string()
2106    } else {
2107        type_str.to_string()
2108    }
2109}
2110
2111#[cfg(test)]
2112mod tests {
2113    use super::*;
2114
2115    #[test]
2116    fn test_simple_name_extracts_dotted_identifiers() {
2117        // General dotted identifier handling (for call targets)
2118        assert_eq!(simple_name("module.func"), "func");
2119        assert_eq!(simple_name("obj.method"), "method");
2120        assert_eq!(simple_name("package.module.func"), "func");
2121        assert_eq!(simple_name("self.helper"), "helper");
2122
2123        // No dots - return as-is
2124        assert_eq!(simple_name("function"), "function");
2125        assert_eq!(simple_name(""), "");
2126    }
2127
2128    #[test]
2129    fn test_ffi_library_simple_name_extracts_library_base_names() {
2130        // Standard shared library names
2131        assert_eq!(ffi_library_simple_name("libfoo.so"), "libfoo");
2132        assert_eq!(ffi_library_simple_name("lib1.so"), "lib1");
2133        assert_eq!(ffi_library_simple_name("lib2.so"), "lib2");
2134
2135        // Different extensions
2136        assert_eq!(ffi_library_simple_name("kernel32.dll"), "kernel32");
2137        assert_eq!(ffi_library_simple_name("libSystem.dylib"), "libSystem");
2138
2139        // Versioned shared libraries (libc.so.6)
2140        assert_eq!(ffi_library_simple_name("libc.so.6"), "libc");
2141
2142        // No extension - return as-is
2143        assert_eq!(ffi_library_simple_name("kernel32"), "kernel32");
2144        assert_eq!(ffi_library_simple_name("numpy"), "numpy");
2145
2146        // Variable references (prefixed with $)
2147        assert_eq!(ffi_library_simple_name("$libname"), "$libname");
2148
2149        // Edge cases
2150        assert_eq!(ffi_library_simple_name(""), "");
2151        assert_eq!(ffi_library_simple_name("lib.so"), "lib");
2152    }
2153
2154    #[test]
2155    fn test_ffi_library_simple_name_prevents_duplicate_edges() {
2156        // This was the bug: lib1.so and lib2.so both became "so"
2157        let name1 = ffi_library_simple_name("lib1.so");
2158        let name2 = ffi_library_simple_name("lib2.so");
2159
2160        // They should be different
2161        assert_ne!(
2162            name1, name2,
2163            "lib1.so and lib2.so must produce different simple names"
2164        );
2165        assert_eq!(name1, "lib1");
2166        assert_eq!(name2, "lib2");
2167    }
2168
2169    #[test]
2170    fn test_ffi_library_simple_name_handles_directory_paths() {
2171        // Full paths with directories containing dots (Codex finding)
2172        assert_eq!(ffi_library_simple_name("/opt/v1.2/libfoo.so"), "libfoo");
2173        assert_eq!(
2174            ffi_library_simple_name("/usr/lib/x86_64-linux-gnu/libc.so.6"),
2175            "libc"
2176        );
2177        assert_eq!(ffi_library_simple_name("libs/lib1.so"), "lib1");
2178
2179        // Relative paths
2180        assert_eq!(ffi_library_simple_name("./libs/kernel32.dll"), "kernel32");
2181        assert_eq!(
2182            ffi_library_simple_name("../lib/libSystem.dylib"),
2183            "libSystem"
2184        );
2185    }
2186
2187    // ====================================================================
2188    // Route decorator parsing unit tests
2189    // ====================================================================
2190
2191    #[test]
2192    fn test_parse_route_decorator_app_route_default_get() {
2193        let result = parse_route_decorator_text("app.route('/api/users')");
2194        assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2195    }
2196
2197    #[test]
2198    fn test_parse_route_decorator_app_route_with_methods_post() {
2199        let result = parse_route_decorator_text("app.route('/api/users', methods=['POST'])");
2200        assert_eq!(result, Some(("POST".to_string(), "/api/users".to_string())));
2201    }
2202
2203    #[test]
2204    fn test_parse_route_decorator_app_route_with_methods_put_double_quotes() {
2205        let result = parse_route_decorator_text("app.route(\"/api/items\", methods=[\"PUT\"])");
2206        assert_eq!(result, Some(("PUT".to_string(), "/api/items".to_string())));
2207    }
2208
2209    #[test]
2210    fn test_parse_route_decorator_app_get() {
2211        let result = parse_route_decorator_text("app.get('/api/users')");
2212        assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2213    }
2214
2215    #[test]
2216    fn test_parse_route_decorator_app_post() {
2217        let result = parse_route_decorator_text("app.post('/api/items')");
2218        assert_eq!(result, Some(("POST".to_string(), "/api/items".to_string())));
2219    }
2220
2221    #[test]
2222    fn test_parse_route_decorator_app_put() {
2223        let result = parse_route_decorator_text("app.put('/api/items/1')");
2224        assert_eq!(
2225            result,
2226            Some(("PUT".to_string(), "/api/items/1".to_string()))
2227        );
2228    }
2229
2230    #[test]
2231    fn test_parse_route_decorator_app_delete() {
2232        let result = parse_route_decorator_text("app.delete('/api/items/1')");
2233        assert_eq!(
2234            result,
2235            Some(("DELETE".to_string(), "/api/items/1".to_string()))
2236        );
2237    }
2238
2239    #[test]
2240    fn test_parse_route_decorator_app_patch() {
2241        let result = parse_route_decorator_text("app.patch('/api/items/1')");
2242        assert_eq!(
2243            result,
2244            Some(("PATCH".to_string(), "/api/items/1".to_string()))
2245        );
2246    }
2247
2248    #[test]
2249    fn test_parse_route_decorator_router_get_fastapi() {
2250        let result = parse_route_decorator_text("router.get('/api/users')");
2251        assert_eq!(result, Some(("GET".to_string(), "/api/users".to_string())));
2252    }
2253
2254    #[test]
2255    fn test_parse_route_decorator_router_post_fastapi() {
2256        let result = parse_route_decorator_text("router.post('/api/items')");
2257        assert_eq!(result, Some(("POST".to_string(), "/api/items".to_string())));
2258    }
2259
2260    #[test]
2261    fn test_parse_route_decorator_blueprint_route() {
2262        let result = parse_route_decorator_text("blueprint.route('/health')");
2263        assert_eq!(result, Some(("GET".to_string(), "/health".to_string())));
2264    }
2265
2266    #[test]
2267    fn test_parse_route_decorator_unknown_receiver_returns_none() {
2268        // "server" is not a recognized receiver
2269        let result = parse_route_decorator_text("server.get('/api/users')");
2270        assert_eq!(result, None);
2271    }
2272
2273    #[test]
2274    fn test_parse_route_decorator_unknown_method_returns_none() {
2275        // "options" is not in the ROUTE_METHOD_NAMES list and is not "route"
2276        let result = parse_route_decorator_text("app.options('/api/users')");
2277        assert_eq!(result, None);
2278    }
2279
2280    #[test]
2281    fn test_parse_route_decorator_no_parens_returns_none() {
2282        let result = parse_route_decorator_text("app.route");
2283        assert_eq!(result, None);
2284    }
2285
2286    #[test]
2287    fn test_parse_route_decorator_no_dot_returns_none() {
2288        let result = parse_route_decorator_text("route('/api/users')");
2289        assert_eq!(result, None);
2290    }
2291
2292    #[test]
2293    fn test_extract_path_from_decorator_args_single_quotes() {
2294        let result = extract_path_from_decorator_args("'/api/users')");
2295        assert_eq!(result, Some("/api/users".to_string()));
2296    }
2297
2298    #[test]
2299    fn test_extract_path_from_decorator_args_double_quotes() {
2300        let result = extract_path_from_decorator_args("\"/api/items\")");
2301        assert_eq!(result, Some("/api/items".to_string()));
2302    }
2303
2304    #[test]
2305    fn test_extract_path_from_decorator_args_empty_returns_none() {
2306        let result = extract_path_from_decorator_args("'')");
2307        assert_eq!(result, None);
2308    }
2309
2310    #[test]
2311    fn test_extract_path_from_decorator_args_no_string_returns_none() {
2312        let result = extract_path_from_decorator_args("some_var)");
2313        assert_eq!(result, None);
2314    }
2315
2316    #[test]
2317    fn test_extract_method_from_route_args_with_methods_keyword() {
2318        let result = extract_method_from_route_args("'/api/users', methods=['POST'])");
2319        assert_eq!(result, "POST");
2320    }
2321
2322    #[test]
2323    fn test_extract_method_from_route_args_without_methods_keyword() {
2324        let result = extract_method_from_route_args("'/api/users')");
2325        assert_eq!(result, "GET");
2326    }
2327
2328    #[test]
2329    fn test_extract_method_from_route_args_delete() {
2330        let result = extract_method_from_route_args("'/api/items', methods=['DELETE'])");
2331        assert_eq!(result, "DELETE");
2332    }
2333
2334    #[test]
2335    fn test_extract_method_from_route_args_lowercase_normalizes() {
2336        let result = extract_method_from_route_args("'/x', methods=['put'])");
2337        assert_eq!(result, "PUT");
2338    }
2339
2340    #[test]
2341    fn test_extract_first_string_literal_single_quotes() {
2342        let result = extract_first_string_literal("'POST']");
2343        assert_eq!(result, Some("POST".to_string()));
2344    }
2345
2346    #[test]
2347    fn test_extract_first_string_literal_double_quotes() {
2348        let result = extract_first_string_literal("\"DELETE\"]");
2349        assert_eq!(result, Some("DELETE".to_string()));
2350    }
2351
2352    #[test]
2353    fn test_extract_first_string_literal_empty_returns_none() {
2354        let result = extract_first_string_literal("no quotes here");
2355        assert_eq!(result, None);
2356    }
2357}
2358
2359#[cfg(test)]
2360mod shape_tests {
2361    use super::{cf_bucket_for_python_kind, python_shape_mapping};
2362    use sqry_core::graph::unified::build::shape::{
2363        CfBucket, ShapeBudget, ShapeMapping, compute_shape_descriptor,
2364    };
2365
2366    const SAMPLE: &str = include_str!(concat!(
2367        env!("CARGO_MANIFEST_DIR"),
2368        "/../test-fixtures/shape/reference/sample.py"
2369    ));
2370
2371    fn parse(src: &str) -> tree_sitter::Tree {
2372        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2373        let mut p = tree_sitter::Parser::new();
2374        p.set_language(&lang).expect("load python grammar");
2375        p.parse(src, None).expect("parse")
2376    }
2377
2378    /// Resolve the function_definition with the given name from the fixture.
2379    fn function_named<'t>(tree: &'t tree_sitter::Tree, name: &str) -> tree_sitter::Node<'t> {
2380        let root = tree.root_node();
2381        let mut stack = vec![root];
2382        while let Some(node) = stack.pop() {
2383            if node.kind() == "function_definition"
2384                && node
2385                    .child_by_field_name("name")
2386                    .and_then(|n| n.utf8_text(SAMPLE.as_bytes()).ok())
2387                    == Some(name)
2388            {
2389                return node;
2390            }
2391            let mut c = node.walk();
2392            for ch in node.children(&mut c) {
2393                stack.push(ch);
2394            }
2395        }
2396        panic!("no function_definition named {name}");
2397    }
2398
2399    #[test]
2400    fn cf_table_is_non_empty() {
2401        let mapping = python_shape_mapping();
2402        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2403        let mut covered = 0;
2404        for id in 0..lang.node_kind_count() {
2405            let kid = id as u16;
2406            if mapping.cf_bucket(kid).is_some() {
2407                covered += 1;
2408            }
2409        }
2410        assert!(
2411            covered >= 10,
2412            "expected many python CF kinds mapped, got {covered}"
2413        );
2414    }
2415
2416    #[test]
2417    fn histogram_covers_real_control_flow() {
2418        let tree = parse(SAMPLE);
2419        let func = function_named(&tree, "classify");
2420        let d = compute_shape_descriptor(
2421            func,
2422            SAMPLE.as_bytes(),
2423            python_shape_mapping(),
2424            &ShapeBudget::default(),
2425        );
2426        assert!(!d.is_unhashable(), "classify body must be hashable");
2427        for bucket in [
2428            CfBucket::Branch,
2429            CfBucket::Loop,
2430            CfBucket::Match,
2431            CfBucket::Try,
2432            CfBucket::Catch,
2433            CfBucket::Throw,
2434            CfBucket::Resource,
2435            CfBucket::Return,
2436            CfBucket::BreakContinue,
2437            CfBucket::Call,
2438            CfBucket::Assign,
2439            CfBucket::Comprehension,
2440        ] {
2441            assert!(
2442                d.cf_histogram[bucket.index()] >= 1,
2443                "classify must exercise {bucket:?}"
2444            );
2445        }
2446    }
2447
2448    #[test]
2449    fn async_body_covers_yield_await_closure() {
2450        let tree = parse(SAMPLE);
2451        let func = function_named(&tree, "fetch");
2452        let d = compute_shape_descriptor(
2453            func,
2454            SAMPLE.as_bytes(),
2455            python_shape_mapping(),
2456            &ShapeBudget::default(),
2457        );
2458        assert!(d.cf_histogram[CfBucket::Await.index()] >= 1, "await");
2459        assert!(d.cf_histogram[CfBucket::Yield.index()] >= 1, "yield");
2460        assert!(
2461            d.cf_histogram[CfBucket::Closure.index()] >= 1,
2462            "lambda closure"
2463        );
2464        assert!(
2465            d.signature_shape.has_return_annotation,
2466            "-> str return annotation"
2467        );
2468    }
2469
2470    #[test]
2471    fn signature_shape_reads_arity_and_splats() {
2472        let tree = parse(SAMPLE);
2473        let func = function_named(&tree, "classify");
2474        let mapping = python_shape_mapping();
2475        let shape = mapping.signature_shape(func, SAMPLE.as_bytes());
2476        // classify(values, threshold=0, *extra, **opts)
2477        assert_eq!(
2478            shape.arity_positional, 2,
2479            "values + threshold are positional"
2480        );
2481        assert!(shape.has_defaults, "threshold=0");
2482        assert!(shape.has_varargs, "*extra");
2483        assert!(shape.has_kwargs, "**opts");
2484    }
2485
2486    #[test]
2487    fn unknown_kind_maps_to_none() {
2488        assert!(cf_bucket_for_python_kind("module").is_none());
2489        assert!(cf_bucket_for_python_kind("identifier").is_none());
2490    }
2491}