Skip to main content

the_code_graph_parser/
python.rs

1use std::cell::RefCell;
2use std::path::Path;
3
4use tree_sitter::{Node, Parser};
5use tree_sitter_language::LanguageFn;
6
7use domain::error::CodeGraphError;
8use domain::model::{Edge, EdgeKind, Language, Location, SymbolKind, SymbolNode, Visibility};
9
10use crate::{ImportName, LanguageParser, ParseResult, RawImport};
11
12thread_local! {
13    static PY_PARSER: RefCell<Parser> = RefCell::new(Parser::new());
14}
15
16/// Parser for Python (.py) files.
17pub struct PythonParser {
18    lang: LanguageFn,
19}
20
21impl PythonParser {
22    pub fn new() -> Self {
23        Self {
24            lang: tree_sitter_python::LANGUAGE,
25        }
26    }
27}
28
29impl Default for PythonParser {
30    fn default() -> Self {
31        Self::new()
32    }
33}
34
35impl LanguageParser for PythonParser {
36    fn language(&self) -> Language {
37        Language::Python
38    }
39
40    fn file_extensions(&self) -> &[&str] {
41        &["py"]
42    }
43
44    fn parse(&self, source: &[u8], path: &Path) -> domain::error::Result<ParseResult> {
45        let lang: tree_sitter::Language = self.lang.into();
46
47        PY_PARSER.with(|parser_cell| {
48            let mut parser = parser_cell.borrow_mut();
49            parser
50                .set_language(&lang)
51                .map_err(|e| CodeGraphError::Parse {
52                    file: path.to_path_buf(),
53                    message: format!("failed to set language: {e}"),
54                })?;
55
56            let tree = parser
57                .parse(source, None)
58                .ok_or_else(|| CodeGraphError::Parse {
59                    file: path.to_path_buf(),
60                    message: "tree-sitter parse returned None".into(),
61                })?;
62
63            extract_all(source, path, &tree)
64        })
65    }
66}
67
68// ---------------------------------------------------------------------------
69// Main extraction
70// ---------------------------------------------------------------------------
71
72fn extract_all(
73    source: &[u8],
74    path: &Path,
75    tree: &tree_sitter::Tree,
76) -> domain::error::Result<ParseResult> {
77    let mut symbols = Vec::new();
78    let mut edges = Vec::new();
79    let file_path = path.to_string_lossy().to_string();
80    let root = tree.root_node();
81
82    // Collect imports, handling TYPE_CHECKING blocks
83    let imports = extract_imports_from_root(&root, source);
84
85    // Walk top-level statements
86    let mut cursor = root.walk();
87    for child in root.children(&mut cursor) {
88        if !child.is_named() {
89            continue;
90        }
91        extract_top_level(source, &file_path, child, &mut symbols, &mut edges);
92    }
93
94    Ok(ParseResult {
95        symbols,
96        edges,
97        imports,
98        exports: Vec::new(), // Python has no explicit export declarations
99    })
100}
101
102// ---------------------------------------------------------------------------
103// Top-level statement dispatch
104// ---------------------------------------------------------------------------
105
106fn extract_top_level(
107    source: &[u8],
108    file_path: &str,
109    node: Node,
110    symbols: &mut Vec<SymbolNode>,
111    edges: &mut Vec<Edge>,
112) {
113    match node.kind() {
114        "function_definition" => {
115            if let Some(sym) = extract_function(source, file_path, node, &[]) {
116                edges.push(contains_edge(file_path, &sym.qualified_name));
117                symbols.push(sym);
118            }
119        }
120        "class_definition" => {
121            extract_class(source, file_path, node, &[], symbols, edges);
122        }
123        "decorated_definition" => {
124            extract_decorated(source, file_path, node, symbols, edges);
125        }
126        "expression_statement" => {
127            extract_assignment(source, file_path, node, symbols, edges);
128        }
129        _ => {}
130    }
131}
132
133// ---------------------------------------------------------------------------
134// Function extraction
135// ---------------------------------------------------------------------------
136
137fn extract_function(
138    source: &[u8],
139    file_path: &str,
140    node: Node,
141    decorators: &[String],
142) -> Option<SymbolNode> {
143    let name = node_text_field(node, "name", source)?;
144    let qualified_name = format!("{file_path}::{name}");
145    let is_async = has_async_keyword(node, source);
146    let signature = build_py_signature(source, node);
147    let visibility = python_visibility(&name);
148
149    Some(SymbolNode {
150        name: name.clone(),
151        qualified_name,
152        kind: SymbolKind::Function,
153        location: node_location(file_path, node),
154        visibility,
155        is_exported: matches!(visibility, Visibility::Public),
156        is_async,
157        is_test: is_test_name(&name),
158        decorators: decorators.to_vec(),
159        signature,
160    })
161}
162
163// ---------------------------------------------------------------------------
164// Class extraction
165// ---------------------------------------------------------------------------
166
167fn extract_class(
168    source: &[u8],
169    file_path: &str,
170    node: Node,
171    decorators: &[String],
172    symbols: &mut Vec<SymbolNode>,
173    edges: &mut Vec<Edge>,
174) {
175    let name = match node_text_field(node, "name", source) {
176        Some(n) => n,
177        None => return,
178    };
179    let qualified_name = format!("{file_path}::{name}");
180    let visibility = python_visibility(&name);
181
182    let class_sym = SymbolNode {
183        name: name.clone(),
184        qualified_name: qualified_name.clone(),
185        kind: SymbolKind::Class,
186        location: node_location(file_path, node),
187        visibility,
188        is_exported: matches!(visibility, Visibility::Public),
189        is_async: false,
190        is_test: is_test_name(&name),
191        decorators: decorators.to_vec(),
192        signature: None,
193    };
194    edges.push(contains_edge(file_path, &class_sym.qualified_name));
195    symbols.push(class_sym);
196
197    // Extends edges: class Foo(Bar, Baz):
198    extract_extends_edges(source, file_path, node, &name, &qualified_name, edges);
199
200    // Methods and properties inside the class body
201    if let Some(body) = node.child_by_field_name("body") {
202        extract_class_body(
203            source,
204            file_path,
205            &name,
206            &qualified_name,
207            body,
208            symbols,
209            edges,
210        );
211    }
212}
213
214fn extract_extends_edges(
215    source: &[u8],
216    file_path: &str,
217    class_node: Node,
218    _class_name: &str,
219    class_qualified_name: &str,
220    edges: &mut Vec<Edge>,
221) {
222    // Python tree-sitter: class arguments are in "argument_list" or "superclasses"
223    // The field name in tree-sitter-python is "superclasses" → argument_list
224    let superclasses_node = class_node.child_by_field_name("superclasses");
225    let target_node = superclasses_node.or_else(|| {
226        // Also try "argument_list" as direct child
227        let mut cursor = class_node.walk();
228        let found = class_node
229            .children(&mut cursor)
230            .find(|c| c.kind() == "argument_list");
231        found
232    });
233
234    if let Some(args) = target_node {
235        let mut cursor = args.walk();
236        for arg in args.children(&mut cursor) {
237            if !arg.is_named() {
238                continue;
239            }
240            // Simple identifier base class
241            let base_name = match arg.kind() {
242                "identifier" => arg.utf8_text(source).ok().map(|s| s.to_string()),
243                "attribute" => arg.utf8_text(source).ok().map(|s| s.to_string()),
244                _ => None,
245            };
246            if let Some(base) = base_name {
247                // Only emit Extends for same-file resolution: use qualified name pattern
248                let target = format!("{file_path}::{base}");
249                edges.push(Edge {
250                    kind: EdgeKind::Extends,
251                    source: class_qualified_name.to_string(),
252                    target,
253                    metadata: None,
254                });
255            }
256        }
257    }
258}
259
260fn extract_class_body(
261    source: &[u8],
262    file_path: &str,
263    class_name: &str,
264    class_qualified_name: &str,
265    body: Node,
266    symbols: &mut Vec<SymbolNode>,
267    edges: &mut Vec<Edge>,
268) {
269    let mut cursor = body.walk();
270    for stmt in body.children(&mut cursor) {
271        if !stmt.is_named() {
272            continue;
273        }
274        match stmt.kind() {
275            "function_definition" => {
276                extract_method(
277                    source,
278                    file_path,
279                    class_name,
280                    class_qualified_name,
281                    stmt,
282                    &[],
283                    symbols,
284                    edges,
285                );
286            }
287            "decorated_definition" => {
288                let decorators = collect_decorators(source, stmt);
289                // Find the inner definition
290                let inner = stmt.children(&mut stmt.walk()).find(|c| {
291                    c.is_named() && matches!(c.kind(), "function_definition" | "class_definition")
292                });
293                if let Some(inner_node) = inner {
294                    if inner_node.kind() == "function_definition" {
295                        extract_method(
296                            source,
297                            file_path,
298                            class_name,
299                            class_qualified_name,
300                            inner_node,
301                            &decorators,
302                            symbols,
303                            edges,
304                        );
305                    }
306                }
307            }
308            _ => {}
309        }
310    }
311}
312
313#[allow(clippy::too_many_arguments)]
314fn extract_method(
315    source: &[u8],
316    file_path: &str,
317    class_name: &str,
318    class_qualified_name: &str,
319    node: Node,
320    decorators: &[String],
321    symbols: &mut Vec<SymbolNode>,
322    edges: &mut Vec<Edge>,
323) {
324    let name = match node_text_field(node, "name", source) {
325        Some(n) => n,
326        None => return,
327    };
328    let member_qualified = format!("{file_path}::{class_name}.{name}");
329    let is_async = has_async_keyword(node, source);
330    let signature = build_py_signature(source, node);
331    let visibility = python_visibility(&name);
332
333    // Determine kind: @property → Property, else Method
334    let kind = if decorators
335        .iter()
336        .any(|d| d == "@property" || d == "property")
337    {
338        SymbolKind::Property
339    } else {
340        SymbolKind::Method
341    };
342
343    let sym = SymbolNode {
344        name: name.clone(),
345        qualified_name: member_qualified.clone(),
346        kind,
347        location: node_location(file_path, node),
348        visibility,
349        is_exported: matches!(visibility, Visibility::Public),
350        is_async,
351        is_test: is_test_name(&name),
352        decorators: decorators.to_vec(),
353        signature,
354    };
355    symbols.push(sym);
356    edges.push(Edge {
357        kind: EdgeKind::ChildOf,
358        source: member_qualified,
359        target: class_qualified_name.to_string(),
360        metadata: None,
361    });
362}
363
364// ---------------------------------------------------------------------------
365// Decorated definition
366// ---------------------------------------------------------------------------
367
368fn extract_decorated(
369    source: &[u8],
370    file_path: &str,
371    node: Node,
372    symbols: &mut Vec<SymbolNode>,
373    edges: &mut Vec<Edge>,
374) {
375    let decorators = collect_decorators(source, node);
376
377    // Find the inner function_definition or class_definition
378    let inner = {
379        let mut cursor = node.walk();
380        let found = node.children(&mut cursor).find(|c| {
381            c.is_named() && matches!(c.kind(), "function_definition" | "class_definition")
382        });
383        found
384    };
385
386    match inner {
387        Some(inner_node) if inner_node.kind() == "function_definition" => {
388            if let Some(sym) = extract_function(source, file_path, inner_node, &decorators) {
389                edges.push(contains_edge(file_path, &sym.qualified_name));
390                symbols.push(sym);
391            }
392        }
393        Some(inner_node) if inner_node.kind() == "class_definition" => {
394            extract_class(source, file_path, inner_node, &decorators, symbols, edges);
395        }
396        _ => {}
397    }
398}
399
400fn collect_decorators(source: &[u8], node: Node) -> Vec<String> {
401    let mut decorators = Vec::new();
402    let mut cursor = node.walk();
403    for child in node.children(&mut cursor) {
404        if child.is_named() && child.kind() == "decorator" {
405            if let Ok(text) = child.utf8_text(source) {
406                // Strip the leading '@' for the decorator text
407                let text = text.trim();
408                decorators.push(text.to_string());
409            }
410        }
411    }
412    decorators
413}
414
415// ---------------------------------------------------------------------------
416// Assignment (top-level variable)
417// ---------------------------------------------------------------------------
418
419fn extract_assignment(
420    source: &[u8],
421    file_path: &str,
422    node: Node,
423    symbols: &mut Vec<SymbolNode>,
424    edges: &mut Vec<Edge>,
425) {
426    // expression_statement → assignment
427    let assignment = {
428        let mut cursor = node.walk();
429        let found = node
430            .children(&mut cursor)
431            .find(|c| c.is_named() && c.kind() == "assignment");
432        found
433    };
434
435    let assignment = match assignment {
436        Some(a) => a,
437        None => return,
438    };
439
440    // Get left-hand side identifier
441    let lhs = match assignment.child_by_field_name("left") {
442        Some(l) => l,
443        None => return,
444    };
445
446    if lhs.kind() != "identifier" {
447        return;
448    }
449
450    let name = match lhs.utf8_text(source).ok() {
451        Some(n) => n.to_string(),
452        None => return,
453    };
454
455    let qualified_name = format!("{file_path}::{name}");
456    let visibility = python_visibility(&name);
457
458    let sym = SymbolNode {
459        name: name.clone(),
460        qualified_name: qualified_name.clone(),
461        kind: SymbolKind::Variable,
462        location: node_location(file_path, node),
463        visibility,
464        is_exported: matches!(visibility, Visibility::Public),
465        is_async: false,
466        is_test: false,
467        decorators: Vec::new(),
468        signature: None,
469    };
470    edges.push(contains_edge(file_path, &sym.qualified_name));
471    symbols.push(sym);
472}
473
474// ---------------------------------------------------------------------------
475// Import extraction
476// ---------------------------------------------------------------------------
477
478fn extract_imports_from_root(root: &Node, source: &[u8]) -> Vec<RawImport> {
479    let mut imports = Vec::new();
480    let mut cursor = root.walk();
481
482    for child in root.children(&mut cursor) {
483        if !child.is_named() {
484            continue;
485        }
486        match child.kind() {
487            "import_statement" => {
488                imports.extend(parse_import_statement(&child, source, false));
489            }
490            "import_from_statement" => {
491                if let Some(imp) = parse_import_from_statement(&child, source, false) {
492                    imports.push(imp);
493                }
494            }
495            "if_statement" => {
496                // Detect TYPE_CHECKING blocks
497                if is_type_checking_guard(&child, source) {
498                    let consequence = child.child_by_field_name("consequence");
499                    if let Some(block) = consequence {
500                        let mut block_cursor = block.walk();
501                        for stmt in block.children(&mut block_cursor) {
502                            if !stmt.is_named() {
503                                continue;
504                            }
505                            match stmt.kind() {
506                                "import_statement" => {
507                                    imports.extend(parse_import_statement(&stmt, source, true));
508                                }
509                                "import_from_statement" => {
510                                    if let Some(imp) =
511                                        parse_import_from_statement(&stmt, source, true)
512                                    {
513                                        imports.push(imp);
514                                    }
515                                }
516                                _ => {}
517                            }
518                        }
519                    }
520                }
521            }
522            _ => {}
523        }
524    }
525
526    imports
527}
528
529/// Detect `if TYPE_CHECKING:` or `if typing.TYPE_CHECKING:`
530fn is_type_checking_guard(node: &Node, source: &[u8]) -> bool {
531    let condition = match node.child_by_field_name("condition") {
532        Some(c) => c,
533        None => return false,
534    };
535    match condition.kind() {
536        "identifier" => condition.utf8_text(source).ok() == Some("TYPE_CHECKING"),
537        "attribute" => {
538            // typing.TYPE_CHECKING → attribute node with attribute field "TYPE_CHECKING"
539            condition
540                .child_by_field_name("attribute")
541                .and_then(|a| a.utf8_text(source).ok())
542                == Some("TYPE_CHECKING")
543        }
544        _ => false,
545    }
546}
547
548/// Parse `import os`, `import os.path`, `import os as o`, `import a, b`
549fn parse_import_statement(node: &Node, source: &[u8], is_type_only: bool) -> Vec<RawImport> {
550    let line = node.start_position().row + 1;
551    let mut imports = Vec::new();
552
553    let mut cursor = node.walk();
554    for child in node.children(&mut cursor) {
555        if !child.is_named() {
556            continue;
557        }
558        match child.kind() {
559            "dotted_name" => {
560                let specifier = child.utf8_text(source).unwrap_or("").to_string();
561                imports.push(RawImport {
562                    specifier: specifier.clone(),
563                    names: vec![ImportName {
564                        name: specifier,
565                        alias: None,
566                        is_type: false,
567                    }],
568                    is_type_only,
569                    is_side_effect: false,
570                    is_namespace: false,
571                    line,
572                });
573            }
574            "aliased_import" => {
575                // `import os as o` → aliased_import with name=dotted_name, alias=identifier
576                let specifier = child
577                    .child_by_field_name("name")
578                    .and_then(|n| n.utf8_text(source).ok())
579                    .unwrap_or("")
580                    .to_string();
581                let alias = child
582                    .child_by_field_name("alias")
583                    .and_then(|a| a.utf8_text(source).ok())
584                    .map(|s| s.to_string());
585                imports.push(RawImport {
586                    specifier: specifier.clone(),
587                    names: vec![ImportName {
588                        name: specifier,
589                        alias,
590                        is_type: false,
591                    }],
592                    is_type_only,
593                    is_side_effect: false,
594                    is_namespace: false,
595                    line,
596                });
597            }
598            _ => {}
599        }
600    }
601
602    imports
603}
604
605/// Parse `from X import Y`, `from . import Y`, `from .X import Y`, `from X import *`
606fn parse_import_from_statement(
607    node: &Node,
608    source: &[u8],
609    is_type_only: bool,
610) -> Option<RawImport> {
611    let line = node.start_position().row + 1;
612
613    // Build specifier from module_name field
614    let specifier = build_from_specifier(node, source);
615
616    // Collect names
617    let mut names = Vec::new();
618    let mut is_namespace = false;
619
620    let mut cursor = node.walk();
621    for child in node.children(&mut cursor) {
622        if !child.is_named() {
623            continue;
624        }
625        match child.kind() {
626            "wildcard_import" => {
627                is_namespace = true;
628                names.push(ImportName {
629                    name: "*".to_string(),
630                    alias: None,
631                    is_type: false,
632                });
633            }
634            "dotted_name" => {
635                // This is a bare import name (not the module part)
636                // Skip if it's the module_name field — only process non-field children
637                // In tree-sitter-python, "from foo import bar" has module_name="foo" (field)
638                // and children include the import names as non-field "dotted_name" nodes
639                // We need to skip the module_name field child
640                let is_module_name = node
641                    .child_by_field_name("module_name")
642                    .map(|mn| mn.id() == child.id())
643                    .unwrap_or(false);
644                if !is_module_name {
645                    let name = child.utf8_text(source).unwrap_or("").to_string();
646                    names.push(ImportName {
647                        name,
648                        alias: None,
649                        is_type: false,
650                    });
651                }
652            }
653            "aliased_import" => {
654                let name = child
655                    .child_by_field_name("name")
656                    .and_then(|n| n.utf8_text(source).ok())
657                    .unwrap_or("")
658                    .to_string();
659                let alias = child
660                    .child_by_field_name("alias")
661                    .and_then(|a| a.utf8_text(source).ok())
662                    .map(|s| s.to_string());
663                names.push(ImportName {
664                    name,
665                    alias,
666                    is_type: false,
667                });
668            }
669            "identifier" => {
670                // Plain identifier as import name
671                // Skip if it matches the module_name field
672                let is_module_name = node
673                    .child_by_field_name("module_name")
674                    .map(|mn| mn.id() == child.id())
675                    .unwrap_or(false);
676                if !is_module_name {
677                    let name = child.utf8_text(source).unwrap_or("").to_string();
678                    names.push(ImportName {
679                        name,
680                        alias: None,
681                        is_type: false,
682                    });
683                }
684            }
685            _ => {}
686        }
687    }
688
689    Some(RawImport {
690        specifier,
691        names,
692        is_type_only,
693        is_side_effect: false,
694        is_namespace,
695        line,
696    })
697}
698
699/// Build the specifier string from a `from X import` statement.
700/// Handles:
701/// - `from os.path import join` → "os.path"
702/// - `from . import models` → "."
703/// - `from .models import User` → ".models"
704/// - `from .. import utils` → ".."
705/// - `from ..utils import helper` → "..utils"
706fn build_from_specifier(node: &Node, source: &[u8]) -> String {
707    let module_name = node.child_by_field_name("module_name");
708
709    match module_name {
710        None => {
711            // `from . import X` — no module_name field; look for relative_import or import_prefix
712            let mut cursor = node.walk();
713            for child in node.children(&mut cursor) {
714                if child.is_named() && child.kind() == "relative_import" {
715                    return relative_import_specifier(&child, source);
716                }
717            }
718            ".".to_string()
719        }
720        Some(mn) => match mn.kind() {
721            "relative_import" => relative_import_specifier(&mn, source),
722            "dotted_name" => mn.utf8_text(source).unwrap_or("").to_string(),
723            _ => mn.utf8_text(source).unwrap_or("").to_string(),
724        },
725    }
726}
727
728/// Build specifier from a `relative_import` node.
729/// relative_import = import_prefix + dotted_name?
730fn relative_import_specifier(node: &Node, source: &[u8]) -> String {
731    let mut dots = String::new();
732    let mut module_part = String::new();
733
734    let mut cursor = node.walk();
735    for child in node.children(&mut cursor) {
736        match child.kind() {
737            "import_prefix" => {
738                dots = child.utf8_text(source).unwrap_or(".").to_string();
739            }
740            "dotted_name" => {
741                module_part = child.utf8_text(source).unwrap_or("").to_string();
742            }
743            _ => {}
744        }
745    }
746
747    if module_part.is_empty() {
748        dots
749    } else {
750        format!("{dots}{module_part}")
751    }
752}
753
754// ---------------------------------------------------------------------------
755// Utility helpers
756// ---------------------------------------------------------------------------
757
758/// Get text of a named field on a node.
759fn node_text_field(node: Node, field: &str, source: &[u8]) -> Option<String> {
760    node.child_by_field_name(field)
761        .and_then(|n| n.utf8_text(source).ok())
762        .map(|s| s.to_string())
763}
764
765/// Build a Location from a tree-sitter node.
766fn node_location(file_path: &str, node: Node) -> Location {
767    let start = node.start_position();
768    let end = node.end_position();
769    Location {
770        file: file_path.into(),
771        line_start: start.row + 1,
772        line_end: end.row + 1,
773        col_start: start.column,
774        col_end: end.column,
775    }
776}
777
778/// Check if a function_definition node is preceded by `async` keyword.
779fn has_async_keyword(node: Node, source: &[u8]) -> bool {
780    let mut cursor = node.walk();
781    for child in node.children(&mut cursor) {
782        if child.kind() == "async" {
783            return true;
784        }
785        // Also handle as text
786        if !child.is_named() {
787            if let Ok(text) = child.utf8_text(source) {
788                if text == "async" {
789                    return true;
790                }
791            }
792        }
793    }
794    false
795}
796
797/// Python visibility rules:
798/// - `_prefix` → Private
799/// - `__prefix` (without trailing `__`) → Private
800/// - `__dunder__` → Public (dunder methods)
801/// - no prefix → Public
802fn python_visibility(name: &str) -> Visibility {
803    if name.starts_with("__") && name.ends_with("__") && name.len() > 4 {
804        // Dunder method: __init__, __str__, etc.
805        Visibility::Public
806    } else if name.starts_with("__") {
807        // Name mangled: __private
808        Visibility::Private
809    } else if name.starts_with('_') {
810        // Convention private: _helper
811        Visibility::Private
812    } else {
813        Visibility::Public
814    }
815}
816
817/// Build function signature from parameters.
818fn build_py_signature(source: &[u8], node: Node) -> Option<String> {
819    node.child_by_field_name("parameters")
820        .and_then(|n| n.utf8_text(source).ok())
821        .map(|s| s.to_string())
822}
823
824/// Create a Contains edge.
825fn contains_edge(file_path: &str, qualified_name: &str) -> Edge {
826    Edge {
827        kind: EdgeKind::Contains,
828        source: file_path.to_string(),
829        target: qualified_name.to_string(),
830        metadata: None,
831    }
832}
833
834/// Check if a name looks like a test function.
835fn is_test_name(name: &str) -> bool {
836    name.starts_with("test_") || name.starts_with("Test") || name == "test"
837}
838
839// ---------------------------------------------------------------------------
840// Tests
841// ---------------------------------------------------------------------------
842
843#[cfg(test)]
844mod tests {
845    use super::*;
846    use domain::model::{EdgeKind, SymbolKind, Visibility};
847
848    fn parse_python(source: &str) -> ParseResult {
849        let parser = PythonParser::new();
850        parser
851            .parse(source.as_bytes(), Path::new("test.py"))
852            .expect("parse failed")
853    }
854
855    // -----------------------------------------------------------------------
856    // AC15: def foo(): → Function symbol
857    // -----------------------------------------------------------------------
858
859    #[test]
860    fn ac15_function_definition() {
861        let result = parse_python("def foo():\n    pass\n");
862        let sym = result.symbols.iter().find(|s| s.name == "foo").unwrap();
863        assert_eq!(sym.kind, SymbolKind::Function);
864        assert_eq!(sym.qualified_name, "test.py::foo");
865        assert!(!sym.is_async);
866        assert_eq!(sym.visibility, Visibility::Public);
867        assert!(sym.is_exported);
868    }
869
870    #[test]
871    fn function_contains_edge() {
872        let result = parse_python("def foo():\n    pass\n");
873        let edge = result
874            .edges
875            .iter()
876            .find(|e| e.kind == EdgeKind::Contains && e.target == "test.py::foo")
877            .unwrap();
878        assert_eq!(edge.source, "test.py");
879    }
880
881    #[test]
882    fn function_location_populated() {
883        let result = parse_python("def foo():\n    pass\n");
884        let sym = result.symbols.iter().find(|s| s.name == "foo").unwrap();
885        assert_eq!(sym.location.file.to_string_lossy(), "test.py");
886        assert_eq!(sym.location.line_start, 1);
887    }
888
889    // -----------------------------------------------------------------------
890    // AC16: class Bar: with methods → Class + Method + ChildOf
891    // -----------------------------------------------------------------------
892
893    #[test]
894    fn ac16_class_with_method() {
895        let source = "class Bar:\n    def greet(self):\n        pass\n";
896        let result = parse_python(source);
897
898        let class_sym = result.symbols.iter().find(|s| s.name == "Bar").unwrap();
899        assert_eq!(class_sym.kind, SymbolKind::Class);
900        assert_eq!(class_sym.qualified_name, "test.py::Bar");
901
902        let method_sym = result.symbols.iter().find(|s| s.name == "greet").unwrap();
903        assert_eq!(method_sym.kind, SymbolKind::Method);
904        assert_eq!(method_sym.qualified_name, "test.py::Bar.greet");
905
906        let child_of = result
907            .edges
908            .iter()
909            .find(|e| e.kind == EdgeKind::ChildOf)
910            .unwrap();
911        assert_eq!(child_of.source, "test.py::Bar.greet");
912        assert_eq!(child_of.target, "test.py::Bar");
913    }
914
915    #[test]
916    fn class_contains_edge() {
917        let source = "class Foo:\n    pass\n";
918        let result = parse_python(source);
919        let edge = result
920            .edges
921            .iter()
922            .find(|e| e.kind == EdgeKind::Contains && e.target == "test.py::Foo")
923            .unwrap();
924        assert_eq!(edge.source, "test.py");
925    }
926
927    #[test]
928    fn class_multiple_methods() {
929        let source = "class Calc:\n    def add(self, a, b):\n        return a + b\n    def sub(self, a, b):\n        return a - b\n";
930        let result = parse_python(source);
931
932        assert!(result
933            .symbols
934            .iter()
935            .any(|s| s.name == "add" && s.kind == SymbolKind::Method));
936        assert!(result
937            .symbols
938            .iter()
939            .any(|s| s.name == "sub" && s.kind == SymbolKind::Method));
940
941        let child_of_count = result
942            .edges
943            .iter()
944            .filter(|e| e.kind == EdgeKind::ChildOf)
945            .count();
946        assert_eq!(child_of_count, 2);
947    }
948
949    // -----------------------------------------------------------------------
950    // AC17: from .models import User → RawImport with specifier=".models"
951    // -----------------------------------------------------------------------
952
953    #[test]
954    fn ac17_relative_import_with_module() {
955        let result = parse_python("from .models import User\n");
956        assert_eq!(result.imports.len(), 1);
957        let imp = &result.imports[0];
958        assert_eq!(imp.specifier, ".models");
959        assert_eq!(imp.names.len(), 1);
960        assert_eq!(imp.names[0].name, "User");
961        assert!(!imp.is_type_only);
962        assert!(!imp.is_namespace);
963    }
964
965    #[test]
966    fn relative_import_double_dot() {
967        let result = parse_python("from ..utils import helper\n");
968        assert_eq!(result.imports.len(), 1);
969        let imp = &result.imports[0];
970        assert_eq!(imp.specifier, "..utils");
971        assert_eq!(imp.names[0].name, "helper");
972    }
973
974    #[test]
975    fn relative_import_dot_only() {
976        let result = parse_python("from . import models\n");
977        assert_eq!(result.imports.len(), 1);
978        let imp = &result.imports[0];
979        assert_eq!(imp.specifier, ".");
980        assert_eq!(imp.names[0].name, "models");
981    }
982
983    #[test]
984    fn relative_import_double_dot_only() {
985        let result = parse_python("from .. import utils\n");
986        assert_eq!(result.imports.len(), 1);
987        let imp = &result.imports[0];
988        assert_eq!(imp.specifier, "..");
989        assert_eq!(imp.names[0].name, "utils");
990    }
991
992    // -----------------------------------------------------------------------
993    // AC18: import os.path → RawImport with specifier="os.path"
994    // -----------------------------------------------------------------------
995
996    #[test]
997    fn ac18_import_dotted_module() {
998        let result = parse_python("import os.path\n");
999        assert_eq!(result.imports.len(), 1);
1000        let imp = &result.imports[0];
1001        assert_eq!(imp.specifier, "os.path");
1002        assert_eq!(imp.names[0].name, "os.path");
1003    }
1004
1005    #[test]
1006    fn import_simple_module() {
1007        let result = parse_python("import os\n");
1008        assert_eq!(result.imports.len(), 1);
1009        let imp = &result.imports[0];
1010        assert_eq!(imp.specifier, "os");
1011        assert_eq!(imp.names[0].name, "os");
1012    }
1013
1014    #[test]
1015    fn import_with_alias() {
1016        let result = parse_python("import numpy as np\n");
1017        assert_eq!(result.imports.len(), 1);
1018        let imp = &result.imports[0];
1019        assert_eq!(imp.specifier, "numpy");
1020        assert_eq!(imp.names[0].alias, Some("np".to_string()));
1021    }
1022
1023    #[test]
1024    fn from_import_multiple_names() {
1025        let result = parse_python("from os.path import join, exists\n");
1026        assert_eq!(result.imports.len(), 1);
1027        let imp = &result.imports[0];
1028        assert_eq!(imp.specifier, "os.path");
1029        assert_eq!(imp.names.len(), 2);
1030        assert!(imp.names.iter().any(|n| n.name == "join"));
1031        assert!(imp.names.iter().any(|n| n.name == "exists"));
1032    }
1033
1034    #[test]
1035    fn from_import_wildcard() {
1036        let result = parse_python("from os import *\n");
1037        assert_eq!(result.imports.len(), 1);
1038        let imp = &result.imports[0];
1039        assert_eq!(imp.specifier, "os");
1040        assert!(imp.is_namespace);
1041    }
1042
1043    #[test]
1044    fn from_import_with_alias() {
1045        let result = parse_python("from os.path import join as path_join\n");
1046        let imp = &result.imports[0];
1047        assert_eq!(imp.specifier, "os.path");
1048        assert_eq!(imp.names[0].name, "join");
1049        assert_eq!(imp.names[0].alias, Some("path_join".to_string()));
1050    }
1051
1052    // -----------------------------------------------------------------------
1053    // AC19: async def foo(): → Function with is_async=true
1054    // -----------------------------------------------------------------------
1055
1056    #[test]
1057    fn ac19_async_function() {
1058        let result = parse_python("async def fetch():\n    pass\n");
1059        let sym = result.symbols.iter().find(|s| s.name == "fetch").unwrap();
1060        assert_eq!(sym.kind, SymbolKind::Function);
1061        assert!(sym.is_async);
1062    }
1063
1064    #[test]
1065    fn async_method() {
1066        let source = "class Client:\n    async def get(self):\n        pass\n";
1067        let result = parse_python(source);
1068        let sym = result.symbols.iter().find(|s| s.name == "get").unwrap();
1069        assert!(sym.is_async);
1070        assert_eq!(sym.kind, SymbolKind::Method);
1071    }
1072
1073    // -----------------------------------------------------------------------
1074    // AC20: decorated functions → decorators field populated
1075    // -----------------------------------------------------------------------
1076
1077    #[test]
1078    fn ac20_decorated_function() {
1079        let source = "@my_decorator\ndef foo():\n    pass\n";
1080        let result = parse_python(source);
1081        let sym = result.symbols.iter().find(|s| s.name == "foo").unwrap();
1082        assert_eq!(sym.kind, SymbolKind::Function);
1083        assert!(!sym.decorators.is_empty(), "decorators should be populated");
1084        assert!(sym.decorators.iter().any(|d| d.contains("my_decorator")));
1085    }
1086
1087    #[test]
1088    fn decorated_class() {
1089        let source = "@dataclass\nclass Point:\n    pass\n";
1090        let result = parse_python(source);
1091        let sym = result.symbols.iter().find(|s| s.name == "Point").unwrap();
1092        assert_eq!(sym.kind, SymbolKind::Class);
1093        assert!(!sym.decorators.is_empty());
1094        assert!(sym.decorators.iter().any(|d| d.contains("dataclass")));
1095    }
1096
1097    #[test]
1098    fn property_decorator_produces_property_kind() {
1099        let source = "class Foo:\n    @property\n    def name(self):\n        return self._name\n";
1100        let result = parse_python(source);
1101        let sym = result.symbols.iter().find(|s| s.name == "name").unwrap();
1102        assert_eq!(sym.kind, SymbolKind::Property);
1103    }
1104
1105    #[test]
1106    fn multiple_decorators() {
1107        let source = "@decorator_one\n@decorator_two\ndef bar():\n    pass\n";
1108        let result = parse_python(source);
1109        let sym = result.symbols.iter().find(|s| s.name == "bar").unwrap();
1110        assert_eq!(sym.decorators.len(), 2);
1111    }
1112
1113    // -----------------------------------------------------------------------
1114    // AC21: if TYPE_CHECKING: imports → is_type_only=true
1115    // -----------------------------------------------------------------------
1116
1117    #[test]
1118    fn ac21_type_checking_guard() {
1119        let source =
1120            "from typing import TYPE_CHECKING\nif TYPE_CHECKING:\n    from .models import User\n";
1121        let result = parse_python(source);
1122
1123        let type_guarded = result
1124            .imports
1125            .iter()
1126            .find(|i| i.specifier == ".models")
1127            .unwrap();
1128        assert!(type_guarded.is_type_only);
1129    }
1130
1131    #[test]
1132    fn type_checking_attribute_form() {
1133        let source = "import typing\nif typing.TYPE_CHECKING:\n    from .types import MyType\n";
1134        let result = parse_python(source);
1135
1136        let type_guarded = result
1137            .imports
1138            .iter()
1139            .find(|i| i.specifier == ".types")
1140            .unwrap();
1141        assert!(type_guarded.is_type_only);
1142    }
1143
1144    #[test]
1145    fn regular_imports_not_type_only() {
1146        let result = parse_python("from os.path import join\n");
1147        assert!(!result.imports[0].is_type_only);
1148    }
1149
1150    // -----------------------------------------------------------------------
1151    // AC49: Invalid/empty source → no panic
1152    // -----------------------------------------------------------------------
1153
1154    #[test]
1155    fn ac49_empty_source_no_panic() {
1156        let result = parse_python("");
1157        assert!(result.symbols.is_empty());
1158        assert!(result.imports.is_empty());
1159        assert!(result.edges.is_empty());
1160    }
1161
1162    #[test]
1163    fn ac49_invalid_source_no_panic() {
1164        // Should not panic even with malformed code
1165        let result = parse_python("def (\nclass {{{");
1166        // At minimum it should not panic — partial extraction may occur
1167        let _ = result;
1168    }
1169
1170    // -----------------------------------------------------------------------
1171    // AC50: Source with errors → partial extraction
1172    // -----------------------------------------------------------------------
1173
1174    #[test]
1175    fn ac50_partial_extraction_on_error() {
1176        let source = "def valid_function():\n    pass\n\ndef (\n\ndef another_valid():\n    pass\n";
1177        let result = parse_python(source);
1178        // Should extract at least valid_function
1179        assert!(
1180            result.symbols.iter().any(|s| s.name == "valid_function"),
1181            "should extract valid_function even with parse errors"
1182        );
1183    }
1184
1185    // -----------------------------------------------------------------------
1186    // Visibility rules
1187    // -----------------------------------------------------------------------
1188
1189    #[test]
1190    fn visibility_public_function() {
1191        let result = parse_python("def public_func():\n    pass\n");
1192        let sym = result
1193            .symbols
1194            .iter()
1195            .find(|s| s.name == "public_func")
1196            .unwrap();
1197        assert_eq!(sym.visibility, Visibility::Public);
1198        assert!(sym.is_exported);
1199    }
1200
1201    #[test]
1202    fn visibility_private_single_underscore() {
1203        let result = parse_python("def _private():\n    pass\n");
1204        let sym = result
1205            .symbols
1206            .iter()
1207            .find(|s| s.name == "_private")
1208            .unwrap();
1209        assert_eq!(sym.visibility, Visibility::Private);
1210        assert!(!sym.is_exported);
1211    }
1212
1213    #[test]
1214    fn visibility_private_double_underscore() {
1215        let result = parse_python("def __mangled():\n    pass\n");
1216        let sym = result
1217            .symbols
1218            .iter()
1219            .find(|s| s.name == "__mangled")
1220            .unwrap();
1221        assert_eq!(sym.visibility, Visibility::Private);
1222    }
1223
1224    #[test]
1225    fn visibility_dunder_is_public() {
1226        let source = "class Foo:\n    def __init__(self):\n        pass\n";
1227        let result = parse_python(source);
1228        let sym = result
1229            .symbols
1230            .iter()
1231            .find(|s| s.name == "__init__")
1232            .unwrap();
1233        assert_eq!(sym.visibility, Visibility::Public);
1234    }
1235
1236    // -----------------------------------------------------------------------
1237    // Extends edges
1238    // -----------------------------------------------------------------------
1239
1240    #[test]
1241    fn class_extends_single_base() {
1242        let source = "class Animal:\n    pass\n\nclass Dog(Animal):\n    pass\n";
1243        let result = parse_python(source);
1244
1245        let extends = result
1246            .edges
1247            .iter()
1248            .find(|e| e.kind == EdgeKind::Extends)
1249            .unwrap();
1250        assert_eq!(extends.source, "test.py::Dog");
1251        assert_eq!(extends.target, "test.py::Animal");
1252    }
1253
1254    #[test]
1255    fn class_no_base_no_extends_edge() {
1256        let source = "class Simple:\n    pass\n";
1257        let result = parse_python(source);
1258        assert!(!result.edges.iter().any(|e| e.kind == EdgeKind::Extends));
1259    }
1260
1261    // -----------------------------------------------------------------------
1262    // Top-level variable assignment
1263    // -----------------------------------------------------------------------
1264
1265    #[test]
1266    fn top_level_variable_assignment() {
1267        let result = parse_python("x = 42\n");
1268        assert!(result
1269            .symbols
1270            .iter()
1271            .any(|s| s.name == "x" && s.kind == SymbolKind::Variable));
1272    }
1273
1274    // -----------------------------------------------------------------------
1275    // Import line numbers
1276    // -----------------------------------------------------------------------
1277
1278    #[test]
1279    fn import_line_number() {
1280        let result = parse_python("import os\n");
1281        assert_eq!(result.imports[0].line, 1);
1282    }
1283
1284    #[test]
1285    fn import_line_number_second_line() {
1286        let result = parse_python("\nimport sys\n");
1287        assert_eq!(result.imports[0].line, 2);
1288    }
1289
1290    // -----------------------------------------------------------------------
1291    // Parser metadata
1292    // -----------------------------------------------------------------------
1293
1294    #[test]
1295    fn language_returns_python() {
1296        let parser = PythonParser::new();
1297        assert_eq!(parser.language(), Language::Python);
1298    }
1299
1300    #[test]
1301    fn file_extensions_includes_py() {
1302        let parser = PythonParser::new();
1303        assert!(parser.file_extensions().contains(&"py"));
1304    }
1305
1306    // -----------------------------------------------------------------------
1307    // Integration test
1308    // -----------------------------------------------------------------------
1309
1310    #[test]
1311    fn integration_multi_construct_file() {
1312        let source = r#"
1313import os
1314import numpy as np
1315from os.path import join, exists
1316from .models import User
1317from ..utils import helper
1318
1319from typing import TYPE_CHECKING
1320if TYPE_CHECKING:
1321    from .types import MyType
1322
1323def standalone_func(x, y):
1324    return x + y
1325
1326async def async_handler():
1327    pass
1328
1329@staticmethod
1330def decorated_func():
1331    pass
1332
1333class BaseModel:
1334    pass
1335
1336class MyModel(BaseModel):
1337    def __init__(self):
1338        pass
1339
1340    @property
1341    def name(self):
1342        return self._name
1343
1344    async def save(self):
1345        pass
1346
1347    def _private_method(self):
1348        pass
1349
1350x = 42
1351"#;
1352        let result = parse_python(source);
1353
1354        // Symbols
1355        assert!(result
1356            .symbols
1357            .iter()
1358            .any(|s| s.name == "standalone_func" && s.kind == SymbolKind::Function));
1359        assert!(result
1360            .symbols
1361            .iter()
1362            .any(|s| s.name == "async_handler" && s.kind == SymbolKind::Function && s.is_async));
1363        assert!(result
1364            .symbols
1365            .iter()
1366            .any(|s| s.name == "decorated_func" && s.kind == SymbolKind::Function));
1367        assert!(result
1368            .symbols
1369            .iter()
1370            .any(|s| s.name == "BaseModel" && s.kind == SymbolKind::Class));
1371        assert!(result
1372            .symbols
1373            .iter()
1374            .any(|s| s.name == "MyModel" && s.kind == SymbolKind::Class));
1375        assert!(result
1376            .symbols
1377            .iter()
1378            .any(|s| s.name == "__init__" && s.kind == SymbolKind::Method));
1379        assert!(result
1380            .symbols
1381            .iter()
1382            .any(|s| s.name == "name" && s.kind == SymbolKind::Property));
1383        assert!(result
1384            .symbols
1385            .iter()
1386            .any(|s| s.name == "save" && s.kind == SymbolKind::Method && s.is_async));
1387        assert!(result
1388            .symbols
1389            .iter()
1390            .any(|s| s.name == "_private_method" && s.visibility == Visibility::Private));
1391        assert!(result
1392            .symbols
1393            .iter()
1394            .any(|s| s.name == "x" && s.kind == SymbolKind::Variable));
1395
1396        // Edges
1397        let contains_count = result
1398            .edges
1399            .iter()
1400            .filter(|e| e.kind == EdgeKind::Contains)
1401            .count();
1402        assert!(
1403            contains_count >= 5,
1404            "expected >= 5 Contains edges, got {contains_count}"
1405        );
1406
1407        let child_of_count = result
1408            .edges
1409            .iter()
1410            .filter(|e| e.kind == EdgeKind::ChildOf)
1411            .count();
1412        assert!(
1413            child_of_count >= 3,
1414            "expected >= 3 ChildOf edges, got {child_of_count}"
1415        );
1416
1417        let extends_count = result
1418            .edges
1419            .iter()
1420            .filter(|e| e.kind == EdgeKind::Extends)
1421            .count();
1422        assert_eq!(extends_count, 1, "expected 1 Extends edge");
1423
1424        // Imports
1425        assert!(result.imports.iter().any(|i| i.specifier == "os"));
1426        assert!(result
1427            .imports
1428            .iter()
1429            .any(|i| i.specifier == "numpy" && i.names[0].alias == Some("np".to_string())));
1430        assert!(result.imports.iter().any(|i| i.specifier == "os.path"));
1431        assert!(result.imports.iter().any(|i| i.specifier == ".models"));
1432        assert!(result.imports.iter().any(|i| i.specifier == "..utils"));
1433        assert!(result
1434            .imports
1435            .iter()
1436            .any(|i| i.specifier == ".types" && i.is_type_only));
1437    }
1438}