Skip to main content

ucm_ingest/
code_parser.rs

1//! Code parser — extracts entities and relationships from source code.
2//!
3//! Extracts functions, classes, structs, and import relationships using
4//! language-specific pattern matching. Produces `UcmEvent` streams that
5//! the graph projection applies to build the dependency graph.
6//!
7//! Supported languages: TypeScript, JavaScript, Rust, Python.
8//!
9//! **Edges produced:** Each file gets a `Module` entity. Functions/structs
10//! emit `DependsOn` edges to their module. Import statements emit `Imports`
11//! edges from the importing module to the imported symbol. This gives the
12//! BFS traversal a complete path: `callerFn → callerModule → importedSymbol`.
13//!
14//! Production upgrade path: replace `extract_functions_*` with tree-sitter
15//! grammars for sub-millisecond incremental re-parsing and error recovery.
16//! The event API surface stays identical — only the extraction backend changes.
17
18use std::collections::HashMap;
19use std::path::Path;
20use ucm_graph_core::edge::*;
21use ucm_graph_core::entity::*;
22use ucm_graph_core::event::*;
23
24/// Maps Rust crate names (underscored, e.g. `ucm_graph_core`) to their `src/`
25/// directory paths relative to the scan root (e.g. `ucm-core/src`).
26/// Built by the CLI scanner from workspace `Cargo.toml` files.
27pub type RustCrateMap = HashMap<String, String>;
28
29/// The top-level Python package name for absolute import resolution.
30/// e.g. for marimo, this is "marimo" — so `from marimo._runtime.dataflow import X`
31/// resolves to `marimo/_runtime/dataflow.py`.
32pub type PythonPackageRoot = Option<String>;
33
34/// Parse source code and emit entity + dependency events.
35///
36/// # Arguments
37/// - `file_path` — path relative to project root (used as entity ID component)
38/// - `source`    — raw source text
39/// - `language`  — "typescript", "javascript", "rust", or "python"
40///
41/// # Returns
42/// Stream of `UcmEvent`s ready for `GraphProjection::apply_event`.
43pub fn parse_source_code(file_path: &str, source: &str, language: &str) -> Vec<UcmEvent> {
44    parse_source_code_with_context(file_path, source, language, &HashMap::new())
45}
46
47/// Parse source code with project context for cross-file edge resolution.
48///
49/// `crate_map` maps Rust crate names (underscored) to their src/ directory
50/// paths relative to the scan root. For non-Rust languages this is ignored.
51pub fn parse_source_code_with_context(
52    file_path: &str,
53    source: &str,
54    language: &str,
55    crate_map: &RustCrateMap,
56) -> Vec<UcmEvent> {
57    parse_source_code_full(file_path, source, language, crate_map, &None)
58}
59
60/// Parse source code with full context including Python package root.
61pub fn parse_source_code_full(
62    file_path: &str,
63    source: &str,
64    language: &str,
65    crate_map: &RustCrateMap,
66    python_package_root: &PythonPackageRoot,
67) -> Vec<UcmEvent> {
68    let mut events = Vec::new();
69
70    // 1. Emit a module entity for this file so import edges have a valid source.
71    let module_id = EntityId::local(file_path, "module");
72    events.push(UcmEvent::new(EventPayload::EntityDiscovered {
73        entity_id: module_id.clone(),
74        kind: EntityKind::Module {
75            language: language.to_string(),
76            exports: vec![],
77        },
78        name: file_name_of(file_path),
79        file_path: file_path.to_string(),
80        language: language.to_string(),
81        source: DiscoverySource::StaticAnalysis,
82        line_range: None,
83    }));
84
85    // 2. Extract function/struct entities and wire them to the module.
86    //    For Python, also extract class-method associations.
87    if matches!(language, "python" | "py") {
88        let py_entities = extract_python_entities(source);
89        for ent in &py_entities {
90            match ent {
91                PythonEntity::Function {
92                    name,
93                    is_async,
94                    line_start,
95                    line_end,
96                    class_name,
97                } => {
98                    let display_name = if let Some(cls) = class_name {
99                        format!("{cls}.{name}")
100                    } else {
101                        name.clone()
102                    };
103                    let fn_id = EntityId::local(file_path, &display_name);
104                    events.push(UcmEvent::new(EventPayload::EntityDiscovered {
105                        entity_id: fn_id.clone(),
106                        kind: EntityKind::Function {
107                            is_async: *is_async,
108                            parameter_count: 0,
109                            return_type: None,
110                        },
111                        name: display_name.clone(),
112                        file_path: file_path.to_string(),
113                        language: language.to_string(),
114                        source: DiscoverySource::StaticAnalysis,
115                        line_range: Some((*line_start, *line_end)),
116                    }));
117                    events.push(UcmEvent::new(EventPayload::DependencyLinked {
118                        source_entity: fn_id.clone(),
119                        target_entity: module_id.clone(),
120                        relation_type: RelationType::DependsOn,
121                        confidence: 0.99,
122                        source: DiscoverySource::StaticAnalysis,
123                        description: format!("{display_name} is defined in {file_path}"),
124                    }));
125                    // If method belongs to a class, emit Contains edge
126                    if let Some(cls) = class_name {
127                        let class_id = EntityId::local(file_path, cls);
128                        events.push(UcmEvent::new(EventPayload::DependencyLinked {
129                            source_entity: class_id,
130                            target_entity: fn_id,
131                            relation_type: RelationType::Contains,
132                            confidence: 0.99,
133                            source: DiscoverySource::StaticAnalysis,
134                            description: format!("{cls} contains method {name}"),
135                        }));
136                    }
137                }
138                PythonEntity::Class {
139                    name,
140                    line_num,
141                    bases,
142                } => {
143                    let class_id = EntityId::local(file_path, name);
144                    events.push(UcmEvent::new(EventPayload::EntityDiscovered {
145                        entity_id: class_id.clone(),
146                        kind: EntityKind::DataModel { fields: vec![] },
147                        name: name.clone(),
148                        file_path: file_path.to_string(),
149                        language: language.to_string(),
150                        source: DiscoverySource::StaticAnalysis,
151                        line_range: Some((*line_num, line_num + 5)),
152                    }));
153                    events.push(UcmEvent::new(EventPayload::DependencyLinked {
154                        source_entity: class_id.clone(),
155                        target_entity: module_id.clone(),
156                        relation_type: RelationType::DependsOn,
157                        confidence: 0.99,
158                        source: DiscoverySource::StaticAnalysis,
159                        description: format!("{name} is defined in {file_path}"),
160                    }));
161                    // Emit Extends edges for each base class
162                    for base in bases {
163                        let base_id = EntityId::local(file_path, base);
164                        events.push(UcmEvent::new(EventPayload::DependencyLinked {
165                            source_entity: class_id.clone(),
166                            target_entity: base_id,
167                            relation_type: RelationType::Extends,
168                            confidence: 0.90,
169                            source: DiscoverySource::StaticAnalysis,
170                            description: format!("{name} extends {base}"),
171                        }));
172                    }
173                }
174            }
175        }
176    } else {
177        let functions = match language {
178            "rust" | "rs" => extract_functions_rust(source),
179            _ => extract_functions_ts(source),
180        };
181
182        for (name, is_async, line_start, line_end) in functions {
183            let fn_id = EntityId::local(file_path, &name);
184            events.push(UcmEvent::new(EventPayload::EntityDiscovered {
185                entity_id: fn_id.clone(),
186                kind: EntityKind::Function {
187                    is_async,
188                    parameter_count: 0,
189                    return_type: None,
190                },
191                name: name.clone(),
192                file_path: file_path.to_string(),
193                language: language.to_string(),
194                source: DiscoverySource::StaticAnalysis,
195                line_range: Some((line_start, line_end)),
196            }));
197            // fn → module: "this function lives in this module"
198            events.push(UcmEvent::new(EventPayload::DependencyLinked {
199                source_entity: fn_id,
200                target_entity: module_id.clone(),
201                relation_type: RelationType::DependsOn,
202                confidence: 0.99,
203                source: DiscoverySource::StaticAnalysis,
204                description: format!("{name} is defined in {file_path}"),
205            }));
206        }
207
208        // 3. Extract class / struct entities.
209        let structs = match language {
210            "rust" | "rs" => extract_structs_rust(source),
211            _ => extract_classes_ts(source),
212        };
213
214        for (name, line_num) in structs {
215            let struct_id = EntityId::local(file_path, &name);
216            events.push(UcmEvent::new(EventPayload::EntityDiscovered {
217                entity_id: struct_id.clone(),
218                kind: EntityKind::DataModel { fields: vec![] },
219                name: name.clone(),
220                file_path: file_path.to_string(),
221                language: language.to_string(),
222                source: DiscoverySource::StaticAnalysis,
223                line_range: Some((line_num, line_num + 5)),
224            }));
225            events.push(UcmEvent::new(EventPayload::DependencyLinked {
226                source_entity: struct_id,
227                target_entity: module_id.clone(),
228                relation_type: RelationType::DependsOn,
229                confidence: 0.99,
230                source: DiscoverySource::StaticAnalysis,
231                description: format!("{name} is defined in {file_path}"),
232            }));
233        }
234    }
235
236    // 4. Extract API routes (TypeScript/JS only).
237    if matches!(language, "typescript" | "javascript" | "ts" | "js") {
238        for (method, route, _handler, line_num) in extract_routes_ts(source) {
239            let route_id = EntityId::local(file_path, &format!("{method}:{route}"));
240            events.push(UcmEvent::new(EventPayload::EntityDiscovered {
241                entity_id: route_id.clone(),
242                kind: EntityKind::ApiEndpoint {
243                    method: method.clone(),
244                    route: route.clone(),
245                    handler: String::new(),
246                },
247                name: format!("{method} {route}"),
248                file_path: file_path.to_string(),
249                language: language.to_string(),
250                source: DiscoverySource::StaticAnalysis,
251                line_range: Some((line_num, line_num)),
252            }));
253            events.push(UcmEvent::new(EventPayload::DependencyLinked {
254                source_entity: route_id,
255                target_entity: module_id.clone(),
256                relation_type: RelationType::DependsOn,
257                confidence: 0.99,
258                source: DiscoverySource::StaticAnalysis,
259                description: format!("{method} {route} is defined in {file_path}"),
260            }));
261        }
262    }
263
264    // 5. Extract imports → module:file imports symbol.
265    //    module_id → imported symbol entity.
266    //    When the imported symbol changes, BFS propagates to this module,
267    //    then to all functions/structs that DependsOn this module.
268    let imports = match language {
269        "rust" | "rs" => extract_imports_rust(source, file_path, crate_map),
270        "python" | "py" => extract_imports_python(source, file_path, python_package_root),
271        _ => extract_imports_ts(source, file_path),
272    };
273
274    for (symbols, from_path, line_num) in imports {
275        for symbol in &symbols {
276            events.push(UcmEvent::new(EventPayload::DependencyLinked {
277                source_entity: module_id.clone(),
278                target_entity: EntityId::local(&from_path, symbol),
279                relation_type: RelationType::Imports,
280                confidence: 0.95,
281                source: DiscoverySource::StaticAnalysis,
282                description: format!("import {symbol} from '{from_path}' at line {line_num}"),
283            }));
284        }
285    }
286
287    events
288}
289
290// ── TypeScript / JavaScript ───────────────────────────────────────────────────
291
292fn extract_functions_ts(source: &str) -> Vec<(String, bool, usize, usize)> {
293    let mut out = Vec::new();
294    for (i, line) in source.lines().enumerate() {
295        let t = line.trim();
296        let is_async = t.contains("async");
297        if let Some(name) = ts_function_name(t) {
298            out.push((name, is_async, i + 1, i + 20));
299        }
300    }
301    out
302}
303
304fn ts_function_name(line: &str) -> Option<String> {
305    for pat in &["function ", "async function "] {
306        if let Some(pos) = line.find(pat) {
307            let after = &line[pos + pat.len()..];
308            let name: String = after
309                .chars()
310                .take_while(|c| c.is_alphanumeric() || *c == '_')
311                .collect();
312            if !name.is_empty() {
313                return Some(name);
314            }
315        }
316    }
317    // const name = ( or const name = async (
318    if line.starts_with("const ") || line.starts_with("export const ") {
319        let rest = line
320            .strip_prefix("export const ")
321            .unwrap_or_else(|| line.strip_prefix("const ").unwrap_or(line));
322        if let Some(eq) = rest.find('=') {
323            let name: String = rest[..eq]
324                .trim()
325                .chars()
326                .take_while(|c| c.is_alphanumeric() || *c == '_')
327                .collect();
328            let after_eq = &rest[eq..];
329            if !name.is_empty() && (after_eq.contains('(') || after_eq.contains("=>")) {
330                return Some(name);
331            }
332        }
333    }
334    None
335}
336
337fn extract_classes_ts(source: &str) -> Vec<(String, usize)> {
338    let mut out = Vec::new();
339    for (i, line) in source.lines().enumerate() {
340        let t = line.trim();
341        if t.contains("class ") && t.contains('{') {
342            if let Some(after) = t.split("class ").nth(1) {
343                let name: String = after
344                    .chars()
345                    .take_while(|c| c.is_alphanumeric() || *c == '_')
346                    .collect();
347                if !name.is_empty() {
348                    out.push((name, i + 1));
349                }
350            }
351        }
352    }
353    out
354}
355
356fn extract_routes_ts(source: &str) -> Vec<(String, String, String, usize)> {
357    let mut out = Vec::new();
358    for (i, line) in source.lines().enumerate() {
359        let t = line.trim();
360        for method in &["get", "post", "put", "delete", "patch"] {
361            for prefix in &[format!("app.{method}("), format!("router.{method}(")] {
362                if t.contains(prefix.as_str()) {
363                    if let Some(route) = ts_route_path(t) {
364                        out.push((method.to_uppercase(), route, String::new(), i + 1));
365                    }
366                }
367            }
368        }
369    }
370    out
371}
372
373fn ts_route_path(line: &str) -> Option<String> {
374    let after = line.split('(').nth(1)?;
375    let q = if after.contains('\'') { '\'' } else { '"' };
376    let parts: Vec<&str> = after.split(q).collect();
377    if parts.len() >= 2 {
378        Some(parts[1].to_string())
379    } else {
380        None
381    }
382}
383
384/// Returns `(symbols, resolved_path, line_number)` for TypeScript imports.
385fn extract_imports_ts(source: &str, current_file: &str) -> Vec<(Vec<String>, String, usize)> {
386    let mut out = Vec::new();
387    let dir = parent_dir(current_file);
388    for (i, line) in source.lines().enumerate() {
389        let t = line.trim();
390        if t.starts_with("import ") && t.contains("from ") {
391            let symbols = ts_import_symbols(t);
392            if let Some(raw_path) = ts_import_path(t) {
393                // Only follow relative imports — node_modules won't be in the graph.
394                if raw_path.starts_with("./") || raw_path.starts_with("../") {
395                    let resolved = resolve_path(&dir, &raw_path, &["ts", "tsx", "js"]);
396                    if !symbols.is_empty() {
397                        out.push((symbols, resolved, i + 1));
398                    }
399                }
400            }
401        }
402    }
403    out
404}
405
406fn ts_import_symbols(line: &str) -> Vec<String> {
407    if let (Some(s), Some(e)) = (line.find('{'), line.find('}')) {
408        return line[s + 1..e]
409            .split(',')
410            .map(|s| {
411                s.trim()
412                    .split(" as ")
413                    .next()
414                    .unwrap_or("")
415                    .trim()
416                    .to_string()
417            })
418            .filter(|s| !s.is_empty())
419            .collect();
420    }
421    // default import: import Foo from ...
422    let after = line.strip_prefix("import ").unwrap_or("");
423    let name: String = after
424        .chars()
425        .take_while(|c| c.is_alphanumeric() || *c == '_')
426        .collect();
427    if !name.is_empty() && name != "type" {
428        vec![name]
429    } else {
430        vec![]
431    }
432}
433
434fn ts_import_path(line: &str) -> Option<String> {
435    let after = line.split("from ").nth(1)?;
436    let q = if after.contains('\'') { '\'' } else { '"' };
437    let parts: Vec<&str> = after.split(q).collect();
438    if parts.len() >= 2 {
439        Some(parts[1].to_string())
440    } else {
441        None
442    }
443}
444
445// ── Rust ─────────────────────────────────────────────────────────────────────
446
447fn extract_functions_rust(source: &str) -> Vec<(String, bool, usize, usize)> {
448    let mut out = Vec::new();
449    for (i, line) in source.lines().enumerate() {
450        let t = line.trim();
451        // Skip test functions and doc comments
452        if t.starts_with("//") || t.starts_with("#[test") {
453            continue;
454        }
455        if let Some(name) = rust_fn_name(t) {
456            let is_async = t.contains("async ");
457            out.push((name, is_async, i + 1, i + 30));
458        }
459    }
460    out
461}
462
463fn rust_fn_name(line: &str) -> Option<String> {
464    // Strip visibility and qualifiers
465    let stripped = line
466        .trim_start_matches("pub(crate) ")
467        .trim_start_matches("pub(super) ")
468        .trim_start_matches("pub ")
469        .trim_start_matches("async ")
470        .trim_start_matches("unsafe ")
471        .trim_start_matches("extern \"C\" ");
472    if let Some(rest) = stripped.strip_prefix("fn ") {
473        let name: String = rest
474            .chars()
475            .take_while(|c| c.is_alphanumeric() || *c == '_')
476            .collect();
477        if !name.is_empty() {
478            return Some(name);
479        }
480    }
481    None
482}
483
484fn extract_structs_rust(source: &str) -> Vec<(String, usize)> {
485    let mut out = Vec::new();
486    for (i, line) in source.lines().enumerate() {
487        let t = line.trim();
488        let stripped = t
489            .trim_start_matches("pub(crate) ")
490            .trim_start_matches("pub ");
491        if let Some(rest) = stripped.strip_prefix("struct ") {
492            let name: String = rest
493                .chars()
494                .take_while(|c| c.is_alphanumeric() || *c == '_')
495                .collect();
496            if !name.is_empty() {
497                out.push((name, i + 1));
498            }
499        } else if let Some(rest) = stripped.strip_prefix("enum ") {
500            let name: String = rest
501                .chars()
502                .take_while(|c| c.is_alphanumeric() || *c == '_')
503                .collect();
504            if !name.is_empty() {
505                out.push((name, i + 1));
506            }
507        } else if let Some(rest) = stripped.strip_prefix("trait ") {
508            let name: String = rest
509                .chars()
510                .take_while(|c| c.is_alphanumeric() || *c == '_')
511                .collect();
512            if !name.is_empty() {
513                out.push((name, i + 1));
514            }
515        }
516    }
517    out
518}
519
520/// Extract Rust `use` imports and resolve them to file paths that match scanned entity IDs.
521///
522/// Handles three import forms:
523/// 1. `use crate::module::Symbol`   → resolves relative to current crate's `src/` root
524/// 2. `use super::module::Symbol`   → resolves relative to parent module directory
525/// 3. `use sibling_crate::mod::Sym` → looks up crate name in `crate_map`
526///
527/// Symbols imported from `std` and other external crates (not in crate_map) are skipped.
528fn extract_imports_rust(
529    source: &str,
530    current_file: &str,
531    crate_map: &RustCrateMap,
532) -> Vec<(Vec<String>, String, usize)> {
533    let mut out = Vec::new();
534
535    // Infer current crate's src root from file path.
536    // e.g. "ucm-core/src/graph.rs" → crate_src_root = "ucm-core/src"
537    //      "ucm-api/src/main.rs"   → crate_src_root = "ucm-api/src"
538    let crate_src_root = infer_crate_src_root(current_file);
539    // e.g. "ucm-core/src/graph.rs" → file_in_crate = "graph.rs"
540    let file_in_crate = current_file
541        .strip_prefix(&format!("{crate_src_root}/"))
542        .unwrap_or(current_file);
543
544    for (i, line) in source.lines().enumerate() {
545        let t = line.trim();
546        if !t.starts_with("use ") {
547            continue;
548        }
549        let rest = &t[4..]; // strip "use "
550
551        // Determine target src root + module path based on import prefix
552        let (target_src_root, rest_after_prefix) = if let Some(r) = rest.strip_prefix("crate::") {
553            // use crate::foo::Bar → resolve from own crate root
554            (crate_src_root.clone(), r)
555        } else if let Some(r) = rest.strip_prefix("super::") {
556            // use super::foo::Bar → resolve from parent module dir
557            let parent = rust_parent_module_dir(&crate_src_root, file_in_crate);
558            (parent, r)
559        } else if let Some(r) = rest.strip_prefix("self::") {
560            // use self::foo::Bar → resolve from current module dir
561            let current_dir = rust_current_module_dir(&crate_src_root, file_in_crate);
562            (current_dir, r)
563        } else {
564            // Could be a sibling crate import: use ucm_graph_core::graph::UcmGraph
565            // Extract the first segment and look it up in crate_map
566            let first_segment = rest.split("::").next().unwrap_or("");
567            if let Some(sibling_root) = crate_map.get(first_segment) {
568                let after = rest
569                    .strip_prefix(first_segment)
570                    .and_then(|s| s.strip_prefix("::"))
571                    .unwrap_or("");
572                (sibling_root.clone(), after)
573            } else {
574                // External crate (std, serde, etc.) — skip
575                continue;
576            }
577        };
578
579        // Parse module_path::Symbol or module::{A, B, C}
580        let cleaned = rest_after_prefix.trim_end_matches(';');
581        let (module_segments, symbols) = if cleaned.contains('{') {
582            // use crate::foo::{A, B, C}
583            let brace_start = cleaned.find('{').unwrap_or(cleaned.len());
584            let prefix = cleaned[..brace_start].trim_end_matches("::");
585            let inner = cleaned
586                .get(brace_start + 1..)
587                .and_then(|s| s.split('}').next())
588                .unwrap_or("");
589            let syms: Vec<String> = inner
590                .split(',')
591                .map(|s| {
592                    s.trim()
593                        .split(" as ")
594                        .next()
595                        .unwrap_or("")
596                        .trim()
597                        .to_string()
598                })
599                .filter(|s| !s.is_empty() && s != "*")
600                .collect();
601            (prefix.to_string(), syms)
602        } else {
603            // use crate::foo::bar::Baz → module=foo::bar, symbol=Baz
604            let parts: Vec<&str> = cleaned.split("::").collect();
605            if parts.len() < 2 {
606                // Just `use crate::module;` — module itself is the symbol
607                if parts.len() == 1 && !parts[0].is_empty() && parts[0] != "*" {
608                    // Importing a module as a whole: target is module.rs#module
609                    let mod_name = parts[0].to_string();
610                    let file_path = format!("{target_src_root}/{mod_name}.rs");
611                    out.push((vec!["module".to_string()], file_path, i + 1));
612                }
613                continue;
614            }
615            let symbol = parts.last().unwrap().to_string();
616            if symbol == "*" {
617                continue;
618            }
619            let mod_parts = &parts[..parts.len() - 1];
620            (mod_parts.join("::"), vec![symbol])
621        };
622
623        if symbols.is_empty() {
624            continue;
625        }
626
627        // Convert module::path to file path: foo::bar → {target_src_root}/foo/bar.rs
628        // Also try foo/bar/mod.rs convention (but .rs is more common)
629        let module_file_path = if module_segments.is_empty() {
630            // Direct import from crate root: use crate::Symbol → lib.rs or main.rs
631            // Best guess: target the src root's lib.rs
632            format!("{target_src_root}/lib.rs")
633        } else {
634            format!(
635                "{target_src_root}/{}.rs",
636                module_segments.replace("::", "/")
637            )
638        };
639
640        out.push((symbols, module_file_path, i + 1));
641    }
642    out
643}
644
645/// Infer the crate src root from a file path.
646/// "ucm-core/src/graph.rs" → "ucm-core/src"
647/// "src/main.rs" → "src"
648/// "crates/ucm-core/src/entity.rs" → "crates/ucm-core/src"
649fn infer_crate_src_root(file_path: &str) -> String {
650    // Find the last occurrence of "/src/" and take everything up to and including "src"
651    if let Some(pos) = file_path.rfind("/src/") {
652        file_path[..pos + 4].to_string()
653    } else if file_path.starts_with("src/") {
654        "src".to_string()
655    } else {
656        // Fallback: use parent directory
657        parent_dir(file_path)
658    }
659}
660
661/// Get the parent module directory for `super::` resolution.
662/// crate_src_root="ucm-core/src", file_in_crate="graph.rs" → "ucm-core/src"
663/// crate_src_root="ucm-core/src", file_in_crate="sub/module.rs" → "ucm-core/src"
664fn rust_parent_module_dir(crate_src_root: &str, file_in_crate: &str) -> String {
665    let dir = parent_dir(file_in_crate);
666    if dir.is_empty() {
667        // Already at crate root — super points to crate root
668        crate_src_root.to_string()
669    } else {
670        // Go one level up
671        let parent = parent_dir(&dir);
672        if parent.is_empty() {
673            crate_src_root.to_string()
674        } else {
675            format!("{crate_src_root}/{parent}")
676        }
677    }
678}
679
680/// Get the current module directory for `self::` resolution.
681fn rust_current_module_dir(crate_src_root: &str, file_in_crate: &str) -> String {
682    let dir = parent_dir(file_in_crate);
683    if dir.is_empty() {
684        crate_src_root.to_string()
685    } else {
686        format!("{crate_src_root}/{dir}")
687    }
688}
689
690// ── Python ───────────────────────────────────────────────────────────────────
691
692/// A Python entity extracted with indentation awareness.
693#[derive(Debug)]
694enum PythonEntity {
695    Function {
696        name: String,
697        is_async: bool,
698        line_start: usize,
699        line_end: usize,
700        /// If this function is a method, the enclosing class name.
701        class_name: Option<String>,
702    },
703    Class {
704        name: String,
705        line_num: usize,
706        /// Base class names from `class Foo(Bar, Baz):`.
707        bases: Vec<String>,
708    },
709}
710
711/// Extract Python functions and classes with indentation-based class-method association.
712fn extract_python_entities(source: &str) -> Vec<PythonEntity> {
713    let mut out = Vec::new();
714    let mut current_class: Option<(String, usize)> = None; // (name, indent_col)
715
716    for (i, line) in source.lines().enumerate() {
717        let trimmed = line.trim();
718        if trimmed.is_empty() || trimmed.starts_with('#') {
719            continue;
720        }
721
722        let indent = line.len() - line.trim_start().len();
723
724        // If we're inside a class and hit a line at or before the class indent
725        // (that isn't blank), we've left the class body.
726        if let Some((_, class_indent)) = &current_class {
727            if indent <= *class_indent && !trimmed.is_empty() {
728                current_class = None;
729            }
730        }
731
732        // Detect class definition
733        if let Some(rest) = trimmed.strip_prefix("class ") {
734            let name: String = rest
735                .chars()
736                .take_while(|c| c.is_alphanumeric() || *c == '_')
737                .collect();
738            if !name.is_empty() {
739                let bases = parse_python_bases(rest);
740                current_class = Some((name.clone(), indent));
741                out.push(PythonEntity::Class {
742                    name,
743                    line_num: i + 1,
744                    bases,
745                });
746                continue;
747            }
748        }
749
750        // Detect function/method definition
751        if let Some(rest) = trimmed
752            .strip_prefix("async def ")
753            .or_else(|| trimmed.strip_prefix("def "))
754        {
755            let is_async = trimmed.starts_with("async ");
756            let name: String = rest
757                .chars()
758                .take_while(|c| c.is_alphanumeric() || *c == '_')
759                .collect();
760            if !name.is_empty() {
761                let class_name = current_class
762                    .as_ref()
763                    .filter(|(_, ci)| indent > *ci)
764                    .map(|(cn, _)| cn.clone());
765                out.push(PythonEntity::Function {
766                    name,
767                    is_async,
768                    line_start: i + 1,
769                    line_end: i + 20,
770                    class_name,
771                });
772            }
773        }
774    }
775    out
776}
777
778/// Parse base classes from a class definition line.
779/// `"Foo(Bar, Baz):"` → `["Bar", "Baz"]`
780fn parse_python_bases(after_class_name: &str) -> Vec<String> {
781    if let Some(paren_start) = after_class_name.find('(') {
782        if let Some(paren_end) = after_class_name.find(')') {
783            let inner = &after_class_name[paren_start + 1..paren_end];
784            return inner
785                .split(',')
786                .map(|s| {
787                    // Handle `metaclass=ABCMeta` style args — skip keyword args
788                    let s = s.trim();
789                    if s.contains('=') {
790                        return String::new();
791                    }
792                    // Take just the last component of dotted names: `abc.ABC` → `ABC`
793                    s.rsplit('.').next().unwrap_or("").trim().to_string()
794                })
795                .filter(|s| !s.is_empty())
796                .collect();
797        }
798    }
799    vec![]
800}
801
802/// Extract Python imports: relative (`from .mod import X`) and absolute (`from pkg.mod import X`).
803fn extract_imports_python(
804    source: &str,
805    current_file: &str,
806    package_root: &PythonPackageRoot,
807) -> Vec<(Vec<String>, String, usize)> {
808    let mut out = Vec::new();
809    let dir = parent_dir(current_file);
810
811    for (i, line) in source.lines().enumerate() {
812        let t = line.trim();
813
814        // Handle `from .module import Foo, Bar` (relative imports)
815        if let Some(rest) = t.strip_prefix("from .") {
816            if let Some(imp_pos) = rest.find(" import ") {
817                let mod_part = &rest[..imp_pos];
818                let imp_part = &rest[imp_pos + 8..];
819                let symbols = parse_python_import_symbols(imp_part);
820                // Resolve dots: `from ..foo import X` means go up two levels
821                let dots = mod_part.chars().take_while(|c| *c == '.').count();
822                let module_name = mod_part.trim_start_matches('.');
823                let mut base = dir.clone();
824                for _ in 0..dots {
825                    base = parent_dir(&base);
826                }
827                let path = if module_name.is_empty() {
828                    format!("{base}/__init__.py")
829                } else {
830                    format!("{base}/{}.py", module_name.replace('.', "/"))
831                };
832                if !symbols.is_empty() {
833                    out.push((symbols, path, i + 1));
834                }
835            }
836            continue;
837        }
838
839        // Handle `from package.sub.module import X, Y` (absolute imports)
840        if let Some(rest) = t.strip_prefix("from ") {
841            if let Some(imp_pos) = rest.find(" import ") {
842                let mod_part = &rest[..imp_pos];
843                let imp_part = &rest[imp_pos + 8..];
844
845                // Only resolve if it matches the project's package root
846                if let Some(pkg) = package_root {
847                    if mod_part.starts_with(pkg.as_str()) {
848                        let symbols = parse_python_import_symbols(imp_part);
849                        // Strip package root prefix and convert dots to slashes:
850                        // `marimo._runtime.dataflow` → `_runtime/dataflow.py`
851                        // The package root maps to the scan directory itself.
852                        let after_pkg = mod_part
853                            .strip_prefix(pkg.as_str())
854                            .unwrap_or(mod_part)
855                            .trim_start_matches('.');
856                        let path = if after_pkg.is_empty() {
857                            "__init__.py".to_string()
858                        } else {
859                            format!("{}.py", after_pkg.replace('.', "/"))
860                        };
861                        if !symbols.is_empty() {
862                            out.push((symbols, path, i + 1));
863                        }
864                    }
865                }
866            }
867            continue;
868        }
869
870        // Handle `import package.sub.module` (bare absolute imports)
871        if let Some(rest) = t.strip_prefix("import ") {
872            if let Some(pkg) = package_root {
873                // May have comma-separated: `import foo.bar, foo.baz`
874                for mod_path in rest.split(',') {
875                    let mod_path = mod_path.trim().split(" as ").next().unwrap_or("").trim();
876                    if mod_path.starts_with(pkg.as_str()) {
877                        let after_pkg = mod_path
878                            .strip_prefix(pkg.as_str())
879                            .unwrap_or(mod_path)
880                            .trim_start_matches('.');
881                        let path = if after_pkg.is_empty() {
882                            "__init__.py".to_string()
883                        } else {
884                            format!("{}.py", after_pkg.replace('.', "/"))
885                        };
886                        out.push((vec!["module".to_string()], path, i + 1));
887                    }
888                }
889            }
890        }
891    }
892    out
893}
894
895/// Parse symbols from the `import X, Y as Z, W` part of a Python import.
896fn parse_python_import_symbols(imp_part: &str) -> Vec<String> {
897    // Handle parenthesized imports: `from x import (\n  A,\n  B\n)`
898    let cleaned = imp_part.trim_start_matches('(').trim_end_matches(')');
899    cleaned
900        .split(',')
901        .map(|s| {
902            s.trim()
903                .split(" as ")
904                .next()
905                .unwrap_or("")
906                .trim()
907                .to_string()
908        })
909        .filter(|s| !s.is_empty() && s != "*")
910        .collect()
911}
912
913// ── Path utilities ────────────────────────────────────────────────────────────
914
915fn parent_dir(file_path: &str) -> String {
916    Path::new(file_path)
917        .parent()
918        .map(|p| p.to_string_lossy().to_string())
919        .unwrap_or_default()
920}
921
922fn file_name_of(file_path: &str) -> String {
923    Path::new(file_path)
924        .file_name()
925        .map(|n| n.to_string_lossy().to_string())
926        .unwrap_or_else(|| file_path.to_string())
927}
928
929/// Resolve a relative import path (e.g., `"./auth/service"`) against the
930/// directory of the importing file, appending `.ts` if no extension present.
931///
932/// Uses `PathBuf::join` + manual component normalization so that `../`
933/// traversal works correctly even when `dir` is a single-level path
934/// (e.g. `"fraud"` + `"../pipeline/rag"` → `"pipeline/rag.ts"`).
935fn resolve_path(dir: &str, raw: &str, _extensions: &[&str]) -> String {
936    use std::path::{Component, PathBuf};
937
938    // Build base: treat empty dir as current directory.
939    let base = if dir.is_empty() {
940        PathBuf::from(".")
941    } else {
942        PathBuf::from(dir)
943    };
944
945    // Join and normalize away `.` and `..` manually.
946    // PathBuf::join handles the concatenation; we then walk components to
947    // resolve parent-dir traversal without touching the filesystem.
948    let joined = base.join(raw);
949    let mut parts: Vec<std::ffi::OsString> = Vec::new();
950    for comp in joined.components() {
951        match comp {
952            Component::ParentDir => {
953                parts.pop();
954            }
955            Component::CurDir => {}
956            Component::RootDir => {} // drop any accidental leading /
957            other => parts.push(other.as_os_str().to_owned()),
958        }
959    }
960    let normalized: PathBuf = parts.iter().collect();
961    let s = normalized.to_string_lossy();
962
963    // Append .ts extension if the path has no extension (most TS imports omit it).
964    if Path::new(s.as_ref()).extension().is_none() {
965        format!("{s}.ts")
966    } else {
967        s.to_string()
968    }
969}
970
971#[cfg(test)]
972mod tests {
973    use super::*;
974
975    #[test]
976    fn test_parse_typescript_emits_module_entity() {
977        let source = r#"
978import { DatabaseClient } from './db/client';
979export async function validateToken(token: string): Promise<boolean> {
980    return true;
981}
982"#;
983        let events = parse_source_code("src/auth/service.ts", source, "typescript");
984
985        let entity_events: Vec<_> = events
986            .iter()
987            .filter(|e| matches!(&e.payload, EventPayload::EntityDiscovered { .. }))
988            .collect();
989        // module + validateToken
990        assert!(
991            entity_events.len() >= 2,
992            "Expected module + function entities"
993        );
994
995        let dep_events: Vec<_> = events
996            .iter()
997            .filter(|e| matches!(&e.payload, EventPayload::DependencyLinked { .. }))
998            .collect();
999        // validateToken→module + module→DatabaseClient
1000        assert!(
1001            dep_events.len() >= 2,
1002            "Expected function→module + module→import edges"
1003        );
1004    }
1005
1006    #[test]
1007    fn test_module_entity_is_discovered_before_import_edges() {
1008        let source = "import { Foo } from './foo';\nfunction bar() {}";
1009        let events = parse_source_code("src/main.ts", source, "typescript");
1010
1011        // Module entity must appear before DependencyLinked so projection has it.
1012        let first_entity = events
1013            .iter()
1014            .position(|e| matches!(&e.payload, EventPayload::EntityDiscovered { .. }));
1015        let first_dep = events
1016            .iter()
1017            .position(|e| matches!(&e.payload, EventPayload::DependencyLinked { .. }));
1018        assert!(
1019            first_entity < first_dep,
1020            "EntityDiscovered must precede DependencyLinked"
1021        );
1022    }
1023
1024    #[test]
1025    fn test_parse_rust_functions_and_structs() {
1026        let source = r#"
1027use crate::graph::UcmGraph;
1028
1029pub struct GraphProjection;
1030
1031impl GraphProjection {
1032    pub fn replay_all(events: &[UcmEvent]) -> UcmGraph {
1033        UcmGraph::new()
1034    }
1035
1036    pub async fn apply_event(graph: &mut UcmGraph, event: &UcmEvent) {}
1037}
1038"#;
1039        let events = parse_source_code("src/projection.rs", source, "rust");
1040
1041        let entities: Vec<_> = events
1042            .iter()
1043            .filter(|e| {
1044                matches!(
1045                    &e.payload,
1046                    EventPayload::EntityDiscovered {
1047                        kind: EntityKind::Function { .. },
1048                        ..
1049                    }
1050                )
1051            })
1052            .collect();
1053        assert!(
1054            entities.len() >= 2,
1055            "Should find replay_all and apply_event"
1056        );
1057
1058        let structs: Vec<_> = events
1059            .iter()
1060            .filter(|e| {
1061                matches!(
1062                    &e.payload,
1063                    EventPayload::EntityDiscovered {
1064                        kind: EntityKind::DataModel { .. },
1065                        ..
1066                    }
1067                )
1068            })
1069            .collect();
1070        assert!(!structs.is_empty(), "Should find GraphProjection struct");
1071    }
1072
1073    #[test]
1074    fn test_parse_rust_imports() {
1075        let source = r#"
1076use crate::entity::EntityId;
1077use crate::graph::UcmGraph;
1078use std::collections::HashMap;
1079"#;
1080        let empty_map = RustCrateMap::new();
1081        let imports = extract_imports_rust(source, "ucm-core/src/main.rs", &empty_map);
1082        // Only crate:: imports, skip std::
1083        assert_eq!(imports.len(), 2, "Should find 2 crate imports, skip std");
1084        assert!(imports
1085            .iter()
1086            .any(|(syms, _, _)| syms.contains(&"EntityId".to_string())));
1087        assert!(imports
1088            .iter()
1089            .any(|(syms, _, _)| syms.contains(&"UcmGraph".to_string())));
1090
1091        // Verify resolved file paths
1092        let entity_import = imports
1093            .iter()
1094            .find(|(s, _, _)| s.contains(&"EntityId".to_string()))
1095            .unwrap();
1096        assert_eq!(
1097            entity_import.1, "ucm-core/src/entity.rs",
1098            "crate::entity::EntityId should resolve to ucm-core/src/entity.rs"
1099        );
1100
1101        let graph_import = imports
1102            .iter()
1103            .find(|(s, _, _)| s.contains(&"UcmGraph".to_string()))
1104            .unwrap();
1105        assert_eq!(
1106            graph_import.1, "ucm-core/src/graph.rs",
1107            "crate::graph::UcmGraph should resolve to ucm-core/src/graph.rs"
1108        );
1109    }
1110
1111    #[test]
1112    fn test_rust_cross_crate_imports() {
1113        let source = r#"
1114use ucm_graph_core::graph::UcmGraph;
1115use ucm_graph_core::entity::{EntityId, EntityKind};
1116use ucm_ingest::code_parser;
1117use serde::Serialize;
1118"#;
1119        let mut crate_map = RustCrateMap::new();
1120        crate_map.insert("ucm_graph_core".to_string(), "ucm-core/src".to_string());
1121        crate_map.insert("ucm_ingest".to_string(), "ucm-ingest/src".to_string());
1122
1123        let imports = extract_imports_rust(source, "ucm-api/src/main.rs", &crate_map);
1124
1125        // Should find 3 imports (ucm_graph_core::graph, ucm_graph_core::entity, ucm_ingest::code_parser)
1126        // Should skip serde (not in crate_map)
1127        assert_eq!(
1128            imports.len(),
1129            3,
1130            "Should find 3 sibling crate imports, skip serde: got {imports:?}"
1131        );
1132
1133        // Verify cross-crate resolution
1134        let graph_import = imports
1135            .iter()
1136            .find(|(s, _, _)| s.contains(&"UcmGraph".to_string()))
1137            .unwrap();
1138        assert_eq!(graph_import.1, "ucm-core/src/graph.rs");
1139
1140        let entity_import = imports
1141            .iter()
1142            .find(|(s, _, _)| s.contains(&"EntityId".to_string()))
1143            .unwrap();
1144        assert_eq!(entity_import.1, "ucm-core/src/entity.rs");
1145        assert!(
1146            entity_import.0.contains(&"EntityKind".to_string()),
1147            "Should import both EntityId and EntityKind"
1148        );
1149
1150        // Importing a module directly (no :: after module name → single segment after crate)
1151        let parser_import = imports
1152            .iter()
1153            .find(|(_, path, _)| path.contains("ucm-ingest"))
1154            .unwrap();
1155        assert_eq!(parser_import.1, "ucm-ingest/src/code_parser.rs");
1156    }
1157
1158    #[test]
1159    fn test_parse_api_routes() {
1160        let source = r#"
1161app.get('/api/v1/users', getUsers);
1162app.post('/api/v1/auth/login', handleLogin);
1163"#;
1164        let events = parse_source_code("src/routes.ts", source, "typescript");
1165        let routes: Vec<_> = events
1166            .iter()
1167            .filter(|e| {
1168                matches!(
1169                    &e.payload,
1170                    EventPayload::EntityDiscovered {
1171                        kind: EntityKind::ApiEndpoint { .. },
1172                        ..
1173                    }
1174                )
1175            })
1176            .collect();
1177        assert_eq!(routes.len(), 2);
1178    }
1179
1180    #[test]
1181    fn test_resolve_path_parent_traversal() {
1182        // fraud/agent.ts imports from ../pipeline/rag-pipeline
1183        // dir = "fraud", raw = "../pipeline/rag-pipeline"
1184        // expected = "pipeline/rag-pipeline.ts"  (NOT "/pipeline/rag-pipeline.ts")
1185        let result = resolve_path("fraud", "../pipeline/rag-pipeline", &["ts"]);
1186        assert_eq!(result, "pipeline/rag-pipeline.ts");
1187
1188        // nested: src/fraud/agent.ts imports from ../pipeline/rag
1189        // dir = "src/fraud", raw = "../pipeline/rag"
1190        // expected = "src/pipeline/rag.ts"
1191        let result2 = resolve_path("src/fraud", "../pipeline/rag", &["ts"]);
1192        assert_eq!(result2, "src/pipeline/rag.ts");
1193
1194        // same-dir import: fraud/agent.ts imports ./compliance-checker
1195        let result3 = resolve_path("fraud", "./compliance-checker", &["ts"]);
1196        assert_eq!(result3, "fraud/compliance-checker.ts");
1197
1198        // file at root level: dir = "", raw = "./embedding-service"
1199        let result4 = resolve_path("", "./embedding-service", &["ts"]);
1200        assert_eq!(result4, "embedding-service.ts");
1201    }
1202
1203    #[test]
1204    fn test_full_graph_has_edges() {
1205        // Simulate two files: auth.ts exports validateToken, middleware.ts imports it.
1206        let auth_src = "export async function validateToken() {}";
1207        let mid_src =
1208            "import { validateToken } from './auth';\nexport function authMiddleware() {}";
1209
1210        use ucm_events::projection::GraphProjection;
1211        use ucm_graph_core::graph::UcmGraph;
1212        let mut graph = UcmGraph::new();
1213        for ev in parse_source_code("src/auth.ts", auth_src, "typescript") {
1214            GraphProjection::apply_event(&mut graph, &ev);
1215        }
1216        for ev in parse_source_code("src/middleware.ts", mid_src, "typescript") {
1217            GraphProjection::apply_event(&mut graph, &ev);
1218        }
1219
1220        let stats = graph.stats();
1221        assert!(stats.entity_count >= 2, "Should have entities");
1222        assert!(
1223            stats.edge_count >= 1,
1224            "Should have at least one edge — this was the core bug"
1225        );
1226    }
1227
1228    // ── Python parser tests ──────────────────────────────────────────────────
1229
1230    #[test]
1231    fn test_python_absolute_imports() {
1232        let source = r#"
1233from marimo._runtime.dataflow import DirectedGraph
1234from marimo._ast.visitor import parse_cell
1235import os
1236import marimo._plugins.ui as ui
1237"#;
1238        let pkg_root = Some("marimo".to_string());
1239        let imports = extract_imports_python(source, "_runtime/runtime.py", &pkg_root);
1240
1241        // Should find 3 imports: 2 `from` + 1 bare `import` matching package root
1242        assert_eq!(
1243            imports.len(),
1244            3,
1245            "Expected 3 marimo imports, got {imports:?}"
1246        );
1247
1248        // Check first import resolves correctly (package root stripped)
1249        let dg_import = imports
1250            .iter()
1251            .find(|(s, _, _)| s.contains(&"DirectedGraph".to_string()))
1252            .expect("Should find DirectedGraph import");
1253        assert_eq!(dg_import.1, "_runtime/dataflow.py");
1254
1255        // Check second import
1256        let visitor_import = imports
1257            .iter()
1258            .find(|(s, _, _)| s.contains(&"parse_cell".to_string()))
1259            .expect("Should find parse_cell import");
1260        assert_eq!(visitor_import.1, "_ast/visitor.py");
1261
1262        // Check bare import
1263        let bare_import = imports
1264            .iter()
1265            .find(|(_, path, _)| path.contains("_plugins"))
1266            .expect("Should find bare marimo._plugins import");
1267        assert_eq!(bare_import.1, "_plugins/ui.py");
1268    }
1269
1270    #[test]
1271    fn test_python_absolute_imports_skip_external() {
1272        let source = r#"
1273from marimo._runtime.dataflow import DirectedGraph
1274from typing import Optional
1275import json
1276from dataclasses import dataclass
1277"#;
1278        let pkg_root = Some("marimo".to_string());
1279        let imports = extract_imports_python(source, "marimo/test.py", &pkg_root);
1280
1281        // Should only find 1 import (marimo), skip typing/json/dataclasses
1282        assert_eq!(
1283            imports.len(),
1284            1,
1285            "Should skip external imports, got {imports:?}"
1286        );
1287    }
1288
1289    #[test]
1290    fn test_python_relative_imports_still_work() {
1291        let source = r#"
1292from .dataflow import DirectedGraph
1293from ..utils import serialize
1294"#;
1295        let pkg_root = Some("marimo".to_string());
1296        let imports = extract_imports_python(source, "marimo/_runtime/runtime.py", &pkg_root);
1297
1298        assert_eq!(imports.len(), 2, "Should find 2 relative imports");
1299        let dg = imports
1300            .iter()
1301            .find(|(s, _, _)| s.contains(&"DirectedGraph".to_string()))
1302            .unwrap();
1303        assert_eq!(dg.1, "marimo/_runtime/dataflow.py");
1304
1305        let util = imports
1306            .iter()
1307            .find(|(s, _, _)| s.contains(&"serialize".to_string()))
1308            .unwrap();
1309        assert_eq!(util.1, "marimo/utils.py");
1310    }
1311
1312    #[test]
1313    fn test_python_class_method_association() {
1314        let source = r#"
1315class DirectedGraph:
1316    def __init__(self):
1317        pass
1318
1319    def add_edge(self, src, dst):
1320        pass
1321
1322    async def traverse(self):
1323        pass
1324
1325def standalone_function():
1326    pass
1327"#;
1328        let entities = extract_python_entities(source);
1329
1330        // Should find: 1 class + 3 methods + 1 standalone function
1331        let classes: Vec<_> = entities
1332            .iter()
1333            .filter(|e| matches!(e, PythonEntity::Class { .. }))
1334            .collect();
1335        assert_eq!(classes.len(), 1, "Should find 1 class");
1336
1337        let methods: Vec<_> = entities
1338            .iter()
1339            .filter(|e| {
1340                matches!(
1341                    e,
1342                    PythonEntity::Function {
1343                        class_name: Some(_),
1344                        ..
1345                    }
1346                )
1347            })
1348            .collect();
1349        assert_eq!(methods.len(), 3, "Should find 3 methods in DirectedGraph");
1350
1351        let standalone: Vec<_> = entities
1352            .iter()
1353            .filter(|e| {
1354                matches!(
1355                    e,
1356                    PythonEntity::Function {
1357                        class_name: None,
1358                        ..
1359                    }
1360                )
1361            })
1362            .collect();
1363        assert_eq!(standalone.len(), 1, "Should find 1 standalone function");
1364
1365        // Verify method names are associated with the class
1366        for m in &methods {
1367            if let PythonEntity::Function { class_name, .. } = m {
1368                assert_eq!(
1369                    class_name.as_deref(),
1370                    Some("DirectedGraph"),
1371                    "Method should belong to DirectedGraph"
1372                );
1373            }
1374        }
1375    }
1376
1377    #[test]
1378    fn test_python_class_method_events() {
1379        let source = r#"
1380class MyClass:
1381    def my_method(self):
1382        pass
1383"#;
1384        let events = parse_source_code("test.py", source, "python");
1385
1386        // Should have Contains edge from MyClass to MyClass.my_method
1387        let contains_edges: Vec<_> = events
1388            .iter()
1389            .filter(|e| {
1390                matches!(
1391                    &e.payload,
1392                    EventPayload::DependencyLinked {
1393                        relation_type: RelationType::Contains,
1394                        ..
1395                    }
1396                )
1397            })
1398            .collect();
1399        assert_eq!(
1400            contains_edges.len(),
1401            1,
1402            "Should have 1 Contains edge for class→method"
1403        );
1404
1405        // Verify the method is named ClassName.method_name
1406        let method_entities: Vec<_> = events
1407            .iter()
1408            .filter_map(|e| {
1409                if let EventPayload::EntityDiscovered {
1410                    kind: EntityKind::Function { .. },
1411                    name,
1412                    ..
1413                } = &e.payload
1414                {
1415                    Some(name.clone())
1416                } else {
1417                    None
1418                }
1419            })
1420            .collect();
1421        assert!(
1422            method_entities.contains(&"MyClass.my_method".to_string()),
1423            "Method should be named MyClass.my_method, got {method_entities:?}"
1424        );
1425    }
1426
1427    #[test]
1428    fn test_python_inheritance_edges() {
1429        let source = r#"
1430class Animal:
1431    pass
1432
1433class Dog(Animal):
1434    pass
1435
1436class GuideDog(Dog, Trainable):
1437    pass
1438"#;
1439        let entities = extract_python_entities(source);
1440
1441        let classes: Vec<_> = entities
1442            .iter()
1443            .filter_map(|e| {
1444                if let PythonEntity::Class { name, bases, .. } = e {
1445                    Some((name.clone(), bases.clone()))
1446                } else {
1447                    None
1448                }
1449            })
1450            .collect();
1451
1452        assert_eq!(classes.len(), 3);
1453
1454        let animal = classes.iter().find(|(n, _)| n == "Animal").unwrap();
1455        assert!(animal.1.is_empty(), "Animal has no bases");
1456
1457        let dog = classes.iter().find(|(n, _)| n == "Dog").unwrap();
1458        assert_eq!(dog.1, vec!["Animal"]);
1459
1460        let guide = classes.iter().find(|(n, _)| n == "GuideDog").unwrap();
1461        assert_eq!(guide.1, vec!["Dog", "Trainable"]);
1462    }
1463
1464    #[test]
1465    fn test_python_inheritance_events() {
1466        let source = r#"
1467class Base:
1468    pass
1469
1470class Child(Base):
1471    pass
1472"#;
1473        let events = parse_source_code("test.py", source, "python");
1474
1475        let extends_edges: Vec<_> = events
1476            .iter()
1477            .filter(|e| {
1478                matches!(
1479                    &e.payload,
1480                    EventPayload::DependencyLinked {
1481                        relation_type: RelationType::Extends,
1482                        ..
1483                    }
1484                )
1485            })
1486            .collect();
1487        assert_eq!(
1488            extends_edges.len(),
1489            1,
1490            "Should have 1 Extends edge (Child → Base)"
1491        );
1492    }
1493
1494    #[test]
1495    fn test_python_no_package_root_skips_absolute() {
1496        let source = "from marimo._runtime import foo\nimport json\n";
1497        let no_pkg: PythonPackageRoot = None;
1498        let imports = extract_imports_python(source, "test.py", &no_pkg);
1499        assert!(
1500            imports.is_empty(),
1501            "Without package root, absolute imports should be skipped"
1502        );
1503    }
1504
1505    #[test]
1506    fn test_python_metaclass_skipped_in_bases() {
1507        let source = "class Foo(Bar, metaclass=ABCMeta):\n    pass\n";
1508        let entities = extract_python_entities(source);
1509        if let Some(PythonEntity::Class { bases, .. }) = entities.first() {
1510            assert_eq!(bases, &["Bar"], "metaclass= arg should be skipped");
1511        } else {
1512            panic!("Expected a class entity");
1513        }
1514    }
1515}