Skip to main content

graphify_extract/
ast_extract.rs

1//! Regex-based AST extraction engine.
2//!
3//! This module implements a **working** regex-based extractor for each supported
4//! language. It serves as the "Pass 1" deterministic extraction while tree-sitter
5//! grammar crates are being added to the workspace.
6//!
7//! For each source file the extractor produces:
8//! - A **file** node
9//! - **Class / struct / trait / interface** nodes
10//! - **Function / method** nodes with `defines` edges from their parent
11//! - **Import** nodes with `imports` edges from the file
12//! - **Calls** edges inferred by matching known function names within bodies
13
14use std::collections::HashMap;
15use std::path::Path;
16
17use graphify_core::confidence::Confidence;
18use graphify_core::id::make_id;
19use graphify_core::model::{ExtractionResult, GraphEdge, GraphNode, NodeType};
20use regex::Regex;
21use tracing::trace;
22
23// ═══════════════════════════════════════════════════════════════════════════
24// Public entry point
25// ═══════════════════════════════════════════════════════════════════════════
26
27/// Extract graph nodes and edges from a single source file.
28pub fn extract_file(path: &Path, source: &str, lang: &str) -> ExtractionResult {
29    match lang {
30        "python" => extract_python(path, source),
31        "javascript" | "typescript" => extract_js_ts(path, source, lang),
32        "rust" => extract_rust(path, source),
33        "go" => extract_go(path, source),
34        "java" => extract_java(path, source),
35        "c" | "cpp" => extract_c_cpp(path, source, lang),
36        "ruby" => extract_ruby(path, source),
37        "csharp" => extract_csharp(path, source),
38        "kotlin" => extract_kotlin(path, source),
39        _ => extract_generic(path, source, lang),
40    }
41}
42
43// ═══════════════════════════════════════════════════════════════════════════
44// Helpers
45// ═══════════════════════════════════════════════════════════════════════════
46
47fn file_stem(path: &Path) -> String {
48    path.file_stem()
49        .and_then(|s| s.to_str())
50        .unwrap_or("unknown")
51        .to_string()
52}
53
54fn path_str(path: &Path) -> String {
55    path.to_string_lossy().into_owned()
56}
57
58fn make_file_node(path: &Path) -> GraphNode {
59    let ps = path_str(path);
60    GraphNode {
61        id: make_id(&[&ps]),
62        label: file_stem(path),
63        source_file: ps,
64        source_location: None,
65        node_type: NodeType::File,
66        community: None,
67        extra: HashMap::new(),
68    }
69}
70
71fn make_node(name: &str, path: &Path, node_type: NodeType, line: usize) -> GraphNode {
72    let ps = path_str(path);
73    GraphNode {
74        id: make_id(&[&ps, name]),
75        label: name.to_string(),
76        source_file: ps,
77        source_location: Some(format!("L{line}")),
78        node_type,
79        community: None,
80        extra: HashMap::new(),
81    }
82}
83
84fn make_edge(
85    source_id: &str,
86    target_id: &str,
87    relation: &str,
88    path: &Path,
89    confidence: Confidence,
90) -> GraphEdge {
91    GraphEdge {
92        source: source_id.to_string(),
93        target: target_id.to_string(),
94        relation: relation.to_string(),
95        confidence: confidence.clone(),
96        confidence_score: confidence.default_score(),
97        source_file: path_str(path),
98        source_location: None,
99        weight: 1.0,
100        extra: HashMap::new(),
101    }
102}
103
104/// Simple call-graph inference: for each function body, look for occurrences
105/// of other known function names.
106fn infer_calls(
107    functions: &[(String, String, usize, usize)], // (name, id, start_line, end_line)
108    source_lines: &[&str],
109    path: &Path,
110) -> Vec<GraphEdge> {
111    let mut edges = Vec::new();
112    for (_caller_name, caller_id, start, end) in functions {
113        let body = source_lines
114            .get(*start..*end)
115            .unwrap_or_default()
116            .join("\n");
117        for (callee_name, callee_id, _, _) in functions {
118            if caller_id == callee_id {
119                continue;
120            }
121            // Check if callee_name appears in caller body as a call (name followed by `(`)
122            let pattern = format!(r"\b{}\s*\(", regex::escape(callee_name));
123            if let Ok(re) = Regex::new(&pattern)
124                && re.is_match(&body)
125            {
126                edges.push(make_edge(
127                    caller_id,
128                    callee_id,
129                    "calls",
130                    path,
131                    Confidence::Inferred,
132                ));
133            }
134        }
135    }
136    edges
137}
138
139// ═══════════════════════════════════════════════════════════════════════════
140// Python
141// ═══════════════════════════════════════════════════════════════════════════
142
143fn extract_python(path: &Path, source: &str) -> ExtractionResult {
144    let mut result = ExtractionResult::default();
145    let file_node = make_file_node(path);
146    let file_id = file_node.id.clone();
147    result.nodes.push(file_node);
148
149    let lines: Vec<&str> = source.lines().collect();
150    let ps = path_str(path);
151
152    // Classes: `class Foo(Bar):`  or `class Foo:`
153    let re_class = Regex::new(r"(?m)^(\s*)class\s+(\w+)").unwrap();
154    let re_class_lookup = Regex::new(r"^(\s*)class\s+(\w+)").unwrap();
155    let mut class_ids: HashMap<String, String> = HashMap::new();
156    for cap in re_class.captures_iter(source) {
157        let name = &cap[2];
158        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
159        let node = make_node(name, path, NodeType::Class, line);
160        let node_id = node.id.clone();
161        class_ids.insert(name.to_string(), node_id.clone());
162        result.nodes.push(node);
163        result.edges.push(make_edge(
164            &file_id,
165            &node_id,
166            "defines",
167            path,
168            Confidence::Extracted,
169        ));
170    }
171
172    // Functions / methods: `def foo(...):`
173    let re_func = Regex::new(r"(?m)^(\s*)def\s+(\w+)\s*\(").unwrap();
174    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
175    let func_matches: Vec<_> = re_func.captures_iter(source).collect();
176    for (i, cap) in func_matches.iter().enumerate() {
177        let indent = cap[1].len();
178        let name = cap[2].to_string();
179        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
180
181        let node_type = if indent > 0 {
182            NodeType::Method
183        } else {
184            NodeType::Function
185        };
186        let node = make_node(&name, path, node_type, start_line);
187        let node_id = node.id.clone();
188
189        // Determine parent: if indented, belong to nearest class above with less indent
190        let parent_id = if indent > 0 {
191            // Find enclosing class by checking lines above for `class` with less indent
192            let mut parent = None;
193            for line_idx in (0..start_line.saturating_sub(1)).rev() {
194                if let Some(line) = lines.get(line_idx)
195                    && let Some(cls_cap) = re_class_lookup.captures(line)
196                    && cls_cap[1].len() < indent
197                {
198                    parent = class_ids.get(&cls_cap[2]).cloned();
199                    break;
200                }
201            }
202            parent.unwrap_or_else(|| file_id.clone())
203        } else {
204            file_id.clone()
205        };
206
207        // End line: next function at same or lower indent, or end of file
208        let end_line = if i + 1 < func_matches.len() {
209            source[..func_matches[i + 1].get(0).unwrap().start()]
210                .lines()
211                .count()
212        } else {
213            lines.len()
214        };
215
216        functions.push((name.clone(), node_id.clone(), start_line, end_line));
217        result.nodes.push(node);
218        result.edges.push(make_edge(
219            &parent_id,
220            &node_id,
221            "defines",
222            path,
223            Confidence::Extracted,
224        ));
225    }
226
227    // Imports: `import X` / `from X import Y`
228    let re_import = Regex::new(r"(?m)^(?:from\s+([\w.]+)\s+)?import\s+([\w.,\s*]+)").unwrap();
229    for cap in re_import.captures_iter(source) {
230        let module = cap.get(1).map_or("", |m| m.as_str());
231        let names_str = &cap[2];
232        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
233
234        for name in names_str.split(',') {
235            let name = name.trim().split(" as ").next().unwrap_or("").trim();
236            if name.is_empty() || name == "*" {
237                continue;
238            }
239            let full_name = if module.is_empty() {
240                name.to_string()
241            } else {
242                format!("{module}.{name}")
243            };
244            let import_id = make_id(&[&ps, "import", &full_name]);
245            result.nodes.push(GraphNode {
246                id: import_id.clone(),
247                label: full_name,
248                source_file: ps.clone(),
249                source_location: Some(format!("L{line}")),
250                node_type: NodeType::Module,
251                community: None,
252                extra: HashMap::new(),
253            });
254            result.edges.push(make_edge(
255                &file_id,
256                &import_id,
257                "imports",
258                path,
259                Confidence::Extracted,
260            ));
261        }
262    }
263
264    // Infer calls
265    let call_edges = infer_calls(&functions, &lines, path);
266    result.edges.extend(call_edges);
267
268    trace!(
269        "python: {} nodes, {} edges from {}",
270        result.nodes.len(),
271        result.edges.len(),
272        ps
273    );
274    result
275}
276
277// ═══════════════════════════════════════════════════════════════════════════
278// JavaScript / TypeScript
279// ═══════════════════════════════════════════════════════════════════════════
280
281fn extract_js_ts(path: &Path, source: &str, lang: &str) -> ExtractionResult {
282    let mut result = ExtractionResult::default();
283    let file_node = make_file_node(path);
284    let file_id = file_node.id.clone();
285    result.nodes.push(file_node);
286
287    let lines: Vec<&str> = source.lines().collect();
288    let ps = path_str(path);
289
290    // Classes: `class Foo` / `export class Foo`
291    let re_class = Regex::new(r"(?m)(?:export\s+)?(?:default\s+)?class\s+(\w+)").unwrap();
292    for cap in re_class.captures_iter(source) {
293        let name = &cap[1];
294        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
295        let node = make_node(name, path, NodeType::Class, line);
296        let node_id = node.id.clone();
297        result.nodes.push(node);
298        result.edges.push(make_edge(
299            &file_id,
300            &node_id,
301            "defines",
302            path,
303            Confidence::Extracted,
304        ));
305    }
306
307    // Functions: `function foo(` / `const foo = (` / `const foo = async (`
308    // Also: `export function foo(` / `export default function foo(`
309    let re_func = Regex::new(
310        r"(?m)(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*\(|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[^=])\s*=>"
311    )
312    .unwrap();
313    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
314    let func_matches: Vec<_> = re_func.captures_iter(source).collect();
315
316    for (i, cap) in func_matches.iter().enumerate() {
317        let name = cap
318            .get(1)
319            .or(cap.get(2))
320            .map(|m| m.as_str().to_string())
321            .unwrap_or_default();
322        if name.is_empty() {
323            continue;
324        }
325        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
326        let end_line = if i + 1 < func_matches.len() {
327            source[..func_matches[i + 1].get(0).unwrap().start()]
328                .lines()
329                .count()
330        } else {
331            lines.len()
332        };
333
334        let node = make_node(&name, path, NodeType::Function, start_line);
335        let node_id = node.id.clone();
336        functions.push((name, node_id.clone(), start_line, end_line));
337        result.nodes.push(node);
338        result.edges.push(make_edge(
339            &file_id,
340            &node_id,
341            "defines",
342            path,
343            Confidence::Extracted,
344        ));
345    }
346
347    // Imports: `import { X } from 'Y'` / `import X from 'Y'` / `import 'Y'`
348    let re_import = Regex::new(
349        r#"(?m)import\s+(?:\{([^}]+)\}|(\w+))\s+from\s+['"]([^'"]+)['"]|import\s+['"]([^'"]+)['"]"#,
350    )
351    .unwrap();
352    for cap in re_import.captures_iter(source) {
353        let module = cap.get(3).or(cap.get(4)).map(|m| m.as_str()).unwrap_or("");
354        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
355
356        if let Some(names) = cap.get(1) {
357            for name in names.as_str().split(',') {
358                let name = name.trim().split(" as ").next().unwrap_or("").trim();
359                if name.is_empty() {
360                    continue;
361                }
362                let full = format!("{module}/{name}");
363                let import_id = make_id(&[&ps, "import", &full]);
364                result.nodes.push(GraphNode {
365                    id: import_id.clone(),
366                    label: full,
367                    source_file: ps.clone(),
368                    source_location: Some(format!("L{line}")),
369                    node_type: NodeType::Module,
370                    community: None,
371                    extra: HashMap::new(),
372                });
373                result.edges.push(make_edge(
374                    &file_id,
375                    &import_id,
376                    "imports",
377                    path,
378                    Confidence::Extracted,
379                ));
380            }
381        } else if let Some(default_name) = cap.get(2) {
382            let name = default_name.as_str();
383            let import_id = make_id(&[&ps, "import", module]);
384            result.nodes.push(GraphNode {
385                id: import_id.clone(),
386                label: name.to_string(),
387                source_file: ps.clone(),
388                source_location: Some(format!("L{line}")),
389                node_type: NodeType::Module,
390                community: None,
391                extra: HashMap::new(),
392            });
393            result.edges.push(make_edge(
394                &file_id,
395                &import_id,
396                "imports",
397                path,
398                Confidence::Extracted,
399            ));
400        }
401    }
402
403    // Also handle require() for JS
404    if lang == "javascript" {
405        let re_require = Regex::new(
406            r#"(?m)(?:const|let|var)\s+(\w+)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)"#,
407        )
408        .unwrap();
409        for cap in re_require.captures_iter(source) {
410            let name = &cap[1];
411            let module = &cap[2];
412            let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
413            let import_id = make_id(&[&ps, "import", module]);
414            result.nodes.push(GraphNode {
415                id: import_id.clone(),
416                label: name.to_string(),
417                source_file: ps.clone(),
418                source_location: Some(format!("L{line}")),
419                node_type: NodeType::Module,
420                community: None,
421                extra: HashMap::new(),
422            });
423            result.edges.push(make_edge(
424                &file_id,
425                &import_id,
426                "imports",
427                path,
428                Confidence::Extracted,
429            ));
430        }
431    }
432
433    let call_edges = infer_calls(&functions, &lines, path);
434    result.edges.extend(call_edges);
435
436    result
437}
438
439// ═══════════════════════════════════════════════════════════════════════════
440// Rust
441// ═══════════════════════════════════════════════════════════════════════════
442
443fn extract_rust(path: &Path, source: &str) -> ExtractionResult {
444    let mut result = ExtractionResult::default();
445    let file_node = make_file_node(path);
446    let file_id = file_node.id.clone();
447    result.nodes.push(file_node);
448
449    let lines: Vec<&str> = source.lines().collect();
450    let ps = path_str(path);
451
452    // Structs: `pub struct Foo` / `struct Foo`
453    let re_struct = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?struct\s+(\w+)").unwrap();
454    for cap in re_struct.captures_iter(source) {
455        let name = &cap[1];
456        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
457        let node = make_node(name, path, NodeType::Struct, line);
458        let node_id = node.id.clone();
459        result.nodes.push(node);
460        result.edges.push(make_edge(
461            &file_id,
462            &node_id,
463            "defines",
464            path,
465            Confidence::Extracted,
466        ));
467    }
468
469    // Enums: `pub enum Foo` / `enum Foo`
470    let re_enum = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?enum\s+(\w+)").unwrap();
471    for cap in re_enum.captures_iter(source) {
472        let name = &cap[1];
473        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
474        let node = make_node(name, path, NodeType::Enum, line);
475        let node_id = node.id.clone();
476        result.nodes.push(node);
477        result.edges.push(make_edge(
478            &file_id,
479            &node_id,
480            "defines",
481            path,
482            Confidence::Extracted,
483        ));
484    }
485
486    // Traits: `pub trait Foo` / `trait Foo`
487    let re_trait = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?trait\s+(\w+)").unwrap();
488    for cap in re_trait.captures_iter(source) {
489        let name = &cap[1];
490        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
491        let node = make_node(name, path, NodeType::Trait, line);
492        let node_id = node.id.clone();
493        result.nodes.push(node);
494        result.edges.push(make_edge(
495            &file_id,
496            &node_id,
497            "defines",
498            path,
499            Confidence::Extracted,
500        ));
501    }
502
503    // Impl blocks: `impl Foo` / `impl Trait for Foo`
504    let re_impl = Regex::new(r"(?m)^(?:\s*)impl(?:<[^>]*>)?\s+(?:(\w+)\s+for\s+)?(\w+)").unwrap();
505    for cap in re_impl.captures_iter(source) {
506        let _trait_name = cap.get(1).map(|m| m.as_str());
507        let type_name = &cap[2];
508        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
509        // Create an "implements" edge if impl Trait for Type
510        if let Some(trait_m) = cap.get(1) {
511            let trait_id = make_id(&[&ps, trait_m.as_str()]);
512            let type_id = make_id(&[&ps, type_name]);
513            result.edges.push(make_edge(
514                &type_id,
515                &trait_id,
516                "implements",
517                path,
518                Confidence::Extracted,
519            ));
520        }
521        let _ = line;
522    }
523
524    // Functions: `pub fn foo(` / `fn foo(` / `pub(crate) fn foo(`
525    // Also methods inside impl blocks
526    let re_func = Regex::new(
527        r"(?m)^(\s*)(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:const\s+)?fn\s+(\w+)",
528    )
529    .unwrap();
530    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
531    let func_matches: Vec<_> = re_func.captures_iter(source).collect();
532    for (i, cap) in func_matches.iter().enumerate() {
533        let indent = cap[1].len();
534        let name = cap[2].to_string();
535        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
536        let end_line = if i + 1 < func_matches.len() {
537            source[..func_matches[i + 1].get(0).unwrap().start()]
538                .lines()
539                .count()
540        } else {
541            lines.len()
542        };
543
544        let node_type = if indent > 0 {
545            NodeType::Method
546        } else {
547            NodeType::Function
548        };
549        let node = make_node(&name, path, node_type, start_line);
550        let node_id = node.id.clone();
551        functions.push((name, node_id.clone(), start_line, end_line));
552        result.nodes.push(node);
553        result.edges.push(make_edge(
554            &file_id,
555            &node_id,
556            "defines",
557            path,
558            Confidence::Extracted,
559        ));
560    }
561
562    // Use statements
563    let re_use = Regex::new(r"(?m)^(?:\s*)(?:pub\s+)?use\s+([\w:]+)").unwrap();
564    for cap in re_use.captures_iter(source) {
565        let module = &cap[1];
566        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
567        let import_id = make_id(&[&ps, "use", module]);
568        result.nodes.push(GraphNode {
569            id: import_id.clone(),
570            label: module.to_string(),
571            source_file: ps.clone(),
572            source_location: Some(format!("L{line}")),
573            node_type: NodeType::Module,
574            community: None,
575            extra: HashMap::new(),
576        });
577        result.edges.push(make_edge(
578            &file_id,
579            &import_id,
580            "imports",
581            path,
582            Confidence::Extracted,
583        ));
584    }
585
586    let call_edges = infer_calls(&functions, &lines, path);
587    result.edges.extend(call_edges);
588
589    result
590}
591
592// ═══════════════════════════════════════════════════════════════════════════
593// Go
594// ═══════════════════════════════════════════════════════════════════════════
595
596fn extract_go(path: &Path, source: &str) -> ExtractionResult {
597    let mut result = ExtractionResult::default();
598    let file_node = make_file_node(path);
599    let file_id = file_node.id.clone();
600    result.nodes.push(file_node);
601
602    let lines: Vec<&str> = source.lines().collect();
603    let ps = path_str(path);
604
605    // Type definitions: `type Foo struct {` / `type Foo interface {`
606    let re_type = Regex::new(r"(?m)^type\s+(\w+)\s+(struct|interface)").unwrap();
607    for cap in re_type.captures_iter(source) {
608        let name = &cap[1];
609        let kind = &cap[2];
610        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
611        let node_type = match kind {
612            "interface" => NodeType::Interface,
613            _ => NodeType::Struct,
614        };
615        let node = make_node(name, path, node_type, line);
616        let node_id = node.id.clone();
617        result.nodes.push(node);
618        result.edges.push(make_edge(
619            &file_id,
620            &node_id,
621            "defines",
622            path,
623            Confidence::Extracted,
624        ));
625    }
626
627    // Functions and methods: `func Foo(` / `func (r *Recv) Foo(`
628    let re_func = Regex::new(r"(?m)^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(").unwrap();
629    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
630    let func_matches: Vec<_> = re_func.captures_iter(source).collect();
631    for (i, cap) in func_matches.iter().enumerate() {
632        let name = cap[1].to_string();
633        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
634        let end_line = if i + 1 < func_matches.len() {
635            source[..func_matches[i + 1].get(0).unwrap().start()]
636                .lines()
637                .count()
638        } else {
639            lines.len()
640        };
641
642        // Methods have a receiver
643        let full_match = cap.get(0).unwrap().as_str();
644        let node_type = if full_match.contains('(') && full_match.find('(') < full_match.find(&name)
645        {
646            NodeType::Method
647        } else {
648            NodeType::Function
649        };
650
651        let node = make_node(&name, path, node_type, start_line);
652        let node_id = node.id.clone();
653        functions.push((name, node_id.clone(), start_line, end_line));
654        result.nodes.push(node);
655        result.edges.push(make_edge(
656            &file_id,
657            &node_id,
658            "defines",
659            path,
660            Confidence::Extracted,
661        ));
662    }
663
664    // Imports: `import "fmt"` / `import ( "fmt" "os" )`
665    let re_import_single = Regex::new(r#"(?m)^import\s+"([^"]+)""#).unwrap();
666    let re_import_block = Regex::new(r"(?s)import\s*\(([^)]+)\)").unwrap();
667    let re_import_line = Regex::new(r#""([^"]+)""#).unwrap();
668
669    for cap in re_import_single.captures_iter(source) {
670        let module = &cap[1];
671        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
672        let import_id = make_id(&[&ps, "import", module]);
673        result.nodes.push(GraphNode {
674            id: import_id.clone(),
675            label: module.to_string(),
676            source_file: ps.clone(),
677            source_location: Some(format!("L{line}")),
678            node_type: NodeType::Package,
679            community: None,
680            extra: HashMap::new(),
681        });
682        result.edges.push(make_edge(
683            &file_id,
684            &import_id,
685            "imports",
686            path,
687            Confidence::Extracted,
688        ));
689    }
690
691    for cap in re_import_block.captures_iter(source) {
692        let block = &cap[1];
693        let block_start = source[..cap.get(0).unwrap().start()].lines().count() + 1;
694        for (idx, imp_cap) in re_import_line.captures_iter(block).enumerate() {
695            let module = &imp_cap[1];
696            let import_id = make_id(&[&ps, "import", module]);
697            result.nodes.push(GraphNode {
698                id: import_id.clone(),
699                label: module.to_string(),
700                source_file: ps.clone(),
701                source_location: Some(format!("L{}", block_start + idx + 1)),
702                node_type: NodeType::Package,
703                community: None,
704                extra: HashMap::new(),
705            });
706            result.edges.push(make_edge(
707                &file_id,
708                &import_id,
709                "imports",
710                path,
711                Confidence::Extracted,
712            ));
713        }
714    }
715
716    let call_edges = infer_calls(&functions, &lines, path);
717    result.edges.extend(call_edges);
718
719    result
720}
721
722// ═══════════════════════════════════════════════════════════════════════════
723// Java
724// ═══════════════════════════════════════════════════════════════════════════
725
726fn extract_java(path: &Path, source: &str) -> ExtractionResult {
727    let mut result = ExtractionResult::default();
728    let file_node = make_file_node(path);
729    let file_id = file_node.id.clone();
730    result.nodes.push(file_node);
731
732    let lines: Vec<&str> = source.lines().collect();
733    let ps = path_str(path);
734
735    // Classes / interfaces / enums
736    let re_class = Regex::new(
737        r"(?m)(?:public\s+|private\s+|protected\s+)?(?:abstract\s+|static\s+|final\s+)*(class|interface|enum)\s+(\w+)",
738    )
739    .unwrap();
740    for cap in re_class.captures_iter(source) {
741        let kind = &cap[1];
742        let name = &cap[2];
743        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
744        let node_type = match kind {
745            "interface" => NodeType::Interface,
746            "enum" => NodeType::Enum,
747            _ => NodeType::Class,
748        };
749        let node = make_node(name, path, node_type, line);
750        let node_id = node.id.clone();
751        result.nodes.push(node);
752        result.edges.push(make_edge(
753            &file_id,
754            &node_id,
755            "defines",
756            path,
757            Confidence::Extracted,
758        ));
759    }
760
761    // Methods: `public void foo(` / `private static int bar(`
762    let re_method = Regex::new(
763        r"(?m)^\s+(?:public\s+|private\s+|protected\s+)?(?:static\s+)?(?:final\s+)?(?:synchronized\s+)?(?:abstract\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(",
764    )
765    .unwrap();
766    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
767    let func_matches: Vec<_> = re_method.captures_iter(source).collect();
768    for (i, cap) in func_matches.iter().enumerate() {
769        let name = cap[1].to_string();
770        // Skip common false positives
771        if [
772            "if", "for", "while", "switch", "catch", "return", "new", "throw",
773        ]
774        .contains(&name.as_str())
775        {
776            continue;
777        }
778        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
779        let end_line = if i + 1 < func_matches.len() {
780            source[..func_matches[i + 1].get(0).unwrap().start()]
781                .lines()
782                .count()
783        } else {
784            lines.len()
785        };
786
787        let node = make_node(&name, path, NodeType::Method, start_line);
788        let node_id = node.id.clone();
789        functions.push((name, node_id.clone(), start_line, end_line));
790        result.nodes.push(node);
791        result.edges.push(make_edge(
792            &file_id,
793            &node_id,
794            "defines",
795            path,
796            Confidence::Extracted,
797        ));
798    }
799
800    // Imports
801    let re_import = Regex::new(r"(?m)^import\s+(?:static\s+)?([\w.]+)\s*;").unwrap();
802    for cap in re_import.captures_iter(source) {
803        let module = &cap[1];
804        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
805        let import_id = make_id(&[&ps, "import", module]);
806        result.nodes.push(GraphNode {
807            id: import_id.clone(),
808            label: module.to_string(),
809            source_file: ps.clone(),
810            source_location: Some(format!("L{line}")),
811            node_type: NodeType::Package,
812            community: None,
813            extra: HashMap::new(),
814        });
815        result.edges.push(make_edge(
816            &file_id,
817            &import_id,
818            "imports",
819            path,
820            Confidence::Extracted,
821        ));
822    }
823
824    let call_edges = infer_calls(&functions, &lines, path);
825    result.edges.extend(call_edges);
826
827    result
828}
829
830// ═══════════════════════════════════════════════════════════════════════════
831// C / C++
832// ═══════════════════════════════════════════════════════════════════════════
833
834fn extract_c_cpp(path: &Path, source: &str, lang: &str) -> ExtractionResult {
835    let mut result = ExtractionResult::default();
836    let file_node = make_file_node(path);
837    let file_id = file_node.id.clone();
838    result.nodes.push(file_node);
839
840    let lines: Vec<&str> = source.lines().collect();
841    let ps = path_str(path);
842
843    // #include directives
844    let re_include = Regex::new(r#"(?m)^#include\s+[<"]([^>"]+)[>"]"#).unwrap();
845    for cap in re_include.captures_iter(source) {
846        let header = &cap[1];
847        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
848        let import_id = make_id(&[&ps, "include", header]);
849        result.nodes.push(GraphNode {
850            id: import_id.clone(),
851            label: header.to_string(),
852            source_file: ps.clone(),
853            source_location: Some(format!("L{line}")),
854            node_type: NodeType::Module,
855            community: None,
856            extra: HashMap::new(),
857        });
858        result.edges.push(make_edge(
859            &file_id,
860            &import_id,
861            "includes",
862            path,
863            Confidence::Extracted,
864        ));
865    }
866
867    // C++ classes / structs / namespaces
868    if lang == "cpp" {
869        let re_class = Regex::new(r"(?m)^(?:\s*)(?:class|struct|namespace)\s+(\w+)").unwrap();
870        for cap in re_class.captures_iter(source) {
871            let name = &cap[1];
872            let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
873            let node = make_node(name, path, NodeType::Class, line);
874            let node_id = node.id.clone();
875            result.nodes.push(node);
876            result.edges.push(make_edge(
877                &file_id,
878                &node_id,
879                "defines",
880                path,
881                Confidence::Extracted,
882            ));
883        }
884    }
885
886    // C structs
887    if lang == "c" {
888        let re_struct = Regex::new(r"(?m)^(?:typedef\s+)?struct\s+(\w+)").unwrap();
889        for cap in re_struct.captures_iter(source) {
890            let name = &cap[1];
891            let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
892            let node = make_node(name, path, NodeType::Struct, line);
893            let node_id = node.id.clone();
894            result.nodes.push(node);
895            result.edges.push(make_edge(
896                &file_id,
897                &node_id,
898                "defines",
899                path,
900                Confidence::Extracted,
901            ));
902        }
903    }
904
905    // Functions: `type name(` at start of line (heuristic)
906    let re_func = Regex::new(
907        r"(?m)^(?:static\s+)?(?:inline\s+)?(?:extern\s+)?(?:const\s+)?(?:unsigned\s+)?(?:signed\s+)?(?:\w+(?:\s*\*\s*|\s+))(\w+)\s*\([^;]*\)\s*\{",
908    )
909    .unwrap();
910    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
911    let func_matches: Vec<_> = re_func.captures_iter(source).collect();
912    for (i, cap) in func_matches.iter().enumerate() {
913        let name = cap[1].to_string();
914        if ["if", "for", "while", "switch", "return", "sizeof"].contains(&name.as_str()) {
915            continue;
916        }
917        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
918        let end_line = if i + 1 < func_matches.len() {
919            source[..func_matches[i + 1].get(0).unwrap().start()]
920                .lines()
921                .count()
922        } else {
923            lines.len()
924        };
925
926        let node = make_node(&name, path, NodeType::Function, start_line);
927        let node_id = node.id.clone();
928        functions.push((name, node_id.clone(), start_line, end_line));
929        result.nodes.push(node);
930        result.edges.push(make_edge(
931            &file_id,
932            &node_id,
933            "defines",
934            path,
935            Confidence::Extracted,
936        ));
937    }
938
939    let call_edges = infer_calls(&functions, &lines, path);
940    result.edges.extend(call_edges);
941
942    result
943}
944
945// ═══════════════════════════════════════════════════════════════════════════
946// Ruby
947// ═══════════════════════════════════════════════════════════════════════════
948
949fn extract_ruby(path: &Path, source: &str) -> ExtractionResult {
950    let mut result = ExtractionResult::default();
951    let file_node = make_file_node(path);
952    let file_id = file_node.id.clone();
953    result.nodes.push(file_node);
954
955    let lines: Vec<&str> = source.lines().collect();
956    let ps = path_str(path);
957
958    // Classes and modules
959    let re_class = Regex::new(r"(?m)^\s*(class|module)\s+(\w+(?:::\w+)*)").unwrap();
960    for cap in re_class.captures_iter(source) {
961        let name = &cap[2];
962        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
963        let node = make_node(name, path, NodeType::Class, line);
964        let node_id = node.id.clone();
965        result.nodes.push(node);
966        result.edges.push(make_edge(
967            &file_id,
968            &node_id,
969            "defines",
970            path,
971            Confidence::Extracted,
972        ));
973    }
974
975    // Methods
976    let re_func = Regex::new(r"(?m)^\s*def\s+(self\.)?(\w+[?!=]?)").unwrap();
977    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
978    let func_matches: Vec<_> = re_func.captures_iter(source).collect();
979    for (i, cap) in func_matches.iter().enumerate() {
980        let name = cap[2].to_string();
981        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
982        let end_line = if i + 1 < func_matches.len() {
983            source[..func_matches[i + 1].get(0).unwrap().start()]
984                .lines()
985                .count()
986        } else {
987            lines.len()
988        };
989
990        let node = make_node(&name, path, NodeType::Method, start_line);
991        let node_id = node.id.clone();
992        functions.push((name, node_id.clone(), start_line, end_line));
993        result.nodes.push(node);
994        result.edges.push(make_edge(
995            &file_id,
996            &node_id,
997            "defines",
998            path,
999            Confidence::Extracted,
1000        ));
1001    }
1002
1003    // require / require_relative
1004    let re_require = Regex::new(r#"(?m)^\s*require(?:_relative)?\s+['"]([^'"]+)['"]"#).unwrap();
1005    for cap in re_require.captures_iter(source) {
1006        let module = &cap[1];
1007        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1008        let import_id = make_id(&[&ps, "require", module]);
1009        result.nodes.push(GraphNode {
1010            id: import_id.clone(),
1011            label: module.to_string(),
1012            source_file: ps.clone(),
1013            source_location: Some(format!("L{line}")),
1014            node_type: NodeType::Module,
1015            community: None,
1016            extra: HashMap::new(),
1017        });
1018        result.edges.push(make_edge(
1019            &file_id,
1020            &import_id,
1021            "imports",
1022            path,
1023            Confidence::Extracted,
1024        ));
1025    }
1026
1027    let call_edges = infer_calls(&functions, &lines, path);
1028    result.edges.extend(call_edges);
1029
1030    result
1031}
1032
1033// ═══════════════════════════════════════════════════════════════════════════
1034// C#
1035// ═══════════════════════════════════════════════════════════════════════════
1036
1037fn extract_csharp(path: &Path, source: &str) -> ExtractionResult {
1038    let mut result = ExtractionResult::default();
1039    let file_node = make_file_node(path);
1040    let file_id = file_node.id.clone();
1041    result.nodes.push(file_node);
1042
1043    let lines: Vec<&str> = source.lines().collect();
1044    let ps = path_str(path);
1045
1046    // Classes / interfaces / structs / enums
1047    let re_class = Regex::new(
1048        r"(?m)(?:public\s+|private\s+|protected\s+|internal\s+)?(?:abstract\s+|static\s+|sealed\s+|partial\s+)*(class|interface|struct|enum)\s+(\w+)",
1049    )
1050    .unwrap();
1051    for cap in re_class.captures_iter(source) {
1052        let kind = &cap[1];
1053        let name = &cap[2];
1054        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1055        let node_type = match kind {
1056            "interface" => NodeType::Interface,
1057            "struct" => NodeType::Struct,
1058            "enum" => NodeType::Enum,
1059            _ => NodeType::Class,
1060        };
1061        let node = make_node(name, path, node_type, line);
1062        let node_id = node.id.clone();
1063        result.nodes.push(node);
1064        result.edges.push(make_edge(
1065            &file_id,
1066            &node_id,
1067            "defines",
1068            path,
1069            Confidence::Extracted,
1070        ));
1071    }
1072
1073    // Methods
1074    let re_method = Regex::new(
1075        r"(?m)^\s+(?:public\s+|private\s+|protected\s+|internal\s+)?(?:static\s+)?(?:virtual\s+)?(?:override\s+)?(?:async\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(",
1076    )
1077    .unwrap();
1078    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1079    let func_matches: Vec<_> = re_method.captures_iter(source).collect();
1080    for (i, cap) in func_matches.iter().enumerate() {
1081        let name = cap[1].to_string();
1082        if [
1083            "if", "for", "while", "switch", "catch", "return", "new", "throw",
1084        ]
1085        .contains(&name.as_str())
1086        {
1087            continue;
1088        }
1089        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1090        let end_line = if i + 1 < func_matches.len() {
1091            source[..func_matches[i + 1].get(0).unwrap().start()]
1092                .lines()
1093                .count()
1094        } else {
1095            lines.len()
1096        };
1097
1098        let node = make_node(&name, path, NodeType::Method, start_line);
1099        let node_id = node.id.clone();
1100        functions.push((name, node_id.clone(), start_line, end_line));
1101        result.nodes.push(node);
1102        result.edges.push(make_edge(
1103            &file_id,
1104            &node_id,
1105            "defines",
1106            path,
1107            Confidence::Extracted,
1108        ));
1109    }
1110
1111    // using directives
1112    let re_using = Regex::new(r"(?m)^using\s+([\w.]+)\s*;").unwrap();
1113    for cap in re_using.captures_iter(source) {
1114        let ns = &cap[1];
1115        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1116        let import_id = make_id(&[&ps, "using", ns]);
1117        result.nodes.push(GraphNode {
1118            id: import_id.clone(),
1119            label: ns.to_string(),
1120            source_file: ps.clone(),
1121            source_location: Some(format!("L{line}")),
1122            node_type: NodeType::Namespace,
1123            community: None,
1124            extra: HashMap::new(),
1125        });
1126        result.edges.push(make_edge(
1127            &file_id,
1128            &import_id,
1129            "imports",
1130            path,
1131            Confidence::Extracted,
1132        ));
1133    }
1134
1135    let call_edges = infer_calls(&functions, &lines, path);
1136    result.edges.extend(call_edges);
1137
1138    result
1139}
1140
1141// ═══════════════════════════════════════════════════════════════════════════
1142// Kotlin
1143// ═══════════════════════════════════════════════════════════════════════════
1144
1145fn extract_kotlin(path: &Path, source: &str) -> ExtractionResult {
1146    let mut result = ExtractionResult::default();
1147    let file_node = make_file_node(path);
1148    let file_id = file_node.id.clone();
1149    result.nodes.push(file_node);
1150
1151    let lines: Vec<&str> = source.lines().collect();
1152    let ps = path_str(path);
1153
1154    // Classes / objects / interfaces
1155    let re_class = Regex::new(
1156        r"(?m)(?:open\s+|abstract\s+|data\s+|sealed\s+)?(?:class|object|interface)\s+(\w+)",
1157    )
1158    .unwrap();
1159    for cap in re_class.captures_iter(source) {
1160        let name = &cap[1];
1161        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1162        let node = make_node(name, path, NodeType::Class, line);
1163        let node_id = node.id.clone();
1164        result.nodes.push(node);
1165        result.edges.push(make_edge(
1166            &file_id,
1167            &node_id,
1168            "defines",
1169            path,
1170            Confidence::Extracted,
1171        ));
1172    }
1173
1174    // Functions: `fun foo(`
1175    let re_func = Regex::new(r"(?m)^\s*(?:(?:private|public|protected|internal|override|open|suspend)\s+)*fun\s+(?:<[^>]+>\s+)?(\w+)\s*\(").unwrap();
1176    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1177    let func_matches: Vec<_> = re_func.captures_iter(source).collect();
1178    for (i, cap) in func_matches.iter().enumerate() {
1179        let name = cap[1].to_string();
1180        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1181        let end_line = if i + 1 < func_matches.len() {
1182            source[..func_matches[i + 1].get(0).unwrap().start()]
1183                .lines()
1184                .count()
1185        } else {
1186            lines.len()
1187        };
1188
1189        let node = make_node(&name, path, NodeType::Function, start_line);
1190        let node_id = node.id.clone();
1191        functions.push((name, node_id.clone(), start_line, end_line));
1192        result.nodes.push(node);
1193        result.edges.push(make_edge(
1194            &file_id,
1195            &node_id,
1196            "defines",
1197            path,
1198            Confidence::Extracted,
1199        ));
1200    }
1201
1202    // Imports
1203    let re_import = Regex::new(r"(?m)^import\s+([\w.]+)").unwrap();
1204    for cap in re_import.captures_iter(source) {
1205        let module = &cap[1];
1206        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1207        let import_id = make_id(&[&ps, "import", module]);
1208        result.nodes.push(GraphNode {
1209            id: import_id.clone(),
1210            label: module.to_string(),
1211            source_file: ps.clone(),
1212            source_location: Some(format!("L{line}")),
1213            node_type: NodeType::Package,
1214            community: None,
1215            extra: HashMap::new(),
1216        });
1217        result.edges.push(make_edge(
1218            &file_id,
1219            &import_id,
1220            "imports",
1221            path,
1222            Confidence::Extracted,
1223        ));
1224    }
1225
1226    let call_edges = infer_calls(&functions, &lines, path);
1227    result.edges.extend(call_edges);
1228
1229    result
1230}
1231
1232// ═══════════════════════════════════════════════════════════════════════════
1233// Generic fallback (Scala, PHP, Swift, Lua, Zig, PowerShell, Elixir, ObjC, Julia)
1234// ═══════════════════════════════════════════════════════════════════════════
1235
1236fn extract_generic(path: &Path, source: &str, _lang: &str) -> ExtractionResult {
1237    let mut result = ExtractionResult::default();
1238    let file_node = make_file_node(path);
1239    let file_id = file_node.id.clone();
1240    result.nodes.push(file_node);
1241
1242    let lines: Vec<&str> = source.lines().collect();
1243    let ps = path_str(path);
1244
1245    // Generic class/struct/module pattern
1246    let re_class =
1247        Regex::new(r"(?m)^\s*(?:(?:pub|public|private|protected|internal|open|abstract|sealed|partial|static|final|export)\s+)*(?:class|struct|module|object|interface|trait|protocol|enum|defmodule)\s+(\w+(?:::\w+)*)")
1248            .unwrap();
1249    for cap in re_class.captures_iter(source) {
1250        let name = &cap[1];
1251        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1252        let node = make_node(name, path, NodeType::Class, line);
1253        let node_id = node.id.clone();
1254        result.nodes.push(node);
1255        result.edges.push(make_edge(
1256            &file_id,
1257            &node_id,
1258            "defines",
1259            path,
1260            Confidence::Extracted,
1261        ));
1262    }
1263
1264    // Generic function pattern
1265    let re_func = Regex::new(
1266        r"(?m)^\s*(?:(?:pub|public|private|protected|internal|open|override|suspend|static|async|export|def|defp)\s+)*(?:func|function|fn|def|defp|fun|sub)\s+(\w+[?!]?)\s*[\(<]",
1267    )
1268    .unwrap();
1269    let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1270    let func_matches: Vec<_> = re_func.captures_iter(source).collect();
1271    for (i, cap) in func_matches.iter().enumerate() {
1272        let name = cap[1].to_string();
1273        let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1274        let end_line = if i + 1 < func_matches.len() {
1275            source[..func_matches[i + 1].get(0).unwrap().start()]
1276                .lines()
1277                .count()
1278        } else {
1279            lines.len()
1280        };
1281
1282        let node = make_node(&name, path, NodeType::Function, start_line);
1283        let node_id = node.id.clone();
1284        functions.push((name, node_id.clone(), start_line, end_line));
1285        result.nodes.push(node);
1286        result.edges.push(make_edge(
1287            &file_id,
1288            &node_id,
1289            "defines",
1290            path,
1291            Confidence::Extracted,
1292        ));
1293    }
1294
1295    // Generic import pattern
1296    let re_import =
1297        Regex::new(r#"(?m)^\s*(?:import|use|using|require|include|from)\s+['"]?([\w./:-]+)['"]?"#)
1298            .unwrap();
1299    for cap in re_import.captures_iter(source) {
1300        let module = &cap[1];
1301        let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1302        let import_id = make_id(&[&ps, "import", module]);
1303        result.nodes.push(GraphNode {
1304            id: import_id.clone(),
1305            label: module.to_string(),
1306            source_file: ps.clone(),
1307            source_location: Some(format!("L{line}")),
1308            node_type: NodeType::Module,
1309            community: None,
1310            extra: HashMap::new(),
1311        });
1312        result.edges.push(make_edge(
1313            &file_id,
1314            &import_id,
1315            "imports",
1316            path,
1317            Confidence::Extracted,
1318        ));
1319    }
1320
1321    let call_edges = infer_calls(&functions, &lines, path);
1322    result.edges.extend(call_edges);
1323
1324    result
1325}
1326
1327// ═══════════════════════════════════════════════════════════════════════════
1328// Tests
1329// ═══════════════════════════════════════════════════════════════════════════
1330
1331#[cfg(test)]
1332mod tests {
1333    use super::*;
1334    use std::path::Path;
1335
1336    // ----- Python -----
1337
1338    #[test]
1339    fn python_extracts_class_and_methods() {
1340        let source = r#"
1341class MyClass:
1342    def __init__(self):
1343        pass
1344
1345    def greet(self, name):
1346        return f"Hello {name}"
1347
1348def standalone():
1349    pass
1350"#;
1351        let result = extract_file(Path::new("test.py"), source, "python");
1352
1353        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1354        assert!(labels.contains(&"MyClass"), "missing MyClass: {labels:?}");
1355        assert!(labels.contains(&"__init__"), "missing __init__: {labels:?}");
1356        assert!(labels.contains(&"greet"), "missing greet: {labels:?}");
1357        assert!(
1358            labels.contains(&"standalone"),
1359            "missing standalone: {labels:?}"
1360        );
1361
1362        // File node
1363        assert!(result.nodes.iter().any(|n| n.node_type == NodeType::File));
1364        // Class node
1365        assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Class));
1366    }
1367
1368    #[test]
1369    fn python_extracts_imports() {
1370        let source = r#"
1371import os
1372from pathlib import Path
1373from collections import defaultdict, OrderedDict
1374"#;
1375        let result = extract_file(Path::new("test.py"), source, "python");
1376
1377        let import_edges: Vec<&GraphEdge> = result
1378            .edges
1379            .iter()
1380            .filter(|e| e.relation == "imports")
1381            .collect();
1382        assert!(
1383            import_edges.len() >= 2,
1384            "expected at least 2 import edges, got {}",
1385            import_edges.len()
1386        );
1387    }
1388
1389    #[test]
1390    fn python_infers_calls() {
1391        let source = r#"
1392def foo():
1393    bar()
1394
1395def bar():
1396    pass
1397"#;
1398        let result = extract_file(Path::new("test.py"), source, "python");
1399
1400        let call_edges: Vec<&GraphEdge> = result
1401            .edges
1402            .iter()
1403            .filter(|e| e.relation == "calls")
1404            .collect();
1405        assert!(!call_edges.is_empty(), "expected call edges, got none");
1406        assert_eq!(call_edges[0].confidence, Confidence::Inferred);
1407    }
1408
1409    // ----- Rust -----
1410
1411    #[test]
1412    fn rust_extracts_structs_and_functions() {
1413        let source = r#"
1414use std::collections::HashMap;
1415
1416pub struct Config {
1417    name: String,
1418}
1419
1420pub enum Status {
1421    Active,
1422    Inactive,
1423}
1424
1425pub trait Runnable {
1426    fn run(&self);
1427}
1428
1429impl Runnable for Config {
1430    fn run(&self) {
1431        println!("{}", self.name);
1432    }
1433}
1434
1435pub fn main() {
1436    let c = Config { name: "test".into() };
1437    c.run();
1438}
1439"#;
1440        let result = extract_file(Path::new("lib.rs"), source, "rust");
1441
1442        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1443        assert!(labels.contains(&"Config"), "missing Config: {labels:?}");
1444        assert!(labels.contains(&"Status"), "missing Status: {labels:?}");
1445        assert!(labels.contains(&"Runnable"), "missing Runnable: {labels:?}");
1446        assert!(labels.contains(&"main"), "missing main: {labels:?}");
1447        assert!(labels.contains(&"run"), "missing run: {labels:?}");
1448
1449        // Check struct and enum types
1450        assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Struct));
1451        assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Enum));
1452        assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Trait));
1453
1454        // Implements edge
1455        assert!(
1456            result.edges.iter().any(|e| e.relation == "implements"),
1457            "missing implements edge"
1458        );
1459
1460        // Use/import node
1461        assert!(
1462            result.nodes.iter().any(|n| n.label.contains("std")),
1463            "missing use statement node"
1464        );
1465    }
1466
1467    // ----- JavaScript / TypeScript -----
1468
1469    #[test]
1470    fn js_extracts_functions_and_classes() {
1471        let source = r#"
1472import { useState } from 'react';
1473import axios from 'axios';
1474
1475export class ApiClient {
1476    constructor(baseUrl) {
1477        this.baseUrl = baseUrl;
1478    }
1479}
1480
1481export function fetchData(url) {
1482    return axios.get(url);
1483}
1484
1485const processData = (data) => {
1486    return data.map(x => x * 2);
1487};
1488"#;
1489        let result = extract_file(Path::new("api.js"), source, "javascript");
1490
1491        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1492        assert!(
1493            labels.contains(&"ApiClient"),
1494            "missing ApiClient: {labels:?}"
1495        );
1496        assert!(
1497            labels.contains(&"fetchData"),
1498            "missing fetchData: {labels:?}"
1499        );
1500
1501        // Import edges
1502        let import_count = result
1503            .edges
1504            .iter()
1505            .filter(|e| e.relation == "imports")
1506            .count();
1507        assert!(
1508            import_count >= 2,
1509            "expected >=2 imports, got {import_count}"
1510        );
1511    }
1512
1513    #[test]
1514    fn ts_extracts_same_as_js() {
1515        let source = "export function hello(): string { return 'hi'; }\n";
1516        let result = extract_file(Path::new("hello.ts"), source, "typescript");
1517        assert!(result.nodes.iter().any(|n| n.label == "hello"));
1518    }
1519
1520    // ----- Go -----
1521
1522    #[test]
1523    fn go_extracts_types_and_functions() {
1524        let source = r#"
1525package main
1526
1527import (
1528    "fmt"
1529    "os"
1530)
1531
1532type Server struct {
1533    host string
1534    port int
1535}
1536
1537type Handler interface {
1538    Handle()
1539}
1540
1541func (s *Server) Start() {
1542    fmt.Println("starting")
1543}
1544
1545func main() {
1546    s := Server{host: "localhost", port: 8080}
1547    s.Start()
1548}
1549"#;
1550        let result = extract_file(Path::new("main.go"), source, "go");
1551
1552        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1553        assert!(labels.contains(&"Server"), "missing Server: {labels:?}");
1554        assert!(labels.contains(&"Handler"), "missing Handler: {labels:?}");
1555        assert!(labels.contains(&"Start"), "missing Start: {labels:?}");
1556        assert!(labels.contains(&"main"), "missing main: {labels:?}");
1557
1558        assert!(
1559            result
1560                .nodes
1561                .iter()
1562                .any(|n| n.node_type == NodeType::Interface)
1563        );
1564        assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Struct));
1565
1566        // Import nodes
1567        assert!(
1568            result.nodes.iter().any(|n| n.label == "fmt"),
1569            "missing fmt import"
1570        );
1571    }
1572
1573    // ----- Java -----
1574
1575    #[test]
1576    fn java_extracts_class_and_methods() {
1577        let source = r#"
1578import java.util.List;
1579import java.util.ArrayList;
1580
1581public class UserService {
1582    private List<String> users;
1583
1584    public UserService() {
1585        this.users = new ArrayList<>();
1586    }
1587
1588    public void addUser(String name) {
1589        users.add(name);
1590    }
1591
1592    public List<String> getUsers() {
1593        return users;
1594    }
1595}
1596"#;
1597        let result = extract_file(Path::new("UserService.java"), source, "java");
1598
1599        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1600        assert!(
1601            labels.contains(&"UserService"),
1602            "missing UserService: {labels:?}"
1603        );
1604        assert!(labels.contains(&"addUser"), "missing addUser: {labels:?}");
1605        assert!(labels.contains(&"getUsers"), "missing getUsers: {labels:?}");
1606
1607        let import_count = result
1608            .edges
1609            .iter()
1610            .filter(|e| e.relation == "imports")
1611            .count();
1612        assert!(
1613            import_count >= 2,
1614            "expected >=2 imports, got {import_count}"
1615        );
1616    }
1617
1618    // ----- C/C++ -----
1619
1620    #[test]
1621    fn c_extracts_includes_and_functions() {
1622        let source = r#"
1623#include <stdio.h>
1624#include "myheader.h"
1625
1626typedef struct Point {
1627    int x;
1628    int y;
1629} Point;
1630
1631int add(int a, int b) {
1632    return a + b;
1633}
1634
1635int main() {
1636    printf("%d\n", add(1, 2));
1637    return 0;
1638}
1639"#;
1640        let result = extract_file(Path::new("main.c"), source, "c");
1641
1642        assert!(
1643            result.edges.iter().any(|e| e.relation == "includes"),
1644            "missing includes edge"
1645        );
1646        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1647        assert!(labels.contains(&"main"), "missing main: {labels:?}");
1648        assert!(labels.contains(&"add"), "missing add: {labels:?}");
1649    }
1650
1651    // ----- Generic -----
1652
1653    #[test]
1654    fn generic_extracts_basic_patterns() {
1655        let source = r#"
1656defmodule MyApp.Worker do
1657  def start(args) do
1658    process(args)
1659  end
1660
1661  def process(data) do
1662    IO.puts(data)
1663  end
1664end
1665"#;
1666        let result = extract_file(Path::new("worker.ex"), source, "elixir");
1667        // Should at least have a file node and find some functions
1668        assert!(!result.nodes.is_empty());
1669        assert!(
1670            result.nodes.iter().any(|n| n.node_type == NodeType::File),
1671            "missing file node"
1672        );
1673    }
1674
1675    // ----- ID generation consistency -----
1676
1677    #[test]
1678    fn node_ids_are_deterministic() {
1679        let source = "def foo():\n    pass\n";
1680        let r1 = extract_file(Path::new("test.py"), source, "python");
1681        let r2 = extract_file(Path::new("test.py"), source, "python");
1682        assert_eq!(r1.nodes.len(), r2.nodes.len());
1683        for (a, b) in r1.nodes.iter().zip(r2.nodes.iter()) {
1684            assert_eq!(a.id, b.id);
1685        }
1686    }
1687
1688    #[test]
1689    fn all_edges_have_source_file() {
1690        let source = "def foo():\n    bar()\ndef bar():\n    pass\n";
1691        let result = extract_file(Path::new("x.py"), source, "python");
1692        for edge in &result.edges {
1693            assert!(!edge.source_file.is_empty());
1694        }
1695    }
1696
1697    // ----- Ruby -----
1698
1699    #[test]
1700    fn ruby_extracts_class_and_methods() {
1701        let source = r#"
1702require 'json'
1703
1704class Greeter
1705  def initialize(name)
1706    @name = name
1707  end
1708
1709  def greet
1710    "Hello, #{@name}!"
1711  end
1712end
1713"#;
1714        let result = extract_file(Path::new("greeter.rb"), source, "ruby");
1715
1716        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1717        assert!(labels.contains(&"Greeter"), "missing Greeter: {labels:?}");
1718        assert!(
1719            labels.contains(&"initialize"),
1720            "missing initialize: {labels:?}"
1721        );
1722        assert!(labels.contains(&"greet"), "missing greet: {labels:?}");
1723    }
1724
1725    // ----- Kotlin -----
1726
1727    #[test]
1728    fn kotlin_extracts_class_and_functions() {
1729        let source = r#"
1730import kotlin.math.sqrt
1731
1732data class Point(val x: Double, val y: Double)
1733
1734fun distance(a: Point, b: Point): Double {
1735    return sqrt((a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y))
1736}
1737"#;
1738        let result = extract_file(Path::new("geometry.kt"), source, "kotlin");
1739
1740        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1741        assert!(labels.contains(&"Point"), "missing Point: {labels:?}");
1742        assert!(labels.contains(&"distance"), "missing distance: {labels:?}");
1743    }
1744
1745    // ----- C# -----
1746
1747    #[test]
1748    fn csharp_extracts_class_and_methods() {
1749        let source = r#"
1750using System;
1751using System.Collections.Generic;
1752
1753public class Calculator
1754{
1755    public int Add(int a, int b)
1756    {
1757        return a + b;
1758    }
1759}
1760"#;
1761        let result = extract_file(Path::new("Calculator.cs"), source, "csharp");
1762
1763        let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1764        assert!(
1765            labels.contains(&"Calculator"),
1766            "missing Calculator: {labels:?}"
1767        );
1768        assert!(labels.contains(&"Add"), "missing Add: {labels:?}");
1769    }
1770}