1use std::collections::HashMap;
15use std::path::Path;
16
17use graphify_core::confidence::Confidence;
18use graphify_core::id::make_id;
19use graphify_core::model::{ExtractionResult, GraphEdge, GraphNode, NodeType};
20use regex::Regex;
21use tracing::trace;
22
23pub fn extract_file(path: &Path, source: &str, lang: &str) -> ExtractionResult {
29 match lang {
30 "python" => extract_python(path, source),
31 "javascript" | "typescript" => extract_js_ts(path, source, lang),
32 "rust" => extract_rust(path, source),
33 "go" => extract_go(path, source),
34 "java" => extract_java(path, source),
35 "c" | "cpp" => extract_c_cpp(path, source, lang),
36 "ruby" => extract_ruby(path, source),
37 "csharp" => extract_csharp(path, source),
38 "kotlin" => extract_kotlin(path, source),
39 _ => extract_generic(path, source, lang),
40 }
41}
42
43fn file_stem(path: &Path) -> String {
48 path.file_stem()
49 .and_then(|s| s.to_str())
50 .unwrap_or("unknown")
51 .to_string()
52}
53
54fn path_str(path: &Path) -> String {
55 path.to_string_lossy().into_owned()
56}
57
58fn make_file_node(path: &Path) -> GraphNode {
59 let ps = path_str(path);
60 GraphNode {
61 id: make_id(&[&ps]),
62 label: file_stem(path),
63 source_file: ps,
64 source_location: None,
65 node_type: NodeType::File,
66 community: None,
67 extra: HashMap::new(),
68 }
69}
70
71fn make_node(name: &str, path: &Path, node_type: NodeType, line: usize) -> GraphNode {
72 let ps = path_str(path);
73 GraphNode {
74 id: make_id(&[&ps, name]),
75 label: name.to_string(),
76 source_file: ps,
77 source_location: Some(format!("L{line}")),
78 node_type,
79 community: None,
80 extra: HashMap::new(),
81 }
82}
83
84fn make_edge(
85 source_id: &str,
86 target_id: &str,
87 relation: &str,
88 path: &Path,
89 confidence: Confidence,
90) -> GraphEdge {
91 GraphEdge {
92 source: source_id.to_string(),
93 target: target_id.to_string(),
94 relation: relation.to_string(),
95 confidence: confidence.clone(),
96 confidence_score: confidence.default_score(),
97 source_file: path_str(path),
98 source_location: None,
99 weight: 1.0,
100 extra: HashMap::new(),
101 }
102}
103
104fn infer_calls(
107 functions: &[(String, String, usize, usize)], source_lines: &[&str],
109 path: &Path,
110) -> Vec<GraphEdge> {
111 let mut edges = Vec::new();
112 for (_caller_name, caller_id, start, end) in functions {
113 let body = source_lines
114 .get(*start..*end)
115 .unwrap_or_default()
116 .join("\n");
117 for (callee_name, callee_id, _, _) in functions {
118 if caller_id == callee_id {
119 continue;
120 }
121 let pattern = format!(r"\b{}\s*\(", regex::escape(callee_name));
123 if let Ok(re) = Regex::new(&pattern)
124 && re.is_match(&body)
125 {
126 edges.push(make_edge(
127 caller_id,
128 callee_id,
129 "calls",
130 path,
131 Confidence::Inferred,
132 ));
133 }
134 }
135 }
136 edges
137}
138
139fn extract_python(path: &Path, source: &str) -> ExtractionResult {
144 let mut result = ExtractionResult::default();
145 let file_node = make_file_node(path);
146 let file_id = file_node.id.clone();
147 result.nodes.push(file_node);
148
149 let lines: Vec<&str> = source.lines().collect();
150 let ps = path_str(path);
151
152 let re_class = Regex::new(r"(?m)^(\s*)class\s+(\w+)").unwrap();
154 let re_class_lookup = Regex::new(r"^(\s*)class\s+(\w+)").unwrap();
155 let mut class_ids: HashMap<String, String> = HashMap::new();
156 for cap in re_class.captures_iter(source) {
157 let name = &cap[2];
158 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
159 let node = make_node(name, path, NodeType::Class, line);
160 let node_id = node.id.clone();
161 class_ids.insert(name.to_string(), node_id.clone());
162 result.nodes.push(node);
163 result.edges.push(make_edge(
164 &file_id,
165 &node_id,
166 "defines",
167 path,
168 Confidence::Extracted,
169 ));
170 }
171
172 let re_func = Regex::new(r"(?m)^(\s*)def\s+(\w+)\s*\(").unwrap();
174 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
175 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
176 for (i, cap) in func_matches.iter().enumerate() {
177 let indent = cap[1].len();
178 let name = cap[2].to_string();
179 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
180
181 let node_type = if indent > 0 {
182 NodeType::Method
183 } else {
184 NodeType::Function
185 };
186 let node = make_node(&name, path, node_type, start_line);
187 let node_id = node.id.clone();
188
189 let parent_id = if indent > 0 {
191 let mut parent = None;
193 for line_idx in (0..start_line.saturating_sub(1)).rev() {
194 if let Some(line) = lines.get(line_idx)
195 && let Some(cls_cap) = re_class_lookup.captures(line)
196 && cls_cap[1].len() < indent
197 {
198 parent = class_ids.get(&cls_cap[2]).cloned();
199 break;
200 }
201 }
202 parent.unwrap_or_else(|| file_id.clone())
203 } else {
204 file_id.clone()
205 };
206
207 let end_line = if i + 1 < func_matches.len() {
209 source[..func_matches[i + 1].get(0).unwrap().start()]
210 .lines()
211 .count()
212 } else {
213 lines.len()
214 };
215
216 functions.push((name.clone(), node_id.clone(), start_line, end_line));
217 result.nodes.push(node);
218 result.edges.push(make_edge(
219 &parent_id,
220 &node_id,
221 "defines",
222 path,
223 Confidence::Extracted,
224 ));
225 }
226
227 let re_import = Regex::new(r"(?m)^(?:from\s+([\w.]+)\s+)?import\s+([\w.,\s*]+)").unwrap();
229 for cap in re_import.captures_iter(source) {
230 let module = cap.get(1).map_or("", |m| m.as_str());
231 let names_str = &cap[2];
232 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
233
234 for name in names_str.split(',') {
235 let name = name.trim().split(" as ").next().unwrap_or("").trim();
236 if name.is_empty() || name == "*" {
237 continue;
238 }
239 let full_name = if module.is_empty() {
240 name.to_string()
241 } else {
242 format!("{module}.{name}")
243 };
244 let import_id = make_id(&[&ps, "import", &full_name]);
245 result.nodes.push(GraphNode {
246 id: import_id.clone(),
247 label: full_name,
248 source_file: ps.clone(),
249 source_location: Some(format!("L{line}")),
250 node_type: NodeType::Module,
251 community: None,
252 extra: HashMap::new(),
253 });
254 result.edges.push(make_edge(
255 &file_id,
256 &import_id,
257 "imports",
258 path,
259 Confidence::Extracted,
260 ));
261 }
262 }
263
264 let call_edges = infer_calls(&functions, &lines, path);
266 result.edges.extend(call_edges);
267
268 trace!(
269 "python: {} nodes, {} edges from {}",
270 result.nodes.len(),
271 result.edges.len(),
272 ps
273 );
274 result
275}
276
277fn extract_js_ts(path: &Path, source: &str, lang: &str) -> ExtractionResult {
282 let mut result = ExtractionResult::default();
283 let file_node = make_file_node(path);
284 let file_id = file_node.id.clone();
285 result.nodes.push(file_node);
286
287 let lines: Vec<&str> = source.lines().collect();
288 let ps = path_str(path);
289
290 let re_class = Regex::new(r"(?m)(?:export\s+)?(?:default\s+)?class\s+(\w+)").unwrap();
292 for cap in re_class.captures_iter(source) {
293 let name = &cap[1];
294 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
295 let node = make_node(name, path, NodeType::Class, line);
296 let node_id = node.id.clone();
297 result.nodes.push(node);
298 result.edges.push(make_edge(
299 &file_id,
300 &node_id,
301 "defines",
302 path,
303 Confidence::Extracted,
304 ));
305 }
306
307 let re_func = Regex::new(
310 r"(?m)(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*\(|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[^=])\s*=>"
311 )
312 .unwrap();
313 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
314 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
315
316 for (i, cap) in func_matches.iter().enumerate() {
317 let name = cap
318 .get(1)
319 .or(cap.get(2))
320 .map(|m| m.as_str().to_string())
321 .unwrap_or_default();
322 if name.is_empty() {
323 continue;
324 }
325 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
326 let end_line = if i + 1 < func_matches.len() {
327 source[..func_matches[i + 1].get(0).unwrap().start()]
328 .lines()
329 .count()
330 } else {
331 lines.len()
332 };
333
334 let node = make_node(&name, path, NodeType::Function, start_line);
335 let node_id = node.id.clone();
336 functions.push((name, node_id.clone(), start_line, end_line));
337 result.nodes.push(node);
338 result.edges.push(make_edge(
339 &file_id,
340 &node_id,
341 "defines",
342 path,
343 Confidence::Extracted,
344 ));
345 }
346
347 let re_import = Regex::new(
349 r#"(?m)import\s+(?:\{([^}]+)\}|(\w+))\s+from\s+['"]([^'"]+)['"]|import\s+['"]([^'"]+)['"]"#,
350 )
351 .unwrap();
352 for cap in re_import.captures_iter(source) {
353 let module = cap.get(3).or(cap.get(4)).map(|m| m.as_str()).unwrap_or("");
354 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
355
356 if let Some(names) = cap.get(1) {
357 for name in names.as_str().split(',') {
358 let name = name.trim().split(" as ").next().unwrap_or("").trim();
359 if name.is_empty() {
360 continue;
361 }
362 let full = format!("{module}/{name}");
363 let import_id = make_id(&[&ps, "import", &full]);
364 result.nodes.push(GraphNode {
365 id: import_id.clone(),
366 label: full,
367 source_file: ps.clone(),
368 source_location: Some(format!("L{line}")),
369 node_type: NodeType::Module,
370 community: None,
371 extra: HashMap::new(),
372 });
373 result.edges.push(make_edge(
374 &file_id,
375 &import_id,
376 "imports",
377 path,
378 Confidence::Extracted,
379 ));
380 }
381 } else if let Some(default_name) = cap.get(2) {
382 let name = default_name.as_str();
383 let import_id = make_id(&[&ps, "import", module]);
384 result.nodes.push(GraphNode {
385 id: import_id.clone(),
386 label: name.to_string(),
387 source_file: ps.clone(),
388 source_location: Some(format!("L{line}")),
389 node_type: NodeType::Module,
390 community: None,
391 extra: HashMap::new(),
392 });
393 result.edges.push(make_edge(
394 &file_id,
395 &import_id,
396 "imports",
397 path,
398 Confidence::Extracted,
399 ));
400 }
401 }
402
403 if lang == "javascript" {
405 let re_require = Regex::new(
406 r#"(?m)(?:const|let|var)\s+(\w+)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)"#,
407 )
408 .unwrap();
409 for cap in re_require.captures_iter(source) {
410 let name = &cap[1];
411 let module = &cap[2];
412 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
413 let import_id = make_id(&[&ps, "import", module]);
414 result.nodes.push(GraphNode {
415 id: import_id.clone(),
416 label: name.to_string(),
417 source_file: ps.clone(),
418 source_location: Some(format!("L{line}")),
419 node_type: NodeType::Module,
420 community: None,
421 extra: HashMap::new(),
422 });
423 result.edges.push(make_edge(
424 &file_id,
425 &import_id,
426 "imports",
427 path,
428 Confidence::Extracted,
429 ));
430 }
431 }
432
433 let call_edges = infer_calls(&functions, &lines, path);
434 result.edges.extend(call_edges);
435
436 result
437}
438
439fn extract_rust(path: &Path, source: &str) -> ExtractionResult {
444 let mut result = ExtractionResult::default();
445 let file_node = make_file_node(path);
446 let file_id = file_node.id.clone();
447 result.nodes.push(file_node);
448
449 let lines: Vec<&str> = source.lines().collect();
450 let ps = path_str(path);
451
452 let re_struct = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?struct\s+(\w+)").unwrap();
454 for cap in re_struct.captures_iter(source) {
455 let name = &cap[1];
456 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
457 let node = make_node(name, path, NodeType::Struct, line);
458 let node_id = node.id.clone();
459 result.nodes.push(node);
460 result.edges.push(make_edge(
461 &file_id,
462 &node_id,
463 "defines",
464 path,
465 Confidence::Extracted,
466 ));
467 }
468
469 let re_enum = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?enum\s+(\w+)").unwrap();
471 for cap in re_enum.captures_iter(source) {
472 let name = &cap[1];
473 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
474 let node = make_node(name, path, NodeType::Enum, line);
475 let node_id = node.id.clone();
476 result.nodes.push(node);
477 result.edges.push(make_edge(
478 &file_id,
479 &node_id,
480 "defines",
481 path,
482 Confidence::Extracted,
483 ));
484 }
485
486 let re_trait = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?trait\s+(\w+)").unwrap();
488 for cap in re_trait.captures_iter(source) {
489 let name = &cap[1];
490 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
491 let node = make_node(name, path, NodeType::Trait, line);
492 let node_id = node.id.clone();
493 result.nodes.push(node);
494 result.edges.push(make_edge(
495 &file_id,
496 &node_id,
497 "defines",
498 path,
499 Confidence::Extracted,
500 ));
501 }
502
503 let re_impl = Regex::new(r"(?m)^(?:\s*)impl(?:<[^>]*>)?\s+(?:(\w+)\s+for\s+)?(\w+)").unwrap();
505 for cap in re_impl.captures_iter(source) {
506 let _trait_name = cap.get(1).map(|m| m.as_str());
507 let type_name = &cap[2];
508 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
509 if let Some(trait_m) = cap.get(1) {
511 let trait_id = make_id(&[&ps, trait_m.as_str()]);
512 let type_id = make_id(&[&ps, type_name]);
513 result.edges.push(make_edge(
514 &type_id,
515 &trait_id,
516 "implements",
517 path,
518 Confidence::Extracted,
519 ));
520 }
521 let _ = line;
522 }
523
524 let re_func = Regex::new(
527 r"(?m)^(\s*)(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:const\s+)?fn\s+(\w+)",
528 )
529 .unwrap();
530 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
531 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
532 for (i, cap) in func_matches.iter().enumerate() {
533 let indent = cap[1].len();
534 let name = cap[2].to_string();
535 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
536 let end_line = if i + 1 < func_matches.len() {
537 source[..func_matches[i + 1].get(0).unwrap().start()]
538 .lines()
539 .count()
540 } else {
541 lines.len()
542 };
543
544 let node_type = if indent > 0 {
545 NodeType::Method
546 } else {
547 NodeType::Function
548 };
549 let node = make_node(&name, path, node_type, start_line);
550 let node_id = node.id.clone();
551 functions.push((name, node_id.clone(), start_line, end_line));
552 result.nodes.push(node);
553 result.edges.push(make_edge(
554 &file_id,
555 &node_id,
556 "defines",
557 path,
558 Confidence::Extracted,
559 ));
560 }
561
562 let re_use = Regex::new(r"(?m)^(?:\s*)(?:pub\s+)?use\s+([\w:]+)").unwrap();
564 for cap in re_use.captures_iter(source) {
565 let module = &cap[1];
566 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
567 let import_id = make_id(&[&ps, "use", module]);
568 result.nodes.push(GraphNode {
569 id: import_id.clone(),
570 label: module.to_string(),
571 source_file: ps.clone(),
572 source_location: Some(format!("L{line}")),
573 node_type: NodeType::Module,
574 community: None,
575 extra: HashMap::new(),
576 });
577 result.edges.push(make_edge(
578 &file_id,
579 &import_id,
580 "imports",
581 path,
582 Confidence::Extracted,
583 ));
584 }
585
586 let call_edges = infer_calls(&functions, &lines, path);
587 result.edges.extend(call_edges);
588
589 result
590}
591
592fn extract_go(path: &Path, source: &str) -> ExtractionResult {
597 let mut result = ExtractionResult::default();
598 let file_node = make_file_node(path);
599 let file_id = file_node.id.clone();
600 result.nodes.push(file_node);
601
602 let lines: Vec<&str> = source.lines().collect();
603 let ps = path_str(path);
604
605 let re_type = Regex::new(r"(?m)^type\s+(\w+)\s+(struct|interface)").unwrap();
607 for cap in re_type.captures_iter(source) {
608 let name = &cap[1];
609 let kind = &cap[2];
610 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
611 let node_type = match kind {
612 "interface" => NodeType::Interface,
613 _ => NodeType::Struct,
614 };
615 let node = make_node(name, path, node_type, line);
616 let node_id = node.id.clone();
617 result.nodes.push(node);
618 result.edges.push(make_edge(
619 &file_id,
620 &node_id,
621 "defines",
622 path,
623 Confidence::Extracted,
624 ));
625 }
626
627 let re_func = Regex::new(r"(?m)^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(").unwrap();
629 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
630 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
631 for (i, cap) in func_matches.iter().enumerate() {
632 let name = cap[1].to_string();
633 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
634 let end_line = if i + 1 < func_matches.len() {
635 source[..func_matches[i + 1].get(0).unwrap().start()]
636 .lines()
637 .count()
638 } else {
639 lines.len()
640 };
641
642 let full_match = cap.get(0).unwrap().as_str();
644 let node_type = if full_match.contains('(') && full_match.find('(') < full_match.find(&name)
645 {
646 NodeType::Method
647 } else {
648 NodeType::Function
649 };
650
651 let node = make_node(&name, path, node_type, start_line);
652 let node_id = node.id.clone();
653 functions.push((name, node_id.clone(), start_line, end_line));
654 result.nodes.push(node);
655 result.edges.push(make_edge(
656 &file_id,
657 &node_id,
658 "defines",
659 path,
660 Confidence::Extracted,
661 ));
662 }
663
664 let re_import_single = Regex::new(r#"(?m)^import\s+"([^"]+)""#).unwrap();
666 let re_import_block = Regex::new(r"(?s)import\s*\(([^)]+)\)").unwrap();
667 let re_import_line = Regex::new(r#""([^"]+)""#).unwrap();
668
669 for cap in re_import_single.captures_iter(source) {
670 let module = &cap[1];
671 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
672 let import_id = make_id(&[&ps, "import", module]);
673 result.nodes.push(GraphNode {
674 id: import_id.clone(),
675 label: module.to_string(),
676 source_file: ps.clone(),
677 source_location: Some(format!("L{line}")),
678 node_type: NodeType::Package,
679 community: None,
680 extra: HashMap::new(),
681 });
682 result.edges.push(make_edge(
683 &file_id,
684 &import_id,
685 "imports",
686 path,
687 Confidence::Extracted,
688 ));
689 }
690
691 for cap in re_import_block.captures_iter(source) {
692 let block = &cap[1];
693 let block_start = source[..cap.get(0).unwrap().start()].lines().count() + 1;
694 for (idx, imp_cap) in re_import_line.captures_iter(block).enumerate() {
695 let module = &imp_cap[1];
696 let import_id = make_id(&[&ps, "import", module]);
697 result.nodes.push(GraphNode {
698 id: import_id.clone(),
699 label: module.to_string(),
700 source_file: ps.clone(),
701 source_location: Some(format!("L{}", block_start + idx + 1)),
702 node_type: NodeType::Package,
703 community: None,
704 extra: HashMap::new(),
705 });
706 result.edges.push(make_edge(
707 &file_id,
708 &import_id,
709 "imports",
710 path,
711 Confidence::Extracted,
712 ));
713 }
714 }
715
716 let call_edges = infer_calls(&functions, &lines, path);
717 result.edges.extend(call_edges);
718
719 result
720}
721
722fn extract_java(path: &Path, source: &str) -> ExtractionResult {
727 let mut result = ExtractionResult::default();
728 let file_node = make_file_node(path);
729 let file_id = file_node.id.clone();
730 result.nodes.push(file_node);
731
732 let lines: Vec<&str> = source.lines().collect();
733 let ps = path_str(path);
734
735 let re_class = Regex::new(
737 r"(?m)(?:public\s+|private\s+|protected\s+)?(?:abstract\s+|static\s+|final\s+)*(class|interface|enum)\s+(\w+)",
738 )
739 .unwrap();
740 for cap in re_class.captures_iter(source) {
741 let kind = &cap[1];
742 let name = &cap[2];
743 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
744 let node_type = match kind {
745 "interface" => NodeType::Interface,
746 "enum" => NodeType::Enum,
747 _ => NodeType::Class,
748 };
749 let node = make_node(name, path, node_type, line);
750 let node_id = node.id.clone();
751 result.nodes.push(node);
752 result.edges.push(make_edge(
753 &file_id,
754 &node_id,
755 "defines",
756 path,
757 Confidence::Extracted,
758 ));
759 }
760
761 let re_method = Regex::new(
763 r"(?m)^\s+(?:public\s+|private\s+|protected\s+)?(?:static\s+)?(?:final\s+)?(?:synchronized\s+)?(?:abstract\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(",
764 )
765 .unwrap();
766 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
767 let func_matches: Vec<_> = re_method.captures_iter(source).collect();
768 for (i, cap) in func_matches.iter().enumerate() {
769 let name = cap[1].to_string();
770 if [
772 "if", "for", "while", "switch", "catch", "return", "new", "throw",
773 ]
774 .contains(&name.as_str())
775 {
776 continue;
777 }
778 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
779 let end_line = if i + 1 < func_matches.len() {
780 source[..func_matches[i + 1].get(0).unwrap().start()]
781 .lines()
782 .count()
783 } else {
784 lines.len()
785 };
786
787 let node = make_node(&name, path, NodeType::Method, start_line);
788 let node_id = node.id.clone();
789 functions.push((name, node_id.clone(), start_line, end_line));
790 result.nodes.push(node);
791 result.edges.push(make_edge(
792 &file_id,
793 &node_id,
794 "defines",
795 path,
796 Confidence::Extracted,
797 ));
798 }
799
800 let re_import = Regex::new(r"(?m)^import\s+(?:static\s+)?([\w.]+)\s*;").unwrap();
802 for cap in re_import.captures_iter(source) {
803 let module = &cap[1];
804 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
805 let import_id = make_id(&[&ps, "import", module]);
806 result.nodes.push(GraphNode {
807 id: import_id.clone(),
808 label: module.to_string(),
809 source_file: ps.clone(),
810 source_location: Some(format!("L{line}")),
811 node_type: NodeType::Package,
812 community: None,
813 extra: HashMap::new(),
814 });
815 result.edges.push(make_edge(
816 &file_id,
817 &import_id,
818 "imports",
819 path,
820 Confidence::Extracted,
821 ));
822 }
823
824 let call_edges = infer_calls(&functions, &lines, path);
825 result.edges.extend(call_edges);
826
827 result
828}
829
830fn extract_c_cpp(path: &Path, source: &str, lang: &str) -> ExtractionResult {
835 let mut result = ExtractionResult::default();
836 let file_node = make_file_node(path);
837 let file_id = file_node.id.clone();
838 result.nodes.push(file_node);
839
840 let lines: Vec<&str> = source.lines().collect();
841 let ps = path_str(path);
842
843 let re_include = Regex::new(r#"(?m)^#include\s+[<"]([^>"]+)[>"]"#).unwrap();
845 for cap in re_include.captures_iter(source) {
846 let header = &cap[1];
847 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
848 let import_id = make_id(&[&ps, "include", header]);
849 result.nodes.push(GraphNode {
850 id: import_id.clone(),
851 label: header.to_string(),
852 source_file: ps.clone(),
853 source_location: Some(format!("L{line}")),
854 node_type: NodeType::Module,
855 community: None,
856 extra: HashMap::new(),
857 });
858 result.edges.push(make_edge(
859 &file_id,
860 &import_id,
861 "includes",
862 path,
863 Confidence::Extracted,
864 ));
865 }
866
867 if lang == "cpp" {
869 let re_class = Regex::new(r"(?m)^(?:\s*)(?:class|struct|namespace)\s+(\w+)").unwrap();
870 for cap in re_class.captures_iter(source) {
871 let name = &cap[1];
872 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
873 let node = make_node(name, path, NodeType::Class, line);
874 let node_id = node.id.clone();
875 result.nodes.push(node);
876 result.edges.push(make_edge(
877 &file_id,
878 &node_id,
879 "defines",
880 path,
881 Confidence::Extracted,
882 ));
883 }
884 }
885
886 if lang == "c" {
888 let re_struct = Regex::new(r"(?m)^(?:typedef\s+)?struct\s+(\w+)").unwrap();
889 for cap in re_struct.captures_iter(source) {
890 let name = &cap[1];
891 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
892 let node = make_node(name, path, NodeType::Struct, line);
893 let node_id = node.id.clone();
894 result.nodes.push(node);
895 result.edges.push(make_edge(
896 &file_id,
897 &node_id,
898 "defines",
899 path,
900 Confidence::Extracted,
901 ));
902 }
903 }
904
905 let re_func = Regex::new(
907 r"(?m)^(?:static\s+)?(?:inline\s+)?(?:extern\s+)?(?:const\s+)?(?:unsigned\s+)?(?:signed\s+)?(?:\w+(?:\s*\*\s*|\s+))(\w+)\s*\([^;]*\)\s*\{",
908 )
909 .unwrap();
910 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
911 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
912 for (i, cap) in func_matches.iter().enumerate() {
913 let name = cap[1].to_string();
914 if ["if", "for", "while", "switch", "return", "sizeof"].contains(&name.as_str()) {
915 continue;
916 }
917 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
918 let end_line = if i + 1 < func_matches.len() {
919 source[..func_matches[i + 1].get(0).unwrap().start()]
920 .lines()
921 .count()
922 } else {
923 lines.len()
924 };
925
926 let node = make_node(&name, path, NodeType::Function, start_line);
927 let node_id = node.id.clone();
928 functions.push((name, node_id.clone(), start_line, end_line));
929 result.nodes.push(node);
930 result.edges.push(make_edge(
931 &file_id,
932 &node_id,
933 "defines",
934 path,
935 Confidence::Extracted,
936 ));
937 }
938
939 let call_edges = infer_calls(&functions, &lines, path);
940 result.edges.extend(call_edges);
941
942 result
943}
944
945fn extract_ruby(path: &Path, source: &str) -> ExtractionResult {
950 let mut result = ExtractionResult::default();
951 let file_node = make_file_node(path);
952 let file_id = file_node.id.clone();
953 result.nodes.push(file_node);
954
955 let lines: Vec<&str> = source.lines().collect();
956 let ps = path_str(path);
957
958 let re_class = Regex::new(r"(?m)^\s*(class|module)\s+(\w+(?:::\w+)*)").unwrap();
960 for cap in re_class.captures_iter(source) {
961 let name = &cap[2];
962 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
963 let node = make_node(name, path, NodeType::Class, line);
964 let node_id = node.id.clone();
965 result.nodes.push(node);
966 result.edges.push(make_edge(
967 &file_id,
968 &node_id,
969 "defines",
970 path,
971 Confidence::Extracted,
972 ));
973 }
974
975 let re_func = Regex::new(r"(?m)^\s*def\s+(self\.)?(\w+[?!=]?)").unwrap();
977 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
978 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
979 for (i, cap) in func_matches.iter().enumerate() {
980 let name = cap[2].to_string();
981 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
982 let end_line = if i + 1 < func_matches.len() {
983 source[..func_matches[i + 1].get(0).unwrap().start()]
984 .lines()
985 .count()
986 } else {
987 lines.len()
988 };
989
990 let node = make_node(&name, path, NodeType::Method, start_line);
991 let node_id = node.id.clone();
992 functions.push((name, node_id.clone(), start_line, end_line));
993 result.nodes.push(node);
994 result.edges.push(make_edge(
995 &file_id,
996 &node_id,
997 "defines",
998 path,
999 Confidence::Extracted,
1000 ));
1001 }
1002
1003 let re_require = Regex::new(r#"(?m)^\s*require(?:_relative)?\s+['"]([^'"]+)['"]"#).unwrap();
1005 for cap in re_require.captures_iter(source) {
1006 let module = &cap[1];
1007 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1008 let import_id = make_id(&[&ps, "require", module]);
1009 result.nodes.push(GraphNode {
1010 id: import_id.clone(),
1011 label: module.to_string(),
1012 source_file: ps.clone(),
1013 source_location: Some(format!("L{line}")),
1014 node_type: NodeType::Module,
1015 community: None,
1016 extra: HashMap::new(),
1017 });
1018 result.edges.push(make_edge(
1019 &file_id,
1020 &import_id,
1021 "imports",
1022 path,
1023 Confidence::Extracted,
1024 ));
1025 }
1026
1027 let call_edges = infer_calls(&functions, &lines, path);
1028 result.edges.extend(call_edges);
1029
1030 result
1031}
1032
1033fn extract_csharp(path: &Path, source: &str) -> ExtractionResult {
1038 let mut result = ExtractionResult::default();
1039 let file_node = make_file_node(path);
1040 let file_id = file_node.id.clone();
1041 result.nodes.push(file_node);
1042
1043 let lines: Vec<&str> = source.lines().collect();
1044 let ps = path_str(path);
1045
1046 let re_class = Regex::new(
1048 r"(?m)(?:public\s+|private\s+|protected\s+|internal\s+)?(?:abstract\s+|static\s+|sealed\s+|partial\s+)*(class|interface|struct|enum)\s+(\w+)",
1049 )
1050 .unwrap();
1051 for cap in re_class.captures_iter(source) {
1052 let kind = &cap[1];
1053 let name = &cap[2];
1054 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1055 let node_type = match kind {
1056 "interface" => NodeType::Interface,
1057 "struct" => NodeType::Struct,
1058 "enum" => NodeType::Enum,
1059 _ => NodeType::Class,
1060 };
1061 let node = make_node(name, path, node_type, line);
1062 let node_id = node.id.clone();
1063 result.nodes.push(node);
1064 result.edges.push(make_edge(
1065 &file_id,
1066 &node_id,
1067 "defines",
1068 path,
1069 Confidence::Extracted,
1070 ));
1071 }
1072
1073 let re_method = Regex::new(
1075 r"(?m)^\s+(?:public\s+|private\s+|protected\s+|internal\s+)?(?:static\s+)?(?:virtual\s+)?(?:override\s+)?(?:async\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(",
1076 )
1077 .unwrap();
1078 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1079 let func_matches: Vec<_> = re_method.captures_iter(source).collect();
1080 for (i, cap) in func_matches.iter().enumerate() {
1081 let name = cap[1].to_string();
1082 if [
1083 "if", "for", "while", "switch", "catch", "return", "new", "throw",
1084 ]
1085 .contains(&name.as_str())
1086 {
1087 continue;
1088 }
1089 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1090 let end_line = if i + 1 < func_matches.len() {
1091 source[..func_matches[i + 1].get(0).unwrap().start()]
1092 .lines()
1093 .count()
1094 } else {
1095 lines.len()
1096 };
1097
1098 let node = make_node(&name, path, NodeType::Method, start_line);
1099 let node_id = node.id.clone();
1100 functions.push((name, node_id.clone(), start_line, end_line));
1101 result.nodes.push(node);
1102 result.edges.push(make_edge(
1103 &file_id,
1104 &node_id,
1105 "defines",
1106 path,
1107 Confidence::Extracted,
1108 ));
1109 }
1110
1111 let re_using = Regex::new(r"(?m)^using\s+([\w.]+)\s*;").unwrap();
1113 for cap in re_using.captures_iter(source) {
1114 let ns = &cap[1];
1115 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1116 let import_id = make_id(&[&ps, "using", ns]);
1117 result.nodes.push(GraphNode {
1118 id: import_id.clone(),
1119 label: ns.to_string(),
1120 source_file: ps.clone(),
1121 source_location: Some(format!("L{line}")),
1122 node_type: NodeType::Namespace,
1123 community: None,
1124 extra: HashMap::new(),
1125 });
1126 result.edges.push(make_edge(
1127 &file_id,
1128 &import_id,
1129 "imports",
1130 path,
1131 Confidence::Extracted,
1132 ));
1133 }
1134
1135 let call_edges = infer_calls(&functions, &lines, path);
1136 result.edges.extend(call_edges);
1137
1138 result
1139}
1140
1141fn extract_kotlin(path: &Path, source: &str) -> ExtractionResult {
1146 let mut result = ExtractionResult::default();
1147 let file_node = make_file_node(path);
1148 let file_id = file_node.id.clone();
1149 result.nodes.push(file_node);
1150
1151 let lines: Vec<&str> = source.lines().collect();
1152 let ps = path_str(path);
1153
1154 let re_class = Regex::new(
1156 r"(?m)(?:open\s+|abstract\s+|data\s+|sealed\s+)?(?:class|object|interface)\s+(\w+)",
1157 )
1158 .unwrap();
1159 for cap in re_class.captures_iter(source) {
1160 let name = &cap[1];
1161 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1162 let node = make_node(name, path, NodeType::Class, line);
1163 let node_id = node.id.clone();
1164 result.nodes.push(node);
1165 result.edges.push(make_edge(
1166 &file_id,
1167 &node_id,
1168 "defines",
1169 path,
1170 Confidence::Extracted,
1171 ));
1172 }
1173
1174 let re_func = Regex::new(r"(?m)^\s*(?:(?:private|public|protected|internal|override|open|suspend)\s+)*fun\s+(?:<[^>]+>\s+)?(\w+)\s*\(").unwrap();
1176 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1177 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
1178 for (i, cap) in func_matches.iter().enumerate() {
1179 let name = cap[1].to_string();
1180 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1181 let end_line = if i + 1 < func_matches.len() {
1182 source[..func_matches[i + 1].get(0).unwrap().start()]
1183 .lines()
1184 .count()
1185 } else {
1186 lines.len()
1187 };
1188
1189 let node = make_node(&name, path, NodeType::Function, start_line);
1190 let node_id = node.id.clone();
1191 functions.push((name, node_id.clone(), start_line, end_line));
1192 result.nodes.push(node);
1193 result.edges.push(make_edge(
1194 &file_id,
1195 &node_id,
1196 "defines",
1197 path,
1198 Confidence::Extracted,
1199 ));
1200 }
1201
1202 let re_import = Regex::new(r"(?m)^import\s+([\w.]+)").unwrap();
1204 for cap in re_import.captures_iter(source) {
1205 let module = &cap[1];
1206 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1207 let import_id = make_id(&[&ps, "import", module]);
1208 result.nodes.push(GraphNode {
1209 id: import_id.clone(),
1210 label: module.to_string(),
1211 source_file: ps.clone(),
1212 source_location: Some(format!("L{line}")),
1213 node_type: NodeType::Package,
1214 community: None,
1215 extra: HashMap::new(),
1216 });
1217 result.edges.push(make_edge(
1218 &file_id,
1219 &import_id,
1220 "imports",
1221 path,
1222 Confidence::Extracted,
1223 ));
1224 }
1225
1226 let call_edges = infer_calls(&functions, &lines, path);
1227 result.edges.extend(call_edges);
1228
1229 result
1230}
1231
1232fn extract_generic(path: &Path, source: &str, _lang: &str) -> ExtractionResult {
1237 let mut result = ExtractionResult::default();
1238 let file_node = make_file_node(path);
1239 let file_id = file_node.id.clone();
1240 result.nodes.push(file_node);
1241
1242 let lines: Vec<&str> = source.lines().collect();
1243 let ps = path_str(path);
1244
1245 let re_class =
1247 Regex::new(r"(?m)^\s*(?:(?:pub|public|private|protected|internal|open|abstract|sealed|partial|static|final|export)\s+)*(?:class|struct|module|object|interface|trait|protocol|enum|defmodule)\s+(\w+(?:::\w+)*)")
1248 .unwrap();
1249 for cap in re_class.captures_iter(source) {
1250 let name = &cap[1];
1251 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1252 let node = make_node(name, path, NodeType::Class, line);
1253 let node_id = node.id.clone();
1254 result.nodes.push(node);
1255 result.edges.push(make_edge(
1256 &file_id,
1257 &node_id,
1258 "defines",
1259 path,
1260 Confidence::Extracted,
1261 ));
1262 }
1263
1264 let re_func = Regex::new(
1266 r"(?m)^\s*(?:(?:pub|public|private|protected|internal|open|override|suspend|static|async|export|def|defp)\s+)*(?:func|function|fn|def|defp|fun|sub)\s+(\w+[?!]?)\s*[\(<]",
1267 )
1268 .unwrap();
1269 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1270 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
1271 for (i, cap) in func_matches.iter().enumerate() {
1272 let name = cap[1].to_string();
1273 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1274 let end_line = if i + 1 < func_matches.len() {
1275 source[..func_matches[i + 1].get(0).unwrap().start()]
1276 .lines()
1277 .count()
1278 } else {
1279 lines.len()
1280 };
1281
1282 let node = make_node(&name, path, NodeType::Function, start_line);
1283 let node_id = node.id.clone();
1284 functions.push((name, node_id.clone(), start_line, end_line));
1285 result.nodes.push(node);
1286 result.edges.push(make_edge(
1287 &file_id,
1288 &node_id,
1289 "defines",
1290 path,
1291 Confidence::Extracted,
1292 ));
1293 }
1294
1295 let re_import =
1297 Regex::new(r#"(?m)^\s*(?:import|use|using|require|include|from)\s+['"]?([\w./:-]+)['"]?"#)
1298 .unwrap();
1299 for cap in re_import.captures_iter(source) {
1300 let module = &cap[1];
1301 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1302 let import_id = make_id(&[&ps, "import", module]);
1303 result.nodes.push(GraphNode {
1304 id: import_id.clone(),
1305 label: module.to_string(),
1306 source_file: ps.clone(),
1307 source_location: Some(format!("L{line}")),
1308 node_type: NodeType::Module,
1309 community: None,
1310 extra: HashMap::new(),
1311 });
1312 result.edges.push(make_edge(
1313 &file_id,
1314 &import_id,
1315 "imports",
1316 path,
1317 Confidence::Extracted,
1318 ));
1319 }
1320
1321 let call_edges = infer_calls(&functions, &lines, path);
1322 result.edges.extend(call_edges);
1323
1324 result
1325}
1326
1327#[cfg(test)]
1332mod tests {
1333 use super::*;
1334 use std::path::Path;
1335
1336 #[test]
1339 fn python_extracts_class_and_methods() {
1340 let source = r#"
1341class MyClass:
1342 def __init__(self):
1343 pass
1344
1345 def greet(self, name):
1346 return f"Hello {name}"
1347
1348def standalone():
1349 pass
1350"#;
1351 let result = extract_file(Path::new("test.py"), source, "python");
1352
1353 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1354 assert!(labels.contains(&"MyClass"), "missing MyClass: {labels:?}");
1355 assert!(labels.contains(&"__init__"), "missing __init__: {labels:?}");
1356 assert!(labels.contains(&"greet"), "missing greet: {labels:?}");
1357 assert!(
1358 labels.contains(&"standalone"),
1359 "missing standalone: {labels:?}"
1360 );
1361
1362 assert!(result.nodes.iter().any(|n| n.node_type == NodeType::File));
1364 assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Class));
1366 }
1367
1368 #[test]
1369 fn python_extracts_imports() {
1370 let source = r#"
1371import os
1372from pathlib import Path
1373from collections import defaultdict, OrderedDict
1374"#;
1375 let result = extract_file(Path::new("test.py"), source, "python");
1376
1377 let import_edges: Vec<&GraphEdge> = result
1378 .edges
1379 .iter()
1380 .filter(|e| e.relation == "imports")
1381 .collect();
1382 assert!(
1383 import_edges.len() >= 2,
1384 "expected at least 2 import edges, got {}",
1385 import_edges.len()
1386 );
1387 }
1388
1389 #[test]
1390 fn python_infers_calls() {
1391 let source = r#"
1392def foo():
1393 bar()
1394
1395def bar():
1396 pass
1397"#;
1398 let result = extract_file(Path::new("test.py"), source, "python");
1399
1400 let call_edges: Vec<&GraphEdge> = result
1401 .edges
1402 .iter()
1403 .filter(|e| e.relation == "calls")
1404 .collect();
1405 assert!(!call_edges.is_empty(), "expected call edges, got none");
1406 assert_eq!(call_edges[0].confidence, Confidence::Inferred);
1407 }
1408
1409 #[test]
1412 fn rust_extracts_structs_and_functions() {
1413 let source = r#"
1414use std::collections::HashMap;
1415
1416pub struct Config {
1417 name: String,
1418}
1419
1420pub enum Status {
1421 Active,
1422 Inactive,
1423}
1424
1425pub trait Runnable {
1426 fn run(&self);
1427}
1428
1429impl Runnable for Config {
1430 fn run(&self) {
1431 println!("{}", self.name);
1432 }
1433}
1434
1435pub fn main() {
1436 let c = Config { name: "test".into() };
1437 c.run();
1438}
1439"#;
1440 let result = extract_file(Path::new("lib.rs"), source, "rust");
1441
1442 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1443 assert!(labels.contains(&"Config"), "missing Config: {labels:?}");
1444 assert!(labels.contains(&"Status"), "missing Status: {labels:?}");
1445 assert!(labels.contains(&"Runnable"), "missing Runnable: {labels:?}");
1446 assert!(labels.contains(&"main"), "missing main: {labels:?}");
1447 assert!(labels.contains(&"run"), "missing run: {labels:?}");
1448
1449 assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Struct));
1451 assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Enum));
1452 assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Trait));
1453
1454 assert!(
1456 result.edges.iter().any(|e| e.relation == "implements"),
1457 "missing implements edge"
1458 );
1459
1460 assert!(
1462 result.nodes.iter().any(|n| n.label.contains("std")),
1463 "missing use statement node"
1464 );
1465 }
1466
1467 #[test]
1470 fn js_extracts_functions_and_classes() {
1471 let source = r#"
1472import { useState } from 'react';
1473import axios from 'axios';
1474
1475export class ApiClient {
1476 constructor(baseUrl) {
1477 this.baseUrl = baseUrl;
1478 }
1479}
1480
1481export function fetchData(url) {
1482 return axios.get(url);
1483}
1484
1485const processData = (data) => {
1486 return data.map(x => x * 2);
1487};
1488"#;
1489 let result = extract_file(Path::new("api.js"), source, "javascript");
1490
1491 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1492 assert!(
1493 labels.contains(&"ApiClient"),
1494 "missing ApiClient: {labels:?}"
1495 );
1496 assert!(
1497 labels.contains(&"fetchData"),
1498 "missing fetchData: {labels:?}"
1499 );
1500
1501 let import_count = result
1503 .edges
1504 .iter()
1505 .filter(|e| e.relation == "imports")
1506 .count();
1507 assert!(
1508 import_count >= 2,
1509 "expected >=2 imports, got {import_count}"
1510 );
1511 }
1512
1513 #[test]
1514 fn ts_extracts_same_as_js() {
1515 let source = "export function hello(): string { return 'hi'; }\n";
1516 let result = extract_file(Path::new("hello.ts"), source, "typescript");
1517 assert!(result.nodes.iter().any(|n| n.label == "hello"));
1518 }
1519
1520 #[test]
1523 fn go_extracts_types_and_functions() {
1524 let source = r#"
1525package main
1526
1527import (
1528 "fmt"
1529 "os"
1530)
1531
1532type Server struct {
1533 host string
1534 port int
1535}
1536
1537type Handler interface {
1538 Handle()
1539}
1540
1541func (s *Server) Start() {
1542 fmt.Println("starting")
1543}
1544
1545func main() {
1546 s := Server{host: "localhost", port: 8080}
1547 s.Start()
1548}
1549"#;
1550 let result = extract_file(Path::new("main.go"), source, "go");
1551
1552 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1553 assert!(labels.contains(&"Server"), "missing Server: {labels:?}");
1554 assert!(labels.contains(&"Handler"), "missing Handler: {labels:?}");
1555 assert!(labels.contains(&"Start"), "missing Start: {labels:?}");
1556 assert!(labels.contains(&"main"), "missing main: {labels:?}");
1557
1558 assert!(
1559 result
1560 .nodes
1561 .iter()
1562 .any(|n| n.node_type == NodeType::Interface)
1563 );
1564 assert!(result.nodes.iter().any(|n| n.node_type == NodeType::Struct));
1565
1566 assert!(
1568 result.nodes.iter().any(|n| n.label == "fmt"),
1569 "missing fmt import"
1570 );
1571 }
1572
1573 #[test]
1576 fn java_extracts_class_and_methods() {
1577 let source = r#"
1578import java.util.List;
1579import java.util.ArrayList;
1580
1581public class UserService {
1582 private List<String> users;
1583
1584 public UserService() {
1585 this.users = new ArrayList<>();
1586 }
1587
1588 public void addUser(String name) {
1589 users.add(name);
1590 }
1591
1592 public List<String> getUsers() {
1593 return users;
1594 }
1595}
1596"#;
1597 let result = extract_file(Path::new("UserService.java"), source, "java");
1598
1599 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1600 assert!(
1601 labels.contains(&"UserService"),
1602 "missing UserService: {labels:?}"
1603 );
1604 assert!(labels.contains(&"addUser"), "missing addUser: {labels:?}");
1605 assert!(labels.contains(&"getUsers"), "missing getUsers: {labels:?}");
1606
1607 let import_count = result
1608 .edges
1609 .iter()
1610 .filter(|e| e.relation == "imports")
1611 .count();
1612 assert!(
1613 import_count >= 2,
1614 "expected >=2 imports, got {import_count}"
1615 );
1616 }
1617
1618 #[test]
1621 fn c_extracts_includes_and_functions() {
1622 let source = r#"
1623#include <stdio.h>
1624#include "myheader.h"
1625
1626typedef struct Point {
1627 int x;
1628 int y;
1629} Point;
1630
1631int add(int a, int b) {
1632 return a + b;
1633}
1634
1635int main() {
1636 printf("%d\n", add(1, 2));
1637 return 0;
1638}
1639"#;
1640 let result = extract_file(Path::new("main.c"), source, "c");
1641
1642 assert!(
1643 result.edges.iter().any(|e| e.relation == "includes"),
1644 "missing includes edge"
1645 );
1646 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1647 assert!(labels.contains(&"main"), "missing main: {labels:?}");
1648 assert!(labels.contains(&"add"), "missing add: {labels:?}");
1649 }
1650
1651 #[test]
1654 fn generic_extracts_basic_patterns() {
1655 let source = r#"
1656defmodule MyApp.Worker do
1657 def start(args) do
1658 process(args)
1659 end
1660
1661 def process(data) do
1662 IO.puts(data)
1663 end
1664end
1665"#;
1666 let result = extract_file(Path::new("worker.ex"), source, "elixir");
1667 assert!(!result.nodes.is_empty());
1669 assert!(
1670 result.nodes.iter().any(|n| n.node_type == NodeType::File),
1671 "missing file node"
1672 );
1673 }
1674
1675 #[test]
1678 fn node_ids_are_deterministic() {
1679 let source = "def foo():\n pass\n";
1680 let r1 = extract_file(Path::new("test.py"), source, "python");
1681 let r2 = extract_file(Path::new("test.py"), source, "python");
1682 assert_eq!(r1.nodes.len(), r2.nodes.len());
1683 for (a, b) in r1.nodes.iter().zip(r2.nodes.iter()) {
1684 assert_eq!(a.id, b.id);
1685 }
1686 }
1687
1688 #[test]
1689 fn all_edges_have_source_file() {
1690 let source = "def foo():\n bar()\ndef bar():\n pass\n";
1691 let result = extract_file(Path::new("x.py"), source, "python");
1692 for edge in &result.edges {
1693 assert!(!edge.source_file.is_empty());
1694 }
1695 }
1696
1697 #[test]
1700 fn ruby_extracts_class_and_methods() {
1701 let source = r#"
1702require 'json'
1703
1704class Greeter
1705 def initialize(name)
1706 @name = name
1707 end
1708
1709 def greet
1710 "Hello, #{@name}!"
1711 end
1712end
1713"#;
1714 let result = extract_file(Path::new("greeter.rb"), source, "ruby");
1715
1716 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1717 assert!(labels.contains(&"Greeter"), "missing Greeter: {labels:?}");
1718 assert!(
1719 labels.contains(&"initialize"),
1720 "missing initialize: {labels:?}"
1721 );
1722 assert!(labels.contains(&"greet"), "missing greet: {labels:?}");
1723 }
1724
1725 #[test]
1728 fn kotlin_extracts_class_and_functions() {
1729 let source = r#"
1730import kotlin.math.sqrt
1731
1732data class Point(val x: Double, val y: Double)
1733
1734fun distance(a: Point, b: Point): Double {
1735 return sqrt((a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y))
1736}
1737"#;
1738 let result = extract_file(Path::new("geometry.kt"), source, "kotlin");
1739
1740 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1741 assert!(labels.contains(&"Point"), "missing Point: {labels:?}");
1742 assert!(labels.contains(&"distance"), "missing distance: {labels:?}");
1743 }
1744
1745 #[test]
1748 fn csharp_extracts_class_and_methods() {
1749 let source = r#"
1750using System;
1751using System.Collections.Generic;
1752
1753public class Calculator
1754{
1755 public int Add(int a, int b)
1756 {
1757 return a + b;
1758 }
1759}
1760"#;
1761 let result = extract_file(Path::new("Calculator.cs"), source, "csharp");
1762
1763 let labels: Vec<&str> = result.nodes.iter().map(|n| n.label.as_str()).collect();
1764 assert!(
1765 labels.contains(&"Calculator"),
1766 "missing Calculator: {labels:?}"
1767 );
1768 assert!(labels.contains(&"Add"), "missing Add: {labels:?}");
1769 }
1770}