1use std::collections::HashMap;
15use std::path::Path;
16
17use graphify_core::confidence::Confidence;
18use graphify_core::id::make_id;
19use graphify_core::model::{ExtractionResult, GraphEdge, GraphNode, NodeType};
20use regex::Regex;
21use tracing::trace;
22
23pub fn extract_file(path: &Path, source: &str, lang: &str) -> ExtractionResult {
29 match lang {
30 "python" => extract_python(path, source),
31 "javascript" | "typescript" => extract_js_ts(path, source, lang),
32 "rust" => extract_rust(path, source),
33 "go" => extract_go(path, source),
34 "java" => extract_java(path, source),
35 "c" | "cpp" => extract_c_cpp(path, source, lang),
36 "ruby" => extract_ruby(path, source),
37 "csharp" => extract_csharp(path, source),
38 "kotlin" => extract_kotlin(path, source),
39 _ => extract_generic(path, source, lang),
40 }
41}
42
43fn file_stem(path: &Path) -> String {
48 path.file_stem()
49 .and_then(|s| s.to_str())
50 .unwrap_or("unknown")
51 .to_string()
52}
53
54fn path_str(path: &Path) -> String {
55 path.to_string_lossy().into_owned()
56}
57
58fn make_file_node(path: &Path) -> GraphNode {
59 let ps = path_str(path);
60 GraphNode {
61 id: make_id(&[&ps]),
62 label: file_stem(path),
63 source_file: ps,
64 source_location: None,
65 node_type: NodeType::File,
66 community: None,
67 extra: HashMap::new(),
68 }
69}
70
71fn make_node(name: &str, path: &Path, node_type: NodeType, line: usize) -> GraphNode {
72 let ps = path_str(path);
73 GraphNode {
74 id: make_id(&[&ps, name]),
75 label: name.to_string(),
76 source_file: ps,
77 source_location: Some(format!("L{line}")),
78 node_type,
79 community: None,
80 extra: HashMap::new(),
81 }
82}
83
84fn make_edge(
85 source_id: &str,
86 target_id: &str,
87 relation: &str,
88 path: &Path,
89 confidence: Confidence,
90) -> GraphEdge {
91 GraphEdge {
92 source: source_id.to_string(),
93 target: target_id.to_string(),
94 relation: relation.to_string(),
95 confidence: confidence.clone(),
96 confidence_score: confidence.default_score(),
97 source_file: path_str(path),
98 source_location: None,
99 weight: 1.0,
100 extra: HashMap::new(),
101 }
102}
103
104fn infer_calls(
107 functions: &[(String, String, usize, usize)], source_lines: &[&str],
109 path: &Path,
110) -> Vec<GraphEdge> {
111 let mut edges = Vec::new();
112 for (_caller_name, caller_id, start, end) in functions {
113 let body = source_lines
114 .get(*start..*end)
115 .unwrap_or_default()
116 .join("\n");
117 for (callee_name, callee_id, _, _) in functions {
118 if caller_id == callee_id {
119 continue;
120 }
121 let pattern = format!(r"\b{}\s*\(", regex::escape(callee_name));
123 if let Ok(re) = Regex::new(&pattern)
124 && re.is_match(&body)
125 {
126 edges.push(make_edge(
127 caller_id,
128 callee_id,
129 "calls",
130 path,
131 Confidence::Inferred,
132 ));
133 }
134 }
135 }
136 edges
137}
138
139fn extract_python(path: &Path, source: &str) -> ExtractionResult {
144 let mut result = ExtractionResult::default();
145 let file_node = make_file_node(path);
146 let file_id = file_node.id.clone();
147 result.nodes.push(file_node);
148
149 let lines: Vec<&str> = source.lines().collect();
150 let ps = path_str(path);
151
152 let re_class = Regex::new(r"(?m)^(\s*)class\s+(\w+)").unwrap();
154 let re_class_lookup = Regex::new(r"^(\s*)class\s+(\w+)").unwrap();
155 let mut class_ids: HashMap<String, String> = HashMap::new();
156 for cap in re_class.captures_iter(source) {
157 let name = &cap[2];
158 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
159 let node = make_node(name, path, NodeType::Class, line);
160 let node_id = node.id.clone();
161 class_ids.insert(name.to_string(), node_id.clone());
162 result.nodes.push(node);
163 result.edges.push(make_edge(
164 &file_id,
165 &node_id,
166 "defines",
167 path,
168 Confidence::Extracted,
169 ));
170 }
171
172 let re_func = Regex::new(r"(?m)^(\s*)def\s+(\w+)\s*\(").unwrap();
174 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
175 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
176 for (i, cap) in func_matches.iter().enumerate() {
177 let indent = cap[1].len();
178 let name = cap[2].to_string();
179 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
180
181 let node_type = if indent > 0 {
182 NodeType::Method
183 } else {
184 NodeType::Function
185 };
186 let node = make_node(&name, path, node_type, start_line);
187 let node_id = node.id.clone();
188
189 let parent_id = if indent > 0 {
191 let mut parent = None;
193 for line_idx in (0..start_line.saturating_sub(1)).rev() {
194 if let Some(line) = lines.get(line_idx)
195 && let Some(cls_cap) = re_class_lookup.captures(line)
196 && cls_cap[1].len() < indent
197 {
198 parent = class_ids.get(&cls_cap[2]).cloned();
199 break;
200 }
201 }
202 parent.unwrap_or_else(|| file_id.clone())
203 } else {
204 file_id.clone()
205 };
206
207 let end_line = if i + 1 < func_matches.len() {
209 source[..func_matches[i + 1].get(0).unwrap().start()]
210 .lines()
211 .count()
212 } else {
213 lines.len()
214 };
215
216 functions.push((name.clone(), node_id.clone(), start_line, end_line));
217 result.nodes.push(node);
218 result.edges.push(make_edge(
219 &parent_id,
220 &node_id,
221 "defines",
222 path,
223 Confidence::Extracted,
224 ));
225 }
226
227 let re_import = Regex::new(r"(?m)^(?:from\s+([\w.]+)\s+)?import\s+([\w.,\s*]+)").unwrap();
229 for cap in re_import.captures_iter(source) {
230 let module = cap.get(1).map_or("", |m| m.as_str());
231 let names_str = &cap[2];
232 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
233
234 for name in names_str.split(',') {
235 let name = name.trim().split(" as ").next().unwrap_or("").trim();
236 if name.is_empty() || name == "*" {
237 continue;
238 }
239 let full_name = if module.is_empty() {
240 name.to_string()
241 } else {
242 format!("{module}.{name}")
243 };
244 let import_id = make_id(&[&ps, "import", &full_name]);
245 result.nodes.push(GraphNode {
246 id: import_id.clone(),
247 label: full_name,
248 source_file: ps.clone(),
249 source_location: Some(format!("L{line}")),
250 node_type: NodeType::Module,
251 community: None,
252 extra: HashMap::new(),
253 });
254 result.edges.push(make_edge(
255 &file_id,
256 &import_id,
257 "imports",
258 path,
259 Confidence::Extracted,
260 ));
261 }
262 }
263
264 let call_edges = infer_calls(&functions, &lines, path);
266 result.edges.extend(call_edges);
267
268 trace!(
269 "python: {} nodes, {} edges from {}",
270 result.nodes.len(),
271 result.edges.len(),
272 ps
273 );
274 result
275}
276
277fn extract_js_ts(path: &Path, source: &str, lang: &str) -> ExtractionResult {
282 let mut result = ExtractionResult::default();
283 let file_node = make_file_node(path);
284 let file_id = file_node.id.clone();
285 result.nodes.push(file_node);
286
287 let lines: Vec<&str> = source.lines().collect();
288 let ps = path_str(path);
289
290 let re_class = Regex::new(r"(?m)(?:export\s+)?(?:default\s+)?class\s+(\w+)").unwrap();
292 for cap in re_class.captures_iter(source) {
293 let name = &cap[1];
294 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
295 let node = make_node(name, path, NodeType::Class, line);
296 let node_id = node.id.clone();
297 result.nodes.push(node);
298 result.edges.push(make_edge(
299 &file_id,
300 &node_id,
301 "defines",
302 path,
303 Confidence::Extracted,
304 ));
305 }
306
307 let re_func = Regex::new(
310 r"(?m)(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*\(|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[^=])\s*=>"
311 )
312 .unwrap();
313 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
314 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
315
316 for (i, cap) in func_matches.iter().enumerate() {
317 let name = cap
318 .get(1)
319 .or(cap.get(2))
320 .map(|m| m.as_str().to_string())
321 .unwrap_or_default();
322 if name.is_empty() {
323 continue;
324 }
325 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
326 let end_line = if i + 1 < func_matches.len() {
327 source[..func_matches[i + 1].get(0).unwrap().start()]
328 .lines()
329 .count()
330 } else {
331 lines.len()
332 };
333
334 let node = make_node(&name, path, NodeType::Function, start_line);
335 let node_id = node.id.clone();
336 functions.push((name, node_id.clone(), start_line, end_line));
337 result.nodes.push(node);
338 result.edges.push(make_edge(
339 &file_id,
340 &node_id,
341 "defines",
342 path,
343 Confidence::Extracted,
344 ));
345 }
346
347 let re_import = Regex::new(
349 r#"(?m)import\s+(?:\{([^}]+)\}|(\w+))\s+from\s+['"]([^'"]+)['"]|import\s+['"]([^'"]+)['"]"#,
350 )
351 .unwrap();
352 for cap in re_import.captures_iter(source) {
353 let module = cap.get(3).or(cap.get(4)).map(|m| m.as_str()).unwrap_or("");
354 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
355
356 if let Some(names) = cap.get(1) {
357 for name in names.as_str().split(',') {
358 let name = name.trim().split(" as ").next().unwrap_or("").trim();
359 if name.is_empty() {
360 continue;
361 }
362 let full = format!("{module}/{name}");
363 let import_id = make_id(&[&ps, "import", &full]);
364 result.nodes.push(GraphNode {
365 id: import_id.clone(),
366 label: full,
367 source_file: ps.clone(),
368 source_location: Some(format!("L{line}")),
369 node_type: NodeType::Module,
370 community: None,
371 extra: HashMap::new(),
372 });
373 result.edges.push(make_edge(
374 &file_id,
375 &import_id,
376 "imports",
377 path,
378 Confidence::Extracted,
379 ));
380 }
381 } else if let Some(default_name) = cap.get(2) {
382 let name = default_name.as_str();
383 let import_id = make_id(&[&ps, "import", module]);
384 result.nodes.push(GraphNode {
385 id: import_id.clone(),
386 label: name.to_string(),
387 source_file: ps.clone(),
388 source_location: Some(format!("L{line}")),
389 node_type: NodeType::Module,
390 community: None,
391 extra: HashMap::new(),
392 });
393 result.edges.push(make_edge(
394 &file_id,
395 &import_id,
396 "imports",
397 path,
398 Confidence::Extracted,
399 ));
400 }
401 }
402
403 if lang == "javascript" {
405 let re_require = Regex::new(
406 r#"(?m)(?:const|let|var)\s+(\w+)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)"#,
407 )
408 .unwrap();
409 for cap in re_require.captures_iter(source) {
410 let name = &cap[1];
411 let module = &cap[2];
412 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
413 let import_id = make_id(&[&ps, "import", module]);
414 result.nodes.push(GraphNode {
415 id: import_id.clone(),
416 label: name.to_string(),
417 source_file: ps.clone(),
418 source_location: Some(format!("L{line}")),
419 node_type: NodeType::Module,
420 community: None,
421 extra: HashMap::new(),
422 });
423 result.edges.push(make_edge(
424 &file_id,
425 &import_id,
426 "imports",
427 path,
428 Confidence::Extracted,
429 ));
430 }
431 }
432
433 let call_edges = infer_calls(&functions, &lines, path);
434 result.edges.extend(call_edges);
435
436 result
437}
438
439fn extract_rust(path: &Path, source: &str) -> ExtractionResult {
444 let mut result = ExtractionResult::default();
445 let file_node = make_file_node(path);
446 let file_id = file_node.id.clone();
447 result.nodes.push(file_node);
448
449 let lines: Vec<&str> = source.lines().collect();
450 let ps = path_str(path);
451
452 let re_struct = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?struct\s+(\w+)").unwrap();
454 for cap in re_struct.captures_iter(source) {
455 let name = &cap[1];
456 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
457 let node = make_node(name, path, NodeType::Struct, line);
458 let node_id = node.id.clone();
459 result.nodes.push(node);
460 result.edges.push(make_edge(
461 &file_id,
462 &node_id,
463 "defines",
464 path,
465 Confidence::Extracted,
466 ));
467 }
468
469 let re_enum = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?enum\s+(\w+)").unwrap();
471 for cap in re_enum.captures_iter(source) {
472 let name = &cap[1];
473 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
474 let node = make_node(name, path, NodeType::Enum, line);
475 let node_id = node.id.clone();
476 result.nodes.push(node);
477 result.edges.push(make_edge(
478 &file_id,
479 &node_id,
480 "defines",
481 path,
482 Confidence::Extracted,
483 ));
484 }
485
486 let re_trait = Regex::new(r"(?m)^(?:\s*pub(?:\([^)]*\))?\s+)?trait\s+(\w+)").unwrap();
488 for cap in re_trait.captures_iter(source) {
489 let name = &cap[1];
490 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
491 let node = make_node(name, path, NodeType::Trait, line);
492 let node_id = node.id.clone();
493 result.nodes.push(node);
494 result.edges.push(make_edge(
495 &file_id,
496 &node_id,
497 "defines",
498 path,
499 Confidence::Extracted,
500 ));
501 }
502
503 let re_impl = Regex::new(r"(?m)^(?:\s*)impl(?:<[^>]*>)?\s+(?:(\w+)\s+for\s+)?(\w+)").unwrap();
505 for cap in re_impl.captures_iter(source) {
506 let _trait_name = cap.get(1).map(|m| m.as_str());
507 let type_name = &cap[2];
508 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
509 if let Some(trait_m) = cap.get(1) {
511 let trait_id = make_id(&[&ps, trait_m.as_str()]);
512 let type_id = make_id(&[&ps, type_name]);
513 result.edges.push(make_edge(
514 &type_id,
515 &trait_id,
516 "implements",
517 path,
518 Confidence::Extracted,
519 ));
520 }
521 let _ = line;
522 }
523
524 let re_func = Regex::new(
527 r"(?m)^(\s*)(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:const\s+)?fn\s+(\w+)",
528 )
529 .unwrap();
530 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
531 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
532 for (i, cap) in func_matches.iter().enumerate() {
533 let indent = cap[1].len();
534 let name = cap[2].to_string();
535 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
536 let end_line = if i + 1 < func_matches.len() {
537 source[..func_matches[i + 1].get(0).unwrap().start()]
538 .lines()
539 .count()
540 } else {
541 lines.len()
542 };
543
544 let node_type = if indent > 0 {
545 NodeType::Method
546 } else {
547 NodeType::Function
548 };
549 let node = make_node(&name, path, node_type, start_line);
550 let node_id = node.id.clone();
551 functions.push((name, node_id.clone(), start_line, end_line));
552 result.nodes.push(node);
553 result.edges.push(make_edge(
554 &file_id,
555 &node_id,
556 "defines",
557 path,
558 Confidence::Extracted,
559 ));
560 }
561
562 let re_use = Regex::new(r"(?m)^(?:\s*)(?:pub\s+)?use\s+([\w:]+)").unwrap();
564 for cap in re_use.captures_iter(source) {
565 let module = &cap[1];
566 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
567 let import_id = make_id(&[&ps, "use", module]);
568 result.nodes.push(GraphNode {
569 id: import_id.clone(),
570 label: module.to_string(),
571 source_file: ps.clone(),
572 source_location: Some(format!("L{line}")),
573 node_type: NodeType::Module,
574 community: None,
575 extra: HashMap::new(),
576 });
577 result.edges.push(make_edge(
578 &file_id,
579 &import_id,
580 "imports",
581 path,
582 Confidence::Extracted,
583 ));
584 }
585
586 let call_edges = infer_calls(&functions, &lines, path);
587 result.edges.extend(call_edges);
588
589 result
590}
591
592fn extract_go(path: &Path, source: &str) -> ExtractionResult {
597 let mut result = ExtractionResult::default();
598 let file_node = make_file_node(path);
599 let file_id = file_node.id.clone();
600 result.nodes.push(file_node);
601
602 let lines: Vec<&str> = source.lines().collect();
603 let ps = path_str(path);
604
605 let re_type = Regex::new(r"(?m)^type\s+(\w+)\s+(struct|interface)").unwrap();
607 for cap in re_type.captures_iter(source) {
608 let name = &cap[1];
609 let kind = &cap[2];
610 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
611 let node_type = match kind {
612 "interface" => NodeType::Interface,
613 _ => NodeType::Struct,
614 };
615 let node = make_node(name, path, node_type, line);
616 let node_id = node.id.clone();
617 result.nodes.push(node);
618 result.edges.push(make_edge(
619 &file_id,
620 &node_id,
621 "defines",
622 path,
623 Confidence::Extracted,
624 ));
625 }
626
627 let re_func = Regex::new(r"(?m)^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(").unwrap();
629 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
630 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
631 for (i, cap) in func_matches.iter().enumerate() {
632 let name = cap[1].to_string();
633 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
634 let end_line = if i + 1 < func_matches.len() {
635 source[..func_matches[i + 1].get(0).unwrap().start()]
636 .lines()
637 .count()
638 } else {
639 lines.len()
640 };
641
642 let full_match = cap.get(0).unwrap().as_str();
644 let node_type = if full_match.contains('(') && full_match.find('(') < full_match.find(&name)
645 {
646 NodeType::Method
647 } else {
648 NodeType::Function
649 };
650
651 let node = make_node(&name, path, node_type, start_line);
652 let node_id = node.id.clone();
653 functions.push((name, node_id.clone(), start_line, end_line));
654 result.nodes.push(node);
655 result.edges.push(make_edge(
656 &file_id,
657 &node_id,
658 "defines",
659 path,
660 Confidence::Extracted,
661 ));
662 }
663
664 let re_import_single = Regex::new(r#"(?m)^import\s+"([^"]+)""#).unwrap();
666 let re_import_block = Regex::new(r"(?s)import\s*\(([^)]+)\)").unwrap();
667 let re_import_line = Regex::new(r#""([^"]+)""#).unwrap();
668
669 for cap in re_import_single.captures_iter(source) {
670 let module = &cap[1];
671 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
672 let import_id = make_id(&[&ps, "import", module]);
673 result.nodes.push(GraphNode {
674 id: import_id.clone(),
675 label: module.to_string(),
676 source_file: ps.clone(),
677 source_location: Some(format!("L{line}")),
678 node_type: NodeType::Package,
679 community: None,
680 extra: HashMap::new(),
681 });
682 result.edges.push(make_edge(
683 &file_id,
684 &import_id,
685 "imports",
686 path,
687 Confidence::Extracted,
688 ));
689 }
690
691 for cap in re_import_block.captures_iter(source) {
692 let block = &cap[1];
693 let block_start = source[..cap.get(0).unwrap().start()].lines().count() + 1;
694 for (idx, imp_cap) in re_import_line.captures_iter(block).enumerate() {
695 let module = &imp_cap[1];
696 let import_id = make_id(&[&ps, "import", module]);
697 result.nodes.push(GraphNode {
698 id: import_id.clone(),
699 label: module.to_string(),
700 source_file: ps.clone(),
701 source_location: Some(format!("L{}", block_start + idx + 1)),
702 node_type: NodeType::Package,
703 community: None,
704 extra: HashMap::new(),
705 });
706 result.edges.push(make_edge(
707 &file_id,
708 &import_id,
709 "imports",
710 path,
711 Confidence::Extracted,
712 ));
713 }
714 }
715
716 let call_edges = infer_calls(&functions, &lines, path);
717 result.edges.extend(call_edges);
718
719 result
720}
721
722fn extract_java(path: &Path, source: &str) -> ExtractionResult {
727 let mut result = ExtractionResult::default();
728 let file_node = make_file_node(path);
729 let file_id = file_node.id.clone();
730 result.nodes.push(file_node);
731
732 let lines: Vec<&str> = source.lines().collect();
733 let ps = path_str(path);
734
735 let re_class = Regex::new(
737 r"(?m)(?:public\s+|private\s+|protected\s+)?(?:abstract\s+|static\s+|final\s+)*(class|interface|enum)\s+(\w+)",
738 )
739 .unwrap();
740 for cap in re_class.captures_iter(source) {
741 let kind = &cap[1];
742 let name = &cap[2];
743 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
744 let node_type = match kind {
745 "interface" => NodeType::Interface,
746 "enum" => NodeType::Enum,
747 _ => NodeType::Class,
748 };
749 let node = make_node(name, path, node_type, line);
750 let node_id = node.id.clone();
751 result.nodes.push(node);
752 result.edges.push(make_edge(
753 &file_id,
754 &node_id,
755 "defines",
756 path,
757 Confidence::Extracted,
758 ));
759 }
760
761 let re_method = Regex::new(
763 r"(?m)^\s+(?:public\s+|private\s+|protected\s+)?(?:static\s+)?(?:final\s+)?(?:synchronized\s+)?(?:abstract\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(",
764 )
765 .unwrap();
766 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
767 let func_matches: Vec<_> = re_method.captures_iter(source).collect();
768 for (i, cap) in func_matches.iter().enumerate() {
769 let name = cap[1].to_string();
770 if [
772 "if", "for", "while", "switch", "catch", "return", "new", "throw",
773 ]
774 .contains(&name.as_str())
775 {
776 continue;
777 }
778 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
779 let end_line = if i + 1 < func_matches.len() {
780 source[..func_matches[i + 1].get(0).unwrap().start()]
781 .lines()
782 .count()
783 } else {
784 lines.len()
785 };
786
787 let node = make_node(&name, path, NodeType::Method, start_line);
788 let node_id = node.id.clone();
789 functions.push((name, node_id.clone(), start_line, end_line));
790 result.nodes.push(node);
791 result.edges.push(make_edge(
792 &file_id,
793 &node_id,
794 "defines",
795 path,
796 Confidence::Extracted,
797 ));
798 }
799
800 let re_import = Regex::new(r"(?m)^import\s+(?:static\s+)?([\w.]+)\s*;").unwrap();
802 for cap in re_import.captures_iter(source) {
803 let module = &cap[1];
804 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
805 let import_id = make_id(&[&ps, "import", module]);
806 result.nodes.push(GraphNode {
807 id: import_id.clone(),
808 label: module.to_string(),
809 source_file: ps.clone(),
810 source_location: Some(format!("L{line}")),
811 node_type: NodeType::Package,
812 community: None,
813 extra: HashMap::new(),
814 });
815 result.edges.push(make_edge(
816 &file_id,
817 &import_id,
818 "imports",
819 path,
820 Confidence::Extracted,
821 ));
822 }
823
824 let call_edges = infer_calls(&functions, &lines, path);
825 result.edges.extend(call_edges);
826
827 result
828}
829
830fn extract_c_cpp(path: &Path, source: &str, lang: &str) -> ExtractionResult {
835 let mut result = ExtractionResult::default();
836 let file_node = make_file_node(path);
837 let file_id = file_node.id.clone();
838 result.nodes.push(file_node);
839
840 let lines: Vec<&str> = source.lines().collect();
841 let ps = path_str(path);
842
843 let re_include = Regex::new(r#"(?m)^#include\s+[<"]([^>"]+)[>"]"#).unwrap();
845 for cap in re_include.captures_iter(source) {
846 let header = &cap[1];
847 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
848 let import_id = make_id(&[&ps, "include", header]);
849 result.nodes.push(GraphNode {
850 id: import_id.clone(),
851 label: header.to_string(),
852 source_file: ps.clone(),
853 source_location: Some(format!("L{line}")),
854 node_type: NodeType::Module,
855 community: None,
856 extra: HashMap::new(),
857 });
858 result.edges.push(make_edge(
859 &file_id,
860 &import_id,
861 "includes",
862 path,
863 Confidence::Extracted,
864 ));
865 }
866
867 if lang == "cpp" {
869 let re_class = Regex::new(r"(?m)^(?:\s*)(?:class|struct|namespace)\s+(\w+)").unwrap();
870 for cap in re_class.captures_iter(source) {
871 let name = &cap[1];
872 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
873 let node = make_node(name, path, NodeType::Class, line);
874 let node_id = node.id.clone();
875 result.nodes.push(node);
876 result.edges.push(make_edge(
877 &file_id,
878 &node_id,
879 "defines",
880 path,
881 Confidence::Extracted,
882 ));
883 }
884 }
885
886 if lang == "c" {
888 let re_struct = Regex::new(r"(?m)^(?:typedef\s+)?struct\s+(\w+)").unwrap();
889 for cap in re_struct.captures_iter(source) {
890 let name = &cap[1];
891 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
892 let node = make_node(name, path, NodeType::Struct, line);
893 let node_id = node.id.clone();
894 result.nodes.push(node);
895 result.edges.push(make_edge(
896 &file_id,
897 &node_id,
898 "defines",
899 path,
900 Confidence::Extracted,
901 ));
902 }
903 }
904
905 let re_func = Regex::new(
907 r"(?m)^(?:static\s+)?(?:inline\s+)?(?:extern\s+)?(?:const\s+)?(?:unsigned\s+)?(?:signed\s+)?(?:\w+(?:\s*\*\s*|\s+))(\w+)\s*\([^;]*\)\s*\{",
908 )
909 .unwrap();
910 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
911 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
912 for (i, cap) in func_matches.iter().enumerate() {
913 let name = cap[1].to_string();
914 if ["if", "for", "while", "switch", "return", "sizeof"].contains(&name.as_str()) {
915 continue;
916 }
917 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
918 let end_line = if i + 1 < func_matches.len() {
919 source[..func_matches[i + 1].get(0).unwrap().start()]
920 .lines()
921 .count()
922 } else {
923 lines.len()
924 };
925
926 let node = make_node(&name, path, NodeType::Function, start_line);
927 let node_id = node.id.clone();
928 functions.push((name, node_id.clone(), start_line, end_line));
929 result.nodes.push(node);
930 result.edges.push(make_edge(
931 &file_id,
932 &node_id,
933 "defines",
934 path,
935 Confidence::Extracted,
936 ));
937 }
938
939 let call_edges = infer_calls(&functions, &lines, path);
940 result.edges.extend(call_edges);
941
942 result
943}
944
945fn extract_ruby(path: &Path, source: &str) -> ExtractionResult {
950 let mut result = ExtractionResult::default();
951 let file_node = make_file_node(path);
952 let file_id = file_node.id.clone();
953 result.nodes.push(file_node);
954
955 let lines: Vec<&str> = source.lines().collect();
956 let ps = path_str(path);
957
958 let re_class = Regex::new(r"(?m)^\s*(class|module)\s+(\w+(?:::\w+)*)").unwrap();
960 for cap in re_class.captures_iter(source) {
961 let name = &cap[2];
962 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
963 let node = make_node(name, path, NodeType::Class, line);
964 let node_id = node.id.clone();
965 result.nodes.push(node);
966 result.edges.push(make_edge(
967 &file_id,
968 &node_id,
969 "defines",
970 path,
971 Confidence::Extracted,
972 ));
973 }
974
975 let re_func = Regex::new(r"(?m)^\s*def\s+(self\.)?(\w+[?!=]?)").unwrap();
977 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
978 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
979 for (i, cap) in func_matches.iter().enumerate() {
980 let name = cap[2].to_string();
981 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
982 let end_line = if i + 1 < func_matches.len() {
983 source[..func_matches[i + 1].get(0).unwrap().start()]
984 .lines()
985 .count()
986 } else {
987 lines.len()
988 };
989
990 let node = make_node(&name, path, NodeType::Method, start_line);
991 let node_id = node.id.clone();
992 functions.push((name, node_id.clone(), start_line, end_line));
993 result.nodes.push(node);
994 result.edges.push(make_edge(
995 &file_id,
996 &node_id,
997 "defines",
998 path,
999 Confidence::Extracted,
1000 ));
1001 }
1002
1003 let re_require = Regex::new(r#"(?m)^\s*require(?:_relative)?\s+['"]([^'"]+)['"]"#).unwrap();
1005 for cap in re_require.captures_iter(source) {
1006 let module = &cap[1];
1007 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1008 let import_id = make_id(&[&ps, "require", module]);
1009 result.nodes.push(GraphNode {
1010 id: import_id.clone(),
1011 label: module.to_string(),
1012 source_file: ps.clone(),
1013 source_location: Some(format!("L{line}")),
1014 node_type: NodeType::Module,
1015 community: None,
1016 extra: HashMap::new(),
1017 });
1018 result.edges.push(make_edge(
1019 &file_id,
1020 &import_id,
1021 "imports",
1022 path,
1023 Confidence::Extracted,
1024 ));
1025 }
1026
1027 let call_edges = infer_calls(&functions, &lines, path);
1028 result.edges.extend(call_edges);
1029
1030 result
1031}
1032
1033fn extract_csharp(path: &Path, source: &str) -> ExtractionResult {
1038 let mut result = ExtractionResult::default();
1039 let file_node = make_file_node(path);
1040 let file_id = file_node.id.clone();
1041 result.nodes.push(file_node);
1042
1043 let lines: Vec<&str> = source.lines().collect();
1044 let ps = path_str(path);
1045
1046 let re_class = Regex::new(
1048 r"(?m)(?:public\s+|private\s+|protected\s+|internal\s+)?(?:abstract\s+|static\s+|sealed\s+|partial\s+)*(class|interface|struct|enum)\s+(\w+)",
1049 )
1050 .unwrap();
1051 for cap in re_class.captures_iter(source) {
1052 let kind = &cap[1];
1053 let name = &cap[2];
1054 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1055 let node_type = match kind {
1056 "interface" => NodeType::Interface,
1057 "struct" => NodeType::Struct,
1058 "enum" => NodeType::Enum,
1059 _ => NodeType::Class,
1060 };
1061 let node = make_node(name, path, node_type, line);
1062 let node_id = node.id.clone();
1063 result.nodes.push(node);
1064 result.edges.push(make_edge(
1065 &file_id,
1066 &node_id,
1067 "defines",
1068 path,
1069 Confidence::Extracted,
1070 ));
1071 }
1072
1073 let re_method = Regex::new(
1075 r"(?m)^\s+(?:public\s+|private\s+|protected\s+|internal\s+)?(?:static\s+)?(?:virtual\s+)?(?:override\s+)?(?:async\s+)?(?:\w+(?:<[^>]*>)?)\s+(\w+)\s*\(",
1076 )
1077 .unwrap();
1078 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1079 let func_matches: Vec<_> = re_method.captures_iter(source).collect();
1080 for (i, cap) in func_matches.iter().enumerate() {
1081 let name = cap[1].to_string();
1082 if [
1083 "if", "for", "while", "switch", "catch", "return", "new", "throw",
1084 ]
1085 .contains(&name.as_str())
1086 {
1087 continue;
1088 }
1089 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1090 let end_line = if i + 1 < func_matches.len() {
1091 source[..func_matches[i + 1].get(0).unwrap().start()]
1092 .lines()
1093 .count()
1094 } else {
1095 lines.len()
1096 };
1097
1098 let node = make_node(&name, path, NodeType::Method, start_line);
1099 let node_id = node.id.clone();
1100 functions.push((name, node_id.clone(), start_line, end_line));
1101 result.nodes.push(node);
1102 result.edges.push(make_edge(
1103 &file_id,
1104 &node_id,
1105 "defines",
1106 path,
1107 Confidence::Extracted,
1108 ));
1109 }
1110
1111 let re_using = Regex::new(r"(?m)^using\s+([\w.]+)\s*;").unwrap();
1113 for cap in re_using.captures_iter(source) {
1114 let ns = &cap[1];
1115 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1116 let import_id = make_id(&[&ps, "using", ns]);
1117 result.nodes.push(GraphNode {
1118 id: import_id.clone(),
1119 label: ns.to_string(),
1120 source_file: ps.clone(),
1121 source_location: Some(format!("L{line}")),
1122 node_type: NodeType::Namespace,
1123 community: None,
1124 extra: HashMap::new(),
1125 });
1126 result.edges.push(make_edge(
1127 &file_id,
1128 &import_id,
1129 "imports",
1130 path,
1131 Confidence::Extracted,
1132 ));
1133 }
1134
1135 let call_edges = infer_calls(&functions, &lines, path);
1136 result.edges.extend(call_edges);
1137
1138 result
1139}
1140
1141fn extract_kotlin(path: &Path, source: &str) -> ExtractionResult {
1146 let mut result = ExtractionResult::default();
1147 let file_node = make_file_node(path);
1148 let file_id = file_node.id.clone();
1149 result.nodes.push(file_node);
1150
1151 let lines: Vec<&str> = source.lines().collect();
1152 let ps = path_str(path);
1153
1154 let re_class = Regex::new(
1156 r"(?m)(?:open\s+|abstract\s+|data\s+|sealed\s+)?(?:class|object|interface)\s+(\w+)",
1157 )
1158 .unwrap();
1159 for cap in re_class.captures_iter(source) {
1160 let name = &cap[1];
1161 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1162 let node = make_node(name, path, NodeType::Class, line);
1163 let node_id = node.id.clone();
1164 result.nodes.push(node);
1165 result.edges.push(make_edge(
1166 &file_id,
1167 &node_id,
1168 "defines",
1169 path,
1170 Confidence::Extracted,
1171 ));
1172 }
1173
1174 let re_func = Regex::new(r"(?m)^\s*(?:(?:private|public|protected|internal|override|open|suspend)\s+)*fun\s+(?:<[^>]+>\s+)?(\w+)\s*\(").unwrap();
1176 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1177 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
1178 for (i, cap) in func_matches.iter().enumerate() {
1179 let name = cap[1].to_string();
1180 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1181 let end_line = if i + 1 < func_matches.len() {
1182 source[..func_matches[i + 1].get(0).unwrap().start()]
1183 .lines()
1184 .count()
1185 } else {
1186 lines.len()
1187 };
1188
1189 let node = make_node(&name, path, NodeType::Function, start_line);
1190 let node_id = node.id.clone();
1191 functions.push((name, node_id.clone(), start_line, end_line));
1192 result.nodes.push(node);
1193 result.edges.push(make_edge(
1194 &file_id,
1195 &node_id,
1196 "defines",
1197 path,
1198 Confidence::Extracted,
1199 ));
1200 }
1201
1202 let re_import = Regex::new(r"(?m)^import\s+([\w.]+)").unwrap();
1204 for cap in re_import.captures_iter(source) {
1205 let module = &cap[1];
1206 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1207 let import_id = make_id(&[&ps, "import", module]);
1208 result.nodes.push(GraphNode {
1209 id: import_id.clone(),
1210 label: module.to_string(),
1211 source_file: ps.clone(),
1212 source_location: Some(format!("L{line}")),
1213 node_type: NodeType::Package,
1214 community: None,
1215 extra: HashMap::new(),
1216 });
1217 result.edges.push(make_edge(
1218 &file_id,
1219 &import_id,
1220 "imports",
1221 path,
1222 Confidence::Extracted,
1223 ));
1224 }
1225
1226 let call_edges = infer_calls(&functions, &lines, path);
1227 result.edges.extend(call_edges);
1228
1229 result
1230}
1231
1232fn extract_generic(path: &Path, source: &str, _lang: &str) -> ExtractionResult {
1237 let mut result = ExtractionResult::default();
1238 let file_node = make_file_node(path);
1239 let file_id = file_node.id.clone();
1240 result.nodes.push(file_node);
1241
1242 let lines: Vec<&str> = source.lines().collect();
1243 let ps = path_str(path);
1244
1245 let re_class =
1247 Regex::new(r"(?m)^\s*(?:(?:pub|public|private|protected|internal|open|abstract|sealed|partial|static|final|export)\s+)*(?:class|struct|module|object|interface|trait|protocol|enum|defmodule)\s+(\w+(?:::\w+)*)")
1248 .unwrap();
1249 for cap in re_class.captures_iter(source) {
1250 let name = &cap[1];
1251 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1252 let node = make_node(name, path, NodeType::Class, line);
1253 let node_id = node.id.clone();
1254 result.nodes.push(node);
1255 result.edges.push(make_edge(
1256 &file_id,
1257 &node_id,
1258 "defines",
1259 path,
1260 Confidence::Extracted,
1261 ));
1262 }
1263
1264 let re_func = Regex::new(
1266 r"(?m)^\s*(?:(?:pub|public|private|protected|internal|open|override|suspend|static|async|export|def|defp)\s+)*(?:func|function|fn|def|defp|fun|sub)\s+(\w+[?!]?)\s*[\(<]",
1267 )
1268 .unwrap();
1269 let mut functions: Vec<(String, String, usize, usize)> = Vec::new();
1270 let func_matches: Vec<_> = re_func.captures_iter(source).collect();
1271 for (i, cap) in func_matches.iter().enumerate() {
1272 let name = cap[1].to_string();
1273 let start_line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1274 let end_line = if i + 1 < func_matches.len() {
1275 source[..func_matches[i + 1].get(0).unwrap().start()]
1276 .lines()
1277 .count()
1278 } else {
1279 lines.len()
1280 };
1281
1282 let node = make_node(&name, path, NodeType::Function, start_line);
1283 let node_id = node.id.clone();
1284 functions.push((name, node_id.clone(), start_line, end_line));
1285 result.nodes.push(node);
1286 result.edges.push(make_edge(
1287 &file_id,
1288 &node_id,
1289 "defines",
1290 path,
1291 Confidence::Extracted,
1292 ));
1293 }
1294
1295 let re_import =
1297 Regex::new(r#"(?m)^\s*(?:import|use|using|require|include|from)\s+['"]?([\w./:-]+)['"]?"#)
1298 .unwrap();
1299 for cap in re_import.captures_iter(source) {
1300 let module = &cap[1];
1301 let line = source[..cap.get(0).unwrap().start()].lines().count() + 1;
1302 let import_id = make_id(&[&ps, "import", module]);
1303 result.nodes.push(GraphNode {
1304 id: import_id.clone(),
1305 label: module.to_string(),
1306 source_file: ps.clone(),
1307 source_location: Some(format!("L{line}")),
1308 node_type: NodeType::Module,
1309 community: None,
1310 extra: HashMap::new(),
1311 });
1312 result.edges.push(make_edge(
1313 &file_id,
1314 &import_id,
1315 "imports",
1316 path,
1317 Confidence::Extracted,
1318 ));
1319 }
1320
1321 let call_edges = infer_calls(&functions, &lines, path);
1322 result.edges.extend(call_edges);
1323
1324 result
1325}
1326
1327