1use std::collections::{HashMap, HashSet};
2use std::fmt;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use futures::stream::{FuturesUnordered, StreamExt};
7use neo4rs::{query, Graph};
8use thiserror::Error;
9use tokio::sync::Mutex;
10use tree_sitter::{Node, Tree};
11
12use crate::compress::{compress_full_source, compress_snippet, CompressorClient};
13use crate::go_resolve::{
14 discover_go_modules, discover_go_replaces, is_likely_third_party_go_import,
15 resolve_go_import_to_known_go_file, GoModule, GoReplace,
16};
17use crate::python_common_external::is_python_common_external_top_level;
18use crate::go_stdlib::is_go_stdlib_import;
19use crate::ir::{
20 api_endpoint_key, external_api_key, module_key, ClassIr, EdgeIr,
21 EdgeKind, FunctionIr, ProjectIr, PropertyIr,
22};
23use crate::python_stdlib::is_python_stdlib_top_level;
24use crate::schema::props;
25use crate::scanner::ParsedFile;
26use crate::LanguageId;
27
28#[derive(Debug, Clone)]
30pub struct Neo4jConfig {
31 pub uri: String,
33 pub user: String,
35 pub password: String,
37}
38
39#[derive(Debug, Clone)]
41pub struct GraphPersistenceOptions {
42 pub verbose_imports: bool,
44 pub max_parse_warnings_per_file: usize,
46 pub compressor: CompressorConfig,
48}
49
50impl Default for GraphPersistenceOptions {
51 fn default() -> Self {
52 Self {
53 verbose_imports: false,
54 max_parse_warnings_per_file: 50,
55 compressor: CompressorConfig::default(),
56 }
57 }
58}
59
60pub use crate::compress::{CompressorConfig, DEFAULT_COMPRESSOR_URL};
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum NodeKind {
68 File,
69 Class,
70 Function,
71}
72
73impl fmt::Display for NodeKind {
74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75 match self {
76 NodeKind::File => f.write_str("File"),
77 NodeKind::Class => f.write_str("Class"),
78 NodeKind::Function => f.write_str("Function"),
79 }
80 }
81}
82
83#[derive(Debug, Clone)]
85struct ClassSymbol {
86 name: String,
88 fqn: String,
90 kind: Option<&'static str>,
93}
94
95#[derive(Debug, Clone)]
97struct PropertySymbol {
98 class_fqn: String,
99 name: String,
100 fqn: String,
102 declared_type: Option<String>,
103}
104
105#[derive(Debug, Clone)]
107struct FunctionSymbol {
108 name: String,
110 fqn: String,
114 class_fqn: Option<String>,
116 return_type: Option<String>,
118 param_types: Vec<String>,
120 param_count: usize,
122 modifiers: Vec<String>,
124 is_pointer_receiver: Option<bool>,
126}
127
128#[derive(Debug, Clone, PartialEq, Eq, Hash)]
130struct CallbackContract {
131 behaviour: String,
132 name: String,
133 arity: u32,
134 optional: bool,
135}
136
137#[derive(Debug, Default, Clone)]
138struct ErlangModuleSnapshot {
139 implemented_signatures: HashSet<(String, u32)>,
140 callback_signatures: HashSet<(String, u32)>,
141}
142
143#[derive(Debug, Default)]
145struct ErlangBehaviourMetadata {
146 behaviour_usages: HashSet<String>,
147 declared_callbacks: Vec<(String, u32)>,
148 optional_callbacks: HashSet<(String, u32)>,
149 behaviour_extensions: HashSet<String>,
150 overridden_callbacks: HashSet<(String, u32)>,
151}
152
153#[derive(Debug, Error)]
155pub enum GraphError {
156 #[error("neo4j connection error: {0}")]
157 Connection(#[from] neo4rs::Error),
158}
159
160const BATCH_FLUSH_THRESHOLD: usize = 3000;
163
164const CSHARP_NODE_BATCH_FLUSH_THRESHOLD: usize = 500;
166
167#[derive(Debug, Default)]
171struct BatchAccumulator {
172 calls_function: HashSet<(String, String)>,
174 uses_class: HashSet<(String, String)>,
176 class_uses_class: HashSet<(String, String)>,
178 calls_external_api: HashSet<(String, String, String)>,
180}
181
182type SharedBatchAccumulator = Arc<Mutex<BatchAccumulator>>;
183
184impl BatchAccumulator {
185 fn new() -> Self {
186 Self::default()
187 }
188
189 fn add_calls_function(&mut self, caller_fqn: String, callee_fqn: String) {
191 self.calls_function.insert((caller_fqn, callee_fqn));
192 }
193
194 fn add_uses_class(&mut self, fn_fqn: String, class_fqn: String) {
196 self.uses_class.insert((fn_fqn, class_fqn));
197 }
198
199 fn add_class_uses_class(&mut self, derived_fqn: String, base_fqn: String) {
201 self.class_uses_class.insert((derived_fqn, base_fqn));
202 }
203
204 fn add_calls_external_api(&mut self, fn_fqn: String, base_url: String, norm_path: String) {
206 self.calls_external_api.insert((fn_fqn, base_url, norm_path));
207 }
208
209 fn should_flush(&self) -> bool {
211 self.calls_function.len() >= BATCH_FLUSH_THRESHOLD
212 || self.uses_class.len() >= BATCH_FLUSH_THRESHOLD
213 || self.class_uses_class.len() >= BATCH_FLUSH_THRESHOLD
214 || self.calls_external_api.len() >= BATCH_FLUSH_THRESHOLD
215 }
216
217 fn total_size(&self) -> usize {
219 self.calls_function.len()
220 + self.uses_class.len()
221 + self.class_uses_class.len()
222 + self.calls_external_api.len()
223 }
224
225 async fn flush(&mut self, graph: &Graph) -> Result<(), GraphError> {
227 let total = self.total_size();
228 if total == 0 {
229 return Ok(());
230 }
231
232 println!("Neo4j: flushing {} accumulated relationships...", total);
233
234 if !self.calls_function.is_empty() {
236 let caller_fqns: Vec<String> = self.calls_function.iter().map(|(c, _)| c.clone()).collect();
237 let callee_fqns: Vec<String> = self.calls_function.iter().map(|(_, c)| c.clone()).collect();
238
239 let batch_query = query(
240 "
241 UNWIND range(0, size($caller_fqns) - 1) AS i
242 WITH $caller_fqns[i] AS caller_fqn, $callee_fqns[i] AS callee_fqn
243 MERGE (caller:Function { fqn: caller_fqn })
244 MERGE (callee:Function { fqn: callee_fqn })
245 MERGE (caller)-[:CALLS_FUNCTION]->(callee)
246 ",
247 )
248 .param("caller_fqns", caller_fqns)
249 .param("callee_fqns", callee_fqns);
250
251 graph.run(batch_query).await?;
252 self.calls_function.clear();
253 }
254
255 if !self.uses_class.is_empty() {
257 let fn_fqns: Vec<String> = self.uses_class.iter().map(|(f, _)| f.clone()).collect();
258 let cls_fqns: Vec<String> = self.uses_class.iter().map(|(_, c)| c.clone()).collect();
259
260 let batch_query = query(
261 "
262 UNWIND range(0, size($fn_fqns) - 1) AS i
263 WITH $fn_fqns[i] AS fn_fqn, $cls_fqns[i] AS cls_fqn
264 MERGE (fn:Function { fqn: fn_fqn })
265 MERGE (cls:Class { fqn: cls_fqn })
266 MERGE (fn)-[:USES_CLASS]->(cls)
267 ",
268 )
269 .param("fn_fqns", fn_fqns)
270 .param("cls_fqns", cls_fqns);
271
272 graph.run(batch_query).await?;
273 self.uses_class.clear();
274 }
275
276 if !self.class_uses_class.is_empty() {
278 let derived: Vec<String> = self.class_uses_class.iter().map(|(d, _)| d.clone()).collect();
279 let bases: Vec<String> = self.class_uses_class.iter().map(|(_, b)| b.clone()).collect();
280
281 let batch_query = query(
282 "
283 UNWIND range(0, size($derived_fqns) - 1) AS i
284 WITH $derived_fqns[i] AS derived_fqn, $base_fqns[i] AS base_fqn
285 MERGE (d:Class { fqn: derived_fqn })
286 MERGE (b:Class { fqn: base_fqn })
287 MERGE (d)-[:USES_CLASS]->(b)
288 ",
289 )
290 .param("derived_fqns", derived)
291 .param("base_fqns", bases);
292
293 graph.run(batch_query).await?;
294 self.class_uses_class.clear();
295 }
296
297 if !self.calls_external_api.is_empty() {
299 let fn_fqns: Vec<String> = self.calls_external_api.iter().map(|(f, _, _)| f.clone()).collect();
300 let base_urls: Vec<String> = self.calls_external_api.iter().map(|(_, b, _)| b.clone()).collect();
301 let norm_paths: Vec<String> = self.calls_external_api.iter().map(|(_, _, n)| n.clone()).collect();
302
303 let batch_query = query(
304 "
305 UNWIND range(0, size($fn_fqns) - 1) AS i
306 WITH $fn_fqns[i] AS fn_fqn, $base_urls[i] AS base_url, $norm_paths[i] AS norm_path
307 MERGE (fn:Function { fqn: fn_fqn })
308 MERGE (ext:ExternalApi { base_url: base_url, norm_path: norm_path })
309 MERGE (fn)-[:CALLS_EXTERNAL_API]->(ext)
310 ",
311 )
312 .param("fn_fqns", fn_fqns)
313 .param("base_urls", base_urls)
314 .param("norm_paths", norm_paths);
315
316 graph.run(batch_query).await?;
317 self.calls_external_api.clear();
318 }
319
320 Ok(())
321 }
322}
323
324async fn flush_shared_accumulator_if_needed(
325 shared_accumulator: &SharedBatchAccumulator,
326 graph: &Graph,
327) -> Result<(), GraphError> {
328 let mut local_batch = BatchAccumulator::new();
329 {
330 let mut guard = shared_accumulator.lock().await;
331 if !guard.should_flush() {
332 return Ok(());
333 }
334 std::mem::swap(&mut *guard, &mut local_batch);
335 }
336 local_batch.flush(graph).await
337}
338
339async fn flush_shared_accumulator_force(
340 shared_accumulator: &SharedBatchAccumulator,
341 graph: &Graph,
342) -> Result<(), GraphError> {
343 let mut local_batch = BatchAccumulator::new();
344 {
345 let mut guard = shared_accumulator.lock().await;
346 if guard.total_size() == 0 {
347 return Ok(());
348 }
349 std::mem::swap(&mut *guard, &mut local_batch);
350 }
351 local_batch.flush(graph).await
352}
353
354const MAX_CONCURRENT_ERLANG_WRITES: usize = 8;
358
359fn repo_relative_file_path(root: &Path, file_path: &Path) -> PathBuf {
362 let combined = if file_path.is_absolute() {
363 file_path.to_path_buf()
364 } else {
365 root.join(file_path)
366 };
367 let root_abs = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
368 let file_abs = combined
369 .canonicalize()
370 .unwrap_or_else(|_| combined.clone());
371 if let Ok(rel) = file_abs.strip_prefix(&root_abs) {
372 if !rel.as_os_str().is_empty() {
373 return rel.to_path_buf();
374 }
375 }
376 if let Ok(rel) = combined.strip_prefix(root) {
377 return rel.to_path_buf();
378 }
379 if let Ok(rel) = file_path.strip_prefix(root) {
380 return rel.to_path_buf();
381 }
382 if let Ok(rel) = file_path.strip_prefix(&root_abs) {
383 return rel.to_path_buf();
384 }
385 file_path.to_path_buf()
386}
387
388fn neo4j_path_string(root: &Path, file_path: &Path) -> String {
390 path_str_slash(&repo_relative_file_path(root, file_path))
391}
392
393fn path_str_slash(p: &Path) -> String {
394 p.to_string_lossy().replace('\\', "/")
395}
396
397pub(crate) fn derive_project_name(file_path: &Path, root: &Path) -> Option<String> {
399 use std::path::Component;
400 let rel = repo_relative_file_path(root, file_path);
401 for c in rel.components() {
402 if let Component::Normal(s) = c {
403 return s.to_str().map(|x| x.to_string());
404 }
405 }
406 None
407}
408
409fn build_erlang_module_index(files: &[ParsedFile]) -> HashMap<String, ErlangModuleSnapshot> {
410 let mut index: HashMap<String, ErlangModuleSnapshot> = HashMap::new();
411
412 for file in files {
413 if file.language != LanguageId::Erlang {
414 continue;
415 }
416
417 let Some(module_name) = resolve_erlang_module_name(&file.path, &file.tree, &file.source) else {
418 continue;
419 };
420
421 let functions = extract_erlang_functions(&module_name, &file.tree, &file.source);
422 let meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, &file.source);
423 let contracts = collect_callback_contracts_for_module(
424 Some(module_name.as_str()),
425 &meta.behaviour_usages,
426 &meta.declared_callbacks,
427 &meta.optional_callbacks,
428 );
429
430 let snapshot = index.entry(module_name).or_default();
431 for (name, arity, _fqn) in functions {
432 snapshot.implemented_signatures.insert((name, arity));
433 }
434 for contract in contracts {
435 snapshot
436 .callback_signatures
437 .insert((contract.name, contract.arity));
438 }
439 }
440
441 index
442}
443
444pub fn append_csharp_structural_ir(
447 ir: &mut ProjectIr,
448 file_path: &str,
449 project_name: Option<String>,
450 tree: &Tree,
451 source: &str,
452) {
453 let (classes, _, properties) = extract_csharp_symbols(tree, source);
454 let language = LanguageId::CSharp.to_string();
455 for c in classes {
456 ir.classes.push(ClassIr {
457 fqn: c.fqn.clone(),
458 name: c.name,
459 path: file_path.to_string(),
460 language: language.clone(),
461 project_name: project_name.clone(),
462 kind: c.kind.map(str::to_string),
463 });
464 ir.edges.push(EdgeIr {
465 kind: EdgeKind::DeclaresClass,
466 from_label: String::from("File"),
467 from_key: file_path.to_string(),
468 to_label: String::from("Class"),
469 to_key: c.fqn,
470 });
471 }
472 for p in properties {
473 ir.properties.push(PropertyIr {
474 fqn: p.fqn.clone(),
475 name: p.name,
476 class_fqn: p.class_fqn.clone(),
477 path: file_path.to_string(),
478 language: language.clone(),
479 project_name: project_name.clone(),
480 declared_type: p.declared_type,
481 });
482 ir.edges.push(EdgeIr {
483 kind: EdgeKind::DeclaresProperty,
484 from_label: String::from("Class"),
485 from_key: p.class_fqn,
486 to_label: String::from("Property"),
487 to_key: p.fqn,
488 });
489 }
490}
491
492pub fn append_java_class_ir(
494 ir: &mut ProjectIr,
495 file_path: &str,
496 project_name: Option<String>,
497 tree: &Tree,
498 source: &str,
499) {
500 let package = extract_java_package(source);
501 let (classes, _) = extract_java_symbols(tree, source, package.as_deref());
502 let language = LanguageId::Java.to_string();
503 for c in classes {
504 ir.classes.push(ClassIr {
505 fqn: c.fqn.clone(),
506 name: c.name,
507 path: file_path.to_string(),
508 language: language.clone(),
509 project_name: project_name.clone(),
510 kind: c.kind.map(str::to_string),
511 });
512 ir.edges.push(EdgeIr {
513 kind: EdgeKind::DeclaresClass,
514 from_label: String::from("File"),
515 from_key: file_path.to_string(),
516 to_label: String::from("Class"),
517 to_key: c.fqn,
518 });
519 }
520}
521
522pub async fn cleanup_incremental_targets_in_neo4j(
535 cfg: &Neo4jConfig,
536 root: &Path,
537 cleanup_targets: &[String],
538) -> Result<(), GraphError> {
539 if cleanup_targets.is_empty() {
540 println!("Neo4j cleanup: no cleanup targets, skipping.");
541 return Ok(());
542 }
543
544 let normalized_paths: Vec<String> = cleanup_targets
545 .iter()
546 .map(|target| neo4j_path_string(root, Path::new(target)))
547 .collect::<HashSet<_>>()
548 .into_iter()
549 .collect();
550
551 if normalized_paths.is_empty() {
552 println!("Neo4j cleanup: no normalized paths, skipping.");
553 return Ok(());
554 }
555
556 println!(
557 "Neo4j cleanup: deleting stale graph scope for {} path(s)...",
558 normalized_paths.len()
559 );
560
561 let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
562
563 let delete_files = query(
564 "
565 UNWIND $paths AS path
566 OPTIONAL MATCH (f:File { path: path })
567 DETACH DELETE f
568 ",
569 )
570 .param("paths", normalized_paths.clone());
571 graph.run(delete_files).await?;
572
573 let delete_modules = query(
574 "
575 UNWIND $paths AS path
576 OPTIONAL MATCH (m:Module { path: path })
577 DETACH DELETE m
578 ",
579 )
580 .param("paths", normalized_paths.clone());
581 graph.run(delete_modules).await?;
582
583 let delete_classes = query(
584 "
585 UNWIND $paths AS path
586 OPTIONAL MATCH (c:Class { path: path })
587 DETACH DELETE c
588 ",
589 )
590 .param("paths", normalized_paths.clone());
591 graph.run(delete_classes).await?;
592
593 let delete_functions = query(
594 "
595 UNWIND $paths AS path
596 OPTIONAL MATCH (fn:Function { path: path })
597 DETACH DELETE fn
598 ",
599 )
600 .param("paths", normalized_paths);
601 graph.run(delete_functions).await?;
602
603 println!("Neo4j cleanup: stale graph scope deleted.");
604 Ok(())
605}
606
607fn should_emit_parse_warnings_for_path(file_path: &str) -> bool {
636 let p = file_path.replace('\\', "/").to_lowercase();
637 if p.contains("/vendordocs/") {
638 return false;
639 }
640 if p.contains("/refund_issue/") {
641 return false;
642 }
643 true
644}
645
646fn emit_limited_parse_warnings(
647 label: &str,
648 file_path: &str,
649 warnings: Vec<(usize, usize, String)>,
650 max_per_file: usize,
651) {
652 if !should_emit_parse_warnings_for_path(file_path) {
653 return;
654 }
655 let total = warnings.len();
656 if max_per_file == 0 {
657 for (line, col, snippet) in warnings {
658 println!(
659 "{label} parse warning {file_path}:{line}:{col} - {snippet}",
660 );
661 }
662 return;
663 }
664 for (i, (line, col, snippet)) in warnings.into_iter().enumerate() {
665 if i >= max_per_file {
666 let rest = total.saturating_sub(i);
667 if rest > 0 {
668 println!(
669 "{label} parse warning {file_path}: ... {rest} more suppressed (set max_parse_warnings_per_file to 0 for unlimited)",
670 );
671 }
672 break;
673 }
674 println!(
675 "{label} parse warning {file_path}:{line}:{col} - {snippet}",
676 );
677 }
678}
679
680fn should_log_unresolved_import(
681 verbose_imports: bool,
682 is_stdlib: bool,
683 is_third_party: bool,
684) -> bool {
685 verbose_imports || (!is_stdlib && !is_third_party)
686}
687
688pub async fn persist_files_to_neo4j(
689 cfg: &Neo4jConfig,
690 root: &Path,
691 files: &[ParsedFile],
692 clean: bool,
693 follow_symlinks: bool,
694 persistence: &GraphPersistenceOptions,
695) -> Result<(), GraphError> {
696 let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
698
699 if clean {
701 println!("Neo4j: deleting all existing nodes and relationships...");
702 let delete_query = query("MATCH (n) DETACH DELETE n");
703 graph.run(delete_query).await?;
704 println!("Neo4j: database cleaned, starting fresh graph construction");
705 }
706
707 let mut known_paths: HashSet<String> = HashSet::new();
710 for file in files {
711 known_paths.insert(neo4j_path_string(root, &file.path));
712 }
713
714 let go_modules = discover_go_modules(root, follow_symlinks).unwrap_or_else(|e| {
715 eprintln!("Neo4j: warning: could not discover go.mod modules: {e}");
716 Vec::new()
717 });
718
719 let go_replaces = discover_go_replaces(root, follow_symlinks).unwrap_or_else(|e| {
720 eprintln!("Neo4j: warning: could not discover go.mod replace directives: {e}");
721 Vec::new()
722 });
723
724 let csharp_batch_index = build_csharp_batch_index(files, root);
725
726 let compressor_client = if persistence.compressor.enabled {
727 match CompressorClient::from_config(&persistence.compressor) {
728 Ok(client) => {
729 if let Err(e) = client.health_check().await {
730 eprintln!("RedCompressor: health check failed ({e}); compression may be unavailable");
731 }
732 Some(client)
733 }
734 Err(e) => {
735 eprintln!("RedCompressor: failed to create client ({e}); skipping code_bytes");
736 None
737 }
738 }
739 } else {
740 None
741 };
742 let compressor = compressor_client.as_ref();
743
744 let mut accumulator = BatchAccumulator::new();
746 let erlang_module_index = build_erlang_module_index(files);
747
748 let total_files = files.len();
749 let mut erlang_futures: FuturesUnordered<_> = FuturesUnordered::new();
750 let erlang_accumulator: SharedBatchAccumulator =
751 Arc::new(Mutex::new(BatchAccumulator::new()));
752
753 for (idx, file) in files.iter().enumerate() {
754 let language = file.language.to_string();
755 let path = neo4j_path_string(root, &file.path);
756 let project_name = derive_project_name(&file.path, root);
757
758 println!(
760 "Neo4j: processing file {}/{} ({})",
761 idx + 1,
762 total_files,
763 path
764 );
765
766 let q = query(
768 "
769 MERGE (f:File { path: $path })
770 ON CREATE SET f.language = $language,
771 f.project_name = $project_name,
772 f.is_test = $is_test
773 ON MATCH SET f.language = $language,
774 f.project_name = $project_name,
775 f.is_test = $is_test
776 ",
777 )
778 .param("path", path.clone())
779 .param("language", language.clone())
780 .param("project_name", project_name.clone())
781 .param("is_test", file.is_test);
782
783 graph.run(q).await?;
784
785 let source = &file.source;
787 match file.language {
788 LanguageId::Java => {
789 persist_java_structure(
790 &graph,
791 &path,
792 file,
793 source,
794 &known_paths,
795 project_name.clone(),
796 &mut accumulator,
797 persistence,
798 compressor,
799 )
800 .await?;
801 }
802 LanguageId::CSharp => {
803 persist_csharp_structure(
804 &graph,
805 &path,
806 file,
807 source,
808 project_name.clone(),
809 &known_paths,
810 &csharp_batch_index,
811 &mut accumulator,
812 compressor,
813 )
814 .await?;
815 }
816 LanguageId::Erlang => {
817 erlang_futures.push(persist_erlang_structure(
821 &graph,
822 file,
823 path.clone(),
824 source,
825 project_name.clone(),
826 &erlang_module_index,
827 erlang_accumulator.clone(),
828 compressor,
829 ));
830
831 if erlang_futures.len() >= MAX_CONCURRENT_ERLANG_WRITES {
832 if let Some(res) = erlang_futures.next().await {
833 res?;
834 }
835 flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
836 }
837 }
838 LanguageId::Go => {
839 persist_go_structure(
840 &graph,
841 &path,
842 file,
843 source,
844 &known_paths,
845 project_name.clone(),
846 &mut accumulator,
847 root,
848 &go_modules,
849 &go_replaces,
850 persistence,
851 compressor,
852 )
853 .await?;
854 }
855 _ => {
856 persist_non_java_functions(
861 &graph,
862 &path,
863 file,
864 source,
865 project_name.clone(),
866 &known_paths,
867 &mut accumulator,
868 persistence,
869 compressor,
870 )
871 .await?;
872 }
873 }
874
875 if accumulator.should_flush() {
877 accumulator.flush(&graph).await?;
878 }
879 flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
880 }
881
882 while let Some(res) = erlang_futures.next().await {
884 res?;
885 flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
886 }
887
888 accumulator.flush(&graph).await?;
890 flush_shared_accumulator_force(&erlang_accumulator, &graph).await?;
891
892 let same_api_query = query(
896 "
897 MATCH (ep:ApiEndpoint)
898 MATCH (ext:ExternalApi)
899 WHERE ep.norm_path IS NOT NULL
900 AND ext.norm_path IS NOT NULL
901 AND ep.norm_path = ext.norm_path
902 MERGE (ep)-[:SAME_API]->(ext)
903 ",
904 );
905
906 graph.run(same_api_query).await?;
907
908 println!("Neo4j: finished processing {} files.", total_files);
909
910 Ok(())
911}
912
913async fn persist_java_structure(
917 graph: &Graph,
918 file_path: &str,
919 file: &ParsedFile,
920 source: &str,
921 known_paths: &HashSet<String>,
922 project_name: Option<String>,
923 accumulator: &mut BatchAccumulator,
924 persistence: &GraphPersistenceOptions,
925 compressor: Option<&CompressorClient>,
926) -> Result<(), GraphError> {
927 let package = extract_java_package(source);
928 let (classes, methods) = extract_java_symbols(&file.tree, source, package.as_deref());
929 let class_spans = extract_java_class_spans(&file.tree, source, package.as_deref());
930 let method_spans = extract_java_method_body_spans(&file.tree, source, package.as_deref());
931
932 emit_limited_parse_warnings(
933 "Java",
934 file_path,
935 extract_java_parse_warnings(&file.tree, source),
936 persistence.max_parse_warnings_per_file,
937 );
938
939 let class_ann_map: HashMap<String, Vec<String>> =
940 extract_java_class_annotations(&file.tree, source, package.as_deref())
941 .into_iter()
942 .collect();
943 let method_ann_map: HashMap<String, Vec<String>> =
944 extract_java_method_annotations(&file.tree, source, package.as_deref())
945 .into_iter()
946 .collect();
947
948 for class in &classes {
950 let annotations = class_ann_map
951 .get(&class.fqn)
952 .cloned()
953 .unwrap_or_default();
954 let code_bytes = code_bytes_for_span(
955 compressor,
956 source,
957 class_spans.get(&class.fqn).copied(),
958 LanguageId::Java,
959 )
960 .await;
961 let q = query(
962 "
963 MATCH (f:File { path: $path })
964 MERGE (c:Class { fqn: $class_fqn })
965 ON CREATE SET c.name = $class_name,
966 c.path = $path,
967 c.project_name = $project_name,
968 c.annotations = $annotations,
969 c.code_bytes = $code_bytes
970 ON MATCH SET c.name = $class_name,
971 c.path = $path,
972 c.project_name = $project_name,
973 c.annotations = $annotations,
974 c.code_bytes = coalesce($code_bytes, c.code_bytes)
975 MERGE (f)-[:DECLARES_CLASS]->(c)
976 ",
977 )
978 .param("path", file_path.to_string())
979 .param("class_fqn", class.fqn.clone())
980 .param("class_name", class.name.clone())
981 .param("project_name", project_name.clone())
982 .param("annotations", annotations)
983 .param(props::CODE_BYTES, code_bytes);
984
985 graph.run(q).await?;
986 }
987
988 for func in &methods {
990 let fn_annotations = method_ann_map
991 .get(&func.fqn)
992 .cloned()
993 .unwrap_or_default();
994 let code_bytes = code_bytes_for_span(
995 compressor,
996 source,
997 method_spans.get(&func.fqn).copied(),
998 LanguageId::Java,
999 )
1000 .await;
1001 match &func.class_fqn {
1002 Some(class_fqn) => {
1003 let q = query(
1004 "
1005 MATCH (f:File { path: $path })
1006 MERGE (cls:Class { fqn: $class_fqn })
1007 MERGE (fn:Function { fqn: $fn_fqn })
1008 ON CREATE SET fn.name = $fn_name,
1009 fn.path = $path,
1010 fn.project_name = $project_name,
1011 fn.return_type = $return_type,
1012 fn.param_types = $param_types,
1013 fn.param_count = $param_count,
1014 fn.annotations = $fn_annotations,
1015 fn.code_bytes = $code_bytes
1016 ON MATCH SET fn.name = $fn_name,
1017 fn.path = $path,
1018 fn.project_name = $project_name,
1019 fn.return_type = coalesce($return_type, fn.return_type),
1020 fn.param_types = coalesce($param_types, fn.param_types),
1021 fn.param_count = coalesce($param_count, fn.param_count),
1022 fn.annotations = $fn_annotations,
1023 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
1024 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1025 MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
1026 ",
1027 )
1028 .param("path", file_path.to_string())
1029 .param("class_fqn", class_fqn.clone())
1030 .param("fn_fqn", func.fqn.clone())
1031 .param("fn_name", func.name.clone())
1032 .param("project_name", project_name.clone())
1033 .param("return_type", func.return_type.clone())
1034 .param("param_types", func.param_types.clone())
1035 .param("param_count", func.param_count as i64)
1036 .param("fn_annotations", fn_annotations)
1037 .param(props::CODE_BYTES, code_bytes.clone());
1038
1039 graph.run(q).await?;
1040 }
1041 None => {
1042 let q = query(
1043 "
1044 MATCH (f:File { path: $path })
1045 MERGE (fn:Function { fqn: $fn_fqn })
1046 ON CREATE SET fn.name = $fn_name,
1047 fn.path = $path,
1048 fn.project_name = $project_name,
1049 fn.return_type = $return_type,
1050 fn.param_types = $param_types,
1051 fn.param_count = $param_count,
1052 fn.annotations = $fn_annotations,
1053 fn.code_bytes = $code_bytes
1054 ON MATCH SET fn.name = $fn_name,
1055 fn.path = $path,
1056 fn.project_name = $project_name,
1057 fn.return_type = coalesce($return_type, fn.return_type),
1058 fn.param_types = coalesce($param_types, fn.param_types),
1059 fn.param_count = coalesce($param_count, fn.param_count),
1060 fn.annotations = $fn_annotations,
1061 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
1062 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1063 ",
1064 )
1065 .param("path", file_path.to_string())
1066 .param("fn_fqn", func.fqn.clone())
1067 .param("fn_name", func.name.clone())
1068 .param("project_name", project_name.clone())
1069 .param("return_type", func.return_type.clone())
1070 .param("param_types", func.param_types.clone())
1071 .param("param_count", func.param_count as i64)
1072 .param("fn_annotations", fn_annotations)
1073 .param(props::CODE_BYTES, code_bytes);
1074
1075 graph.run(q).await?;
1076 }
1077 }
1078 }
1079
1080 for (derived, base) in extract_java_inheritance_edges(&file.tree, source, package.as_deref()) {
1081 accumulator.add_class_uses_class(derived, base);
1082 }
1083 for (cls, dep) in extract_java_injected_dependencies(&file.tree, source, package.as_deref()) {
1084 accumulator.add_class_uses_class(cls, dep);
1085 }
1086
1087 let calls = extract_java_calls(&file.tree, source, package.as_deref());
1095 for (caller_fqn, callee_fqn) in calls {
1096 accumulator.add_calls_function(caller_fqn, callee_fqn);
1097 }
1098
1099 let internal_imports = extract_internal_java_imports(source);
1105 for import_fqn in internal_imports {
1106 if let Some(dep_path) = map_import_to_project_path(file_path, &import_fqn) {
1107 if !known_paths.contains(&dep_path) {
1109 continue;
1110 }
1111
1112 let dep_query = query(
1114 "
1115 MERGE (src:File { path: $src_path })
1116 MERGE (dst:File { path: $dst_path })
1117 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
1118 ",
1119 )
1120 .param("src_path", file_path.to_string())
1121 .param("dst_path", dep_path.clone());
1122
1123 graph.run(dep_query).await?;
1124 }
1125 }
1126
1127 let endpoints = extract_java_spring_endpoints(source);
1132 for (http_methods, path_template, handler_name) in endpoints {
1133 let norm_path = normalize_api_path(&path_template);
1134
1135 let api_query = query(
1137 "
1138 MERGE (api:ApiEndpoint { path: $path })
1139 ON CREATE SET api.methods = $methods,
1140 api.protocol = 'http',
1141 api.framework = 'spring',
1142 api.project_name = $project_name,
1143 api.norm_path = $norm_path
1144 ON MATCH SET api.methods = $methods,
1145 api.protocol = coalesce(api.protocol, 'http'),
1146 api.framework = coalesce(api.framework, 'spring'),
1147 api.project_name = coalesce(api.project_name, $project_name),
1148 api.norm_path = coalesce(api.norm_path, $norm_path)
1149 ",
1150 )
1151 .param("path", path_template.clone())
1152 .param("methods", http_methods.clone())
1153 .param("project_name", project_name.clone())
1154 .param("norm_path", norm_path.clone());
1155
1156 graph.run(api_query).await?;
1157
1158 for func in &methods {
1160 if func.name != handler_name {
1161 continue;
1162 }
1163
1164 let rel_query = query(
1165 "
1166 MERGE (fn:Function { fqn: $fn_fqn })
1167 MERGE (api:ApiEndpoint { path: $path })
1168 MERGE (api)-[:HANDLED_BY]->(fn)
1169 ",
1170 )
1171 .param("fn_fqn", func.fqn.clone())
1172 .param("path", path_template.clone());
1173
1174 graph.run(rel_query).await?;
1175 }
1176 }
1177
1178 let external_urls = extract_external_http_urls(source);
1180 for full_url in external_urls {
1181 let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
1182 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
1183 let base_url = format!("{protocol}://{host}");
1184 let name = host.clone();
1185 let norm_path = normalize_api_path(&path);
1186
1187 let ext_query = query(
1189 "
1190 MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
1191 ON CREATE SET ext.name = $name,
1192 ext.path = $path,
1193 ext.protocol = $protocol,
1194 ext.provider = $provider,
1195 ext.project_name = $project_name
1196 ON MATCH SET ext.name = coalesce(ext.name, $name),
1197 ext.path = coalesce(ext.path, $path),
1198 ext.protocol = coalesce(ext.protocol, $protocol),
1199 ext.provider = coalesce(ext.provider, $provider),
1200 ext.project_name = coalesce(ext.project_name, $project_name)
1201 ",
1202 )
1203 .param("name", name.clone())
1204 .param("base_url", base_url.clone())
1205 .param("path", path.clone())
1206 .param("norm_path", norm_path.clone())
1207 .param("protocol", protocol.clone())
1208 .param("provider", name.clone())
1209 .param("project_name", project_name.clone());
1210
1211 graph.run(ext_query).await?;
1212
1213 for func in &methods {
1215 accumulator.add_calls_external_api(
1216 func.fqn.clone(),
1217 base_url.clone(),
1218 norm_path.clone(),
1219 );
1220 }
1221 }
1222
1223 let used_classes = extract_java_used_classes(&file.tree, source, package.as_deref());
1225 for (fn_fqn, class_fqn) in used_classes {
1226 accumulator.add_uses_class(fn_fqn, class_fqn);
1227 }
1228
1229 Ok(())
1230}
1231
1232async fn persist_csharp_structure(
1236 graph: &Graph,
1237 file_path: &str,
1238 file: &ParsedFile,
1239 source: &str,
1240 project_name: Option<String>,
1241 known_paths: &HashSet<String>,
1242 csharp_index: &CSharpBatchIndex,
1243 accumulator: &mut BatchAccumulator,
1244 compressor: Option<&CompressorClient>,
1245) -> Result<(), GraphError> {
1246 let language = file.language.to_string();
1247 let namespace = extract_csharp_namespace(&file.tree, source);
1248 let using_summary = extract_csharp_using_summary(&file.tree, source);
1249
1250 let (classes, methods, property_symbols) = extract_csharp_symbols(&file.tree, source);
1251 let class_spans = extract_csharp_class_spans(&file.tree, source);
1252 let property_spans = extract_csharp_property_spans(&file.tree, source);
1253 let method_spans = extract_csharp_method_body_spans_map(&file.tree, source, namespace.as_deref());
1254
1255 for chunk in classes.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
1257 let class_fqns: Vec<String> = chunk.iter().map(|c| c.fqn.clone()).collect();
1258 let class_names: Vec<String> = chunk.iter().map(|c| c.name.clone()).collect();
1259 let class_kinds: Vec<String> = chunk
1260 .iter()
1261 .map(|c| c.kind.unwrap_or("class").to_string())
1262 .collect();
1263 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
1264 for c in chunk {
1265 code_bytes_list.push(
1266 code_bytes_for_span(
1267 compressor,
1268 source,
1269 class_spans.get(&c.fqn).copied(),
1270 LanguageId::CSharp,
1271 )
1272 .await,
1273 );
1274 }
1275 let q = query(
1276 "
1277 UNWIND range(0, size($class_fqns) - 1) AS i
1278 WITH $file_path AS path, $class_fqns[i] AS class_fqn, $class_names[i] AS class_name,
1279 $class_kinds[i] AS class_kind, $project_name AS project_name, $language AS language,
1280 $code_bytes_list[i] AS code_bytes
1281 MATCH (f:File { path: path })
1282 MERGE (c:Class { fqn: class_fqn })
1283 ON CREATE SET c.name = class_name, c.path = path, c.project_name = project_name,
1284 c.language = language, c.kind = class_kind, c.code_bytes = code_bytes
1285 ON MATCH SET c.name = class_name, c.project_name = project_name, c.language = language,
1286 c.kind = class_kind, c.code_bytes = coalesce(code_bytes, c.code_bytes)
1287 MERGE (f)-[:DECLARES_CLASS]->(c)
1288 ",
1289 )
1290 .param("file_path", file_path.to_string())
1291 .param("class_fqns", class_fqns)
1292 .param("class_names", class_names)
1293 .param("class_kinds", class_kinds)
1294 .param("project_name", project_name.clone())
1295 .param("language", language.clone())
1296 .param("code_bytes_list", code_bytes_list);
1297
1298 graph.run(q).await?;
1299 }
1300
1301 for chunk in property_symbols.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
1302 let class_fqns: Vec<String> = chunk.iter().map(|p| p.class_fqn.clone()).collect();
1303 let prop_fqns: Vec<String> = chunk.iter().map(|p| p.fqn.clone()).collect();
1304 let prop_names: Vec<String> = chunk.iter().map(|p| p.name.clone()).collect();
1305 let decl_types: Vec<Option<String>> = chunk.iter().map(|p| p.declared_type.clone()).collect();
1306 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
1307 for p in chunk {
1308 code_bytes_list.push(
1309 code_bytes_for_span(
1310 compressor,
1311 source,
1312 property_spans.get(&p.fqn).copied(),
1313 LanguageId::CSharp,
1314 )
1315 .await,
1316 );
1317 }
1318 let q = query(
1319 "
1320 UNWIND range(0, size($prop_fqns) - 1) AS i
1321 WITH $class_fqns[i] AS class_fqn, $prop_fqns[i] AS prop_fqn, $prop_names[i] AS prop_name,
1322 $decl_types[i] AS declared_type, $file_path AS path,
1323 $project_name AS project_name, $language AS language,
1324 $code_bytes_list[i] AS code_bytes
1325 MATCH (c:Class { fqn: class_fqn })
1326 MERGE (p:Property { fqn: prop_fqn })
1327 ON CREATE SET p.name = prop_name, p.path = path, p.project_name = project_name,
1328 p.language = language, p.declared_type = declared_type, p.code_bytes = code_bytes
1329 ON MATCH SET p.name = prop_name, p.project_name = project_name, p.language = language,
1330 p.declared_type = coalesce(declared_type, p.declared_type),
1331 p.code_bytes = coalesce(code_bytes, p.code_bytes)
1332 MERGE (c)-[:DECLARES_PROPERTY]->(p)
1333 ",
1334 )
1335 .param("class_fqns", class_fqns)
1336 .param("prop_fqns", prop_fqns)
1337 .param("prop_names", prop_names)
1338 .param("decl_types", decl_types)
1339 .param("file_path", file_path.to_string())
1340 .param("project_name", project_name.clone())
1341 .param("language", language.clone())
1342 .param("code_bytes_list", code_bytes_list);
1343
1344 graph.run(q).await?;
1345 }
1346
1347 let methods_with_class: Vec<&FunctionSymbol> =
1348 methods.iter().filter(|f| f.class_fqn.is_some()).collect();
1349 for chunk in methods_with_class.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
1350 let class_fqns: Vec<String> = chunk
1351 .iter()
1352 .map(|f| f.class_fqn.clone().unwrap_or_default())
1353 .collect();
1354 let fn_fqns: Vec<String> = chunk.iter().map(|f| f.fqn.clone()).collect();
1355 let fn_names: Vec<String> = chunk.iter().map(|f| f.name.clone()).collect();
1356 let return_types: Vec<Option<String>> = chunk.iter().map(|f| f.return_type.clone()).collect();
1357 let param_types_list: Vec<Vec<String>> = chunk.iter().map(|f| f.param_types.clone()).collect();
1358 let param_counts: Vec<i64> = chunk.iter().map(|f| f.param_count as i64).collect();
1359 let modifiers_list: Vec<Vec<String>> = chunk.iter().map(|f| f.modifiers.clone()).collect();
1360 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
1361 for f in chunk {
1362 code_bytes_list.push(
1363 code_bytes_for_span(
1364 compressor,
1365 source,
1366 method_spans.get(&f.fqn).copied(),
1367 LanguageId::CSharp,
1368 )
1369 .await,
1370 );
1371 }
1372 let q = query(
1373 "
1374 UNWIND range(0, size($fn_fqns) - 1) AS i
1375 WITH $file_path AS path, $class_fqns[i] AS class_fqn, $fn_fqns[i] AS fn_fqn,
1376 $fn_names[i] AS fn_name, $return_types[i] AS return_type,
1377 $param_types_list[i] AS param_types, $param_counts[i] AS param_count,
1378 $modifiers_list[i] AS modifiers, $project_name AS project_name, $language AS language,
1379 $code_bytes_list[i] AS code_bytes
1380 MATCH (f:File { path: path })
1381 MERGE (cls:Class { fqn: class_fqn })
1382 MERGE (fn:Function { fqn: fn_fqn })
1383 ON CREATE SET fn.name = fn_name, fn.path = path, fn.project_name = project_name,
1384 fn.language = language, fn.return_type = return_type,
1385 fn.param_types = param_types, fn.param_count = param_count,
1386 fn.modifiers = modifiers, fn.code_bytes = code_bytes
1387 ON MATCH SET fn.name = fn_name, fn.project_name = project_name, fn.language = language,
1388 fn.return_type = coalesce(return_type, fn.return_type),
1389 fn.param_types = coalesce(param_types, fn.param_types),
1390 fn.param_count = coalesce(param_count, fn.param_count),
1391 fn.modifiers = coalesce(modifiers, fn.modifiers),
1392 fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
1393 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1394 MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
1395 ",
1396 )
1397 .param("file_path", file_path.to_string())
1398 .param("class_fqns", class_fqns)
1399 .param("fn_fqns", fn_fqns)
1400 .param("fn_names", fn_names)
1401 .param("return_types", return_types)
1402 .param("param_types_list", param_types_list)
1403 .param("param_counts", param_counts)
1404 .param("modifiers_list", modifiers_list)
1405 .param("project_name", project_name.clone())
1406 .param("language", language.clone())
1407 .param("code_bytes_list", code_bytes_list);
1408
1409 graph.run(q).await?;
1410 }
1411
1412 let methods_top: Vec<&FunctionSymbol> = methods.iter().filter(|f| f.class_fqn.is_none()).collect();
1413 for chunk in methods_top.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
1414 let fn_fqns: Vec<String> = chunk.iter().map(|f| f.fqn.clone()).collect();
1415 let fn_names: Vec<String> = chunk.iter().map(|f| f.name.clone()).collect();
1416 let return_types: Vec<Option<String>> = chunk.iter().map(|f| f.return_type.clone()).collect();
1417 let param_types_list: Vec<Vec<String>> = chunk.iter().map(|f| f.param_types.clone()).collect();
1418 let param_counts: Vec<i64> = chunk.iter().map(|f| f.param_count as i64).collect();
1419 let modifiers_list: Vec<Vec<String>> = chunk.iter().map(|f| f.modifiers.clone()).collect();
1420 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
1421 for f in chunk {
1422 code_bytes_list.push(
1423 code_bytes_for_span(
1424 compressor,
1425 source,
1426 method_spans.get(&f.fqn).copied(),
1427 LanguageId::CSharp,
1428 )
1429 .await,
1430 );
1431 }
1432 let q = query(
1433 "
1434 UNWIND range(0, size($fn_fqns) - 1) AS i
1435 WITH $file_path AS path, $fn_fqns[i] AS fn_fqn, $fn_names[i] AS fn_name,
1436 $return_types[i] AS return_type, $param_types_list[i] AS param_types,
1437 $param_counts[i] AS param_count, $modifiers_list[i] AS modifiers,
1438 $project_name AS project_name, $language AS language,
1439 $code_bytes_list[i] AS code_bytes
1440 MATCH (f:File { path: path })
1441 MERGE (fn:Function { fqn: fn_fqn })
1442 ON CREATE SET fn.name = fn_name, fn.path = path, fn.project_name = project_name,
1443 fn.language = language, fn.return_type = return_type,
1444 fn.param_types = param_types, fn.param_count = param_count,
1445 fn.modifiers = modifiers, fn.code_bytes = code_bytes
1446 ON MATCH SET fn.name = fn_name, fn.project_name = project_name, fn.language = language,
1447 fn.return_type = coalesce(return_type, fn.return_type),
1448 fn.param_types = coalesce(param_types, fn.param_types),
1449 fn.param_count = coalesce(param_count, fn.param_count),
1450 fn.modifiers = coalesce(modifiers, fn.modifiers),
1451 fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
1452 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1453 ",
1454 )
1455 .param("file_path", file_path.to_string())
1456 .param("fn_fqns", fn_fqns)
1457 .param("fn_names", fn_names)
1458 .param("return_types", return_types)
1459 .param("param_types_list", param_types_list)
1460 .param("param_counts", param_counts)
1461 .param("modifiers_list", modifiers_list)
1462 .param("project_name", project_name.clone())
1463 .param("language", language.clone())
1464 .param("code_bytes_list", code_bytes_list);
1465
1466 graph.run(q).await?;
1467 }
1468
1469 let endpoints = extract_csharp_api_endpoints_from_tree(&file.tree, source);
1484 for (methods_http, path_template, handler_name) in endpoints {
1485 let norm_path = normalize_api_path(&path_template);
1486
1487 let api_query = query(
1489 "
1490 MERGE (api:ApiEndpoint { path: $path })
1491 ON CREATE SET api.methods = $methods,
1492 api.protocol = 'http',
1493 api.framework = 'aspnet',
1494 api.project_name = $project_name,
1495 api.norm_path = $norm_path
1496 ON MATCH SET api.methods = $methods,
1497 api.protocol = coalesce(api.protocol, 'http'),
1498 api.framework = coalesce(api.framework, 'aspnet'),
1499 api.project_name = coalesce(api.project_name, $project_name),
1500 api.norm_path = coalesce(api.norm_path, $norm_path)
1501 ",
1502 )
1503 .param("path", path_template.clone())
1504 .param("methods", methods_http.clone())
1505 .param("project_name", project_name.clone())
1506 .param("norm_path", norm_path.clone());
1507
1508 graph.run(api_query).await?;
1509
1510 for func in &methods {
1512 if func.name != handler_name {
1513 continue;
1514 }
1515
1516 let rel_query = query(
1517 "
1518 MERGE (fn:Function { fqn: $fn_fqn })
1519 MERGE (api:ApiEndpoint { path: $path })
1520 MERGE (api)-[:HANDLED_BY]->(fn)
1521 ",
1522 )
1523 .param("fn_fqn", func.fqn.clone())
1524 .param("path", path_template.clone());
1525
1526 graph.run(rel_query).await?;
1527 }
1528 }
1529
1530 let url_spans = extract_csharp_external_http_urls_with_spans(&file.tree, source);
1532 let method_spans = csharp_method_body_spans(&file.tree, source, namespace.as_deref());
1533 let mut spans_by_fqn: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
1534 for (fqn, lo, hi) in method_spans {
1535 spans_by_fqn.entry(fqn).or_default().push((lo, hi));
1536 }
1537
1538 for (full_url, u_start, u_end) in url_spans {
1539 let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
1540 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
1541 let base_url = format!("{protocol}://{host}");
1542 let name = host.clone();
1543 let norm_path = normalize_api_path(&path);
1544
1545 let ext_query = query(
1546 "
1547 MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
1548 ON CREATE SET ext.name = $name,
1549 ext.path = $path,
1550 ext.protocol = $protocol,
1551 ext.provider = $provider,
1552 ext.project_name = $project_name
1553 ON MATCH SET ext.name = coalesce(ext.name, $name),
1554 ext.path = coalesce(ext.path, $path),
1555 ext.protocol = coalesce(ext.protocol, $protocol),
1556 ext.provider = coalesce(ext.provider, $provider),
1557 ext.project_name = coalesce(ext.project_name, $project_name)
1558 ",
1559 )
1560 .param("name", name.clone())
1561 .param("base_url", base_url.clone())
1562 .param("path", path.clone())
1563 .param("norm_path", norm_path.clone())
1564 .param("protocol", protocol.clone())
1565 .param("provider", name.clone())
1566 .param("project_name", project_name.clone());
1567
1568 graph.run(ext_query).await?;
1569
1570 for func in &methods {
1571 let Some(ranges) = spans_by_fqn.get(&func.fqn) else {
1572 continue;
1573 };
1574 if !ranges
1575 .iter()
1576 .any(|(lo, hi)| *lo <= u_start && u_end <= *hi)
1577 {
1578 continue;
1579 }
1580 accumulator.add_calls_external_api(
1581 func.fqn.clone(),
1582 base_url.clone(),
1583 norm_path.clone(),
1584 );
1585 }
1586 }
1587
1588 for ns in &using_summary.namespace_imports {
1590 let Some(dep_paths) = csharp_index.namespace_to_paths.get(ns) else {
1591 continue;
1592 };
1593 for dep_path in dep_paths {
1594 if dep_path == file_path || !known_paths.contains(dep_path) {
1595 continue;
1596 }
1597 let dep_query = query(
1598 "
1599 MERGE (src:File { path: $src_path })
1600 MERGE (dst:File { path: $dst_path })
1601 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
1602 ",
1603 )
1604 .param("src_path", file_path.to_string())
1605 .param("dst_path", dep_path.clone());
1606
1607 graph.run(dep_query).await?;
1608 }
1609 }
1610
1611 for (derived, base) in extract_csharp_class_inheritance_edges(
1612 &file.tree,
1613 source,
1614 namespace.as_deref(),
1615 &using_summary,
1616 csharp_index,
1617 ) {
1618 accumulator.add_class_uses_class(derived, base);
1619 }
1620
1621 let used_classes = extract_csharp_used_classes(
1623 &file.tree,
1624 source,
1625 namespace.as_deref(),
1626 &using_summary,
1627 csharp_index,
1628 );
1629 for (fn_fqn, class_fqn) in used_classes {
1630 accumulator.add_uses_class(fn_fqn, class_fqn);
1631 }
1632
1633 let calls = extract_csharp_calls(
1635 &file.tree,
1636 source,
1637 namespace.as_deref(),
1638 &using_summary,
1639 csharp_index,
1640 );
1641 for (caller_fqn, callee_fqn) in calls {
1642 accumulator.add_calls_function(caller_fqn, callee_fqn);
1643 }
1644
1645 Ok(())
1646}
1647
1648async fn persist_erlang_structure(
1658 graph: &Graph,
1659 file: &ParsedFile,
1660 file_path: String,
1661 source: &str,
1662 project_name: Option<String>,
1663 erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
1664 erlang_accumulator: SharedBatchAccumulator,
1665 compressor: Option<&CompressorClient>,
1666) -> Result<(), GraphError> {
1667 let language = file.language.to_string();
1668
1669 let module_name = resolve_erlang_module_name(&file.path, &file.tree, source);
1673 let erlang_meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, source);
1674
1675 if let Some(module_name) = module_name.as_ref() {
1677 let module_code_bytes = match compressor {
1678 Some(client) => compress_full_source(source, LanguageId::Erlang, client).await,
1679 None => None,
1680 };
1681 let module_query = query(
1682 "
1683 MATCH (f:File { path: $path })
1684 MERGE (m:Module { name: $module_name, path: $path })
1685 ON CREATE SET m.language = $language,
1686 m.project_name = $project_name,
1687 m.code_bytes = $code_bytes
1688 ON MATCH SET m.language = $language,
1689 m.project_name = $project_name,
1690 m.code_bytes = coalesce($code_bytes, m.code_bytes)
1691 MERGE (f)-[:DECLARES_MODULE]->(m)
1692 ",
1693 )
1694 .param("path", file_path.to_string())
1695 .param("module_name", module_name.clone())
1696 .param("language", language.clone())
1697 .param("project_name", project_name.clone())
1698 .param(props::CODE_BYTES, module_code_bytes);
1699
1700 graph.run(module_query).await?;
1701 }
1702
1703 let functions = if let Some(module_name) = module_name.as_ref() {
1705 extract_erlang_functions(module_name, &file.tree, source)
1706 } else {
1707 Vec::new()
1708 };
1709 let function_spans = module_name.as_ref().map(|module_name| {
1710 extract_erlang_function_spans(module_name, &file.tree, source)
1711 }).unwrap_or_default();
1712 if !functions.is_empty() {
1714 let module_name = module_name.as_ref().expect("module must exist when functions exist");
1715 let mut fn_fqns: Vec<String> = Vec::with_capacity(functions.len());
1718 let mut fn_names: Vec<String> = Vec::with_capacity(functions.len());
1719 let mut fn_arities: Vec<i64> = Vec::with_capacity(functions.len());
1720 let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(functions.len());
1721
1722 for (fun_name, arity, fqn) in &functions {
1723 fn_fqns.push(fqn.clone());
1724 fn_names.push(fun_name.clone());
1725 fn_arities.push(*arity as i64);
1726 code_bytes_list.push(
1727 code_bytes_for_span(
1728 compressor,
1729 source,
1730 function_spans.get(fqn).copied(),
1731 LanguageId::Erlang,
1732 )
1733 .await,
1734 );
1735 }
1736
1737 let fun_query = query(
1738 "
1739 MATCH (f:File { path: $path })
1740 MATCH (m:Module { name: $module_name, path: $path })
1741 WITH f, m,
1742 $fn_fqns AS fn_fqns,
1743 $fn_names AS fn_names,
1744 $fn_arities AS fn_arities,
1745 $code_bytes_list AS code_bytes_list,
1746 $language AS language,
1747 $path AS path,
1748 $project_name AS project_name
1749 UNWIND range(0, size(fn_fqns) - 1) AS idx
1750 WITH f, m, language, path, project_name,
1751 fn_fqns[idx] AS fn_fqn,
1752 fn_names[idx] AS fn_name,
1753 fn_arities[idx] AS arity,
1754 code_bytes_list[idx] AS code_bytes
1755 MERGE (fn:Function { fqn: fn_fqn })
1756 ON CREATE SET fn.name = fn_name,
1757 fn.path = path,
1758 fn.language = language,
1759 fn.project_name = project_name,
1760 fn.arity = arity,
1761 fn.code_bytes = code_bytes
1762 ON MATCH SET fn.name = fn_name,
1763 fn.path = path,
1764 fn.language = language,
1765 fn.project_name = project_name,
1766 fn.arity = arity,
1767 fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
1768 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
1769 MERGE (m)-[:DECLARES_FUNCTION]->(fn)
1770 ",
1771 )
1772 .param("path", file_path.to_string())
1773 .param("module_name", module_name.clone())
1774 .param("language", language.clone())
1775 .param("fn_fqns", fn_fqns)
1776 .param("fn_names", fn_names)
1777 .param("fn_arities", fn_arities)
1778 .param("code_bytes_list", code_bytes_list)
1779 .param("project_name", project_name.clone());
1780
1781 graph.run(fun_query).await?;
1782 }
1783
1784 let callback_contracts = collect_callback_contracts_for_module(
1787 module_name.as_deref(),
1788 &erlang_meta.behaviour_usages,
1789 &erlang_meta.declared_callbacks,
1790 &erlang_meta.optional_callbacks,
1791 );
1792
1793 if let Some(module_name) = module_name.as_ref() {
1794 for behaviour in &erlang_meta.behaviour_usages {
1796 let behaviour_query = query(
1797 "
1798 MATCH (m:Module { name: $module_name, path: $path })
1799 MERGE (b:Behaviour { name: $behaviour })
1800 ON CREATE SET b.language = $language,
1801 b.project_name = $project_name
1802 ON MATCH SET b.language = coalesce(b.language, $language),
1803 b.project_name = coalesce(b.project_name, $project_name)
1804 MERGE (m)-[:IMPLEMENTS_BEHAVIOUR]->(b)
1805 ",
1806 )
1807 .param("module_name", module_name.clone())
1808 .param("path", file_path.to_string())
1809 .param("behaviour", behaviour.clone())
1810 .param("language", language.clone())
1811 .param("project_name", project_name.clone());
1812 graph.run(behaviour_query).await?;
1813
1814 let dep_path = guess_erlang_file_path_from_module(&file_path, behaviour);
1818 let module_dep_query = query(
1819 "
1820 MATCH (m:Module { name: $module_name, path: $path })
1821 MERGE (dst:File { path: $dst_path })
1822 MERGE (m)-[:DEPENDS_ON_FILE]->(dst)
1823 ",
1824 )
1825 .param("module_name", module_name.clone())
1826 .param("path", file_path.to_string())
1827 .param("dst_path", dep_path);
1828 graph.run(module_dep_query).await?;
1829 }
1830
1831 if !erlang_meta.declared_callbacks.is_empty() {
1834 let file_declares_behaviour_query = query(
1835 "
1836 MATCH (f:File { path: $path })
1837 MERGE (b:Behaviour { name: $behaviour })
1838 ON CREATE SET b.path = $path,
1839 b.language = $language,
1840 b.project_name = $project_name
1841 ON MATCH SET b.path = coalesce(b.path, $path),
1842 b.language = coalesce(b.language, $language),
1843 b.project_name = coalesce(b.project_name, $project_name)
1844 MERGE (f)-[:DECLARES_BEHAVIOUR]->(b)
1845 ",
1846 )
1847 .param("path", file_path.to_string())
1848 .param("behaviour", module_name.clone())
1849 .param("language", language.clone())
1850 .param("project_name", project_name.clone());
1851 graph.run(file_declares_behaviour_query).await?;
1852 }
1853
1854 for parent_behaviour in &erlang_meta.behaviour_extensions {
1856 let extends_query = query(
1857 "
1858 MERGE (child:Behaviour { name: $child })
1859 MERGE (parent:Behaviour { name: $parent })
1860 MERGE (child)-[:EXTENDS_BEHAVIOUR]->(parent)
1861 ",
1862 )
1863 .param("child", module_name.clone())
1864 .param("parent", parent_behaviour.clone());
1865 graph.run(extends_query).await?;
1866 }
1867 }
1868
1869 for contract in &callback_contracts {
1871 let callback_fqn = format!(
1872 "{behaviour}:{name}/{arity}",
1873 behaviour = contract.behaviour,
1874 name = contract.name,
1875 arity = contract.arity
1876 );
1877 let callback_query = query(
1878 "
1879 MERGE (b:Behaviour { name: $behaviour })
1880 MERGE (cb:Callback { fqn: $cb_fqn })
1881 ON CREATE SET cb.name = $cb_name,
1882 cb.arity = $cb_arity,
1883 cb.optional = $cb_optional,
1884 cb.language = $language,
1885 cb.project_name = $project_name
1886 ON MATCH SET cb.name = coalesce(cb.name, $cb_name),
1887 cb.arity = coalesce(cb.arity, $cb_arity),
1888 cb.optional = $cb_optional,
1889 cb.language = coalesce(cb.language, $language),
1890 cb.project_name = coalesce(cb.project_name, $project_name)
1891 MERGE (b)-[:DECLARES_CALLBACK]->(cb)
1892 ",
1893 )
1894 .param("behaviour", contract.behaviour.clone())
1895 .param("cb_fqn", callback_fqn.clone())
1896 .param("cb_name", contract.name.clone())
1897 .param("cb_arity", contract.arity as i64)
1898 .param("cb_optional", contract.optional)
1899 .param("language", language.clone())
1900 .param("project_name", project_name.clone());
1901 graph.run(callback_query).await?;
1902 }
1903
1904 let function_by_sig: HashMap<(String, u32), String> = functions
1907 .iter()
1908 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
1909 .collect();
1910
1911 for contract in &callback_contracts {
1912 let key = (contract.name.clone(), contract.arity);
1913 let Some(fn_fqn) = function_by_sig.get(&key) else {
1914 continue;
1915 };
1916
1917 let callback_fqn = format!(
1918 "{behaviour}:{name}/{arity}",
1919 behaviour = contract.behaviour,
1920 name = contract.name,
1921 arity = contract.arity
1922 );
1923 let implements_query = query(
1924 "
1925 MERGE (fn:Function { fqn: $fn_fqn })
1926 MERGE (cb:Callback { fqn: $cb_fqn })
1927 MERGE (fn)-[:IMPLEMENTS_CALLBACK]->(cb)
1928 ",
1929 )
1930 .param("fn_fqn", fn_fqn.clone())
1931 .param("cb_fqn", callback_fqn.clone());
1932 graph.run(implements_query).await?;
1933 }
1934
1935 for (name, arity) in erlang_meta.overridden_callbacks {
1937 let key = (name.clone(), arity);
1938 let Some(fn_fqn) = function_by_sig.get(&key) else {
1939 continue;
1940 };
1941 for contract in callback_contracts
1942 .iter()
1943 .filter(|c| c.name == name && c.arity == arity)
1944 {
1945 let callback_fqn = format!(
1946 "{behaviour}:{name}/{arity}",
1947 behaviour = contract.behaviour,
1948 name = contract.name,
1949 arity = contract.arity
1950 );
1951 let overrides_query = query(
1952 "
1953 MERGE (fn:Function { fqn: $fn_fqn })
1954 MERGE (cb:Callback { fqn: $cb_fqn })
1955 MERGE (fn)-[:OVERRIDES_CALLBACK]->(cb)
1956 ",
1957 )
1958 .param("fn_fqn", fn_fqn.clone())
1959 .param("cb_fqn", callback_fqn);
1960 graph.run(overrides_query).await?;
1961 }
1962 }
1963
1964 let endpoints = extract_erlang_api_endpoints(&file.tree, source);
1972 for (methods, path_template, handler_module) in endpoints {
1973 let norm_path = normalize_api_path(&path_template);
1974
1975 let api_query = query(
1977 "
1978 MERGE (api:ApiEndpoint { path: $path })
1979 ON CREATE SET api.methods = $methods,
1980 api.protocol = 'http',
1981 api.framework = 'cowboy',
1982 api.project_name = $project_name,
1983 api.norm_path = $norm_path
1984 ON MATCH SET api.methods = $methods,
1985 api.protocol = coalesce(api.protocol, 'http'),
1986 api.framework = coalesce(api.framework, 'cowboy'),
1987 api.project_name = coalesce(api.project_name, $project_name),
1988 api.norm_path = coalesce(api.norm_path, $norm_path)
1989 ",
1990 )
1991 .param("path", path_template.clone())
1992 .param("methods", methods.clone())
1993 .param("project_name", project_name.clone())
1994 .param("norm_path", norm_path.clone());
1995
1996 graph.run(api_query).await?;
1997
1998 let candidate_fqns = select_endpoint_handler_fqns(&handler_module, erlang_module_index);
2002 for fqn in candidate_fqns {
2003 let rel_query = query(
2004 "
2005 MERGE (fn:Function { fqn: $fn_fqn })
2006 MERGE (api:ApiEndpoint { path: $path })
2007 MERGE (api)-[:HANDLED_BY]->(fn)
2008 ",
2009 )
2010 .param("fn_fqn", fqn)
2011 .param("path", path_template.clone());
2012
2013 graph.run(rel_query).await?;
2014 }
2015 }
2016
2017 let external_urls = extract_external_http_urls_from_tree(&file.tree, source);
2023 for full_url in external_urls {
2024 let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
2025 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
2026 let base_url = format!("{protocol}://{host}");
2027 let name = host.clone();
2028 let norm_path = normalize_api_path(&path);
2029
2030 let ext_query = query(
2031 "
2032 MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
2033 ON CREATE SET ext.name = $name,
2034 ext.path = $path,
2035 ext.protocol = $protocol,
2036 ext.provider = $provider,
2037 ext.project_name = $project_name
2038 ON MATCH SET ext.name = coalesce(ext.name, $name),
2039 ext.path = coalesce(ext.path, $path),
2040 ext.protocol = coalesce(ext.protocol, $protocol),
2041 ext.provider = coalesce(ext.provider, $provider),
2042 ext.project_name = coalesce(ext.project_name, $project_name)
2043 ",
2044 )
2045 .param("name", name.clone())
2046 .param("base_url", base_url.clone())
2047 .param("path", path.clone())
2048 .param("norm_path", norm_path.clone())
2049 .param("protocol", protocol.clone())
2050 .param("provider", name.clone())
2051 .param("project_name", project_name.clone());
2052
2053 graph.run(ext_query).await?;
2054
2055 if !functions.is_empty() {
2059 let mut guard = erlang_accumulator.lock().await;
2060 for (_fun_name, _arity, fqn) in &functions {
2061 guard.add_calls_external_api(fqn.clone(), base_url.clone(), norm_path.clone());
2062 }
2063 }
2064 }
2065
2066 let function_by_sig: HashMap<(String, u32), String> = functions
2075 .iter()
2076 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
2077 .collect();
2078 let call_edges: Vec<(String, String)> = extract_erlang_call_edges(
2079 &file.tree,
2080 source,
2081 module_name.as_deref(),
2082 &function_by_sig,
2083 )
2084 .into_iter()
2085 .collect();
2086
2087 if !call_edges.is_empty() {
2088 let mut guard = erlang_accumulator.lock().await;
2089 for (caller_fqn, callee_fqn) in call_edges {
2090 guard.add_calls_function(caller_fqn, callee_fqn);
2091 }
2092 }
2093
2094 let called_modules = extract_erlang_called_modules_from_tree(&file.tree, source);
2100 for callee_mod in called_modules {
2101 if module_name.as_deref() == Some(callee_mod.as_str()) {
2102 continue;
2103 }
2104 let dep_path = guess_erlang_file_path_from_module(&file_path, &callee_mod);
2105 let dep_query = query(
2106 "
2107 MERGE (src:File { path: $src_path })
2108 MERGE (dst:File { path: $dst_path })
2109 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
2110 ",
2111 )
2112 .param("src_path", file_path.to_string())
2113 .param("dst_path", dep_path);
2114
2115 graph.run(dep_query).await?;
2116
2117 if let Some(module_name) = module_name.as_ref() {
2118 let module_dep_query = query(
2119 "
2120 MATCH (m:Module { name: $module_name, path: $path })
2121 MERGE (dst:File { path: $dst_path })
2122 MERGE (m)-[:DEPENDS_ON_FILE]->(dst)
2123 ",
2124 )
2125 .param("module_name", module_name.clone())
2126 .param("path", file_path.to_string())
2127 .param(
2128 "dst_path",
2129 guess_erlang_file_path_from_module(&file_path, &callee_mod),
2130 );
2131
2132 graph.run(module_dep_query).await?;
2133 }
2134 }
2135
2136 Ok(())
2137}
2138
2139async fn persist_non_java_functions(
2143 graph: &Graph,
2144 file_path: &str,
2145 file: &ParsedFile,
2146 source: &str,
2147 project_name: Option<String>,
2148 known_paths: &HashSet<String>,
2149 accumulator: &mut BatchAccumulator,
2150 persistence: &GraphPersistenceOptions,
2151 compressor: Option<&CompressorClient>,
2152) -> Result<(), GraphError> {
2153 let language = file.language.to_string();
2154
2155 match file.language {
2156 LanguageId::Python => {
2157 emit_limited_parse_warnings(
2158 "Python",
2159 file_path,
2160 extract_python_parse_warnings(&file.tree, source),
2161 persistence.max_parse_warnings_per_file,
2162 );
2163 }
2164 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
2165 emit_limited_parse_warnings(
2166 "JS/TS",
2167 file_path,
2168 extract_js_ts_parse_warnings(&file.tree, source),
2169 persistence.max_parse_warnings_per_file,
2170 );
2171 }
2172 _ => {}
2173 }
2174
2175 let functions = extract_non_java_function_symbols(file, source, file_path);
2176 let function_spans = extract_non_java_function_body_spans(file, source, file_path);
2177
2178 let mut name_to_fqn_depth: HashMap<String, (String, usize)> = HashMap::new();
2179 for f in &functions {
2180 let logical = f
2181 .fqn
2182 .split_once("::")
2183 .map(|(_, l)| l)
2184 .unwrap_or(f.fqn.as_str());
2185 let short = logical
2186 .rsplit_once('.')
2187 .map(|(_, s)| s)
2188 .unwrap_or(logical);
2189 let depth = logical.matches('.').count();
2190 name_to_fqn_depth
2191 .entry(short.to_string())
2192 .and_modify(|(existing_fqn, existing_depth)| {
2193 if depth > *existing_depth {
2194 *existing_fqn = f.fqn.clone();
2195 *existing_depth = depth;
2196 }
2197 })
2198 .or_insert_with(|| (f.fqn.clone(), depth));
2199 }
2200 let name_to_fqn: HashMap<String, String> = name_to_fqn_depth
2201 .into_iter()
2202 .map(|(k, (v, _))| (k, v))
2203 .collect();
2204
2205 for func in &functions {
2206 let code_bytes = code_bytes_for_span(
2207 compressor,
2208 source,
2209 function_spans.get(&func.fqn).copied(),
2210 file.language,
2211 )
2212 .await;
2213 let q = query(
2214 "
2215 MATCH (f:File { path: $path })
2216 MERGE (fn:Function { fqn: $fn_fqn })
2217 ON CREATE SET fn.name = $fn_name,
2218 fn.path = $path,
2219 fn.project_name = $project_name,
2220 fn.language = $language,
2221 fn.code_bytes = $code_bytes
2222 ON MATCH SET fn.name = $fn_name,
2223 fn.path = $path,
2224 fn.project_name = $project_name,
2225 fn.language = $language,
2226 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
2227 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
2228 ",
2229 )
2230 .param("path", file_path.to_string())
2231 .param("fn_fqn", func.fqn.clone())
2232 .param("fn_name", func.name.clone())
2233 .param("project_name", project_name.clone())
2234 .param("language", language.clone())
2235 .param(props::CODE_BYTES, code_bytes);
2236
2237 graph.run(q).await?;
2238 }
2239
2240 match file.language {
2241 LanguageId::Python => {
2242 for imp in extract_python_import_modules(&file.tree, source) {
2243 if let Some(dep) = resolve_python_import_to_known_file(&imp, known_paths) {
2244 let dep_query = query(
2245 "
2246 MERGE (src:File { path: $src_path })
2247 MERGE (dst:File { path: $dst_path })
2248 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
2249 ",
2250 )
2251 .param("src_path", file_path.to_string())
2252 .param("dst_path", dep.clone());
2253 graph.run(dep_query).await?;
2254 } else if should_log_unresolved_import(
2255 persistence.verbose_imports,
2256 is_python_stdlib_top_level(&imp),
2257 is_python_common_external_top_level(&imp),
2258 ) {
2259 println!(
2260 "Python import (unresolved to scanned files): `{}` in {}",
2261 imp, file_path
2262 );
2263 }
2264 }
2265 for (caller, callee) in extract_python_intrafile_calls(
2266 &file.tree,
2267 source,
2268 file_path,
2269 &name_to_fqn,
2270 ) {
2271 accumulator.add_calls_function(caller, callee);
2272 }
2273 }
2274 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
2275 for spec in extract_js_ts_import_specifiers(&file.tree, source) {
2276 if let Some(dep) =
2277 resolve_js_ts_import_to_known_file(&spec, file_path, known_paths)
2278 {
2279 let dep_query = query(
2280 "
2281 MERGE (src:File { path: $src_path })
2282 MERGE (dst:File { path: $dst_path })
2283 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
2284 ",
2285 )
2286 .param("src_path", file_path.to_string())
2287 .param("dst_path", dep.clone());
2288 graph.run(dep_query).await?;
2289 } else if persistence.verbose_imports {
2290 println!(
2291 "JS/TS import (unresolved to scanned files): `{}` in {}",
2292 spec, file_path
2293 );
2294 }
2295 }
2296 for (caller, callee) in extract_js_ts_intrafile_calls(
2297 &file.tree,
2298 source,
2299 file_path,
2300 file.language,
2301 &name_to_fqn,
2302 ) {
2303 accumulator.add_calls_function(caller, callee);
2304 }
2305 }
2306 _ => {}
2307 }
2308
2309 Ok(())
2310}
2311
2312fn extract_internal_java_imports(source: &str) -> Vec<String> {
2320 source
2321 .lines()
2322 .filter_map(|line| {
2323 let line = line.trim();
2324 if !line.starts_with("import ") || !line.ends_with(';') {
2326 return None;
2327 }
2328
2329 let body = &line["import ".len()..line.len() - 1];
2331 let body = body.trim();
2332
2333 let body = body.strip_prefix("static ").map(str::trim).unwrap_or(body);
2335
2336 if !body.starts_with("com.redbus.genai.") {
2338 return None;
2339 }
2340
2341 Some(body.to_string())
2342 })
2343 .collect()
2344}
2345
2346fn map_import_to_project_path(current_path: &str, import_fqn: &str) -> Option<String> {
2355 let marker = "com/redbus/genai/";
2356 let idx = current_path.find(marker)?;
2357 let prefix = ¤t_path[..idx];
2358
2359 let relative = import_fqn.replace('.', "/") + ".java";
2360 Some(format!("{prefix}{relative}"))
2361}
2362
2363fn extract_java_package(source: &str) -> Option<String> {
2365 for line in source.lines() {
2366 let line = line.trim();
2367 if !line.starts_with("package ") || !line.ends_with(';') {
2368 continue;
2369 }
2370
2371 let body = &line["package ".len()..line.len() - 1];
2372 let body = body.trim();
2373 if body.is_empty() {
2374 continue;
2375 }
2376 return Some(body.to_string());
2377 }
2378 None
2379}
2380
2381fn extract_erlang_module_name(source: &str) -> Option<String> {
2384 for line in source.lines() {
2385 let line = line.trim();
2386 if !line.starts_with("-module(") || !line.ends_with(").") {
2387 continue;
2388 }
2389
2390 let inner = &line["-module(".len()..line.len() - 2]; let name = inner.trim();
2392 if !name.is_empty() {
2393 return Some(name.to_string());
2394 }
2395 }
2396 None
2397}
2398
2399fn extract_erlang_module_name_from_tree(tree: &Tree, source: &str) -> Option<String> {
2401 let mut out: Option<String> = None;
2402 let root = tree.root_node();
2403
2404 walk_tree(root, |node| {
2405 if out.is_some() || node.kind() != "module_attribute" {
2406 return;
2407 }
2408 if let Some(name_node) = node.child_by_field_name("name") {
2409 let start = name_node.start_byte() as usize;
2410 let end = name_node.end_byte() as usize;
2411 if end <= source.len() && start < end {
2412 let name = source[start..end].trim().trim_matches('\'').to_string();
2413 if !name.is_empty() {
2414 out = Some(name);
2415 }
2416 }
2417 }
2418 });
2419
2420 out
2421}
2422
2423fn is_erlang_header_file(path: &Path) -> bool {
2424 path.extension()
2425 .and_then(|ext| ext.to_str())
2426 .map(|ext| ext.eq_ignore_ascii_case("hrl"))
2427 .unwrap_or(false)
2428}
2429
2430fn resolve_erlang_module_name(path: &Path, tree: &Tree, source: &str) -> Option<String> {
2435 if let Some(module_name) = extract_erlang_module_name_from_tree(tree, source)
2436 .or_else(|| extract_erlang_module_name(source))
2437 {
2438 return Some(module_name);
2439 }
2440
2441 if is_erlang_header_file(path) {
2442 None
2443 } else {
2444 Some(guess_erlang_module_name_from_path(&path.display().to_string()))
2445 }
2446}
2447
2448fn guess_erlang_module_name_from_path(path: &str) -> String {
2451 Path::new(path)
2452 .file_stem()
2453 .and_then(|s| s.to_str())
2454 .unwrap_or("unknown_module")
2455 .to_string()
2456}
2457
2458fn extract_erlang_behaviour_metadata_from_tree(tree: &Tree, source: &str) -> ErlangBehaviourMetadata {
2459 let mut meta = ErlangBehaviourMetadata::default();
2460 let mut seen_declared_callbacks: HashSet<(String, u32)> = HashSet::new();
2461 let root = tree.root_node();
2462
2463 walk_tree(root, |node| match node.kind() {
2464 "behaviour_attribute" => {
2465 if let Some(name_node) = node.child_by_field_name("name") {
2466 if let Some(name) = extract_erlang_name(name_node, source) {
2467 meta.behaviour_usages.insert(name);
2468 }
2469 }
2470 }
2471 "callback" => {
2472 let Some(fun_node) = node.child_by_field_name("fun") else {
2473 return;
2474 };
2475 let Some(fun_name) = extract_erlang_name(fun_node, source) else {
2476 return;
2477 };
2478
2479 let mut cursor = node.walk();
2480 for sig_node in node.children_by_field_name("sigs", &mut cursor) {
2481 let Some(args_node) = sig_node.child_by_field_name("args") else {
2482 continue;
2483 };
2484 let key = (fun_name.clone(), args_node.named_child_count() as u32);
2485 if seen_declared_callbacks.insert(key.clone()) {
2486 meta.declared_callbacks.push(key);
2487 }
2488 }
2489 }
2490 "optional_callbacks_attribute" => {
2491 let mut cursor = node.walk();
2492 for fa_node in node.children_by_field_name("callbacks", &mut cursor) {
2493 if let Some((name, arity)) = extract_erlang_fa_pair(fa_node, source) {
2494 meta.optional_callbacks.insert((name, arity));
2495 }
2496 }
2497 }
2498 "wild_attribute" => {
2499 let Some(attr_node) = node.child_by_field_name("name") else {
2500 return;
2501 };
2502 let Some(attr_name) = extract_wild_attribute_name(attr_node, source) else {
2503 return;
2504 };
2505 let Some(value_node) = node.child_by_field_name("value") else {
2506 return;
2507 };
2508
2509 match attr_name.as_str() {
2510 "extends_behaviour" => {
2511 if let Some(parent_behaviour) = extract_single_name_expr(value_node, source) {
2512 meta.behaviour_extensions.insert(parent_behaviour);
2513 }
2514 }
2515 "override_callback" => {
2516 let mut pairs: HashSet<(String, u32)> = HashSet::new();
2517 walk_tree(value_node, |child| {
2518 if child.kind() == "fa" {
2519 if let Some((name, arity)) = extract_erlang_fa_pair(child, source) {
2520 pairs.insert((name, arity));
2521 }
2522 }
2523 });
2524 if pairs.is_empty() {
2525 if let Some(raw_value) = node_text(value_node, source) {
2526 pairs.extend(parse_fa_pairs_from_expr_text(raw_value));
2527 }
2528 }
2529 meta.overridden_callbacks.extend(pairs);
2530 }
2531 _ => {}
2532 }
2533 }
2534 _ => {}
2535 });
2536
2537 meta
2538}
2539
2540fn extract_wild_attribute_name(attr_node: Node, source: &str) -> Option<String> {
2541 let name_node = attr_node.child_by_field_name("name").unwrap_or(attr_node);
2542 let mut name = extract_erlang_name(name_node, source)?;
2543 if let Some(stripped) = name.strip_prefix('-') {
2544 name = stripped.to_string();
2545 }
2546 Some(name)
2547}
2548
2549fn extract_erlang_fa_pair(fa_node: Node, source: &str) -> Option<(String, u32)> {
2550 if fa_node.kind() != "fa" {
2551 return None;
2552 }
2553 let fun_node = fa_node.child_by_field_name("fun")?;
2554 let arity_node = fa_node.child_by_field_name("arity")?;
2555
2556 let fun_name = extract_erlang_name(fun_node, source)?;
2557 let raw_arity = node_text(arity_node, source)?;
2558 let arity = parse_erlang_arity(raw_arity)?;
2559 Some((fun_name, arity))
2560}
2561
2562fn parse_erlang_arity(raw: &str) -> Option<u32> {
2563 raw.trim().trim_start_matches('/').parse::<u32>().ok()
2564}
2565
2566fn extract_single_name_expr(node: Node, source: &str) -> Option<String> {
2567 let mut raw = node_text(node, source)?.trim();
2568 while raw.starts_with('(') && raw.ends_with(')') && raw.len() >= 2 {
2569 raw = raw[1..raw.len() - 1].trim();
2570 }
2571 normalize_erlang_name(raw)
2572}
2573
2574fn parse_fa_pairs_from_expr_text(expr_text: &str) -> HashSet<(String, u32)> {
2575 let mut out = HashSet::new();
2576 let mut raw = expr_text.trim();
2577 while raw.starts_with('(') && raw.ends_with(')') && raw.len() >= 2 {
2578 raw = raw[1..raw.len() - 1].trim();
2579 }
2580
2581 for token in raw.split(',') {
2582 let token = token.trim();
2583 let Some((name_raw, arity_raw)) = token.split_once('/') else {
2584 continue;
2585 };
2586 let Some(name) = normalize_erlang_name(name_raw) else {
2587 continue;
2588 };
2589 let Some(arity) = parse_erlang_arity(arity_raw) else {
2590 continue;
2591 };
2592 out.insert((name, arity));
2593 }
2594
2595 out
2596}
2597
2598fn extract_erlang_name(node: Node, source: &str) -> Option<String> {
2599 let raw = node_text(node, source)?;
2600 normalize_erlang_name(raw)
2601}
2602
2603fn node_text<'a>(node: Node, source: &'a str) -> Option<&'a str> {
2604 let start = node.start_byte() as usize;
2605 let end = node.end_byte() as usize;
2606 if end <= source.len() && start < end {
2607 Some(&source[start..end])
2608 } else {
2609 None
2610 }
2611}
2612
2613fn normalize_erlang_name(raw: &str) -> Option<String> {
2614 let mut name = raw.trim();
2615 if name.starts_with('\'') && name.ends_with('\'') && name.len() >= 2 {
2616 name = &name[1..name.len() - 1];
2617 }
2618 if name.is_empty() {
2619 return None;
2620 }
2621 if name.starts_with('?') {
2622 return None;
2623 }
2624 if name
2625 .chars()
2626 .next()
2627 .map(|c| c.is_ascii_uppercase())
2628 .unwrap_or(false)
2629 {
2630 return None;
2631 }
2632 if name.chars().any(char::is_whitespace) {
2633 return None;
2634 }
2635 Some(name.to_string())
2636}
2637
2638fn known_behaviour_callbacks(behaviour: &str) -> &'static [(&'static str, u32)] {
2639 match behaviour {
2640 "gen_server" => &[
2641 ("init", 1),
2642 ("handle_call", 3),
2643 ("handle_cast", 2),
2644 ("handle_info", 2),
2645 ("terminate", 2),
2646 ("code_change", 3),
2647 ("format_status", 2),
2648 ],
2649 "supervisor" => &[("init", 1)],
2650 "gen_statem" => &[
2651 ("init", 1),
2652 ("callback_mode", 0),
2653 ("state_name", 3),
2654 ("state_name", 2),
2655 ("terminate", 3),
2656 ("code_change", 4),
2657 ("format_status", 2),
2658 ],
2659 "cowboy_handler" => &[("init", 2)],
2660 "cowboy_loop" => &[("init", 2), ("info", 3), ("terminate", 3)],
2661 "cowboy_websocket" => &[
2662 ("init", 2),
2663 ("websocket_init", 1),
2664 ("websocket_handle", 2),
2665 ("websocket_info", 2),
2666 ("terminate", 3),
2667 ],
2668 "cowboy_rest" => &[
2669 ("init", 2),
2670 ("allowed_methods", 2),
2671 ("content_types_provided", 2),
2672 ("content_types_accepted", 2),
2673 ("resource_exists", 2),
2674 ("is_authorized", 2),
2675 ("forbidden", 2),
2676 ("malformed_request", 2),
2677 ("delete_resource", 2),
2678 ("generate_etag", 2),
2679 ("last_modified", 2),
2680 ],
2681 _ => &[],
2682 }
2683}
2684
2685fn collect_callback_contracts_for_module(
2686 module_name: Option<&str>,
2687 behaviour_usages: &HashSet<String>,
2688 declared_callbacks: &[(String, u32)],
2689 optional_callbacks: &HashSet<(String, u32)>,
2690) -> Vec<CallbackContract> {
2691 let mut contracts = HashSet::new();
2692
2693 if let Some(module_name) = module_name {
2695 for (name, arity) in declared_callbacks {
2696 contracts.insert(CallbackContract {
2697 behaviour: module_name.to_string(),
2698 name: name.clone(),
2699 arity: *arity,
2700 optional: optional_callbacks.contains(&(name.clone(), *arity)),
2701 });
2702 }
2703 }
2704
2705 for behaviour in behaviour_usages {
2707 for (name, arity) in known_behaviour_callbacks(behaviour) {
2708 contracts.insert(CallbackContract {
2709 behaviour: behaviour.clone(),
2710 name: (*name).to_string(),
2711 arity: *arity,
2712 optional: false,
2713 });
2714 }
2715 }
2716
2717 contracts.into_iter().collect()
2718}
2719
2720fn select_endpoint_handler_fqns(
2721 handler_module: &str,
2722 erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
2723) -> Vec<String> {
2724 let Some(snapshot) = erlang_module_index.get(handler_module) else {
2725 return Vec::new();
2726 };
2727
2728 let mut fqns: Vec<String> = snapshot
2729 .callback_signatures
2730 .iter()
2731 .filter(|sig| snapshot.implemented_signatures.contains(*sig))
2732 .map(|(name, arity)| format!("{module}:{name}/{arity}", module = handler_module))
2733 .collect();
2734
2735 fqns.sort();
2736 fqns.dedup();
2737 fqns
2738}
2739
2740fn extract_erlang_functions(
2745 module_name: &str,
2746 tree: &Tree,
2747 source: &str,
2748) -> Vec<(String, u32, String)> {
2749 let mut seen: HashSet<(String, u32)> = HashSet::new();
2750 let mut out: Vec<(String, u32, String)> = Vec::new();
2751
2752 let root = tree.root_node();
2753 walk_tree(root, |node| {
2754 if node.kind() != "function_clause" || !is_top_level_erlang_function_clause(node) {
2755 return;
2756 }
2757
2758 let Some(name_node) = node.child_by_field_name("name") else {
2759 return;
2760 };
2761 let Some(fun_name) = extract_erlang_name(name_node, source) else {
2762 return;
2763 };
2764 let Some(args_node) = node.child_by_field_name("args") else {
2765 return;
2766 };
2767 let arity = args_node.named_child_count() as u32;
2768
2769 if seen.insert((fun_name.clone(), arity)) {
2770 let fqn = format!("{module}:{name}/{arity}", module = module_name, name = fun_name);
2771 out.push((fun_name, arity, fqn));
2772 }
2773 });
2774
2775 out
2776}
2777
2778fn is_top_level_erlang_function_clause(node: Node) -> bool {
2779 let mut parent = node.parent();
2780 let mut has_source_file_ancestor = false;
2781 while let Some(p) = parent {
2782 match p.kind() {
2783 "fun_expr" => return false,
2786 "source_file" => {
2787 has_source_file_ancestor = true;
2788 break;
2789 }
2790 _ => {
2791 parent = p.parent();
2792 }
2793 }
2794 }
2795 has_source_file_ancestor
2796}
2797
2798fn extract_erlang_call_edges(
2799 tree: &Tree,
2800 source: &str,
2801 module_name: Option<&str>,
2802 function_by_sig: &HashMap<(String, u32), String>,
2803) -> HashSet<(String, String)> {
2804 let Some(module_name) = module_name else {
2805 return HashSet::new();
2806 };
2807 let mut edges = HashSet::new();
2808 let root = tree.root_node();
2809
2810 walk_tree(root, |node| {
2811 if node.kind() != "call" {
2812 return;
2813 }
2814 let Some(caller_clause) = find_enclosing_function_clause(node) else {
2815 return;
2816 };
2817 let Some(caller_name_node) = caller_clause.child_by_field_name("name") else {
2818 return;
2819 };
2820 let Some(caller_name) = extract_erlang_name(caller_name_node, source) else {
2821 return;
2822 };
2823 let Some(caller_args) = caller_clause.child_by_field_name("args") else {
2824 return;
2825 };
2826 let caller_arity = caller_args.named_child_count() as u32;
2827 let Some(caller_fqn) = function_by_sig.get(&(caller_name, caller_arity)) else {
2828 return;
2829 };
2830
2831 let Some(call_expr) = node.child_by_field_name("expr") else {
2832 return;
2833 };
2834 let Some(call_args) = node.child_by_field_name("args") else {
2835 return;
2836 };
2837 let callee_arity = call_args.named_child_count() as u32;
2838 let Some(callee_name) = extract_local_call_name(call_expr, source) else {
2839 return;
2840 };
2841
2842 let callee_key = (callee_name, callee_arity);
2843 let Some(callee_fqn) = function_by_sig.get(&callee_key) else {
2844 return;
2845 };
2846 if caller_fqn != callee_fqn && caller_fqn.starts_with(module_name) && callee_fqn.starts_with(module_name) {
2847 edges.insert((caller_fqn.clone(), callee_fqn.clone()));
2848 }
2849 });
2850
2851 edges
2852}
2853
2854fn find_enclosing_function_clause(node: Node) -> Option<Node> {
2855 let mut cur = node.parent();
2856 while let Some(parent) = cur {
2857 if parent.kind() == "function_clause" {
2858 return Some(parent);
2859 }
2860 if parent.kind() == "source_file" {
2861 break;
2862 }
2863 cur = parent.parent();
2864 }
2865 None
2866}
2867
2868fn extract_local_call_name(expr_node: Node, source: &str) -> Option<String> {
2869 match expr_node.kind() {
2870 "_name" | "atom" | "var" => extract_erlang_name(expr_node, source),
2872 "remote" => None,
2874 _ => extract_erlang_name(expr_node, source),
2875 }
2876}
2877
2878fn extract_erlang_api_endpoints(
2884 tree: &Tree,
2885 source: &str,
2886) -> Vec<(Vec<String>, String, String)> {
2887 let mut endpoints = Vec::new();
2888 let mut seen: HashSet<(String, String)> = HashSet::new();
2889 let root = tree.root_node();
2890 walk_tree(root, |node| {
2891 if node.kind() != "tuple" {
2892 return;
2893 }
2894 let mut cursor = node.walk();
2895 let exprs: Vec<Node> = node.children_by_field_name("expr", &mut cursor).collect();
2896 if exprs.len() < 2 {
2897 return;
2898 }
2899 let Some(path_raw) = extract_erlang_string_literal(exprs[0], source) else {
2900 return;
2901 };
2902 let path = normalize_api_path(&path_raw);
2903 if !path.starts_with('/') {
2904 return;
2905 }
2906 let Some(handler_name) = extract_erlang_name(exprs[1], source) else {
2907 return;
2908 };
2909 let key = (path.clone(), handler_name.clone());
2910 if seen.insert(key) {
2911 endpoints.push((vec!["ANY".to_string()], path, handler_name));
2912 }
2913 });
2914
2915 endpoints
2916}
2917
2918fn extract_external_http_urls(source: &str) -> Vec<String> {
2923 let mut urls = HashSet::new();
2924
2925 for line in source.lines() {
2926 let mut rest = line;
2927 loop {
2928 let start = match rest.find("http://").or_else(|| rest.find("https://")) {
2929 Some(i) => i,
2930 None => break,
2931 };
2932 let after = &rest[start..];
2933 let end = after
2934 .find(|c: char| c == '"' || c.is_whitespace() || c == '\'' || c == ')')
2935 .unwrap_or(after.len());
2936 let url = &after[..end];
2937 if !url.is_empty() {
2938 urls.insert(url.to_string());
2939 }
2940 rest = &after[end..];
2941 }
2942 }
2943
2944 urls.into_iter().collect()
2945}
2946
2947fn extract_external_http_urls_from_tree(tree: &Tree, source: &str) -> Vec<String> {
2949 let mut urls = HashSet::new();
2950 let root = tree.root_node();
2951 walk_tree(root, |node| {
2952 let Some(s) = extract_erlang_string_literal(node, source) else {
2953 return;
2954 };
2955 if s.starts_with("http://") || s.starts_with("https://") {
2956 urls.insert(s);
2957 }
2958 });
2959
2960 urls.into_iter().collect()
2961}
2962
2963fn scan_http_urls_in_span(source: &str, span_start: usize, span_end: usize) -> Vec<(String, usize, usize)> {
2966 let mut out = Vec::new();
2967 if span_start >= span_end || span_end > source.len() {
2968 return out;
2969 }
2970 let mut rest_start = span_start;
2971 while rest_start < span_end {
2972 let Some(slice) = source.get(rest_start..span_end) else {
2973 break;
2974 };
2975 let rel = match slice.find("http://").or_else(|| slice.find("https://")) {
2976 Some(i) => i,
2977 None => break,
2978 };
2979 let abs_start = rest_start + rel;
2980 let Some(after) = source.get(abs_start..span_end) else {
2981 break;
2982 };
2983 let end_rel = after
2984 .find(|c: char| c == '"' || c.is_whitespace() || c == '\'' || c == ')')
2985 .unwrap_or(after.len());
2986 let url = after[..end_rel].trim();
2987 if !url.is_empty() {
2988 out.push((url.to_string(), abs_start, abs_start + end_rel));
2989 }
2990 rest_start = abs_start + end_rel.max(1);
2991 }
2992 out
2993}
2994
2995fn extract_csharp_external_http_urls_with_spans(tree: &Tree, source: &str) -> Vec<(String, usize, usize)> {
2997 let mut seen: HashSet<(String, usize, usize)> = HashSet::new();
2998 let root = tree.root_node();
2999 walk_tree(root, |node| {
3000 let kind = node.kind();
3001 if !matches!(
3002 kind,
3003 "string_literal"
3004 | "verbatim_string_literal"
3005 | "interpolated_string_text"
3006 | "interpolated_verbatim_string_text"
3007 ) {
3008 return;
3009 }
3010 let start = node.start_byte() as usize;
3011 let end = node.end_byte() as usize;
3012 for triple in scan_http_urls_in_span(source, start, end) {
3013 seen.insert(triple);
3014 }
3015 });
3016 seen.into_iter().collect()
3017}
3018
3019fn csharp_method_body_spans(tree: &Tree, source: &str, namespace: Option<&str>) -> Vec<(String, usize, usize)> {
3022 let mut out = Vec::new();
3023 let root = tree.root_node();
3024 walk_tree(root, |node| {
3025 match node.kind() {
3026 "method_declaration" => {
3027 let Some(body) = node.child_by_field_name("body") else {
3028 return;
3029 };
3030 let Some(method_name) = csharp_method_simple_name(node, source) else {
3031 return;
3032 };
3033 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
3034 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
3035 .or_else(|| namespace.map(|s| s.to_string()));
3036 let fqn = match class_fqn {
3037 Some(cf) => format!("{cf}.{method_name}"),
3038 None => effective_ns
3039 .as_ref()
3040 .map(|ns| format!("{ns}.{method_name}"))
3041 .unwrap_or(method_name),
3042 };
3043 let lo = body.start_byte() as usize;
3044 let hi = body.end_byte() as usize;
3045 out.push((fqn, lo, hi));
3046 }
3047 "constructor_declaration" => {
3048 let Some(cf) = csharp_enclosing_type_fqn(node, source, namespace) else {
3049 return;
3050 };
3051 let (_, fqn) = csharp_constructor_symbol_fqn(node, &cf, source);
3052 let Some(body) = csharp_block_body(node) else {
3053 return;
3054 };
3055 let lo = body.start_byte() as usize;
3056 let hi = body.end_byte() as usize;
3057 out.push((fqn, lo, hi));
3058 }
3059 "accessor_declaration" => {
3060 let Some(cf) = csharp_enclosing_type_fqn(node, source, namespace) else {
3061 return;
3062 };
3063 let Some(prop_name) = csharp_property_name_for_accessor(node, source) else {
3064 return;
3065 };
3066 let prefix = csharp_accessor_kind_prefix(node, source);
3067 let fqn = format!("{cf}.{}_{}", prefix, prop_name);
3068 let Some(body) = csharp_block_body(node) else {
3069 return;
3070 };
3071 let lo = body.start_byte() as usize;
3072 let hi = body.end_byte() as usize;
3073 out.push((fqn, lo, hi));
3074 }
3075 _ => {}
3076 }
3077 });
3078 out
3079}
3080
3081fn extract_erlang_called_modules_from_tree(tree: &Tree, source: &str) -> HashSet<String> {
3083 let mut modules = HashSet::new();
3084 let root = tree.root_node();
3085 walk_tree(root, |node| {
3086 if node.kind() != "remote" {
3087 return;
3088 }
3089 let Some(remote_module) = node.child_by_field_name("module") else {
3090 return;
3091 };
3092 let Some(module_expr) = remote_module.child_by_field_name("module") else {
3093 return;
3094 };
3095 if let Some(name) = extract_erlang_name(module_expr, source) {
3096 modules.insert(name);
3097 }
3098 });
3099
3100 modules
3101}
3102
3103fn split_url_protocol_host_and_path(url: &str) -> (Option<String>, String, String) {
3109 if let Some(idx) = url.find("://") {
3110 let proto = &url[..idx];
3111 let rest = &url[idx + 3..];
3112
3113 let host_end = rest
3117 .find(|c: char| c == '/' || c == '?')
3118 .unwrap_or(rest.len());
3119
3120 let host = &rest[..host_end];
3121 let path = if host_end < rest.len() {
3122 &rest[host_end..]
3123 } else {
3124 "/"
3125 };
3126
3127 (
3128 Some(proto.to_string()),
3129 host.to_string(),
3130 path.to_string(),
3131 )
3132 } else {
3133 (None, url.to_string(), "/".to_string())
3134 }
3135}
3136
3137fn normalize_api_path(raw: &str) -> String {
3147 let mut p = raw.trim().to_string();
3148
3149 if let Some(idx) = p.find("://") {
3151 let after = &p[idx + 3..];
3152 if let Some(slash) = after.find('/') {
3153 p = after[slash..].to_string();
3154 } else {
3155 return "/".to_string();
3157 }
3158 }
3159
3160 if let Some(idx) = p.find(|c: char| c == '?' || c == '#') {
3162 p.truncate(idx);
3163 }
3164
3165 if !p.starts_with('/') {
3167 p.insert(0, '/');
3168 }
3169
3170 if p.len() > 1 && p.ends_with('/') {
3172 p.pop();
3173 }
3174
3175 p
3176}
3177
3178fn extract_erlang_string_literal(node: Node, source: &str) -> Option<String> {
3179 if node.kind() != "string" {
3180 return None;
3181 }
3182 let raw = node_text(node, source)?.trim();
3183 if raw.starts_with('"') && raw.ends_with('"') && raw.len() >= 2 {
3185 return Some(raw[1..raw.len() - 1].to_string());
3186 }
3187 let first = raw.find('"')?;
3190 let last = raw.rfind('"')?;
3191 if last > first {
3192 return Some(raw[first + 1..last].to_string());
3193 }
3194 None
3195}
3196
3197fn guess_erlang_file_path_from_module(current_path: &str, module_name: &str) -> String {
3200 let base = Path::new(current_path)
3201 .parent()
3202 .map(|p| p.to_path_buf())
3203 .unwrap_or_else(|| Path::new(".").to_path_buf());
3204 path_str_slash(&base.join(format!("{module}.erl", module = module_name)))
3205}
3206
3207fn node_byte_span(node: Node) -> (usize, usize) {
3208 (
3209 node.start_byte() as usize,
3210 node.end_byte() as usize,
3211 )
3212}
3213
3214fn insert_widest_span(map: &mut HashMap<String, (usize, usize)>, key: String, span: (usize, usize)) {
3215 map.entry(key)
3216 .and_modify(|existing| {
3217 existing.0 = existing.0.min(span.0);
3218 existing.1 = existing.1.max(span.1);
3219 })
3220 .or_insert(span);
3221}
3222
3223fn extract_java_class_spans(tree: &Tree, source: &str, package: Option<&str>) -> HashMap<String, (usize, usize)> {
3224 let mut out = HashMap::new();
3225 walk_tree(tree.root_node(), |node| {
3226 if node.kind() != "class_declaration" && node.kind() != "interface_declaration" {
3227 return;
3228 }
3229 let Some(name) = identifier_text_from_children(node, source) else {
3230 return;
3231 };
3232 let fqn = package
3233 .map(|pkg| format!("{pkg}.{name}"))
3234 .unwrap_or(name);
3235 insert_widest_span(&mut out, fqn, node_byte_span(node));
3236 });
3237 out
3238}
3239
3240fn extract_java_method_body_spans(
3241 tree: &Tree,
3242 source: &str,
3243 package: Option<&str>,
3244) -> HashMap<String, (usize, usize)> {
3245 let mut out = HashMap::new();
3246 walk_tree(tree.root_node(), |node| {
3247 if node.kind() != "method_declaration" {
3248 return;
3249 }
3250 let Some(method_name) = identifier_text_from_children(node, source) else {
3251 return;
3252 };
3253 let mut parent = node.parent();
3254 let mut class_fqn: Option<String> = None;
3255 while let Some(p) = parent {
3256 let pk = p.kind();
3257 if pk == "class_declaration" || pk == "interface_declaration" {
3258 if let Some(class_name) = identifier_text_from_children(p, source) {
3259 class_fqn = Some(
3260 package
3261 .map(|pkg| format!("{pkg}.{class_name}"))
3262 .unwrap_or(class_name),
3263 );
3264 }
3265 break;
3266 }
3267 parent = p.parent();
3268 }
3269 let fqn = if let Some(ref cls) = class_fqn {
3270 format!("{cls}.{method_name}")
3271 } else if let Some(pkg) = package {
3272 format!("{pkg}.{method_name}")
3273 } else {
3274 method_name.clone()
3275 };
3276 let span = node
3277 .child_by_field_name("body")
3278 .map(node_byte_span)
3279 .unwrap_or_else(|| node_byte_span(node));
3280 insert_widest_span(&mut out, fqn, span);
3281 });
3282 out
3283}
3284
3285fn extract_csharp_class_spans(tree: &Tree, source: &str) -> HashMap<String, (usize, usize)> {
3286 let mut out = HashMap::new();
3287 let namespace = extract_csharp_namespace(tree, source);
3288 walk_tree(tree.root_node(), |node| {
3289 match node.kind() {
3290 "class_declaration"
3291 | "interface_declaration"
3292 | "struct_declaration"
3293 | "enum_declaration"
3294 | "record_declaration"
3295 | "record_struct_declaration" => {
3296 if let Some(fqn) = csharp_fqn_for_type_declaration(node, source, namespace.as_deref()) {
3297 insert_widest_span(&mut out, fqn, node_byte_span(node));
3298 }
3299 }
3300 _ => {}
3301 }
3302 });
3303 out
3304}
3305
3306fn extract_csharp_property_spans(tree: &Tree, source: &str) -> HashMap<String, (usize, usize)> {
3307 let mut out = HashMap::new();
3308 let namespace = extract_csharp_namespace(tree, source);
3309 walk_tree(tree.root_node(), |node| {
3310 if node.kind() != "property_declaration" {
3311 return;
3312 }
3313 let Some(class_fqn) = csharp_enclosing_type_fqn(node, source, namespace.as_deref()) else {
3314 return;
3315 };
3316 let Some(prop_name_node) = node.child_by_field_name("name") else {
3317 return;
3318 };
3319 let Some(prop_name) = csharp_node_text(prop_name_node, source).filter(|s| !s.is_empty()) else {
3320 return;
3321 };
3322 let fqn = format!("{class_fqn}.{prop_name}");
3323 insert_widest_span(&mut out, fqn, node_byte_span(node));
3324 });
3325 out
3326}
3327
3328fn extract_csharp_method_body_spans_map(
3329 tree: &Tree,
3330 source: &str,
3331 namespace: Option<&str>,
3332) -> HashMap<String, (usize, usize)> {
3333 csharp_method_body_spans(tree, source, namespace)
3334 .into_iter()
3335 .map(|(fqn, lo, hi)| (fqn, (lo, hi)))
3336 .collect()
3337}
3338
3339fn extract_erlang_function_spans(
3340 module_name: &str,
3341 tree: &Tree,
3342 source: &str,
3343) -> HashMap<String, (usize, usize)> {
3344 let mut out = HashMap::new();
3345 walk_tree(tree.root_node(), |node| {
3346 if node.kind() != "function_clause" || !is_top_level_erlang_function_clause(node) {
3347 return;
3348 }
3349 let Some(name_node) = node.child_by_field_name("name") else {
3350 return;
3351 };
3352 let Some(fun_name) = extract_erlang_name(name_node, source) else {
3353 return;
3354 };
3355 let Some(args_node) = node.child_by_field_name("args") else {
3356 return;
3357 };
3358 let arity = args_node.named_child_count() as u32;
3359 let fqn = format!("{module_name}:{fun_name}/{arity}");
3360 insert_widest_span(&mut out, fqn, node_byte_span(node));
3361 });
3362 out
3363}
3364
3365fn extract_go_class_spans(
3366 tree: &Tree,
3367 source: &str,
3368 package: Option<&str>,
3369) -> HashMap<String, (usize, usize)> {
3370 let mut out = HashMap::new();
3371 walk_tree(tree.root_node(), |node| {
3372 if node.kind() != "type_declaration" {
3373 return;
3374 }
3375 let mut i = 0usize;
3376 while let Some(child) = node.child(i) {
3377 i += 1;
3378 if child.kind() != "type_spec" {
3379 continue;
3380 }
3381 let Some(type_n) = child.child_by_field_name("type") else {
3382 continue;
3383 };
3384 if type_n.kind() != "struct_type" && type_n.kind() != "interface_type" {
3385 continue;
3386 }
3387 let Some(name_node) = child.child_by_field_name("name") else {
3388 continue;
3389 };
3390 let start = name_node.start_byte() as usize;
3391 let end = (name_node.end_byte() as usize).min(source.len());
3392 if start >= end {
3393 continue;
3394 }
3395 let name = source[start..end].to_string();
3396 let fqn = package
3397 .map(|pkg| format!("{pkg}.{name}"))
3398 .unwrap_or(name);
3399 insert_widest_span(&mut out, fqn, node_byte_span(child));
3400 }
3401 });
3402 out
3403}
3404
3405fn extract_go_function_body_spans(
3406 tree: &Tree,
3407 source: &str,
3408 package: Option<&str>,
3409) -> HashMap<String, (usize, usize)> {
3410 let mut out = HashMap::new();
3411 walk_tree(tree.root_node(), |node| {
3412 let kind = node.kind();
3413 if kind != "function_declaration" && kind != "method_declaration" {
3414 return;
3415 }
3416 let fqn = match kind {
3417 "function_declaration" => go_decl_fqn_from_function_declaration(node, source, package),
3418 "method_declaration" => go_decl_fqn_from_method_declaration(node, source, package),
3419 _ => None,
3420 };
3421 let Some(fqn) = fqn else {
3422 return;
3423 };
3424 let span = node
3425 .child_by_field_name("body")
3426 .map(node_byte_span)
3427 .unwrap_or_else(|| node_byte_span(node));
3428 insert_widest_span(&mut out, fqn, span);
3429 });
3430 out
3431}
3432
3433fn extract_non_java_function_body_spans(
3434 file: &ParsedFile,
3435 source: &str,
3436 file_path: &str,
3437) -> HashMap<String, (usize, usize)> {
3438 let mut out = HashMap::new();
3439 match file.language {
3440 LanguageId::Python => {
3441 walk_tree(file.tree.root_node(), |node| {
3442 if node.kind() != "function_definition" || python_node_inside_class(node) {
3443 return;
3444 }
3445 let Some(logical) = python_function_logical_name(node, source) else {
3446 return;
3447 };
3448 let fqn = non_java_file_scoped_fqn(file_path, &logical);
3449 let span = node
3450 .child_by_field_name("body")
3451 .map(node_byte_span)
3452 .unwrap_or_else(|| node_byte_span(node));
3453 insert_widest_span(&mut out, fqn, span);
3454 });
3455 }
3456 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
3457 walk_tree(file.tree.root_node(), |node| {
3458 if node.kind() != "function_declaration" {
3459 return;
3460 }
3461 let Some(name) = js_ts_function_name(node, source) else {
3462 return;
3463 };
3464 let fqn = non_java_file_scoped_fqn(file_path, &name);
3465 let span = node
3466 .child_by_field_name("body")
3467 .map(node_byte_span)
3468 .unwrap_or_else(|| node_byte_span(node));
3469 insert_widest_span(&mut out, fqn, span);
3470 });
3471 }
3472 LanguageId::Rust => {
3473 walk_tree(file.tree.root_node(), |node| {
3474 if node.kind() != "function_item" {
3475 return;
3476 }
3477 let Some(name) = rust_function_name(node, source) else {
3478 return;
3479 };
3480 let fqn = non_java_file_scoped_fqn(file_path, &name);
3481 let span = node
3482 .child_by_field_name("body")
3483 .map(node_byte_span)
3484 .unwrap_or_else(|| node_byte_span(node));
3485 insert_widest_span(&mut out, fqn, span);
3486 });
3487 }
3488 _ => {}
3489 }
3490 out
3491}
3492
3493fn js_ts_function_name(node: Node, source: &str) -> Option<String> {
3494 node.child_by_field_name("name")
3495 .and_then(|n| node_text_slice(n, source))
3496 .map(|s| s.trim().to_string())
3497 .filter(|s| !s.is_empty())
3498}
3499
3500fn rust_function_name(node: Node, source: &str) -> Option<String> {
3501 node.child_by_field_name("name")
3502 .and_then(|n| node_text_slice(n, source))
3503 .map(|s| s.trim().to_string())
3504 .filter(|s| !s.is_empty())
3505}
3506
3507async fn code_bytes_for_span(
3508 compressor: Option<&CompressorClient>,
3509 source: &str,
3510 span: Option<(usize, usize)>,
3511 language: LanguageId,
3512) -> Option<Vec<u8>> {
3513 let client = compressor?;
3514 compress_snippet(source, span, language, client).await
3515}
3516
3517fn walk_tree(root: Node, mut f: impl FnMut(Node)) {
3519 let mut stack = vec![root];
3520 while let Some(node) = stack.pop() {
3521 f(node);
3522 let child_count = node.child_count();
3523 for i in (0..child_count).rev() {
3524 if let Some(child) = node.child(i) {
3525 stack.push(child);
3526 }
3527 }
3528 }
3529}
3530
3531fn walk_tree_skip_local_functions(root: Node, mut f: impl FnMut(Node)) {
3534 let mut stack = vec![root];
3535 while let Some(node) = stack.pop() {
3536 f(node);
3537 if node.kind() == "local_function_statement" {
3538 continue;
3539 }
3540 let child_count = node.child_count();
3541 for i in (0..child_count).rev() {
3542 if let Some(child) = node.child(i) {
3543 stack.push(child);
3544 }
3545 }
3546 }
3547}
3548
3549fn identifier_text_from_children(node: Node, source: &str) -> Option<String> {
3551 let child_count = node.child_count();
3552 for i in 0..child_count {
3553 if let Some(child) = node.child(i) {
3554 let kind = child.kind();
3555 if kind == "identifier" || kind == "name" {
3556 let start = child.start_byte() as usize;
3557 let end = child.end_byte() as usize;
3558 if end <= source.len() && start < end {
3559 return Some(source[start..end].to_string());
3560 }
3561 }
3562 }
3563 }
3564 None
3565}
3566
3567fn extract_java_symbols(
3569 tree: &Tree,
3570 source: &str,
3571 package: Option<&str>,
3572) -> (Vec<ClassSymbol>, Vec<FunctionSymbol>) {
3573 let mut classes: Vec<ClassSymbol> = Vec::new();
3574 let mut methods: Vec<FunctionSymbol> = Vec::new();
3575 let mut seen_class_fqns: HashSet<String> = HashSet::new();
3576
3577 let root = tree.root_node();
3578 walk_tree(root, |node| {
3579 let kind = node.kind();
3580 match kind {
3581 "class_declaration" | "interface_declaration" => {
3582 if let Some(name) = identifier_text_from_children(node, source) {
3583 let fqn = if let Some(pkg) = package {
3584 format!("{pkg}.{name}")
3585 } else {
3586 name.clone()
3587 };
3588 if seen_class_fqns.insert(fqn.clone()) {
3589 classes.push(ClassSymbol {
3590 name,
3591 fqn,
3592 kind: None,
3593 });
3594 }
3595 }
3596 }
3597 "method_declaration" => {
3598 if let Some(method_name) = identifier_text_from_children(node, source) {
3599 let mut parent = node.parent();
3601 let mut class_fqn: Option<String> = None;
3602 while let Some(p) = parent {
3603 let pk = p.kind();
3604 if pk == "class_declaration" || pk == "interface_declaration" {
3605 if let Some(class_name) = identifier_text_from_children(p, source) {
3606 let full = if let Some(pkg) = package {
3607 format!("{pkg}.{class_name}")
3608 } else {
3609 class_name
3610 };
3611 class_fqn = Some(full);
3612 }
3613 break;
3614 }
3615 parent = p.parent();
3616 }
3617
3618 let fqn = if let Some(ref cls) = class_fqn {
3619 format!("{cls}.{}", method_name)
3620 } else if let Some(pkg) = package {
3621 format!("{pkg}.{}", method_name)
3622 } else {
3623 method_name.clone()
3624 };
3625
3626 let (return_type, param_types, param_count) =
3627 extract_java_method_signature_types(node, source);
3628
3629 methods.push(FunctionSymbol {
3630 name: method_name,
3631 fqn,
3632 class_fqn,
3633 return_type,
3634 param_types,
3635 param_count,
3636 modifiers: Vec::new(),
3637 is_pointer_receiver: None,
3638 });
3639 }
3640 }
3641 _ => {}
3642 }
3643 });
3644
3645 (classes, methods)
3646}
3647
3648fn extract_java_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
3650 let mut out = Vec::new();
3651 let root = tree.root_node();
3652 walk_tree(root, |node| {
3653 if node.kind() == "ERROR" || node.is_missing() {
3654 let pos = node.start_position();
3655 let s = node.start_byte() as usize;
3656 let e = (node.end_byte() as usize).min(source.len());
3657 let snippet = if s < e {
3658 source[s..e]
3659 .chars()
3660 .take(120)
3661 .collect::<String>()
3662 .replace('\n', " ")
3663 } else {
3664 String::new()
3665 };
3666 out.push((pos.row + 1, pos.column + 1, snippet));
3667 }
3668 });
3669 out
3670}
3671
3672fn java_simple_type_name(type_text: &str) -> String {
3674 let t = type_text.trim();
3675 t.split('<')
3676 .next()
3677 .unwrap_or(t)
3678 .trim()
3679 .split_whitespace()
3680 .last()
3681 .unwrap_or(t)
3682 .trim()
3683 .to_string()
3684}
3685
3686fn java_type_node_display(node: Node, source: &str) -> Option<String> {
3688 let s = node.start_byte() as usize;
3689 let e = (node.end_byte() as usize).min(source.len());
3690 if s >= e {
3691 return None;
3692 }
3693 Some(source[s..e].trim().to_string())
3694}
3695
3696fn extract_java_method_signature_types(
3698 method: Node,
3699 source: &str,
3700) -> (Option<String>, Vec<String>, usize) {
3701 let return_type = method
3702 .child_by_field_name("type")
3703 .and_then(|n| java_type_node_display(n, source))
3704 .map(|full| java_simple_type_name(&full));
3705
3706 let mut param_types: Vec<String> = Vec::new();
3707 if let Some(params) = method.child_by_field_name("parameters") {
3708 let mut i = 0usize;
3709 while let Some(child) = params.child(i) {
3710 i += 1;
3711 if child.kind() == "formal_parameter" || child.kind() == "spread_parameter" {
3712 if let Some(tn) = child.child_by_field_name("type") {
3713 if let Some(full) = java_type_node_display(tn, source) {
3714 param_types.push(java_simple_type_name(&full));
3715 }
3716 }
3717 }
3718 }
3719 }
3720
3721 let param_count = param_types.len();
3722 (return_type, param_types, param_count)
3723}
3724
3725fn extract_java_inheritance_edges(
3727 tree: &Tree,
3728 source: &str,
3729 package: Option<&str>,
3730) -> Vec<(String, String)> {
3731 let mut edges = Vec::new();
3732 let import_map = build_internal_import_map(source);
3733 let root = tree.root_node();
3734
3735 walk_tree(root, |node| {
3736 if node.kind() != "class_declaration" {
3737 return;
3738 }
3739 let Some(class_name) = identifier_text_from_children(node, source) else {
3740 return;
3741 };
3742 let child_fqn = match package {
3743 Some(pkg) => format!("{pkg}.{class_name}"),
3744 None => class_name,
3745 };
3746
3747 let resolve = |type_node: Node| {
3748 java_resolve_type_fqn(type_node, source, package, &import_map)
3749 };
3750
3751 if let Some(super_c) = node.child_by_field_name("superclass") {
3752 if let Some(p) = resolve(super_c) {
3753 edges.push((child_fqn.clone(), p));
3754 }
3755 }
3756 if let Some(ifs) = node.child_by_field_name("interfaces") {
3757 let mut i = 0usize;
3758 while let Some(ch) = ifs.child(i) {
3759 i += 1;
3760 if ch.kind() == "type_list" {
3761 let mut j = 0usize;
3762 while let Some(t) = ch.child(j) {
3763 j += 1;
3764 if let Some(p) = resolve(t) {
3765 edges.push((child_fqn.clone(), p));
3766 }
3767 }
3768 }
3769 }
3770 }
3771 });
3772
3773 edges
3774}
3775
3776fn java_resolve_type_fqn(
3777 type_node: Node,
3778 source: &str,
3779 package: Option<&str>,
3780 import_map: &HashMap<String, String>,
3781) -> Option<String> {
3782 let s = type_node.start_byte() as usize;
3783 let e = (type_node.end_byte() as usize).min(source.len());
3784 if s < e && type_node.kind() == "scoped_type_identifier" {
3785 let text = source[s..e].trim();
3786 if text.contains('.') && !text.is_empty() {
3787 return Some(text.to_string());
3788 }
3789 }
3790
3791 let simple: String = match type_node.kind() {
3792 "integral_type" | "floating_point_type" | "boolean_type" | "void_type" => {
3793 if s >= e {
3794 return None;
3795 }
3796 source[s..e].trim().to_string()
3797 }
3798 _ => type_node
3799 .child_by_field_name("name")
3800 .and_then(|n| {
3801 let s = n.start_byte() as usize;
3802 let e = (n.end_byte() as usize).min(source.len());
3803 if s < e {
3804 Some(source[s..e].to_string())
3805 } else {
3806 None
3807 }
3808 })
3809 .or_else(|| {
3810 let mut found: Option<String> = None;
3811 walk_tree(type_node, |n| {
3812 if found.is_some() {
3813 return;
3814 }
3815 if n.kind() == "type_identifier" {
3816 let s = n.start_byte() as usize;
3817 let e = (n.end_byte() as usize).min(source.len());
3818 if s < e {
3819 found = Some(source[s..e].to_string());
3820 }
3821 }
3822 });
3823 found
3824 })?,
3825 };
3826
3827 if is_java_primitive_or_builtin(&simple) {
3828 return None;
3829 }
3830 Some(
3831 import_map
3832 .get(&simple)
3833 .cloned()
3834 .unwrap_or_else(|| match package {
3835 Some(pkg) => format!("{pkg}.{simple}"),
3836 None => simple,
3837 }),
3838 )
3839}
3840
3841fn extract_java_class_annotations(
3843 tree: &Tree,
3844 source: &str,
3845 package: Option<&str>,
3846) -> Vec<(String, Vec<String>)> {
3847 let mut out = Vec::new();
3848 let root = tree.root_node();
3849 walk_tree(root, |node| {
3850 if node.kind() != "class_declaration" && node.kind() != "interface_declaration" {
3851 return;
3852 }
3853 let Some(class_name) = identifier_text_from_children(node, source) else {
3854 return;
3855 };
3856 let class_fqn = match package {
3857 Some(pkg) => format!("{pkg}.{class_name}"),
3858 None => class_name,
3859 };
3860 let anns = java_modifiers_annotations(node, source);
3861 if !anns.is_empty() {
3862 out.push((class_fqn, anns));
3863 }
3864 });
3865 out
3866}
3867
3868fn extract_java_method_annotations(
3870 tree: &Tree,
3871 source: &str,
3872 package: Option<&str>,
3873) -> Vec<(String, Vec<String>)> {
3874 let mut out = Vec::new();
3875 let root = tree.root_node();
3876 walk_tree(root, |node| {
3877 if node.kind() != "method_declaration" {
3878 return;
3879 }
3880 let Some(method_name) = identifier_text_from_children(node, source) else {
3881 return;
3882 };
3883 let mut parent = node.parent();
3884 let mut class_fqn: Option<String> = None;
3885 while let Some(p) = parent {
3886 let pk = p.kind();
3887 if pk == "class_declaration" || pk == "interface_declaration" {
3888 if let Some(class_name) = identifier_text_from_children(p, source) {
3889 class_fqn = Some(match package {
3890 Some(pkg) => format!("{pkg}.{class_name}"),
3891 None => class_name,
3892 });
3893 }
3894 break;
3895 }
3896 parent = p.parent();
3897 }
3898 let fqn = match &class_fqn {
3899 Some(cls) => format!("{cls}.{method_name}"),
3900 None => match package {
3901 Some(pkg) => format!("{pkg}.{method_name}"),
3902 None => method_name.clone(),
3903 },
3904 };
3905 let anns = java_modifiers_annotations(node, source);
3906 if !anns.is_empty() {
3907 out.push((fqn, anns));
3908 }
3909 });
3910 out
3911}
3912
3913fn java_modifiers_annotations(node: Node, source: &str) -> Vec<String> {
3914 let mut names = Vec::new();
3915 let mut i = 0usize;
3916 while let Some(ch) = node.child(i) {
3917 i += 1;
3918 if ch.kind() == "modifiers" {
3919 let mut j = 0usize;
3920 while let Some(m) = ch.child(j) {
3921 j += 1;
3922 if m.kind() == "marker_annotation" || m.kind() == "annotation" {
3923 if let Some(n) = m.named_child(0) {
3924 if n.kind() == "identifier" || n.kind() == "scoped_identifier" {
3925 let s = n.start_byte() as usize;
3926 let e = (n.end_byte() as usize).min(source.len());
3927 if s < e {
3928 let raw = source[s..e].to_string();
3929 let simple = raw.rsplit('.').next().unwrap_or(&raw).to_string();
3930 names.push(simple);
3931 }
3932 }
3933 }
3934 }
3935 }
3936 }
3937 }
3938 names
3939}
3940
3941fn java_type_has_autowired(type_node: Node, source: &str) -> bool {
3942 let mut found = false;
3943 walk_tree(type_node, |n| {
3944 if n.kind() == "modifiers" {
3945 let mut j = 0usize;
3946 while let Some(m) = n.child(j) {
3947 j += 1;
3948 if m.kind() == "marker_annotation" || m.kind() == "annotation" {
3949 if let Some(id) = m.named_child(0) {
3950 let s = id.start_byte() as usize;
3951 let e = (id.end_byte() as usize).min(source.len());
3952 if s < e {
3953 let raw = &source[s..e];
3954 if raw == "Autowired" || raw.ends_with(".Autowired") {
3955 found = true;
3956 }
3957 }
3958 }
3959 }
3960 }
3961 }
3962 });
3963 found
3964}
3965
3966fn extract_java_injected_dependencies(
3968 tree: &Tree,
3969 source: &str,
3970 package: Option<&str>,
3971) -> Vec<(String, String)> {
3972 let mut out = Vec::new();
3973 let import_map = build_internal_import_map(source);
3974 let root = tree.root_node();
3975
3976 walk_tree(root, |node| {
3977 if node.kind() != "class_declaration" {
3978 return;
3979 }
3980 let Some(class_name) = identifier_text_from_children(node, source) else {
3981 return;
3982 };
3983 let class_fqn = match package {
3984 Some(pkg) => format!("{pkg}.{class_name}"),
3985 None => class_name,
3986 };
3987
3988 let mut i = 0usize;
3989 while let Some(body) = node.child(i) {
3990 i += 1;
3991 if body.kind() != "class_body" {
3992 continue;
3993 }
3994 let mut j = 0usize;
3995 while let Some(member) = body.child(j) {
3996 j += 1;
3997 match member.kind() {
3998 "constructor_declaration" => {
3999 if let Some(params) = member.child_by_field_name("parameters") {
4000 let mut k = 0usize;
4001 while let Some(p) = params.child(k) {
4002 k += 1;
4003 if p.kind() == "formal_parameter" {
4004 if let Some(tn) = p.child_by_field_name("type") {
4005 if let Some(fqn) =
4006 java_resolve_type_fqn(tn, source, package, &import_map)
4007 {
4008 out.push((class_fqn.clone(), fqn));
4009 }
4010 }
4011 }
4012 }
4013 }
4014 }
4015 "field_declaration" => {
4016 if !java_type_has_autowired(member, source) {
4017 continue;
4018 }
4019 let type_node = member
4020 .child_by_field_name("type")
4021 .or_else(|| {
4022 let mut k = 0usize;
4023 while let Some(ch) = member.child(k) {
4024 if matches!(
4025 ch.kind(),
4026 "type_identifier"
4027 | "generic_type"
4028 | "array_type"
4029 | "integral_type"
4030 | "floating_point_type"
4031 | "boolean_type"
4032 | "void_type"
4033 ) {
4034 return Some(ch);
4035 }
4036 k += 1;
4037 }
4038 None
4039 });
4040 if let Some(tn) = type_node {
4041 if let Some(fqn) = java_resolve_type_fqn(tn, source, package, &import_map)
4042 {
4043 out.push((class_fqn.clone(), fqn));
4044 }
4045 }
4046 }
4047 _ => {}
4048 }
4049 }
4050 }
4051 });
4052
4053 out
4054}
4055
4056fn extract_csharp_namespace_line_fallback(source: &str) -> Option<String> {
4058 for line in source.lines() {
4059 let trimmed = line.trim();
4060 if trimmed.starts_with("namespace ") {
4061 let rest = &trimmed["namespace ".len()..];
4062 let ns = rest
4063 .split(|c: char| c == '{' || c == ';' || c.is_whitespace())
4064 .next()?
4065 .trim();
4066 if !ns.is_empty() {
4067 return Some(ns.to_string());
4068 }
4069 }
4070 }
4071 None
4072}
4073
4074fn extract_csharp_namespace_from_ast(tree: &Tree, source: &str) -> Option<String> {
4076 let root = tree.root_node();
4077 for i in 0..root.named_child_count() {
4078 let c = root.named_child(i)?;
4079 if c.kind() == "file_scoped_namespace_declaration" {
4080 return csharp_namespace_declaration_name(c, source);
4081 }
4082 }
4083 for i in 0..root.named_child_count() {
4084 let c = root.named_child(i)?;
4085 if c.kind() == "namespace_declaration" {
4086 return csharp_namespace_declaration_name(c, source);
4087 }
4088 }
4089 None
4090}
4091
4092fn csharp_namespace_declaration_name(decl: Node, source: &str) -> Option<String> {
4094 if !matches!(
4095 decl.kind(),
4096 "namespace_declaration" | "file_scoped_namespace_declaration"
4097 ) {
4098 return None;
4099 }
4100 decl.child_by_field_name("name")
4101 .and_then(|n| csharp_node_text(n, source))
4102 .filter(|s| !s.is_empty())
4103}
4104
4105fn csharp_enclosing_namespace_prefix(node: Node, source: &str) -> Option<String> {
4107 let mut segments: Vec<String> = Vec::new();
4108 let mut cur = node.parent();
4109 while let Some(p) = cur {
4110 if matches!(
4111 p.kind(),
4112 "namespace_declaration" | "file_scoped_namespace_declaration"
4113 ) {
4114 if let Some(name) = csharp_namespace_declaration_name(p, source) {
4115 segments.push(name);
4116 }
4117 }
4118 cur = p.parent();
4119 }
4120 segments.reverse();
4121 if segments.is_empty() {
4122 None
4123 } else {
4124 Some(segments.join("."))
4125 }
4126}
4127
4128fn collect_csharp_file_namespace_strings(tree: &Tree, source: &str) -> Vec<String> {
4130 let mut seen: HashSet<String> = HashSet::new();
4131 let root = tree.root_node();
4132 walk_tree(root, |n| {
4133 if matches!(
4134 n.kind(),
4135 "class_declaration"
4136 | "interface_declaration"
4137 | "struct_declaration"
4138 | "enum_declaration"
4139 | "record_declaration"
4140 | "record_struct_declaration"
4141 ) {
4142 if let Some(p) = csharp_enclosing_namespace_prefix(n, source) {
4143 seen.insert(p);
4144 }
4145 }
4146 });
4147 if seen.is_empty() {
4148 if let Some(p) = extract_csharp_namespace_from_ast(tree, source) {
4149 seen.insert(p);
4150 } else if let Some(p) = extract_csharp_namespace_line_fallback(source) {
4151 seen.insert(p);
4152 }
4153 }
4154 let mut v: Vec<String> = seen.into_iter().collect();
4155 v.sort();
4156 v
4157}
4158
4159fn extract_csharp_namespace(tree: &Tree, source: &str) -> Option<String> {
4161 extract_csharp_namespace_from_ast(tree, source).or_else(|| extract_csharp_namespace_line_fallback(source))
4162}
4163
4164#[derive(Debug, Clone, PartialEq, Eq)]
4166enum CSharpUsingKind {
4167 Namespace(String),
4169 Static(String),
4171 Alias { alias: String, target: String },
4173}
4174
4175#[derive(Debug, Clone, Default)]
4177struct CSharpUsingSummary {
4178 namespace_imports: Vec<String>,
4179 alias_map: HashMap<String, String>,
4180}
4181
4182fn normalize_csharp_global_prefix(s: &str) -> String {
4183 s.replace("global::", "").trim().to_string()
4184}
4185
4186fn is_csharp_system_or_microsoft_namespace(ns: &str) -> bool {
4188 let ns = ns.trim();
4189 let ns = ns.strip_prefix("global::").unwrap_or(ns).trim();
4190 match ns.split('.').next() {
4191 Some("System") | Some("Microsoft") => true,
4192 _ => false,
4193 }
4194}
4195
4196fn parse_csharp_using_directive(node: Node, source: &str) -> Option<CSharpUsingKind> {
4198 let start = node.start_byte() as usize;
4199 let end = node.end_byte() as usize;
4200 let raw = source.get(start..end)?.trim();
4201 let mut body = raw.strip_suffix(';')?.trim();
4202
4203 if let Some(rest) = body.strip_prefix("global") {
4204 if rest.starts_with(char::is_whitespace) {
4205 body = rest.trim_start();
4206 }
4207 }
4208
4209 body = body.strip_prefix("using")?.trim();
4210
4211 if let Some(rest) = body.strip_prefix("static") {
4212 if rest.starts_with(char::is_whitespace) {
4213 let target = normalize_csharp_global_prefix(rest.trim_start());
4214 return if target.is_empty() {
4215 None
4216 } else {
4217 Some(CSharpUsingKind::Static(target))
4218 };
4219 }
4220 }
4221
4222 if let Some(eq_pos) = body.find('=') {
4223 let left = body[..eq_pos].trim();
4224 let right = body[eq_pos + 1..].trim();
4225 if !left.is_empty()
4226 && !right.is_empty()
4227 && left.chars().all(|c| c.is_alphanumeric() || c == '_')
4228 {
4229 return Some(CSharpUsingKind::Alias {
4230 alias: left.to_string(),
4231 target: normalize_csharp_global_prefix(right),
4232 });
4233 }
4234 }
4235
4236 let ns = normalize_csharp_global_prefix(body);
4237 if ns.is_empty() {
4238 None
4239 } else {
4240 Some(CSharpUsingKind::Namespace(ns))
4241 }
4242}
4243
4244fn extract_csharp_using_summary(tree: &Tree, source: &str) -> CSharpUsingSummary {
4246 let mut namespace_imports: Vec<String> = Vec::new();
4247 let mut seen_ns: HashSet<String> = HashSet::new();
4248 let mut alias_map: HashMap<String, String> = HashMap::new();
4249
4250 walk_tree(tree.root_node(), |node| {
4251 if node.kind() != "using_directive" {
4252 return;
4253 }
4254 let Some(kind) = parse_csharp_using_directive(node, source) else {
4255 return;
4256 };
4257 match kind {
4258 CSharpUsingKind::Namespace(ns) => {
4259 if !is_csharp_system_or_microsoft_namespace(&ns) && seen_ns.insert(ns.clone()) {
4260 namespace_imports.push(ns);
4261 }
4262 }
4263 CSharpUsingKind::Static(_) => {}
4264 CSharpUsingKind::Alias { alias, target } => {
4265 alias_map.insert(alias, target);
4266 }
4267 }
4268 });
4269
4270 CSharpUsingSummary {
4271 namespace_imports,
4272 alias_map,
4273 }
4274}
4275
4276#[derive(Debug, Default, Clone)]
4278struct CSharpBatchIndex {
4279 namespace_to_paths: HashMap<String, Vec<String>>,
4280 simple_name_to_fqns: HashMap<String, Vec<String>>,
4281 class_fqns: HashSet<String>,
4282}
4283
4284fn build_csharp_batch_index(files: &[ParsedFile], root: &Path) -> CSharpBatchIndex {
4285 let mut namespace_to_paths: HashMap<String, Vec<String>> = HashMap::new();
4286 let mut simple_name_to_fqns: HashMap<String, Vec<String>> = HashMap::new();
4287 let mut class_fqns: HashSet<String> = HashSet::new();
4288
4289 for file in files {
4290 if file.language != LanguageId::CSharp {
4291 continue;
4292 }
4293 let path_str = neo4j_path_string(root, &file.path);
4294 let source = &file.source;
4295 let tree = &file.tree;
4296 for ns in collect_csharp_file_namespace_strings(tree, source) {
4297 namespace_to_paths
4298 .entry(ns)
4299 .or_default()
4300 .push(path_str.clone());
4301 }
4302 let (classes, _, _) = extract_csharp_symbols(tree, source);
4303 for c in classes {
4304 class_fqns.insert(c.fqn.clone());
4305 simple_name_to_fqns
4306 .entry(c.name.clone())
4307 .or_default()
4308 .push(c.fqn.clone());
4309 }
4310 }
4311
4312 for v in namespace_to_paths.values_mut() {
4313 v.sort();
4314 v.dedup();
4315 }
4316 for v in simple_name_to_fqns.values_mut() {
4317 v.sort();
4318 v.dedup();
4319 }
4320
4321 CSharpBatchIndex {
4322 namespace_to_paths,
4323 simple_name_to_fqns,
4324 class_fqns,
4325 }
4326}
4327
4328fn csharp_effective_import_namespaces(
4330 namespace_imports: &[String],
4331 alias_map: &HashMap<String, String>,
4332 class_fqns: &HashSet<String>,
4333) -> Vec<String> {
4334 let mut out: Vec<String> = namespace_imports.to_vec();
4335 for target in alias_map.values() {
4336 if !class_fqns.contains(target) && !is_csharp_system_or_microsoft_namespace(target) {
4337 out.push(target.clone());
4338 }
4339 }
4340 out.sort();
4341 out.dedup();
4342 out
4343}
4344
4345fn resolve_csharp_type_fqn(
4347 simple: &str,
4348 current_ns: Option<&str>,
4349 namespace_imports: &[String],
4350 alias_map: &HashMap<String, String>,
4351 index: &CSharpBatchIndex,
4352) -> Option<String> {
4353 if let Some(target) = alias_map.get(simple) {
4354 if index.class_fqns.contains(target) {
4355 return Some(target.clone());
4356 }
4357 }
4358
4359 let imports = csharp_effective_import_namespaces(namespace_imports, alias_map, &index.class_fqns);
4360
4361 let candidates: Vec<String> = index
4362 .simple_name_to_fqns
4363 .get(simple)
4364 .cloned()
4365 .unwrap_or_default();
4366
4367 let mut filtered: Vec<String> = candidates
4368 .into_iter()
4369 .filter(|fqn| {
4370 let Some((decl_ns, base_name)) = fqn.rsplit_once('.') else {
4371 return false;
4372 };
4373 if base_name != simple {
4374 return false;
4375 }
4376 if Some(decl_ns) == current_ns {
4377 return true;
4378 }
4379 imports.iter().any(|im| im.as_str() == decl_ns)
4380 })
4381 .collect();
4382
4383 if filtered.is_empty() {
4384 return current_ns.map(|ns| format!("{ns}.{simple}"));
4385 }
4386
4387 if filtered.len() == 1 {
4388 return Some(filtered.pop().expect("one element"));
4389 }
4390
4391 if let Some(ns) = current_ns {
4392 if let Some(hit) = filtered
4393 .iter()
4394 .find(|fqn| fqn.rsplit_once('.').map(|(d, _)| d) == Some(ns))
4395 {
4396 return Some(hit.clone());
4397 }
4398 }
4399
4400 filtered.sort();
4401 Some(filtered[0].clone())
4402}
4403
4404fn csharp_well_known_static_type(name: &str) -> Option<&'static str> {
4406 match name {
4407 "Console" => Some("System.Console"),
4408 "String" => Some("System.String"),
4409 "Math" => Some("System.Math"),
4410 "Object" => Some("System.Object"),
4411 "Environment" => Some("System.Environment"),
4412 _ => None,
4413 }
4414}
4415
4416fn csharp_node_text(node: Node, source: &str) -> Option<String> {
4417 let s = node.start_byte() as usize;
4418 let e = node.end_byte() as usize;
4419 source.get(s..e).map(str::trim).map(String::from)
4420}
4421
4422fn csharp_type_text_to_simple_for_resolve(type_text: &str) -> String {
4424 let t = type_text.split('<').next().unwrap_or(type_text).trim();
4425 t.split('.').last().unwrap_or(t).trim().to_string()
4426}
4427
4428fn csharp_resolve_type_node_to_fqn(
4429 type_node: Node,
4430 source: &str,
4431 namespace: Option<&str>,
4432 using_summary: &CSharpUsingSummary,
4433 index: &CSharpBatchIndex,
4434) -> Option<String> {
4435 let raw = csharp_node_text(type_node, source)?;
4436 if raw == "var" || raw.is_empty() {
4437 return None;
4438 }
4439 let simple = csharp_type_text_to_simple_for_resolve(&raw);
4440 if simple.is_empty() {
4441 return None;
4442 }
4443 resolve_csharp_type_fqn(
4444 &simple,
4445 namespace,
4446 &using_summary.namespace_imports,
4447 &using_summary.alias_map,
4448 index,
4449 )
4450}
4451
4452fn csharp_variable_declaration_child(node: Node) -> Option<Node> {
4453 for i in 0..node.child_count() {
4454 if let Some(c) = node.child(i) {
4455 if c.kind() == "variable_declaration" {
4456 return Some(c);
4457 }
4458 }
4459 }
4460 None
4461}
4462
4463fn csharp_collect_variable_declaration_bindings(
4464 var_decl: Node,
4465 source: &str,
4466 namespace: Option<&str>,
4467 using_summary: &CSharpUsingSummary,
4468 index: &CSharpBatchIndex,
4469 out: &mut HashMap<String, String>,
4470) {
4471 let Some(type_node) = var_decl.child_by_field_name("type") else {
4472 return;
4473 };
4474 let Some(type_fqn) =
4475 csharp_resolve_type_node_to_fqn(type_node, source, namespace, using_summary, index)
4476 else {
4477 return;
4478 };
4479 for i in 0..var_decl.child_count() {
4480 let Some(child) = var_decl.child(i) else {
4481 continue;
4482 };
4483 if child.kind() != "variable_declarator" {
4484 continue;
4485 }
4486 let Some(id) = child.child(0).filter(|c| c.kind() == "identifier") else {
4487 continue;
4488 };
4489 if let Some(name) = csharp_node_text(id, source) {
4490 out.insert(name, type_fqn.clone());
4491 }
4492 }
4493}
4494
4495fn csharp_collect_fields_for_type_declaration(
4496 type_decl: Node,
4497 source: &str,
4498 namespace: Option<&str>,
4499 using_summary: &CSharpUsingSummary,
4500 index: &CSharpBatchIndex,
4501) -> HashMap<String, String> {
4502 let mut fields = HashMap::new();
4503 if !matches!(
4504 type_decl.kind(),
4505 "class_declaration"
4506 | "interface_declaration"
4507 | "struct_declaration"
4508 | "enum_declaration"
4509 | "record_declaration"
4510 | "record_struct_declaration"
4511 ) {
4512 return fields;
4513 }
4514 let Some(body) = type_decl.child_by_field_name("body") else {
4515 return fields;
4516 };
4517 for i in 0..body.child_count() {
4518 let Some(member) = body.child(i) else {
4519 continue;
4520 };
4521 if member.kind() != "field_declaration" {
4522 continue;
4523 }
4524 let Some(vd) = csharp_variable_declaration_child(member) else {
4525 continue;
4526 };
4527 csharp_collect_variable_declaration_bindings(
4528 vd,
4529 source,
4530 namespace,
4531 using_summary,
4532 index,
4533 &mut fields,
4534 );
4535 }
4536 fields
4537}
4538
4539fn csharp_collect_locals_in_scope(
4540 scope_root: Node,
4541 source: &str,
4542 namespace: Option<&str>,
4543 using_summary: &CSharpUsingSummary,
4544 index: &CSharpBatchIndex,
4545) -> HashMap<String, String> {
4546 let mut locals = HashMap::new();
4547 walk_tree(scope_root, |n| {
4548 if n.kind() != "local_declaration_statement" {
4549 return;
4550 }
4551 let Some(vd) = csharp_variable_declaration_child(n) else {
4552 return;
4553 };
4554 csharp_collect_variable_declaration_bindings(
4555 vd,
4556 source,
4557 namespace,
4558 using_summary,
4559 index,
4560 &mut locals,
4561 );
4562 });
4563 locals
4564}
4565
4566fn csharp_type_declaration_kind_str(decl_kind: &str) -> &'static str {
4567 match decl_kind {
4568 "enum_declaration" => "enum",
4569 "interface_declaration" => "interface",
4570 "struct_declaration" => "struct",
4571 "record_struct_declaration" => "struct",
4572 "record_declaration" => "record",
4573 _ => "class",
4574 }
4575}
4576
4577fn csharp_fqn_for_type_declaration(
4581 decl: Node,
4582 source: &str,
4583 legacy_file_namespace: Option<&str>,
4584) -> Option<String> {
4585 let leaf_name = identifier_text_from_children(decl, source)?;
4586 let mut segments = vec![leaf_name];
4587 let mut cur = decl.parent();
4588 while let Some(p) = cur {
4589 let pk = p.kind();
4590 if matches!(
4591 pk,
4592 "class_declaration"
4593 | "interface_declaration"
4594 | "struct_declaration"
4595 | "enum_declaration"
4596 | "record_declaration"
4597 | "record_struct_declaration"
4598 ) {
4599 if let Some(n) = identifier_text_from_children(p, source) {
4600 segments.push(n);
4601 }
4602 }
4603 cur = p.parent();
4604 }
4605 segments.reverse();
4606 let qualified = segments.join(".");
4607 let ns = csharp_enclosing_namespace_prefix(decl, source)
4608 .or_else(|| legacy_file_namespace.map(|s| s.to_string()));
4609 Some(match ns {
4610 Some(n) => format!("{n}.{qualified}"),
4611 None => qualified,
4612 })
4613}
4614
4615fn csharp_innermost_enclosing_type_declaration(from: Node) -> Option<Node> {
4616 let mut cur = from.parent();
4617 while let Some(p) = cur {
4618 let pk = p.kind();
4619 if matches!(
4620 pk,
4621 "class_declaration"
4622 | "interface_declaration"
4623 | "struct_declaration"
4624 | "enum_declaration"
4625 | "record_declaration"
4626 | "record_struct_declaration"
4627 ) {
4628 return Some(p);
4629 }
4630 cur = p.parent();
4631 }
4632 None
4633}
4634
4635fn csharp_constructor_function_fqn(class_fqn: &str, arity: usize) -> String {
4637 format!("{class_fqn}.ctor#{arity}")
4638}
4639
4640fn csharp_constructor_symbol_fqn(ctor: Node, class_fqn: &str, source: &str) -> (String, String) {
4642 let modifiers = csharp_collect_method_modifiers(ctor, source);
4643 if modifiers.iter().any(|m| m == "static") {
4644 return ("cctor".to_string(), format!("{class_fqn}.cctor"));
4645 }
4646 let param_types = csharp_method_parameter_types(ctor, source);
4647 let arity = param_types.len();
4648 (
4649 format!("ctor#{arity}"),
4650 csharp_constructor_function_fqn(class_fqn, arity),
4651 )
4652}
4653
4654fn csharp_method_simple_name(method: Node, source: &str) -> Option<String> {
4655 method
4656 .child_by_field_name("name")
4657 .and_then(|n| csharp_node_text(n, source))
4658 .filter(|s| !s.is_empty())
4659 .or_else(|| identifier_text_from_children(method, source))
4660}
4661
4662fn csharp_collect_method_modifiers(method: Node, source: &str) -> Vec<String> {
4663 let mut out = Vec::new();
4664 for i in 0..method.child_count() {
4665 let Some(c) = method.child(i) else {
4666 continue;
4667 };
4668 if c.kind() == "modifier" {
4669 if let Some(t) = csharp_node_text(c, source) {
4670 if !t.is_empty() {
4671 out.push(t);
4672 }
4673 }
4674 }
4675 }
4676 out
4677}
4678
4679fn csharp_method_return_type_node(method: Node) -> Option<Node> {
4681 method
4682 .child_by_field_name("returns")
4683 .or_else(|| method.child_by_field_name("type"))
4684}
4685
4686fn csharp_method_return_type_string(method: Node, source: &str) -> Option<String> {
4687 let t = csharp_method_return_type_node(method)?;
4688 if t.kind() == "void_keyword" {
4689 return Some("void".to_string());
4690 }
4691 csharp_node_text(t, source)
4692}
4693
4694fn csharp_method_parameter_types(method: Node, source: &str) -> Vec<String> {
4695 let mut out = Vec::new();
4696 let Some(params) = method.child_by_field_name("parameters") else {
4697 return out;
4698 };
4699 for i in 0..params.child_count() {
4700 let Some(p) = params.child(i) else {
4701 continue;
4702 };
4703 if p.kind() != "parameter" {
4704 continue;
4705 }
4706 let Some(ty) = p.child_by_field_name("type") else {
4707 continue;
4708 };
4709 if let Some(s) = csharp_node_text(ty, source) {
4710 if !s.is_empty() {
4711 out.push(s);
4712 }
4713 }
4714 }
4715 out
4716}
4717
4718fn csharp_enclosing_type_fqn(node: Node, source: &str, namespace: Option<&str>) -> Option<String> {
4719 let inner = csharp_innermost_enclosing_type_declaration(node)?;
4720 csharp_fqn_for_type_declaration(inner, source, namespace)
4721}
4722
4723fn csharp_block_body(node: Node) -> Option<Node> {
4724 for i in 0..node.child_count() {
4725 if let Some(c) = node.child(i) {
4726 if c.kind() == "block" {
4727 return Some(c);
4728 }
4729 }
4730 }
4731 None
4732}
4733
4734fn csharp_property_name_for_accessor(acc: Node, source: &str) -> Option<String> {
4735 let list = acc.parent()?;
4736 let prop = list.parent()?;
4737 if prop.kind() != "property_declaration" {
4738 return None;
4739 }
4740 let name_node = prop.child_by_field_name("name")?;
4741 csharp_node_text(name_node, source)
4742}
4743
4744fn csharp_accessor_kind_prefix(acc: Node, source: &str) -> &'static str {
4745 for i in 0..acc.child_count() {
4746 if let Some(c) = acc.child(i) {
4747 if let Some(t) = csharp_node_text(c, source) {
4748 match t.as_str() {
4749 "get" => return "get",
4750 "set" => return "set",
4751 "init" => return "init",
4752 _ => {}
4753 }
4754 }
4755 }
4756 }
4757 "accessor"
4758}
4759
4760fn csharp_method_name_from_simple_name_node(node: Node, source: &str) -> Option<String> {
4761 match node.kind() {
4762 "identifier" => csharp_node_text(node, source),
4763 "generic_name" => node
4764 .child(0)
4765 .filter(|c| c.kind() == "identifier")
4766 .and_then(|c| csharp_node_text(c, source)),
4767 _ => {
4768 let t = csharp_node_text(node, source)?;
4769 Some(csharp_type_text_to_simple_for_resolve(&t))
4770 }
4771 }
4772}
4773
4774fn csharp_unwrap_parenthesized_invoke_expr(mut expr: Node) -> Node {
4775 while expr.kind() == "parenthesized_expression" {
4776 if let Some(inner) = expr.child(1) {
4777 expr = inner;
4778 } else {
4779 break;
4780 }
4781 }
4782 expr
4783}
4784
4785fn csharp_resolve_member_receiver_to_type_fqn(
4786 mut receiver: Node,
4787 source: &str,
4788 class_fqn: Option<&str>,
4789 field_map: &HashMap<String, String>,
4790 local_map: &HashMap<String, String>,
4791 namespace: Option<&str>,
4792 using_summary: &CSharpUsingSummary,
4793 index: &CSharpBatchIndex,
4794) -> Option<String> {
4795 while receiver.kind() == "member_access_expression" {
4796 receiver = receiver.child_by_field_name("expression")?;
4797 }
4798 match receiver.kind() {
4799 "this_expression" | "base_expression" | "this" | "base" => class_fqn.map(String::from),
4801 "identifier" => {
4802 let name = csharp_node_text(receiver, source)?;
4803 if let Some(t) = local_map.get(&name) {
4804 return Some(t.clone());
4805 }
4806 if let Some(t) = field_map.get(&name) {
4807 return Some(t.clone());
4808 }
4809 if let Some(wk) = csharp_well_known_static_type(&name) {
4810 return Some(wk.to_string());
4811 }
4812 resolve_csharp_type_fqn(
4813 &name,
4814 namespace,
4815 &using_summary.namespace_imports,
4816 &using_summary.alias_map,
4817 index,
4818 )
4819 }
4820 _ => None,
4821 }
4822}
4823
4824fn csharp_resolve_invocation_callee_fqn(
4825 fn_expr: Node,
4826 source: &str,
4827 class_fqn: Option<&str>,
4828 field_map: &HashMap<String, String>,
4829 local_map: &HashMap<String, String>,
4830 namespace: Option<&str>,
4831 using_summary: &CSharpUsingSummary,
4832 index: &CSharpBatchIndex,
4833) -> Option<String> {
4834 let fn_expr = csharp_unwrap_parenthesized_invoke_expr(fn_expr);
4835 match fn_expr.kind() {
4836 "identifier" => {
4837 let name = csharp_node_text(fn_expr, source)?;
4838 if let Some(cls) = class_fqn {
4839 Some(format!("{cls}.{name}"))
4840 } else if let Some(ns) = namespace {
4841 Some(format!("{ns}.{name}"))
4842 } else {
4843 Some(name)
4844 }
4845 }
4846 "generic_name" => {
4847 let name = csharp_method_name_from_simple_name_node(fn_expr, source)?;
4848 if let Some(cls) = class_fqn {
4849 Some(format!("{cls}.{name}"))
4850 } else if let Some(ns) = namespace {
4851 Some(format!("{ns}.{name}"))
4852 } else {
4853 Some(name)
4854 }
4855 }
4856 "member_access_expression" => {
4857 let name_node = fn_expr.child_by_field_name("name")?;
4858 let method_name = csharp_method_name_from_simple_name_node(name_node, source)?;
4859 let recv = fn_expr.child_by_field_name("expression")?;
4860 let recv_ty = csharp_resolve_member_receiver_to_type_fqn(
4861 recv,
4862 source,
4863 class_fqn,
4864 field_map,
4865 local_map,
4866 namespace,
4867 using_summary,
4868 index,
4869 )?;
4870 Some(format!("{recv_ty}.{method_name}"))
4871 }
4872 _ => None,
4873 }
4874}
4875
4876fn csharp_collect_calls_from_body(
4877 body: Node,
4878 caller_fqn: &str,
4879 class_fqn: Option<&str>,
4880 field_map: &HashMap<String, String>,
4881 source: &str,
4882 namespace: Option<&str>,
4883 using_summary: &CSharpUsingSummary,
4884 index: &CSharpBatchIndex,
4885 calls: &mut Vec<(String, String)>,
4886) {
4887 let locals = csharp_collect_locals_in_scope(body, source, namespace, using_summary, index);
4888 walk_tree(body, |n| {
4889 if n.kind() != "invocation_expression" {
4890 return;
4891 }
4892 let Some(fn_node) = n.child_by_field_name("function") else {
4893 return;
4894 };
4895 let Some(callee) = csharp_resolve_invocation_callee_fqn(
4896 fn_node,
4897 source,
4898 class_fqn,
4899 field_map,
4900 &locals,
4901 namespace,
4902 using_summary,
4903 index,
4904 ) else {
4905 return;
4906 };
4907 calls.push((caller_fqn.to_string(), callee));
4908 });
4909}
4910
4911fn extract_csharp_symbols(
4917 tree: &Tree,
4918 source: &str,
4919) -> (
4920 Vec<ClassSymbol>,
4921 Vec<FunctionSymbol>,
4922 Vec<PropertySymbol>,
4923) {
4924 let mut classes: Vec<ClassSymbol> = Vec::new();
4925 let mut methods: Vec<FunctionSymbol> = Vec::new();
4926 let mut properties: Vec<PropertySymbol> = Vec::new();
4927 let mut seen_class_fqns: HashSet<String> = HashSet::new();
4928 let mut seen_property_fqns: HashSet<String> = HashSet::new();
4929
4930 let namespace = extract_csharp_namespace(tree, source);
4931 let root = tree.root_node();
4932
4933 walk_tree(root, |node| {
4934 let nk = node.kind();
4935 match nk {
4936 "class_declaration"
4937 | "interface_declaration"
4938 | "struct_declaration"
4939 | "enum_declaration"
4940 | "record_declaration"
4941 | "record_struct_declaration" => {
4942 let Some(name) = identifier_text_from_children(node, source) else {
4943 return;
4944 };
4945 let Some(fqn) = csharp_fqn_for_type_declaration(node, source, namespace.as_deref()) else {
4946 return;
4947 };
4948 if seen_class_fqns.insert(fqn.clone()) {
4949 let kind = Some(csharp_type_declaration_kind_str(nk));
4950 classes.push(ClassSymbol { name, fqn, kind });
4951 }
4952 }
4953 "method_declaration" => {
4954 let Some(method_name) = csharp_method_simple_name(node, source) else {
4955 return;
4956 };
4957 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace.as_deref());
4958 let fqn = if let Some(ref cls) = class_fqn {
4959 format!("{cls}.{method_name}")
4960 } else if let Some(ref ns) = namespace {
4961 format!("{ns}.{method_name}")
4962 } else {
4963 method_name.clone()
4964 };
4965
4966 let param_types = csharp_method_parameter_types(node, source);
4967 let param_count = param_types.len();
4968 let return_type = csharp_method_return_type_string(node, source);
4969 let modifiers = csharp_collect_method_modifiers(node, source);
4970
4971 methods.push(FunctionSymbol {
4972 name: method_name,
4973 fqn,
4974 class_fqn,
4975 return_type,
4976 param_types,
4977 param_count,
4978 modifiers,
4979 is_pointer_receiver: None,
4980 });
4981 }
4982 "constructor_declaration" => {
4983 let Some(class_fqn) =
4984 csharp_enclosing_type_fqn(node, source, namespace.as_deref())
4985 else {
4986 return;
4987 };
4988 let (name, fqn) = csharp_constructor_symbol_fqn(node, &class_fqn, source);
4989 let param_types = csharp_method_parameter_types(node, source);
4990 let param_count = param_types.len();
4991 let modifiers = csharp_collect_method_modifiers(node, source);
4992 methods.push(FunctionSymbol {
4993 name,
4994 fqn,
4995 class_fqn: Some(class_fqn),
4996 return_type: None,
4997 param_types,
4998 param_count,
4999 modifiers,
5000 is_pointer_receiver: None,
5001 });
5002 }
5003 "property_declaration" => {
5004 let Some(class_fqn) =
5005 csharp_enclosing_type_fqn(node, source, namespace.as_deref())
5006 else {
5007 return;
5008 };
5009 let Some(prop_name_node) = node.child_by_field_name("name") else {
5010 return;
5011 };
5012 let Some(prop_name) = csharp_node_text(prop_name_node, source).filter(|s| !s.is_empty())
5013 else {
5014 return;
5015 };
5016 let prop_fqn = format!("{class_fqn}.{prop_name}");
5017 if seen_property_fqns.insert(prop_fqn.clone()) {
5018 let declared_type = node
5019 .child_by_field_name("type")
5020 .and_then(|t| csharp_node_text(t, source))
5021 .filter(|s| !s.is_empty());
5022 properties.push(PropertySymbol {
5023 class_fqn: class_fqn.clone(),
5024 name: prop_name.clone(),
5025 fqn: prop_fqn,
5026 declared_type,
5027 });
5028 }
5029 let prop_type_text = node
5030 .child_by_field_name("type")
5031 .and_then(|t| csharp_node_text(t, source))
5032 .filter(|s| !s.is_empty());
5033 let Some(accessor_list) = node.child_by_field_name("accessors") else {
5034 return;
5035 };
5036 for i in 0..accessor_list.child_count() {
5037 let Some(acc) = accessor_list.child(i) else {
5038 continue;
5039 };
5040 if acc.kind() != "accessor_declaration" {
5041 continue;
5042 }
5043 let prefix = csharp_accessor_kind_prefix(acc, source);
5044 let fn_name = format!("{prefix}_{prop_name}");
5045 let fqn = format!("{class_fqn}.{fn_name}");
5046 let return_type = match prefix {
5047 "get" => prop_type_text.clone(),
5048 "set" | "init" => Some("void".to_string()),
5049 _ => prop_type_text.clone(),
5050 };
5051 let param_types = csharp_method_parameter_types(acc, source);
5052 let param_count = param_types.len();
5053 let modifiers = csharp_collect_method_modifiers(acc, source);
5054 methods.push(FunctionSymbol {
5055 name: fn_name,
5056 fqn,
5057 class_fqn: Some(class_fqn.clone()),
5058 return_type,
5059 param_types,
5060 param_count,
5061 modifiers,
5062 is_pointer_receiver: None,
5063 });
5064 }
5065 }
5066 _ => {}
5067 }
5068 });
5069
5070 (classes, methods, properties)
5071}
5072
5073fn csharp_for_each_unique_type_root(method: Node, seen: &mut HashSet<(usize, usize)>, f: &mut impl FnMut(Node)) {
5076 let mut push = |n: Option<Node>| {
5077 let Some(t) = n else {
5078 return;
5079 };
5080 if t.kind() == "void_keyword" {
5081 return;
5082 }
5083 let key = (t.start_byte() as usize, t.end_byte() as usize);
5084 if seen.insert(key) {
5085 f(t);
5086 }
5087 };
5088
5089 push(csharp_method_return_type_node(method));
5090
5091 walk_tree_skip_local_functions(method, |n| {
5092 match n.kind() {
5093 "local_declaration_statement" => {
5094 if let Some(vd) = csharp_variable_declaration_child(n) {
5095 push(vd.child_by_field_name("type"));
5096 }
5097 }
5098 "variable_declaration" | "parameter" | "object_creation_expression" | "cast_expression"
5099 | "default_expression" => {
5100 push(n.child_by_field_name("type"));
5101 }
5102 "as_expression" | "is_expression" => {
5103 push(n.child_by_field_name("right"));
5104 }
5105 "type_pattern" => {
5106 if let Some(c) = n.named_child(0) {
5107 push(Some(c));
5108 }
5109 }
5110 _ => {}
5111 }
5112 });
5113}
5114
5115fn csharp_collect_simple_type_names_from_type_node(node: Node, source: &str, out: &mut Vec<String>) {
5117 match node.kind() {
5118 "type_identifier" | "identifier" => {
5119 if let Some(t) = csharp_node_text(node, source) {
5120 out.push(t);
5121 }
5122 }
5123 "generic_name" => {
5124 for i in 0..node.child_count() {
5125 let Some(c) = node.child(i) else {
5126 continue;
5127 };
5128 match c.kind() {
5129 "identifier" => {
5130 if let Some(t) = csharp_node_text(c, source) {
5131 out.push(t);
5132 }
5133 }
5134 "type_argument_list" => {
5135 for j in 0..c.named_child_count() {
5136 if let Some(arg) = c.named_child(j) {
5137 csharp_collect_simple_type_names_from_type_node(arg, source, out);
5138 }
5139 }
5140 }
5141 _ => {}
5142 }
5143 }
5144 }
5145 "qualified_name" => {
5146 for i in 0..node.child_count() {
5147 let Some(c) = node.child(i) else {
5148 continue;
5149 };
5150 match c.kind() {
5151 "qualified_name" | "alias_qualified_name" | "generic_name" | "type_identifier"
5152 | "identifier" => {
5153 csharp_collect_simple_type_names_from_type_node(c, source, out);
5154 }
5155 _ => {}
5156 }
5157 }
5158 }
5159 "alias_qualified_name" => {
5160 for i in 0..node.child_count() {
5161 let Some(c) = node.child(i) else {
5162 continue;
5163 };
5164 if matches!(c.kind(), "generic_name" | "identifier" | "type_identifier") {
5165 csharp_collect_simple_type_names_from_type_node(c, source, out);
5166 }
5167 }
5168 }
5169 "nullable_type" => {
5170 if let Some(c) = node.named_child(0) {
5171 csharp_collect_simple_type_names_from_type_node(c, source, out);
5172 }
5173 }
5174 "array_type" => {
5175 if let Some(c) = node.child_by_field_name("type") {
5176 csharp_collect_simple_type_names_from_type_node(c, source, out);
5177 }
5178 }
5179 "pointer_type" => {
5180 if let Some(c) = node.named_child(0) {
5181 csharp_collect_simple_type_names_from_type_node(c, source, out);
5182 }
5183 }
5184 "tuple_type" => {
5185 for i in 0..node.named_child_count() {
5186 let Some(el) = node.named_child(i) else {
5187 continue;
5188 };
5189 if el.kind() == "tuple_element" {
5190 if let Some(tn) = el.child_by_field_name("type") {
5191 csharp_collect_simple_type_names_from_type_node(tn, source, out);
5192 }
5193 }
5194 }
5195 }
5196 "predefined_type" => {}
5197 _ => {
5198 if let Some(inner) = node.child_by_field_name("type") {
5199 csharp_collect_simple_type_names_from_type_node(inner, source, out);
5200 }
5201 }
5202 }
5203}
5204
5205fn csharp_for_each_base_type_in_list(base_list: Node, mut f: impl FnMut(Node)) {
5206 for i in 0..base_list.child_count() {
5207 let Some(c) = base_list.child(i) else {
5208 continue;
5209 };
5210 match c.kind() {
5211 ":" | "," => continue,
5212 "argument_list" => continue,
5214 _ => f(c),
5215 }
5216 }
5217}
5218
5219fn csharp_type_declaration_base_list(decl: Node) -> Option<Node> {
5221 decl.child_by_field_name("bases")
5222 .filter(|b| !b.is_missing())
5223 .or_else(|| {
5224 (0..decl.named_child_count())
5225 .filter_map(|i| decl.named_child(i))
5226 .find(|c| c.kind() == "base_list")
5227 })
5228}
5229
5230fn extract_csharp_class_inheritance_edges(
5232 tree: &Tree,
5233 source: &str,
5234 legacy_ns: Option<&str>,
5235 using_summary: &CSharpUsingSummary,
5236 index: &CSharpBatchIndex,
5237) -> Vec<(String, String)> {
5238 let mut out = Vec::new();
5239 let mut seen: HashSet<(String, String)> = HashSet::new();
5240 let root = tree.root_node();
5241 walk_tree(root, |n| {
5242 if !matches!(
5243 n.kind(),
5244 "class_declaration" | "interface_declaration" | "struct_declaration" | "record_declaration"
5245 ) {
5246 return;
5247 }
5248 let Some(derived_fqn) = csharp_fqn_for_type_declaration(n, source, legacy_ns) else {
5249 return;
5250 };
5251 let Some(bases_field) = csharp_type_declaration_base_list(n) else {
5252 return;
5253 };
5254 if bases_field.is_missing() {
5255 return;
5256 }
5257 let ns_for = csharp_enclosing_namespace_prefix(n, source)
5258 .or_else(|| legacy_ns.map(|s| s.to_string()));
5259 csharp_for_each_base_type_in_list(bases_field, |ty_node| {
5260 let mut names: Vec<String> = Vec::new();
5261 csharp_collect_simple_type_names_from_type_node(ty_node, source, &mut names);
5262 let Some(simple) = names.first() else {
5263 return;
5264 };
5265 if is_csharp_builtin_type(simple) {
5266 return;
5267 }
5268 let base_fqn = resolve_csharp_type_fqn(
5269 simple,
5270 ns_for.as_deref(),
5271 &using_summary.namespace_imports,
5272 &using_summary.alias_map,
5273 index,
5274 )
5275 .unwrap_or_else(|| {
5276 ns_for
5277 .as_ref()
5278 .map(|ns| format!("{ns}.{simple}"))
5279 .unwrap_or_else(|| simple.clone())
5280 });
5281 if base_fqn != derived_fqn && seen.insert((derived_fqn.clone(), base_fqn.clone())) {
5282 out.push((derived_fqn.clone(), base_fqn));
5283 }
5284 });
5285 });
5286 out
5287}
5288
5289fn csharp_is_likely_type_parameter_name(name: &str) -> bool {
5291 let mut it = name.chars();
5292 match (it.next(), it.next()) {
5293 (Some(c), None) if c.is_ascii_uppercase() => true,
5294 _ => false,
5295 }
5296}
5297
5298fn extract_csharp_used_classes(
5301 tree: &Tree,
5302 source: &str,
5303 namespace: Option<&str>,
5304 using_summary: &CSharpUsingSummary,
5305 csharp_index: &CSharpBatchIndex,
5306) -> Vec<(String, String)> {
5307 let mut uses: Vec<(String, String)> = Vec::new();
5308 let mut pair_seen: HashSet<(String, String)> = HashSet::new();
5309 let root = tree.root_node();
5310
5311 walk_tree(root, |node| {
5312 if node.kind() != "method_declaration" {
5313 return;
5314 }
5315
5316 let Some(method_name) = csharp_method_simple_name(node, source) else {
5317 return;
5318 };
5319
5320 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
5321 .or_else(|| namespace.map(|s| s.to_string()));
5322 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
5323 let func_fqn = if let Some(ref cls) = class_fqn {
5324 format!("{cls}.{method_name}")
5325 } else if let Some(ref ns) = effective_ns {
5326 format!("{ns}.{method_name}")
5327 } else {
5328 method_name
5329 };
5330
5331 let mut root_seen: HashSet<(usize, usize)> = HashSet::new();
5332 let mut simple_names: Vec<String> = Vec::new();
5333 csharp_for_each_unique_type_root(node, &mut root_seen, &mut |tr| {
5334 csharp_collect_simple_type_names_from_type_node(tr, source, &mut simple_names);
5335 });
5336
5337 for type_name in simple_names {
5338 if is_csharp_builtin_type(&type_name) {
5339 continue;
5340 }
5341 if csharp_is_likely_type_parameter_name(&type_name)
5342 && !using_summary.alias_map.contains_key(&type_name)
5343 {
5344 continue;
5345 }
5346 if !type_name
5347 .chars()
5348 .next()
5349 .map(|c| c.is_uppercase())
5350 .unwrap_or(false)
5351 {
5352 continue;
5353 }
5354 let type_fqn = resolve_csharp_type_fqn(
5355 &type_name,
5356 effective_ns.as_deref(),
5357 &using_summary.namespace_imports,
5358 &using_summary.alias_map,
5359 csharp_index,
5360 )
5361 .unwrap_or_else(|| {
5362 effective_ns
5363 .as_ref()
5364 .map(|ns| format!("{ns}.{type_name}"))
5365 .unwrap_or_else(|| type_name.clone())
5366 });
5367 if pair_seen.insert((func_fqn.clone(), type_fqn.clone())) {
5368 uses.push((func_fqn.clone(), type_fqn));
5369 }
5370 }
5371 });
5372
5373 uses
5374}
5375
5376fn is_csharp_primitive_or_alias(name: &str) -> bool {
5378 matches!(
5379 name,
5380 "int" | "long" | "short" | "byte" | "float" | "double" | "decimal"
5381 | "bool" | "char" | "string" | "object" | "void" | "dynamic" | "var"
5382 | "nint" | "nuint"
5383 | "Int32" | "Int64" | "Int16" | "UInt32" | "UInt64" | "UInt16"
5384 | "Byte" | "SByte" | "Single" | "Double" | "Decimal"
5385 | "Boolean" | "Char" | "String" | "Object" | "Void"
5386 )
5387}
5388
5389fn is_csharp_common_bcl_or_framework_type(name: &str) -> bool {
5391 matches!(
5392 name,
5393 "List" | "Dictionary" | "IEnumerable" | "IEnumerator" | "IList" | "ICollection" | "IDictionary"
5395 | "IReadOnlyList" | "IReadOnlyCollection" | "IReadOnlyDictionary" | "ISet" | "HashSet"
5396 | "SortedSet" | "Queue" | "Stack" | "LinkedList" | "SortedList" | "ConcurrentBag"
5397 | "ConcurrentQueue" | "ConcurrentStack" | "ConcurrentDictionary" | "ObservableCollection"
5398 | "ImmutableArray" | "ImmutableList" | "ImmutableDictionary" | "IOrderedEnumerable"
5399 | "Task" | "ValueTask" | "CancellationToken" | "CancellationTokenSource"
5401 | "IAsyncEnumerable" | "IAsyncEnumerator" | "Parallel" | "Thread" | "Interlocked"
5402 | "Func" | "Action" | "MulticastDelegate" | "Delegate"
5404 | "DateTime" | "DateTimeOffset" | "TimeSpan" | "DateOnly" | "TimeOnly" | "Guid"
5406 | "StringBuilder" | "Encoding" | "UTF8Encoding" | "ASCIIEncoding" | "UnicodeEncoding"
5408 | "Stream" | "MemoryStream" | "FileStream" | "BufferedStream" | "TextReader" | "TextWriter"
5409 | "StringReader" | "StringWriter" | "BinaryReader" | "BinaryWriter" | "File" | "Path"
5410 | "Directory" | "Environment"
5411 | "Uri" | "Version" | "Type" | "Enum" | "Array" | "Nullable" | "Lazy" | "Tuple"
5413 | "ValueTuple" | "Console" | "Math" | "Convert" | "BitConverter" | "GC" | "WeakReference"
5414 | "RuntimeHelpers" | "Activator"
5415 | "Exception" | "ArgumentException" | "ArgumentNullException" | "ArgumentOutOfRangeException"
5417 | "InvalidOperationException" | "NotSupportedException" | "NotImplementedException"
5418 | "IOException" | "UnauthorizedAccessException" | "TimeoutException" | "AggregateException"
5419 | "OperationCanceledException" | "ObjectDisposedException" | "FormatException"
5420 | "Span" | "ReadOnlySpan" | "Memory" | "ReadOnlyMemory"
5422 | "IDisposable" | "IAsyncDisposable" | "IComparable" | "IEquatable" | "IFormattable"
5424 | "IServiceProvider" | "ILogger" | "IConfiguration" | "IHost" | "IHostedService"
5425 | "IHttpClientFactory" | "HttpClient" | "HttpRequestMessage" | "HttpResponseMessage"
5426 )
5427}
5428
5429fn is_csharp_builtin_type(name: &str) -> bool {
5430 is_csharp_primitive_or_alias(name) || is_csharp_common_bcl_or_framework_type(name)
5431}
5432
5433fn extract_csharp_calls(
5435 tree: &Tree,
5436 source: &str,
5437 namespace: Option<&str>,
5438 using_summary: &CSharpUsingSummary,
5439 csharp_index: &CSharpBatchIndex,
5440) -> Vec<(String, String)> {
5441 let mut calls: Vec<(String, String)> = Vec::new();
5442 let root = tree.root_node();
5443 let mut class_field_maps: HashMap<String, HashMap<String, String>> = HashMap::new();
5444
5445 walk_tree(root, |node| {
5446 if matches!(
5447 node.kind(),
5448 "class_declaration"
5449 | "interface_declaration"
5450 | "struct_declaration"
5451 | "enum_declaration"
5452 | "record_declaration"
5453 | "record_struct_declaration"
5454 ) {
5455 if let Some(cfqn) = csharp_fqn_for_type_declaration(node, source, namespace) {
5456 let ns_for = csharp_enclosing_namespace_prefix(node, source)
5457 .or_else(|| namespace.map(|s| s.to_string()));
5458 let fm = csharp_collect_fields_for_type_declaration(
5459 node,
5460 source,
5461 ns_for.as_deref(),
5462 using_summary,
5463 csharp_index,
5464 );
5465 class_field_maps.insert(cfqn, fm);
5466 }
5467 }
5468 });
5469
5470 let empty_fields: HashMap<String, String> = HashMap::new();
5471
5472 walk_tree(root, |node| {
5473 match node.kind() {
5474 "method_declaration" => {
5475 let Some(method_name) = csharp_method_simple_name(node, source) else {
5476 return;
5477 };
5478 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
5479 .or_else(|| namespace.map(|s| s.to_string()));
5480 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
5481 let caller_fqn = match &class_fqn {
5482 Some(cf) => format!("{cf}.{method_name}"),
5483 None => effective_ns
5484 .as_ref()
5485 .map(|ns| format!("{ns}.{method_name}"))
5486 .unwrap_or(method_name),
5487 };
5488 let Some(body) = csharp_block_body(node) else {
5489 return;
5490 };
5491 let field_map = class_fqn
5492 .as_ref()
5493 .and_then(|c| class_field_maps.get(c))
5494 .unwrap_or(&empty_fields);
5495 csharp_collect_calls_from_body(
5496 body,
5497 &caller_fqn,
5498 class_fqn.as_deref(),
5499 field_map,
5500 source,
5501 effective_ns.as_deref(),
5502 using_summary,
5503 csharp_index,
5504 &mut calls,
5505 );
5506 }
5507 "constructor_declaration" => {
5508 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
5509 let Some(cf) = class_fqn.clone() else {
5510 return;
5511 };
5512 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
5513 .or_else(|| namespace.map(|s| s.to_string()));
5514 let (_, caller_fqn) = csharp_constructor_symbol_fqn(node, &cf, source);
5515 let Some(body) = csharp_block_body(node) else {
5516 return;
5517 };
5518 let field_map = class_field_maps.get(&cf).unwrap_or(&empty_fields);
5519 csharp_collect_calls_from_body(
5520 body,
5521 &caller_fqn,
5522 Some(cf.as_str()),
5523 field_map,
5524 source,
5525 effective_ns.as_deref(),
5526 using_summary,
5527 csharp_index,
5528 &mut calls,
5529 );
5530 }
5531 "accessor_declaration" => {
5532 let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
5533 let Some(cf) = class_fqn.clone() else {
5534 return;
5535 };
5536 let effective_ns = csharp_enclosing_namespace_prefix(node, source)
5537 .or_else(|| namespace.map(|s| s.to_string()));
5538 let Some(prop_name) = csharp_property_name_for_accessor(node, source) else {
5539 return;
5540 };
5541 let prefix = csharp_accessor_kind_prefix(node, source);
5542 let caller_fqn = format!("{cf}.{}_{}", prefix, prop_name);
5543 let Some(body) = csharp_block_body(node) else {
5544 return;
5545 };
5546 let field_map = class_field_maps.get(&cf).unwrap_or(&empty_fields);
5547 csharp_collect_calls_from_body(
5548 body,
5549 &caller_fqn,
5550 Some(cf.as_str()),
5551 field_map,
5552 source,
5553 effective_ns.as_deref(),
5554 using_summary,
5555 csharp_index,
5556 &mut calls,
5557 );
5558 }
5559 _ => {}
5560 }
5561 });
5562
5563 calls
5564}
5565
5566fn csharp_string_value_from_literal_node(node: Node, source: &str) -> Option<String> {
5568 let s = source.get(node.start_byte() as usize..node.end_byte() as usize)?;
5569 let t = s.trim();
5570 match node.kind() {
5571 "string_literal" => {
5572 if t.starts_with('"') && t.ends_with('"') && t.len() >= 2 {
5573 Some(t[1..t.len() - 1].to_string())
5574 } else {
5575 None
5576 }
5577 }
5578 "verbatim_string_literal" => {
5579 if t.starts_with("@\"") && t.ends_with('"') && t.len() >= 3 {
5580 Some(t[2..t.len() - 1].replace("\"\"", "\""))
5581 } else {
5582 None
5583 }
5584 }
5585 _ => None,
5586 }
5587}
5588
5589fn csharp_first_string_in_subtree(root: Node, source: &str) -> Option<String> {
5591 let mut stack = vec![root];
5592 while let Some(n) = stack.pop() {
5593 match n.kind() {
5594 "string_literal" | "verbatim_string_literal" => {
5595 return csharp_string_value_from_literal_node(n, source);
5596 }
5597 _ => {
5598 let cc = n.child_count();
5599 for i in (0..cc).rev() {
5600 if let Some(c) = n.child(i) {
5601 stack.push(c);
5602 }
5603 }
5604 }
5605 }
5606 }
5607 None
5608}
5609
5610fn csharp_first_string_in_attribute(attr: Node, source: &str) -> Option<String> {
5611 for i in 0..attr.child_count() {
5612 let Some(c) = attr.child(i) else {
5613 continue;
5614 };
5615 if c.kind() != "attribute_argument_list" {
5616 continue;
5617 }
5618 for j in 0..c.named_child_count() {
5619 let Some(arg) = c.named_child(j) else {
5620 continue;
5621 };
5622 if arg.kind() == "attribute_argument" {
5623 if let Some(s) = csharp_first_string_in_subtree(arg, source) {
5624 return Some(s);
5625 }
5626 }
5627 }
5628 }
5629 None
5630}
5631
5632fn csharp_attribute_simple_name_from_name_node(n: Node, source: &str) -> Option<String> {
5634 match n.kind() {
5635 "identifier" => csharp_node_text(n, source),
5636 "generic_name" => {
5637 let id = n.child(0).filter(|c| c.kind() == "identifier")?;
5638 csharp_node_text(id, source)
5639 }
5640 "qualified_name" | "alias_qualified_name" => {
5641 let mut last = None;
5642 for i in 0..n.child_count() {
5643 let Some(c) = n.child(i) else {
5644 continue;
5645 };
5646 if !c.is_named() {
5647 continue;
5648 }
5649 if let Some(s) = csharp_attribute_simple_name_from_name_node(c, source) {
5650 last = Some(s);
5651 }
5652 }
5653 last
5654 }
5655 _ => None,
5656 }
5657}
5658
5659fn csharp_route_path_from_str(path: &str) -> Option<String> {
5660 let path = path.trim();
5661 if path.is_empty() {
5662 None
5663 } else if path.starts_with('/') {
5664 Some(path.to_string())
5665 } else {
5666 Some(format!("/{}", path))
5667 }
5668}
5669
5670fn csharp_type_level_route_template(type_node: Node, source: &str) -> Option<String> {
5672 let mut last: Option<String> = None;
5673 for i in 0..type_node.child_count() {
5674 let Some(c) = type_node.child(i) else {
5675 continue;
5676 };
5677 if c.kind() != "attribute_list" {
5678 continue;
5679 }
5680 for j in 0..c.child_count() {
5681 let Some(attr) = c.child(j) else {
5682 continue;
5683 };
5684 if attr.kind() != "attribute" {
5685 continue;
5686 }
5687 let Some(name_n) = attr.child_by_field_name("name") else {
5688 continue;
5689 };
5690 let Some(simple) = csharp_attribute_simple_name_from_name_node(name_n, source) else {
5691 continue;
5692 };
5693 if simple != "Route" {
5694 continue;
5695 }
5696 if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
5697 last = csharp_route_path_from_str(&raw);
5698 }
5699 }
5700 }
5701 last
5702}
5703
5704fn csharp_enclosing_route_type_for_method(method: Node) -> Option<Node> {
5705 let mut p = method.parent();
5706 while let Some(n) = p {
5707 let k = n.kind();
5708 if k == "class_declaration" || k == "record_declaration" {
5709 return Some(n);
5710 }
5711 p = n.parent();
5712 }
5713 None
5714}
5715
5716fn csharp_push_http_verb(verbs: &mut Vec<String>, v: &str) {
5717 if !verbs.iter().any(|x| x == v) {
5718 verbs.push(v.to_string());
5719 }
5720}
5721
5722fn csharp_parse_method_api_attributes(
5724 method: Node,
5725 source: &str,
5726) -> (Vec<String>, Option<String>, Option<String>) {
5727 let mut verbs: Vec<String> = Vec::new();
5728 let mut http_template: Option<String> = None;
5729 let mut route_attr: Option<String> = None;
5730
5731 const VERB_ATTRS: &[(&str, &str)] = &[
5732 ("HttpGet", "GET"),
5733 ("HttpPost", "POST"),
5734 ("HttpPut", "PUT"),
5735 ("HttpDelete", "DELETE"),
5736 ("HttpPatch", "PATCH"),
5737 ("HttpHead", "HEAD"),
5738 ];
5739
5740 for i in 0..method.child_count() {
5741 let Some(c) = method.child(i) else {
5742 continue;
5743 };
5744 if c.kind() != "attribute_list" {
5745 continue;
5746 }
5747 for j in 0..c.child_count() {
5748 let Some(attr) = c.child(j) else {
5749 continue;
5750 };
5751 if attr.kind() != "attribute" {
5752 continue;
5753 }
5754 let Some(name_n) = attr.child_by_field_name("name") else {
5755 continue;
5756 };
5757 let Some(simple) = csharp_attribute_simple_name_from_name_node(name_n, source) else {
5758 continue;
5759 };
5760
5761 if simple == "Route" {
5762 if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
5763 route_attr = csharp_route_path_from_str(&raw);
5764 }
5765 continue;
5766 }
5767
5768 if let Some((_, verb)) = VERB_ATTRS.iter().find(|(a, _)| *a == simple.as_str()) {
5769 csharp_push_http_verb(&mut verbs, verb);
5770 if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
5771 http_template = csharp_route_path_from_str(&raw);
5772 }
5773 }
5774 }
5775 }
5776
5777 (verbs, http_template, route_attr)
5778}
5779
5780fn csharp_join_route_parts(
5782 class_route: Option<&str>,
5783 method_template: Option<&str>,
5784 method_name: &str,
5785) -> String {
5786 fn strip_slashes(s: &str) -> &str {
5787 s.trim().trim_start_matches('/').trim_end_matches('/')
5788 }
5789
5790 let method_part = method_template
5791 .map(|s| s.trim())
5792 .filter(|s| !s.is_empty())
5793 .map(strip_slashes)
5794 .filter(|s| !s.is_empty())
5795 .map(|s| s.to_string())
5796 .unwrap_or_else(|| method_name.trim().to_string());
5797
5798 let Some(base_raw) = class_route.map(str::trim).filter(|s| !s.is_empty()) else {
5799 let m = strip_slashes(&method_part);
5800 return format!("/{}", m);
5801 };
5802
5803 let base = strip_slashes(base_raw);
5804 if base.is_empty() {
5805 return format!("/{}", strip_slashes(&method_part));
5806 }
5807 format!("/{}/{}", base, strip_slashes(&method_part))
5808}
5809
5810fn extract_csharp_api_endpoints_from_tree(tree: &Tree, source: &str) -> Vec<(Vec<String>, String, String)> {
5815 let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
5816 let root = tree.root_node();
5817
5818 walk_tree(root, |node| {
5819 if node.kind() != "method_declaration" {
5820 return;
5821 }
5822
5823 let method_name = node
5824 .child_by_field_name("name")
5825 .and_then(|n| csharp_node_text(n, source))
5826 .or_else(|| identifier_text_from_children(node, source));
5827
5828 let Some(method_name) = method_name else {
5829 return;
5830 };
5831
5832 let (verbs, http_template, route_attr) = csharp_parse_method_api_attributes(node, source);
5833
5834 if verbs.is_empty() && http_template.is_none() && route_attr.is_none() {
5835 return;
5836 }
5837
5838 let methods_http = if verbs.is_empty() {
5839 vec!["ANY".to_string()]
5840 } else {
5841 verbs
5842 };
5843
5844 let method_segment = http_template.or(route_attr);
5845 let class_route = csharp_enclosing_route_type_for_method(node)
5846 .and_then(|t| csharp_type_level_route_template(t, source));
5847
5848 let path_template = csharp_join_route_parts(
5849 class_route.as_deref(),
5850 method_segment.as_deref(),
5851 &method_name,
5852 );
5853
5854 endpoints.push((methods_http, path_template, method_name));
5855 });
5856
5857 endpoints
5858}
5859
5860fn build_internal_import_map(source: &str) -> HashMap<String, String> {
5864 let mut map = HashMap::new();
5865 for fqn in extract_internal_java_imports(source) {
5866 if let Some(simple) = fqn.rsplit('.').next() {
5867 map.insert(simple.to_string(), fqn.clone());
5868 }
5869 }
5870 map
5871}
5872
5873fn collect_local_var_types_for_method(
5879 method_node: Node,
5880 source: &str,
5881 package: Option<&str>,
5882 import_map: &HashMap<String, String>,
5883) -> HashMap<String, String> {
5884 let mut vars: HashMap<String, String> = HashMap::new();
5885
5886 walk_tree(method_node, |node| {
5887 if node.kind() != "local_variable_declaration" {
5888 return;
5889 }
5890
5891 let start = node.start_byte() as usize;
5892 let end = node.end_byte() as usize;
5893 if end > source.len() || start >= end {
5894 return;
5895 }
5896
5897 let stmt = &source[start..end];
5898 let before_eq = stmt.split('=').next().unwrap_or("").trim();
5899 let mut parts = before_eq.split_whitespace();
5901 let type_part = match parts.next() {
5902 Some(t) => t,
5903 None => return,
5904 };
5905 let var_part = match parts.next() {
5906 Some(v) => v,
5907 None => return,
5908 };
5909
5910 let type_simple = type_part
5912 .split('<')
5913 .next()
5914 .unwrap_or(type_part)
5915 .split('.')
5916 .last()
5917 .unwrap_or(type_part)
5918 .trim();
5919
5920 let var_name = var_part
5922 .trim_end_matches(';')
5923 .trim_end_matches(',')
5924 .trim();
5925
5926 if var_name.is_empty() || type_simple.is_empty() {
5927 return;
5928 }
5929
5930 let fqn = if let Some(import_fqn) = import_map.get(type_simple) {
5935 import_fqn.clone()
5936 } else if let Some(pkg_prefix) = import_map
5937 .values()
5938 .find(|v| v.ends_with(".*"))
5939 {
5940 let base = pkg_prefix.trim_end_matches(".*");
5941 format!("{base}.{type_simple}")
5942 } else if let Some(pkg) = package {
5943 format!("{pkg}.{type_simple}")
5944 } else {
5945 type_simple.to_string()
5946 };
5947
5948 vars.insert(var_name.to_string(), fqn);
5949 });
5950
5951 vars
5952}
5953
5954fn collect_identifiers(node: Node, source: &str, out: &mut Vec<String>) {
5958 walk_tree(node, |n| {
5959 if n.kind() == "identifier" {
5960 let start = n.start_byte() as usize;
5961 let end = n.end_byte() as usize;
5962 if end <= source.len() && start < end {
5963 out.push(source[start..end].to_string());
5964 }
5965 }
5966 });
5967}
5968
5969fn extract_java_calls(
5979 tree: &Tree,
5980 source: &str,
5981 package: Option<&str>,
5982) -> Vec<(String, String)> {
5983 let mut calls: Vec<(String, String)> = Vec::new();
5984 let root = tree.root_node();
5985
5986 let import_map = build_internal_import_map(source);
5990
5991 walk_tree(root, |node| {
5992 if node.kind() != "method_declaration" {
5993 return;
5994 }
5995
5996 let method_name = match identifier_text_from_children(node, source) {
5999 Some(name) => name,
6000 None => return,
6001 };
6002
6003 let mut parent = node.parent();
6005 let mut class_fqn: Option<String> = None;
6006 while let Some(p) = parent {
6007 let pk = p.kind();
6008 if pk == "class_declaration" || pk == "interface_declaration" {
6009 if let Some(class_name) = identifier_text_from_children(p, source) {
6010 let full = if let Some(pkg) = package {
6011 format!("{pkg}.{class_name}")
6012 } else {
6013 class_name
6014 };
6015 class_fqn = Some(full);
6016 }
6017 break;
6018 }
6019 parent = p.parent();
6020 }
6021
6022 let caller_fqn = if let Some(ref cls) = class_fqn {
6023 format!("{cls}.{}", method_name)
6024 } else if let Some(pkg) = package {
6025 format!("{pkg}.{}", method_name)
6026 } else {
6027 method_name.clone()
6028 };
6029
6030 let local_var_types =
6034 collect_local_var_types_for_method(node, source, package, &import_map);
6035
6036 walk_tree(node, |child| {
6038 if child.kind() != "method_invocation" {
6039 return;
6040 }
6041
6042 let callee_name = if let Some(name_node) = child.child_by_field_name("name") {
6046 let start = name_node.start_byte() as usize;
6047 let end = name_node.end_byte() as usize;
6048 if end <= source.len() && start < end {
6049 source[start..end].to_string()
6050 } else {
6051 match identifier_text_from_children(child, source) {
6052 Some(name) => name,
6053 None => return,
6054 }
6055 }
6056 } else {
6057 match identifier_text_from_children(child, source) {
6058 Some(name) => name,
6059 None => return,
6060 }
6061 };
6062
6063 let mut receiver_type_fqn: Option<String> = None;
6069 if let Some(object_node) = child.child_by_field_name("object") {
6070 let mut recv_idents: Vec<String> = Vec::new();
6071 collect_identifiers(object_node, source, &mut recv_idents);
6072
6073 for ident in &recv_idents {
6077 if let Some(ty) = local_var_types.get(ident) {
6078 receiver_type_fqn = Some(ty.clone());
6079 break;
6080 }
6081 }
6082
6083 if receiver_type_fqn.is_none() {
6088 if let Some(first_ident) = recv_idents.first() {
6089 if let Some(import_fqn) = import_map.get(first_ident) {
6090 receiver_type_fqn = Some(import_fqn.clone());
6091 } else if let Some(pkg_prefix) = import_map
6092 .values()
6093 .find(|v| v.ends_with(".*"))
6094 {
6095 let base = pkg_prefix.trim_end_matches(".*");
6096 receiver_type_fqn =
6097 Some(format!("{base}.{first_ident}"));
6098 } else if let Some(pkg) = package {
6099 receiver_type_fqn =
6100 Some(format!("{pkg}.{first_ident}"));
6101 }
6102 }
6103 }
6104 }
6105
6106 let callee_fqn = if let Some(ref recv_ty) = receiver_type_fqn {
6107 format!("{recv_ty}.{}", callee_name)
6108 } else if let Some(ref cls) = class_fqn {
6109 format!("{cls}.{}", callee_name)
6110 } else if let Some(pkg) = package {
6111 format!("{pkg}.{}", callee_name)
6112 } else {
6113 callee_name.clone()
6114 };
6115
6116 calls.push((caller_fqn.clone(), callee_fqn));
6117 });
6118 });
6119
6120 calls
6121}
6122
6123fn node_text_slice(n: Node, source: &str) -> Option<String> {
6130 let s = n.start_byte() as usize;
6131 let e = (n.end_byte() as usize).min(source.len());
6132 (s < e).then(|| source[s..e].to_string())
6133}
6134
6135fn non_java_file_scoped_fqn(file_path: &str, logical_name: &str) -> String {
6136 format!("{file_path}::{logical_name}")
6137}
6138
6139fn extract_non_java_function_symbols(
6140 file: &ParsedFile,
6141 source: &str,
6142 file_path: &str,
6143) -> Vec<FunctionSymbol> {
6144 match file.language {
6145 LanguageId::Rust => extract_top_level_functions(LanguageId::Rust, &file.tree, source),
6146 LanguageId::Python => extract_python_graph_symbols(&file.tree, source, file_path),
6147 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
6148 extract_js_ts_graph_symbols(&file.tree, source, file_path, file.language)
6149 }
6150 _ => Vec::new(),
6151 }
6152}
6153
6154fn extract_python_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
6155 let mut out = Vec::new();
6156 walk_tree(tree.root_node(), |node| {
6157 if node.kind() == "ERROR" || node.is_missing() {
6158 let pos = node.start_position();
6159 let s = node.start_byte() as usize;
6160 let e = (node.end_byte() as usize).min(source.len());
6161 let snippet = if s < e {
6162 source[s..e]
6163 .chars()
6164 .take(120)
6165 .collect::<String>()
6166 .replace('\n', " ")
6167 } else {
6168 String::new()
6169 };
6170 out.push((pos.row + 1, pos.column + 1, snippet));
6171 }
6172 });
6173 out
6174}
6175
6176fn extract_js_ts_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
6177 extract_python_parse_warnings(tree, source)
6178}
6179
6180fn python_node_inside_class(mut node: Node) -> bool {
6181 while let Some(p) = node.parent() {
6182 if p.kind() == "class_definition" {
6183 return true;
6184 }
6185 node = p;
6186 }
6187 false
6188}
6189
6190fn python_function_definition_name(fn_node: Node, source: &str) -> Option<String> {
6191 fn_node
6192 .child_by_field_name("name")
6193 .and_then(|n| node_text_slice(n, source))
6194 .map(|s| s.trim().to_string())
6195 .filter(|s| !s.is_empty())
6196}
6197
6198fn python_enclosing_function_prefixes(fn_node: Node, source: &str) -> Vec<String> {
6199 let mut prefixes = Vec::new();
6200 let mut cur = fn_node.parent();
6201 while let Some(p) = cur {
6202 if p.kind() == "function_definition" && !python_node_inside_class(p) {
6203 if let Some(n) = python_function_definition_name(p, source) {
6204 prefixes.insert(0, n);
6205 }
6206 }
6207 cur = p.parent();
6208 }
6209 prefixes
6210}
6211
6212fn python_function_logical_name(fn_node: Node, source: &str) -> Option<String> {
6213 let name = python_function_definition_name(fn_node, source)?;
6214 let prefixes = python_enclosing_function_prefixes(fn_node, source);
6215 Some(if prefixes.is_empty() {
6216 name
6217 } else {
6218 format!("{}.{}", prefixes.join("."), name)
6219 })
6220}
6221
6222fn extract_python_graph_symbols(tree: &Tree, source: &str, file_path: &str) -> Vec<FunctionSymbol> {
6223 let mut out = Vec::new();
6224 let mut seen = HashSet::new();
6225 walk_tree(tree.root_node(), |node| {
6226 if node.kind() != "function_definition" {
6227 return;
6228 }
6229 if python_node_inside_class(node) {
6230 return;
6231 }
6232 let Some(logical) = python_function_logical_name(node, source) else {
6233 return;
6234 };
6235 let fqn = non_java_file_scoped_fqn(file_path, &logical);
6236 if !seen.insert(fqn.clone()) {
6237 return;
6238 }
6239 let name = python_function_definition_name(node, source).unwrap_or_default();
6240 out.push(FunctionSymbol {
6241 name,
6242 fqn,
6243 class_fqn: None,
6244 return_type: None,
6245 param_types: Vec::new(),
6246 param_count: 0,
6247 modifiers: Vec::new(),
6248 is_pointer_receiver: None,
6249 });
6250 });
6251 out
6252}
6253
6254fn python_innermost_enclosing_function(call: Node) -> Option<Node> {
6255 let mut cur = call.parent();
6256 while let Some(p) = cur {
6257 if p.kind() == "function_definition" && !python_node_inside_class(p) {
6258 return Some(p);
6259 }
6260 cur = p.parent();
6261 }
6262 None
6263}
6264
6265fn extract_python_intrafile_calls(
6266 tree: &Tree,
6267 source: &str,
6268 file_path: &str,
6269 name_to_fqn: &HashMap<String, String>,
6270) -> Vec<(String, String)> {
6271 let mut calls = Vec::new();
6272 walk_tree(tree.root_node(), |inner| {
6273 if inner.kind() != "call" {
6274 return;
6275 }
6276 let Some(encl) = python_innermost_enclosing_function(inner) else {
6277 return;
6278 };
6279 let Some(logical) = python_function_logical_name(encl, source) else {
6280 return;
6281 };
6282 let caller_fqn = non_java_file_scoped_fqn(file_path, &logical);
6283 let Some(func_n) = inner.child_by_field_name("function") else {
6284 return;
6285 };
6286 if func_n.kind() != "identifier" {
6287 return;
6288 }
6289 let Some(callee_name) = node_text_slice(func_n, source) else {
6290 return;
6291 };
6292 let callee_name = callee_name.trim();
6293 if let Some(callee_fqn) = name_to_fqn.get(callee_name) {
6294 calls.push((caller_fqn, callee_fqn.clone()));
6295 }
6296 });
6297 calls
6298}
6299
6300fn extract_python_import_modules(tree: &Tree, source: &str) -> Vec<String> {
6301 let mut out = Vec::new();
6302 walk_tree(tree.root_node(), |node| match node.kind() {
6303 "import_from_statement" => {
6304 if let Some(mod_n) = node.child_by_field_name("module_name") {
6305 let t = node_text_slice(mod_n, source).unwrap_or_default();
6306 let t = t.trim();
6307 if !t.is_empty() && t != "." && !t.starts_with('.') {
6308 out.push(t.to_string());
6309 }
6310 }
6311 }
6312 "import_statement" => {
6313 let mut c = node.walk();
6314 if !c.goto_first_child() {
6315 return;
6316 }
6317 loop {
6318 let ch = c.node();
6319 match ch.kind() {
6320 "dotted_name" => {
6321 if let Some(t) = node_text_slice(ch, source) {
6322 let t = t.trim();
6323 if !t.is_empty() {
6324 out.push(t.to_string());
6325 }
6326 }
6327 }
6328 "aliased_import" => {
6329 if let Some(name_n) = ch.child_by_field_name("name") {
6330 if name_n.kind() == "dotted_name" {
6331 if let Some(t) = node_text_slice(name_n, source) {
6332 let t = t.trim();
6333 if !t.is_empty() {
6334 out.push(t.to_string());
6335 }
6336 }
6337 }
6338 }
6339 }
6340 _ => {}
6341 }
6342 if !c.goto_next_sibling() {
6343 break;
6344 }
6345 }
6346 }
6347 _ => {}
6348 });
6349 out.sort();
6350 out.dedup();
6351 out
6352}
6353
6354fn resolve_python_import_to_known_file(
6355 module_path: &str,
6356 known_paths: &HashSet<String>,
6357) -> Option<String> {
6358 let norm = module_path.trim();
6359 if norm.is_empty() {
6360 return None;
6361 }
6362 let needle = norm.replace('.', "/");
6363 known_paths
6364 .iter()
6365 .filter(|p| {
6366 let pn = p.replace('\\', "/");
6367 pn.ends_with(".py") && pn.contains(needle.as_str())
6368 })
6369 .min_by_key(|p| p.len())
6370 .cloned()
6371}
6372
6373fn js_inside_class(mut node: Node) -> bool {
6374 while let Some(p) = node.parent() {
6375 if p.kind() == "class_declaration" {
6376 return true;
6377 }
6378 node = p;
6379 }
6380 false
6381}
6382
6383fn js_function_declaration_name(node: Node, source: &str) -> Option<String> {
6384 node.child_by_field_name("name")
6385 .and_then(|n| node_text_slice(n, source))
6386 .map(|s| s.trim().to_string())
6387 .filter(|s| !s.is_empty())
6388}
6389
6390fn js_nested_function_declaration_prefix(fn_node: Node, source: &str, self_name: &str) -> String {
6391 let mut prefixes = Vec::new();
6392 let mut cur = fn_node.parent();
6393 while let Some(p) = cur {
6394 if p.kind() == "function_declaration" && !js_inside_class(p) {
6395 if let Some(n) = js_function_declaration_name(p, source) {
6396 prefixes.insert(0, n);
6397 }
6398 }
6399 cur = p.parent();
6400 }
6401 if prefixes.is_empty() {
6402 self_name.to_string()
6403 } else {
6404 format!("{}.{}", prefixes.join("."), self_name)
6405 }
6406}
6407
6408fn js_prefix_from_ancestors_for_expr(expr_node: Node, source: &str, var_name: &str) -> String {
6409 let mut prefixes = Vec::new();
6410 let mut cur = expr_node.parent();
6411 while let Some(p) = cur {
6412 if p.kind() == "function_declaration" && !js_inside_class(p) {
6413 if let Some(n) = js_function_declaration_name(p, source) {
6414 prefixes.insert(0, n);
6415 }
6416 }
6417 cur = p.parent();
6418 }
6419 if prefixes.is_empty() {
6420 var_name.to_string()
6421 } else {
6422 format!("{}.{}", prefixes.join("."), var_name)
6423 }
6424}
6425
6426fn js_class_declaration_name_from(class_node: Node, source: &str) -> Option<String> {
6427 class_node
6428 .child_by_field_name("name")
6429 .and_then(|n| node_text_slice(n, source))
6430 .map(|s| s.trim().to_string())
6431 .filter(|s| !s.is_empty())
6432}
6433
6434fn js_enclosing_class_declaration_name(start: Node, source: &str) -> Option<String> {
6435 let mut cur = start.parent();
6436 while let Some(p) = cur {
6437 if p.kind() == "class_declaration" {
6438 return js_class_declaration_name_from(p, source);
6439 }
6440 cur = p.parent();
6441 }
6442 None
6443}
6444
6445fn js_property_name_text(method_node: Node, source: &str) -> Option<String> {
6446 let name_node = method_node
6447 .child_by_field_name("name")
6448 .or_else(|| method_node.child_by_field_name("property"))?;
6449 match name_node.kind() {
6450 "property_identifier" | "identifier" | "private_property_identifier" => {
6451 node_text_slice(name_node, source)
6452 }
6453 _ => None,
6454 }
6455 .map(|s| s.trim().to_string())
6456 .filter(|s| !s.is_empty())
6457}
6458
6459fn is_js_ts_class_member_fn(kind: &str) -> bool {
6460 matches!(
6461 kind,
6462 "method_definition"
6463 | "field_definition"
6464 | "public_field_definition"
6465 | "private_field_definition"
6466 | "protected_field_definition"
6467 )
6468}
6469
6470fn js_ts_string_inner(spec: &str) -> String {
6471 let t = spec.trim();
6472 if t.len() >= 2 {
6473 let bytes = t.as_bytes();
6474 let q = bytes[0];
6475 if (q == b'"' || q == b'\'') && bytes[t.len() - 1] == q {
6476 return t[1..t.len() - 1].to_string();
6477 }
6478 }
6479 t.to_string()
6480}
6481
6482fn extract_js_ts_import_specifiers(tree: &Tree, source: &str) -> Vec<String> {
6483 let mut out = Vec::new();
6484 walk_tree(tree.root_node(), |node| {
6485 if node.kind() != "import_statement" && node.kind() != "export_statement" {
6486 return;
6487 }
6488 if let Some(src) = node.child_by_field_name("source") {
6489 if src.kind() == "string" {
6490 if let Some(raw) = node_text_slice(src, source) {
6491 let inner = js_ts_string_inner(&raw);
6492 if !inner.is_empty() {
6493 out.push(inner);
6494 }
6495 }
6496 }
6497 }
6498 });
6499 out.sort();
6500 out.dedup();
6501 out
6502}
6503
6504fn normalized_logical_path(path: &Path) -> String {
6506 use std::path::Component;
6507 let mut parts: Vec<String> = Vec::new();
6508 let mut starts_root = false;
6509 for c in path.components() {
6510 match c {
6511 Component::RootDir => starts_root = true,
6512 Component::Prefix(_) => {}
6513 Component::CurDir => {}
6514 Component::Normal(s) => parts.push(s.to_string_lossy().into_owned()),
6515 Component::ParentDir => {
6516 parts.pop();
6517 }
6518 }
6519 }
6520 let s = parts.join("/");
6521 if starts_root {
6522 format!("/{s}")
6523 } else {
6524 s
6525 }
6526}
6527
6528fn resolve_js_ts_import_to_known_file(
6529 spec: &str,
6530 current_file: &str,
6531 known_paths: &HashSet<String>,
6532) -> Option<String> {
6533 let spec = spec.trim();
6534 if spec.is_empty() {
6535 return None;
6536 }
6537 if spec.starts_with('@') {
6538 return None;
6539 }
6540
6541 let try_extensions = |base: &str| -> Option<String> {
6542 let base = base.replace('\\', "/");
6543 for ext in ["", ".ts", ".tsx", ".js", ".jsx"] {
6544 let cand = format!("{base}{ext}");
6545 if known_paths.contains(&cand) {
6546 return Some(cand);
6547 }
6548 }
6549 for ext in [".ts", ".tsx", ".js", ".jsx"] {
6550 let cand = format!("{base}/index{ext}");
6551 if known_paths.contains(&cand) {
6552 return Some(cand);
6553 }
6554 }
6555 None
6556 };
6557
6558 if spec.starts_with('.') {
6559 let base = Path::new(current_file).parent()?;
6560 let joined = base.join(spec);
6561 let normalized = normalized_logical_path(&joined);
6562 return try_extensions(&normalized);
6563 }
6564
6565 let needle = spec.replace('\\', "/");
6566 known_paths
6567 .iter()
6568 .filter(|p| {
6569 let pn = p.replace('\\', "/");
6570 (pn.ends_with(".ts")
6571 || pn.ends_with(".tsx")
6572 || pn.ends_with(".js")
6573 || pn.ends_with(".jsx"))
6574 && pn.contains(needle.as_str())
6575 })
6576 .min_by_key(|p| p.len())
6577 .cloned()
6578}
6579
6580fn extract_js_ts_graph_symbols(
6581 tree: &Tree,
6582 source: &str,
6583 file_path: &str,
6584 language: LanguageId,
6585) -> Vec<FunctionSymbol> {
6586 let _ = language;
6587 let mut out = Vec::new();
6588 let mut seen = HashSet::new();
6589 let root = tree.root_node();
6590
6591 let mut push = |logical: String, name: String| {
6592 let fqn = non_java_file_scoped_fqn(file_path, &logical);
6593 if seen.insert(fqn.clone()) {
6594 out.push(FunctionSymbol {
6595 name,
6596 fqn,
6597 class_fqn: None,
6598 return_type: None,
6599 param_types: Vec::new(),
6600 param_count: 0,
6601 modifiers: Vec::new(),
6602 is_pointer_receiver: None,
6603 });
6604 }
6605 };
6606
6607 walk_tree(root, |node| {
6608 let kind = node.kind();
6609 if kind == "function_declaration" {
6610 if js_inside_class(node) {
6611 return;
6612 }
6613 let Some(nm) = js_function_declaration_name(node, source) else {
6614 return;
6615 };
6616 let logical = js_nested_function_declaration_prefix(node, source, &nm);
6617 push(logical, nm);
6618 return;
6619 }
6620
6621 if is_js_ts_class_member_fn(kind) {
6622 let Some(meth) = js_property_name_text(node, source) else {
6623 return;
6624 };
6625 let cls = js_enclosing_class_declaration_name(node, source)
6626 .unwrap_or_else(|| "anonymous_class".to_string());
6627 let logical = format!("{cls}.{meth}");
6628 push(logical, meth);
6629 return;
6630 }
6631
6632 if kind == "variable_declarator" {
6633 let Some(val) = node.child_by_field_name("value") else {
6634 return;
6635 };
6636 if !matches!(val.kind(), "arrow_function" | "function_expression") {
6637 return;
6638 }
6639 let Some(name_n) = node.child_by_field_name("name") else {
6640 return;
6641 };
6642 if name_n.kind() != "identifier" {
6643 return;
6644 }
6645 let Some(var_name) = node_text_slice(name_n, source) else {
6646 return;
6647 };
6648 let var_name = var_name.trim().to_string();
6649 if var_name.is_empty() {
6650 return;
6651 }
6652 if js_inside_class(node) {
6653 let cls = js_enclosing_class_declaration_name(node, source)
6654 .unwrap_or_else(|| "anonymous_class".to_string());
6655 let logical = format!("{cls}.{var_name}");
6656 push(logical, var_name);
6657 } else {
6658 let logical = js_prefix_from_ancestors_for_expr(val, source, &var_name);
6659 push(logical, var_name);
6660 }
6661 }
6662 });
6663
6664 out
6665}
6666
6667fn js_ts_innermost_enclosing_logical(call: Node, source: &str) -> Option<String> {
6668 let mut cur = call.parent();
6669 while let Some(p) = cur {
6670 match p.kind() {
6671 "function_declaration" => {
6672 if js_inside_class(p) {
6673 cur = p.parent();
6674 continue;
6675 }
6676 let nm = js_function_declaration_name(p, source)?;
6677 return Some(js_nested_function_declaration_prefix(p, source, &nm));
6678 }
6679 k if is_js_ts_class_member_fn(k) => {
6680 let meth = js_property_name_text(p, source)?;
6681 let cls = js_enclosing_class_declaration_name(p, source)
6682 .unwrap_or_else(|| "anonymous_class".to_string());
6683 return Some(format!("{cls}.{meth}"));
6684 }
6685 "arrow_function" | "function_expression" => {
6686 let mut up = Some(p);
6687 while let Some(x) = up {
6688 if x.kind() == "variable_declarator" {
6689 let name_n = x.child_by_field_name("name")?;
6690 if name_n.kind() != "identifier" {
6691 return None;
6692 }
6693 let vn = node_text_slice(name_n, source)?;
6694 let vn = vn.trim();
6695 if vn.is_empty() {
6696 return None;
6697 }
6698 return Some(js_prefix_from_ancestors_for_expr(p, source, vn));
6699 }
6700 up = x.parent();
6701 }
6702 }
6703 _ => {}
6704 }
6705 cur = p.parent();
6706 }
6707 None
6708}
6709
6710fn extract_js_ts_intrafile_calls(
6711 tree: &Tree,
6712 source: &str,
6713 file_path: &str,
6714 language: LanguageId,
6715 name_to_fqn: &HashMap<String, String>,
6716) -> Vec<(String, String)> {
6717 let _ = language;
6718 let mut calls = Vec::new();
6719 walk_tree(tree.root_node(), |inner| {
6720 if inner.kind() != "call_expression" {
6721 return;
6722 }
6723 let Some(logical) = js_ts_innermost_enclosing_logical(inner, source) else {
6724 return;
6725 };
6726 let caller_fqn = non_java_file_scoped_fqn(file_path, &logical);
6727 let Some(func_n) = inner.child_by_field_name("function") else {
6728 return;
6729 };
6730 if func_n.kind() != "identifier" {
6731 return;
6732 }
6733 let Some(callee_name) = node_text_slice(func_n, source) else {
6734 return;
6735 };
6736 let callee_name = callee_name.trim();
6737 if let Some(callee_fqn) = name_to_fqn.get(callee_name) {
6738 calls.push((caller_fqn, callee_fqn.clone()));
6739 }
6740 });
6741 calls
6742}
6743
6744fn extract_top_level_functions(
6748 language: LanguageId,
6749 tree: &Tree,
6750 source: &str,
6751) -> Vec<FunctionSymbol> {
6752 let mut functions: Vec<FunctionSymbol> = Vec::new();
6753 let root = tree.root_node();
6754
6755 walk_tree(root, |node| {
6756 let kind = node.kind();
6757 let is_function = match language {
6758 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
6759 kind == "function_declaration"
6760 }
6761 LanguageId::Python => kind == "function_definition",
6762 LanguageId::Rust => kind == "function_item",
6763 LanguageId::Go => kind == "function_declaration",
6764 LanguageId::Java | LanguageId::Erlang | LanguageId::CSharp => false,
6768 };
6769
6770 if !is_function {
6771 return;
6772 }
6773
6774 if let Some(name) = identifier_text_from_children(node, source) {
6775 functions.push(FunctionSymbol {
6778 name: name.clone(),
6779 fqn: name,
6780 class_fqn: None,
6781 return_type: None,
6782 param_types: Vec::new(),
6783 param_count: 0,
6784 modifiers: Vec::new(),
6785 is_pointer_receiver: None,
6786 });
6787 }
6788 });
6789
6790 functions
6791}
6792
6793fn extract_java_spring_endpoints(source: &str) -> Vec<(Vec<String>, String, String)> {
6800 let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
6801 let mut class_base_path: Option<String> = None;
6802 let mut pending_methods: Vec<String> = Vec::new();
6803 let mut pending_path: Option<String> = None;
6804
6805 for line in source.lines() {
6806 let trimmed = line.trim();
6807
6808 if trimmed.starts_with("@RequestMapping") && !trimmed.contains("method") {
6810 if let Some(path) = extract_java_annotation_path(trimmed) {
6811 class_base_path = Some(path);
6812 }
6813 continue;
6814 }
6815
6816 if trimmed.starts_with("@RestController") || trimmed.starts_with("@Controller") {
6818 continue;
6819 }
6820
6821 if trimmed.starts_with('@') {
6823 for (attr, verb) in [
6824 ("@GetMapping", "GET"),
6825 ("@PostMapping", "POST"),
6826 ("@PutMapping", "PUT"),
6827 ("@DeleteMapping", "DELETE"),
6828 ("@PatchMapping", "PATCH"),
6829 ] {
6830 if trimmed.starts_with(attr) {
6831 pending_methods.push(verb.to_string());
6832 if let Some(path) = extract_java_annotation_path(trimmed) {
6833 pending_path = Some(path);
6834 } else {
6835 pending_path = Some("/".to_string());
6836 }
6837 }
6838 }
6839
6840 if trimmed.starts_with("@RequestMapping") {
6842 if let Some(path) = extract_java_annotation_path(trimmed) {
6843 pending_path = Some(path);
6844 }
6845 if trimmed.contains("RequestMethod.GET") {
6847 pending_methods.push("GET".to_string());
6848 } else if trimmed.contains("RequestMethod.POST") {
6849 pending_methods.push("POST".to_string());
6850 } else if trimmed.contains("RequestMethod.PUT") {
6851 pending_methods.push("PUT".to_string());
6852 } else if trimmed.contains("RequestMethod.DELETE") {
6853 pending_methods.push("DELETE".to_string());
6854 } else if pending_path.is_some() && pending_methods.is_empty() {
6855 pending_methods.push("ANY".to_string());
6856 }
6857 }
6858 continue;
6859 }
6860
6861 if (trimmed.starts_with("public ")
6863 || trimmed.starts_with("private ")
6864 || trimmed.starts_with("protected "))
6865 && trimmed.contains('(')
6866 && !pending_methods.is_empty()
6867 {
6868 let before_paren = match trimmed.split_once('(') {
6869 Some((before, _)) => before,
6870 None => continue,
6871 };
6872 let mut last = None;
6873 for p in before_paren.split_whitespace() {
6874 last = Some(p);
6875 }
6876 let method_name = match last {
6877 Some(name) => name.trim().to_string(),
6878 None => continue,
6879 };
6880
6881 let path_template = if let Some(ref base) = class_base_path {
6882 let method_path = pending_path.clone().unwrap_or_else(|| "/".to_string());
6883 if method_path.starts_with('/') {
6884 format!("{}{}", base.trim_end_matches('/'), method_path)
6885 } else {
6886 format!("{}/{}", base.trim_end_matches('/'), method_path)
6887 }
6888 } else {
6889 pending_path.clone().unwrap_or_else(|| format!("/{}", method_name))
6890 };
6891
6892 endpoints.push((pending_methods.clone(), path_template, method_name));
6893
6894 pending_methods.clear();
6895 pending_path = None;
6896 }
6897 }
6898
6899 endpoints
6900}
6901
6902fn extract_java_annotation_path(attr_line: &str) -> Option<String> {
6904 if let Some(start) = attr_line.find('"') {
6906 let rest = &attr_line[start + 1..];
6907 if let Some(end) = rest.find('"') {
6908 let path = &rest[..end];
6909 if !path.is_empty() {
6910 return Some(path.to_string());
6911 }
6912 }
6913 }
6914 None
6915}
6916
6917fn extract_java_used_classes(
6926 tree: &Tree,
6927 source: &str,
6928 package: Option<&str>,
6929) -> Vec<(String, String)> {
6930 let mut uses: Vec<(String, String)> = Vec::new();
6931 let root = tree.root_node();
6932 let import_map = build_internal_import_map(source);
6933
6934 walk_tree(root, |node| {
6935 if node.kind() != "method_declaration" {
6936 return;
6937 }
6938
6939 let method_name = match identifier_text_from_children(node, source) {
6940 Some(name) => name,
6941 None => return,
6942 };
6943
6944 let mut parent = node.parent();
6946 let mut class_fqn: Option<String> = None;
6947 while let Some(p) = parent {
6948 let pk = p.kind();
6949 if pk == "class_declaration" || pk == "interface_declaration" {
6950 if let Some(class_name) = identifier_text_from_children(p, source) {
6951 let full = if let Some(pkg) = package {
6952 format!("{pkg}.{class_name}")
6953 } else {
6954 class_name
6955 };
6956 class_fqn = Some(full);
6957 }
6958 break;
6959 }
6960 parent = p.parent();
6961 }
6962
6963 let func_fqn = if let Some(ref cls) = class_fqn {
6964 format!("{cls}.{}", method_name)
6965 } else if let Some(pkg) = package {
6966 format!("{pkg}.{}", method_name)
6967 } else {
6968 method_name.clone()
6969 };
6970
6971 walk_tree(node, |child| {
6973 let kind = child.kind();
6974
6975 if kind == "type_identifier" || kind == "object_creation_expression" {
6977 let type_name = if kind == "object_creation_expression" {
6978 child.child_by_field_name("type")
6980 .and_then(|t| {
6981 let start = t.start_byte() as usize;
6982 let end = t.end_byte() as usize;
6983 if end <= source.len() && start < end {
6984 Some(source[start..end].to_string())
6985 } else {
6986 None
6987 }
6988 })
6989 } else {
6990 let start = child.start_byte() as usize;
6991 let end = child.end_byte() as usize;
6992 if end <= source.len() && start < end {
6993 Some(source[start..end].to_string())
6994 } else {
6995 None
6996 }
6997 };
6998
6999 if let Some(type_name) = type_name {
7000 if is_java_primitive_or_builtin(&type_name) {
7002 return;
7003 }
7004
7005 let used_class_fqn = if let Some(fqn) = import_map.get(&type_name) {
7007 fqn.clone()
7008 } else if let Some(pkg) = package {
7009 format!("{pkg}.{type_name}")
7010 } else {
7011 type_name
7012 };
7013
7014 uses.push((func_fqn.clone(), used_class_fqn));
7015 }
7016 }
7017 });
7018 });
7019
7020 uses
7021}
7022
7023fn is_java_primitive_or_builtin(name: &str) -> bool {
7025 matches!(
7026 name,
7027 "int" | "long" | "short" | "byte" | "float" | "double" | "boolean" | "char" | "void"
7028 | "String" | "Integer" | "Long" | "Short" | "Byte" | "Float" | "Double" | "Boolean"
7029 | "Character" | "Object" | "Class" | "Void"
7030 | "List" | "ArrayList" | "Map" | "HashMap" | "Set" | "HashSet"
7031 | "Optional" | "Collection" | "Iterator"
7032 )
7033}
7034
7035async fn persist_go_structure(
7040 graph: &Graph,
7041 file_path: &str,
7042 file: &ParsedFile,
7043 source: &str,
7044 known_paths: &HashSet<String>,
7045 project_name: Option<String>,
7046 accumulator: &mut BatchAccumulator,
7047 scan_root: &Path,
7048 go_modules: &[GoModule],
7049 go_replaces: &[GoReplace],
7050 persistence: &GraphPersistenceOptions,
7051 compressor: Option<&CompressorClient>,
7052) -> Result<(), GraphError> {
7053 let language = file.language.to_string();
7054 let package_name = extract_go_package(source);
7055
7056 emit_limited_parse_warnings(
7057 "Go",
7058 file_path,
7059 extract_go_parse_warnings(&file.tree, source),
7060 persistence.max_parse_warnings_per_file,
7061 );
7062
7063 let (structs, functions) = extract_go_symbols(&file.tree, source, package_name.as_deref());
7065 let class_spans = extract_go_class_spans(&file.tree, source, package_name.as_deref());
7066 let function_spans = extract_go_function_body_spans(&file.tree, source, package_name.as_deref());
7067
7068 for strct in &structs {
7070 let class_kind = strct.kind.map(|k| k.to_string());
7071 let code_bytes = code_bytes_for_span(
7072 compressor,
7073 source,
7074 class_spans.get(&strct.fqn).copied(),
7075 LanguageId::Go,
7076 )
7077 .await;
7078 let q = query(
7079 "
7080 MATCH (f:File { path: $path })
7081 MERGE (c:Class { fqn: $class_fqn })
7082 ON CREATE SET c.name = $class_name,
7083 c.path = $path,
7084 c.project_name = $project_name,
7085 c.language = $language,
7086 c.kind = $class_kind,
7087 c.code_bytes = $code_bytes
7088 ON MATCH SET c.name = $class_name,
7089 c.path = $path,
7090 c.project_name = $project_name,
7091 c.language = $language,
7092 c.kind = coalesce($class_kind, c.kind),
7093 c.code_bytes = coalesce($code_bytes, c.code_bytes)
7094 MERGE (f)-[:DECLARES_CLASS]->(c)
7095 ",
7096 )
7097 .param("path", file_path.to_string())
7098 .param("class_fqn", strct.fqn.clone())
7099 .param("class_name", strct.name.clone())
7100 .param("project_name", project_name.clone())
7101 .param("language", language.clone())
7102 .param("class_kind", class_kind)
7103 .param(props::CODE_BYTES, code_bytes);
7104
7105 graph.run(q).await?;
7106 }
7107
7108 for func in &functions {
7110 let code_bytes = code_bytes_for_span(
7111 compressor,
7112 source,
7113 function_spans.get(&func.fqn).copied(),
7114 LanguageId::Go,
7115 )
7116 .await;
7117 match &func.class_fqn {
7118 Some(class_fqn) => {
7119 let q = query(
7120 "
7121 MATCH (f:File { path: $path })
7122 MERGE (cls:Class { fqn: $class_fqn })
7123 MERGE (fn:Function { fqn: $fn_fqn })
7124 ON CREATE SET fn.name = $fn_name,
7125 fn.path = $path,
7126 fn.project_name = $project_name,
7127 fn.language = $language,
7128 fn.is_pointer_receiver = $is_pointer_receiver,
7129 fn.code_bytes = $code_bytes
7130 ON MATCH SET fn.name = $fn_name,
7131 fn.path = $path,
7132 fn.project_name = $project_name,
7133 fn.language = $language,
7134 fn.is_pointer_receiver = coalesce($is_pointer_receiver, fn.is_pointer_receiver),
7135 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
7136 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
7137 MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
7138 ",
7139 )
7140 .param("path", file_path.to_string())
7141 .param("class_fqn", class_fqn.clone())
7142 .param("fn_fqn", func.fqn.clone())
7143 .param("fn_name", func.name.clone())
7144 .param("project_name", project_name.clone())
7145 .param("language", language.clone())
7146 .param("is_pointer_receiver", func.is_pointer_receiver)
7147 .param(props::CODE_BYTES, code_bytes.clone());
7148
7149 graph.run(q).await?;
7150 }
7151 None => {
7152 let q = query(
7153 "
7154 MATCH (f:File { path: $path })
7155 MERGE (fn:Function { fqn: $fn_fqn })
7156 ON CREATE SET fn.name = $fn_name,
7157 fn.path = $path,
7158 fn.project_name = $project_name,
7159 fn.language = $language,
7160 fn.is_pointer_receiver = $is_pointer_receiver,
7161 fn.code_bytes = $code_bytes
7162 ON MATCH SET fn.name = $fn_name,
7163 fn.path = $path,
7164 fn.project_name = $project_name,
7165 fn.language = $language,
7166 fn.is_pointer_receiver = coalesce($is_pointer_receiver, fn.is_pointer_receiver),
7167 fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
7168 MERGE (f)-[:DECLARES_FUNCTION]->(fn)
7169 ",
7170 )
7171 .param("path", file_path.to_string())
7172 .param("fn_fqn", func.fqn.clone())
7173 .param("fn_name", func.name.clone())
7174 .param("project_name", project_name.clone())
7175 .param("language", language.clone())
7176 .param("is_pointer_receiver", func.is_pointer_receiver)
7177 .param(props::CODE_BYTES, code_bytes);
7178
7179 graph.run(q).await?;
7180 }
7181 }
7182 }
7183
7184 let endpoints = extract_go_http_endpoints(source);
7186 for (methods, path_template, handler_name) in endpoints {
7187 let norm_path = normalize_api_path(&path_template);
7188
7189 let api_query = query(
7190 "
7191 MERGE (api:ApiEndpoint { path: $path })
7192 ON CREATE SET api.methods = $methods,
7193 api.protocol = 'http',
7194 api.framework = 'go-http',
7195 api.project_name = $project_name,
7196 api.norm_path = $norm_path
7197 ON MATCH SET api.methods = $methods,
7198 api.protocol = coalesce(api.protocol, 'http'),
7199 api.framework = coalesce(api.framework, 'go-http'),
7200 api.project_name = coalesce(api.project_name, $project_name),
7201 api.norm_path = coalesce(api.norm_path, $norm_path)
7202 ",
7203 )
7204 .param("path", path_template.clone())
7205 .param("methods", methods.clone())
7206 .param("project_name", project_name.clone())
7207 .param("norm_path", norm_path.clone());
7208
7209 graph.run(api_query).await?;
7210
7211 for func in &functions {
7213 if func.name == handler_name {
7214 let rel_query = query(
7215 "
7216 MERGE (fn:Function { fqn: $fn_fqn })
7217 MERGE (api:ApiEndpoint { path: $path })
7218 MERGE (api)-[:HANDLED_BY]->(fn)
7219 ",
7220 )
7221 .param("fn_fqn", func.fqn.clone())
7222 .param("path", path_template.clone());
7223
7224 graph.run(rel_query).await?;
7225 }
7226 }
7227 }
7228
7229 let external_urls = extract_external_http_urls(source);
7231 for full_url in external_urls {
7232 let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
7233 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
7234 let base_url = format!("{protocol}://{host}");
7235 let name = host.clone();
7236 let norm_path = normalize_api_path(&path);
7237
7238 let ext_query = query(
7239 "
7240 MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
7241 ON CREATE SET ext.name = $name,
7242 ext.path = $path,
7243 ext.protocol = $protocol,
7244 ext.provider = $provider,
7245 ext.project_name = $project_name
7246 ON MATCH SET ext.name = coalesce(ext.name, $name),
7247 ext.path = coalesce(ext.path, $path),
7248 ext.protocol = coalesce(ext.protocol, $protocol),
7249 ext.provider = coalesce(ext.provider, $provider),
7250 ext.project_name = coalesce(ext.project_name, $project_name)
7251 ",
7252 )
7253 .param("name", name.clone())
7254 .param("base_url", base_url.clone())
7255 .param("path", path.clone())
7256 .param("norm_path", norm_path.clone())
7257 .param("protocol", protocol.clone())
7258 .param("provider", name.clone())
7259 .param("project_name", project_name.clone());
7260
7261 graph.run(ext_query).await?;
7262
7263 for func in &functions {
7265 accumulator.add_calls_external_api(
7266 func.fqn.clone(),
7267 base_url.clone(),
7268 norm_path.clone(),
7269 );
7270 }
7271 }
7272
7273 let used_structs = extract_go_used_types(&file.tree, source, package_name.as_deref());
7275 for (fn_fqn, struct_fqn) in used_structs {
7276 accumulator.add_uses_class(fn_fqn, struct_fqn);
7277 }
7278
7279 let calls = extract_go_calls(&file.tree, source, package_name.as_deref());
7281 for (caller_fqn, callee_fqn) in calls {
7282 accumulator.add_calls_function(caller_fqn, callee_fqn);
7283 }
7284
7285 for (struct_fqn, embedded_fqn) in extract_go_embedding(&file.tree, source, package_name.as_deref())
7286 {
7287 accumulator.add_class_uses_class(struct_fqn, embedded_fqn);
7288 }
7289
7290 for (caller_fqn, callee_fqn) in
7291 extract_go_goroutine_calls(&file.tree, source, package_name.as_deref())
7292 {
7293 accumulator.add_calls_function(caller_fqn, callee_fqn);
7294 }
7295
7296 for imp in extract_go_imports(&file.tree, source) {
7297 if let Some(dep_path) =
7298 resolve_go_import_to_known_go_file(
7299 &imp,
7300 known_paths,
7301 go_modules,
7302 go_replaces,
7303 Some(scan_root),
7304 )
7305 {
7306 let dep_query = query(
7307 "
7308 MERGE (src:File { path: $src_path })
7309 MERGE (dst:File { path: $dst_path })
7310 MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
7311 ",
7312 )
7313 .param("src_path", file_path.to_string())
7314 .param("dst_path", dep_path.clone());
7315
7316 graph.run(dep_query).await?;
7317 } else if should_log_unresolved_import(
7318 persistence.verbose_imports,
7319 is_go_stdlib_import(&imp),
7320 is_likely_third_party_go_import(&imp),
7321 ) {
7322 println!("Go import (unresolved to scanned files): `{}` in {}", imp, file_path);
7323 }
7324 }
7325
7326 Ok(())
7327}
7328
7329fn extract_go_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
7331 let mut out = Vec::new();
7332 let root = tree.root_node();
7333 walk_tree(root, |node| {
7334 if node.kind() == "ERROR" || node.is_missing() {
7335 let pos = node.start_position();
7336 let s = node.start_byte() as usize;
7337 let e = (node.end_byte() as usize).min(source.len());
7338 let snippet = if s < e {
7339 source[s..e]
7340 .chars()
7341 .take(120)
7342 .collect::<String>()
7343 .replace('\n', " ")
7344 } else {
7345 String::new()
7346 };
7347 out.push((pos.row + 1, pos.column + 1, snippet));
7348 }
7349 });
7350 out
7351}
7352
7353fn go_go_callee_fqn_from_call_expression(
7354 call: Node,
7355 source: &str,
7356 package: Option<&str>,
7357) -> Option<String> {
7358 let func_node = call.child_by_field_name("function")?;
7359 let s = func_node.start_byte() as usize;
7360 let e = (func_node.end_byte() as usize).min(source.len());
7361 if s >= e {
7362 return None;
7363 }
7364 let callee_name = source[s..e].trim().to_string();
7365 if callee_name.is_empty() {
7366 return None;
7367 }
7368 Some(if callee_name.contains('.') {
7369 callee_name
7370 } else if let Some(pkg) = package {
7371 format!("{pkg}.{callee_name}")
7372 } else {
7373 callee_name
7374 })
7375}
7376
7377fn go_decl_fqn_from_function_declaration(
7378 decl: Node,
7379 source: &str,
7380 package: Option<&str>,
7381) -> Option<String> {
7382 let name_node = decl.child_by_field_name("name")?;
7383 let s = name_node.start_byte() as usize;
7384 let e = (name_node.end_byte() as usize).min(source.len());
7385 if s >= e {
7386 return None;
7387 }
7388 let name = source[s..e].to_string();
7389 Some(match package {
7390 Some(pkg) => format!("{pkg}.{name}"),
7391 None => name,
7392 })
7393}
7394
7395fn go_decl_fqn_from_method_declaration(
7396 decl: Node,
7397 source: &str,
7398 package: Option<&str>,
7399) -> Option<String> {
7400 let name_node = decl.child_by_field_name("name")?;
7401 let s = name_node.start_byte() as usize;
7402 let e = (name_node.end_byte() as usize).min(source.len());
7403 if s >= e {
7404 return None;
7405 }
7406 let method_name = source[s..e].to_string();
7407 let receiver_type = decl.child_by_field_name("receiver").and_then(|r| {
7408 let mut type_name = None;
7409 walk_tree(r, |n| {
7410 if n.kind() == "type_identifier" {
7411 let s = n.start_byte() as usize;
7412 let e = (n.end_byte() as usize).min(source.len());
7413 if e <= source.len() && s < e {
7414 type_name = Some(source[s..e].to_string());
7415 }
7416 }
7417 });
7418 type_name
7419 })?;
7420 let class_fqn = match package {
7421 Some(pkg) => format!("{pkg}.{receiver_type}"),
7422 None => receiver_type,
7423 };
7424 Some(format!("{class_fqn}.{method_name}"))
7425}
7426
7427fn go_enclosing_func_decl_fqn_from_inner(node: Node, source: &str, package: Option<&str>) -> Option<String> {
7428 let mut cur = node.parent();
7429 while let Some(n) = cur {
7430 match n.kind() {
7431 "function_declaration" => return go_decl_fqn_from_function_declaration(n, source, package),
7432 "method_declaration" => return go_decl_fqn_from_method_declaration(n, source, package),
7433 _ => cur = n.parent(),
7434 }
7435 }
7436 None
7437}
7438
7439fn extract_go_goroutine_calls(
7441 tree: &Tree,
7442 source: &str,
7443 package: Option<&str>,
7444) -> Vec<(String, String)> {
7445 let mut out = Vec::new();
7446 let root = tree.root_node();
7447 walk_tree(root, |node| {
7448 if node.kind() != "go_statement" {
7449 return;
7450 }
7451 let Some(caller) = go_enclosing_func_decl_fqn_from_inner(node, source, package) else {
7452 return;
7453 };
7454 let mut i = 0usize;
7455 while let Some(expr) = node.named_child(i) {
7456 i += 1;
7457 if expr.kind() == "call_expression" {
7458 if let Some(callee) = go_go_callee_fqn_from_call_expression(expr, source, package) {
7459 out.push((caller.clone(), callee));
7460 }
7461 break;
7462 }
7463 }
7464 });
7465 out
7466}
7467
7468fn go_type_leaf_for_embedding(mut n: Node) -> Node {
7469 loop {
7470 if n.kind() == "pointer_type" {
7471 if let Some(inner) = n.named_child(0) {
7472 n = inner;
7473 continue;
7474 }
7475 }
7476 return n;
7477 }
7478}
7479
7480fn go_embedding_type_fqn(type_node: Node, source: &str, package: Option<&str>) -> Option<String> {
7481 let leaf = go_type_leaf_for_embedding(type_node);
7482 let s = leaf.start_byte() as usize;
7483 let e = (leaf.end_byte() as usize).min(source.len());
7484 if s >= e {
7485 return None;
7486 }
7487 let raw = source[s..e].trim().to_string();
7488 if raw.is_empty() {
7489 return None;
7490 }
7491 Some(if raw.contains('.') {
7492 raw
7493 } else if let Some(pkg) = package {
7494 format!("{pkg}.{raw}")
7495 } else {
7496 raw
7497 })
7498}
7499
7500fn extract_go_embedding(
7502 tree: &Tree,
7503 source: &str,
7504 package: Option<&str>,
7505) -> Vec<(String, String)> {
7506 let mut out = Vec::new();
7507 let root = tree.root_node();
7508 walk_tree(root, |node| {
7509 if node.kind() != "type_declaration" {
7510 return;
7511 }
7512 let mut i = 0usize;
7513 while let Some(child) = node.child(i) {
7514 i += 1;
7515 if child.kind() != "type_spec" {
7516 continue;
7517 }
7518 let Some(type_n) = child.child_by_field_name("type") else {
7519 continue;
7520 };
7521 if type_n.kind() != "struct_type" {
7522 continue;
7523 }
7524 let Some(name_node) = child.child_by_field_name("name") else {
7525 continue;
7526 };
7527 let s = name_node.start_byte() as usize;
7528 let e = (name_node.end_byte() as usize).min(source.len());
7529 if s >= e {
7530 continue;
7531 }
7532 let struct_name = source[s..e].to_string();
7533 let struct_fqn = match package {
7534 Some(pkg) => format!("{pkg}.{struct_name}"),
7535 None => struct_name,
7536 };
7537 walk_tree(type_n, |fd| {
7538 if fd.kind() != "field_declaration" {
7539 return;
7540 }
7541 if fd.child_by_field_name("name").is_some() {
7542 return;
7543 }
7544 let Some(ty) = fd.child_by_field_name("type") else {
7545 return;
7546 };
7547 if let Some(emb) = go_embedding_type_fqn(ty, source, package) {
7548 out.push((struct_fqn.clone(), emb));
7549 }
7550 });
7551 }
7552 });
7553 out
7554}
7555
7556fn extract_go_imports(tree: &Tree, source: &str) -> Vec<String> {
7558 let mut out = Vec::new();
7559 let root = tree.root_node();
7560 walk_tree(root, |node| {
7561 if node.kind() != "import_spec" {
7562 return;
7563 }
7564 let Some(path_node) = node.child_by_field_name("path") else {
7565 return;
7566 };
7567 let s = path_node.start_byte() as usize;
7568 let e = (path_node.end_byte() as usize).min(source.len());
7569 if s >= e {
7570 return;
7571 }
7572 let raw = source[s..e].trim();
7573 let path = raw.trim_matches('`').trim_matches('"').to_string();
7574 if !path.is_empty() {
7575 out.push(path);
7576 }
7577 });
7578 out
7579}
7580
7581fn extract_go_package(source: &str) -> Option<String> {
7583 for line in source.lines() {
7584 let line = line.trim();
7585 if line.starts_with("package ") {
7586 let pkg = line["package ".len()..].trim();
7587 if !pkg.is_empty() {
7588 return Some(pkg.to_string());
7589 }
7590 }
7591 }
7592 None
7593}
7594
7595fn extract_go_symbols(
7597 tree: &Tree,
7598 source: &str,
7599 package: Option<&str>,
7600) -> (Vec<ClassSymbol>, Vec<FunctionSymbol>) {
7601 let mut structs: Vec<ClassSymbol> = Vec::new();
7602 let mut functions: Vec<FunctionSymbol> = Vec::new();
7603 let mut seen_struct_names: HashSet<String> = HashSet::new();
7604
7605 let root = tree.root_node();
7606 walk_tree(root, |node| {
7607 let kind = node.kind();
7608 match kind {
7609 "type_declaration" => {
7610 let mut i = 0usize;
7611 while let Some(child) = node.child(i) {
7612 i += 1;
7613 if child.kind() != "type_spec" {
7614 continue;
7615 }
7616 let Some(type_n) = child.child_by_field_name("type") else {
7617 continue;
7618 };
7619 let kind_label: Option<&'static str> = if type_n.kind() == "struct_type" {
7620 Some("struct")
7621 } else if type_n.kind() == "interface_type" {
7622 Some("interface")
7623 } else {
7624 continue;
7625 };
7626 let Some(name_node) = child.child_by_field_name("name") else {
7627 continue;
7628 };
7629 let start = name_node.start_byte() as usize;
7630 let end = (name_node.end_byte() as usize).min(source.len());
7631 if start >= end {
7632 continue;
7633 }
7634 let name = source[start..end].to_string();
7635 let fqn = if let Some(pkg) = package {
7636 format!("{pkg}.{name}")
7637 } else {
7638 name.clone()
7639 };
7640 if seen_struct_names.insert(fqn.clone()) {
7641 structs.push(ClassSymbol {
7642 name,
7643 fqn,
7644 kind: kind_label,
7645 });
7646 }
7647 }
7648 }
7649 "function_declaration" => {
7650 if let Some(name_node) = node.child_by_field_name("name") {
7651 let start = name_node.start_byte() as usize;
7652 let end = (name_node.end_byte() as usize).min(source.len());
7653 if end <= source.len() && start < end {
7654 let name = source[start..end].to_string();
7655 let fqn = if let Some(pkg) = package {
7656 format!("{pkg}.{name}")
7657 } else {
7658 name.clone()
7659 };
7660 functions.push(FunctionSymbol {
7661 name,
7662 fqn,
7663 class_fqn: None,
7664 return_type: None,
7665 param_types: Vec::new(),
7666 param_count: 0,
7667 modifiers: Vec::new(),
7668 is_pointer_receiver: None,
7669 });
7670 }
7671 }
7672 }
7673 "method_declaration" => {
7674 if let Some(name_node) = node.child_by_field_name("name") {
7676 let start = name_node.start_byte() as usize;
7677 let end = (name_node.end_byte() as usize).min(source.len());
7678 if end <= source.len() && start < end {
7679 let method_name = source[start..end].to_string();
7680
7681 let is_pointer_receiver = node
7682 .child_by_field_name("receiver")
7683 .map(|r| go_receiver_has_pointer(r))
7684 .unwrap_or(false);
7685
7686 let receiver_type = node
7688 .child_by_field_name("receiver")
7689 .and_then(|r| {
7690 let mut type_name = None;
7691 walk_tree(r, |n| {
7692 if n.kind() == "type_identifier" {
7693 let s = n.start_byte() as usize;
7694 let e = (n.end_byte() as usize).min(source.len());
7695 if e <= source.len() && s < e {
7696 type_name = Some(source[s..e].to_string());
7697 }
7698 }
7699 });
7700 type_name
7701 });
7702
7703 let class_fqn = receiver_type.map(|t| {
7704 if let Some(pkg) = package {
7705 format!("{pkg}.{t}")
7706 } else {
7707 t
7708 }
7709 });
7710
7711 let fqn = if let Some(ref cls) = class_fqn {
7712 format!("{cls}.{method_name}")
7713 } else if let Some(pkg) = package {
7714 format!("{pkg}.{method_name}")
7715 } else {
7716 method_name.clone()
7717 };
7718
7719 functions.push(FunctionSymbol {
7720 name: method_name,
7721 fqn,
7722 class_fqn,
7723 return_type: None,
7724 param_types: Vec::new(),
7725 param_count: 0,
7726 modifiers: Vec::new(),
7727 is_pointer_receiver: Some(is_pointer_receiver),
7728 });
7729 }
7730 }
7731 }
7732 _ => {}
7733 }
7734 });
7735
7736 (structs, functions)
7737}
7738
7739fn go_receiver_has_pointer(receiver: Node) -> bool {
7740 let mut ptr = false;
7741 walk_tree(receiver, |n| {
7742 if n.kind() == "pointer_type" {
7743 ptr = true;
7744 }
7745 });
7746 ptr
7747}
7748
7749fn extract_go_http_endpoints(source: &str) -> Vec<(Vec<String>, String, String)> {
7751 let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
7752
7753 for line in source.lines() {
7754 let trimmed = line.trim();
7755
7756 if trimmed.contains("HandleFunc(") || trimmed.contains("Handle(") {
7758 if let Some((path, handler)) = extract_go_handler_call(trimmed) {
7759 endpoints.push((vec!["ANY".to_string()], path, handler));
7760 }
7761 }
7762
7763 for (pattern, method) in [
7765 (".Get(", "GET"),
7766 (".Post(", "POST"),
7767 (".Put(", "PUT"),
7768 (".Delete(", "DELETE"),
7769 (".Patch(", "PATCH"),
7770 ] {
7771 if trimmed.contains(pattern) {
7772 if let Some((path, handler)) = extract_go_handler_call(trimmed) {
7773 endpoints.push((vec![method.to_string()], path, handler));
7774 }
7775 }
7776 }
7777
7778 for (pattern, method) in [
7780 (".GET(", "GET"),
7781 (".POST(", "POST"),
7782 (".PUT(", "PUT"),
7783 (".DELETE(", "DELETE"),
7784 (".PATCH(", "PATCH"),
7785 ] {
7786 if trimmed.contains(pattern) {
7787 if let Some((path, handler)) = extract_go_handler_call(trimmed) {
7788 endpoints.push((vec![method.to_string()], path, handler));
7789 }
7790 }
7791 }
7792
7793 }
7796
7797 endpoints
7798}
7799
7800fn extract_go_handler_call(line: &str) -> Option<(String, String)> {
7802 let first_quote = line.find('"')?;
7804 let rest = &line[first_quote + 1..];
7805 let second_quote = rest.find('"')?;
7806 let path = rest[..second_quote].to_string();
7807
7808 let after_path = &rest[second_quote + 1..];
7810 let comma_idx = after_path.find(',')?;
7811 let handler_part = &after_path[comma_idx + 1..];
7812
7813 let handler = handler_part
7815 .split(|c: char| c == ')' || c == '(' || c == ',')
7816 .next()?
7817 .trim()
7818 .to_string();
7819
7820 if handler.is_empty() || path.is_empty() {
7821 return None;
7822 }
7823
7824 Some((path, handler))
7825}
7826
7827fn extract_go_used_types(
7829 tree: &Tree,
7830 source: &str,
7831 package: Option<&str>,
7832) -> Vec<(String, String)> {
7833 let mut uses: Vec<(String, String)> = Vec::new();
7834 let root = tree.root_node();
7835
7836 walk_tree(root, |node| {
7837 let kind = node.kind();
7838 if kind != "function_declaration" && kind != "method_declaration" {
7839 return;
7840 }
7841
7842 let func_name = node
7843 .child_by_field_name("name")
7844 .and_then(|n| {
7845 let s = n.start_byte() as usize;
7846 let e = n.end_byte() as usize;
7847 if e <= source.len() && s < e {
7848 Some(source[s..e].to_string())
7849 } else {
7850 None
7851 }
7852 });
7853
7854 let func_name = match func_name {
7855 Some(n) => n,
7856 None => return,
7857 };
7858
7859 let func_fqn = if let Some(pkg) = package {
7860 format!("{pkg}.{func_name}")
7861 } else {
7862 func_name
7863 };
7864
7865 walk_tree(node, |child| {
7867 if child.kind() == "type_identifier" {
7868 let s = child.start_byte() as usize;
7869 let e = child.end_byte() as usize;
7870 if e <= source.len() && s < e {
7871 let type_name = source[s..e].to_string();
7872 if !is_go_builtin_type(&type_name) {
7874 let type_fqn = if let Some(pkg) = package {
7875 format!("{pkg}.{type_name}")
7876 } else {
7877 type_name
7878 };
7879 uses.push((func_fqn.clone(), type_fqn));
7880 }
7881 }
7882 }
7883 });
7884 });
7885
7886 uses
7887}
7888
7889fn is_go_builtin_type(name: &str) -> bool {
7891 matches!(
7892 name,
7893 "int" | "int8" | "int16" | "int32" | "int64"
7894 | "uint" | "uint8" | "uint16" | "uint32" | "uint64" | "uintptr"
7895 | "float32" | "float64" | "complex64" | "complex128"
7896 | "bool" | "string" | "byte" | "rune" | "error"
7897 )
7898}
7899
7900fn extract_go_calls(
7902 tree: &Tree,
7903 source: &str,
7904 package: Option<&str>,
7905) -> Vec<(String, String)> {
7906 let mut calls: Vec<(String, String)> = Vec::new();
7907 let root = tree.root_node();
7908
7909 walk_tree(root, |node| {
7910 let kind = node.kind();
7911 if kind != "function_declaration" && kind != "method_declaration" {
7912 return;
7913 }
7914
7915 let func_name = node
7916 .child_by_field_name("name")
7917 .and_then(|n| {
7918 let s = n.start_byte() as usize;
7919 let e = n.end_byte() as usize;
7920 if e <= source.len() && s < e {
7921 Some(source[s..e].to_string())
7922 } else {
7923 None
7924 }
7925 });
7926
7927 let func_name = match func_name {
7928 Some(n) => n,
7929 None => return,
7930 };
7931
7932 let caller_fqn = if let Some(pkg) = package {
7933 format!("{pkg}.{func_name}")
7934 } else {
7935 func_name
7936 };
7937
7938 walk_tree(node, |child| {
7940 if child.kind() == "call_expression" {
7941 if let Some(func_node) = child.child_by_field_name("function") {
7942 let s = func_node.start_byte() as usize;
7943 let e = func_node.end_byte() as usize;
7944 if e <= source.len() && s < e {
7945 let callee_name = source[s..e].to_string();
7946 let callee_fqn = if callee_name.contains('.') {
7948 callee_name
7950 } else if let Some(pkg) = package {
7951 format!("{pkg}.{callee_name}")
7952 } else {
7953 callee_name
7954 };
7955 calls.push((caller_fqn.clone(), callee_fqn));
7956 }
7957 }
7958 }
7959 });
7960 });
7961
7962 calls
7963}
7964
7965#[derive(Debug, Clone, Default)]
7967pub struct ExtractOptions {
7968 pub verbose_imports: bool,
7969 pub max_parse_warnings_per_file: usize,
7970}
7971
7972#[derive(Debug, Default)]
7973struct IrEdgeAccumulator {
7974 calls_function: HashSet<(String, String)>,
7975 uses_class: HashSet<(String, String)>,
7976 class_uses_class: HashSet<(String, String)>,
7977}
7978
7979impl IrEdgeAccumulator {
7980 fn flush_into(&self, edges: &mut Vec<EdgeIr>) {
7981 for (caller, callee) in &self.calls_function {
7982 edges.push(EdgeIr {
7983 kind: EdgeKind::CallsFunction,
7984 from_label: "Function".into(),
7985 from_key: caller.clone(),
7986 to_label: "Function".into(),
7987 to_key: callee.clone(),
7988 });
7989 }
7990 for (fn_fqn, cls_fqn) in &self.uses_class {
7991 edges.push(EdgeIr {
7992 kind: EdgeKind::UsesClass,
7993 from_label: "Function".into(),
7994 from_key: fn_fqn.clone(),
7995 to_label: "Class".into(),
7996 to_key: cls_fqn.clone(),
7997 });
7998 }
7999 for (derived, base) in &self.class_uses_class {
8000 edges.push(EdgeIr {
8001 kind: EdgeKind::ClassUsesClass,
8002 from_label: "Class".into(),
8003 from_key: derived.clone(),
8004 to_label: "Class".into(),
8005 to_key: base.clone(),
8006 });
8007 }
8008 }
8009}
8010
8011fn push_depends_on_file(edges: &mut Vec<EdgeIr>, src: &str, dst: &str) {
8012 edges.push(EdgeIr {
8013 kind: EdgeKind::DependsOnFile,
8014 from_label: "File".into(),
8015 from_key: src.to_string(),
8016 to_label: "File".into(),
8017 to_key: dst.to_string(),
8018 });
8019}
8020
8021fn push_declares_function(edges: &mut Vec<EdgeIr>, from_label: &str, from_key: &str, fqn: &str) {
8022 edges.push(EdgeIr {
8023 kind: EdgeKind::DeclaresFunction,
8024 from_label: from_label.into(),
8025 from_key: from_key.to_string(),
8026 to_label: "Function".into(),
8027 to_key: fqn.to_string(),
8028 });
8029}
8030
8031pub fn build_project_ir(
8033 root: &Path,
8034 files: &[ParsedFile],
8035 _options: &ExtractOptions,
8036) -> ProjectIr {
8037 use crate::ir::{ApiEndpointIr, FileIr};
8038
8039 let mut ir = ProjectIr::empty();
8040 let mut accumulator = IrEdgeAccumulator::default();
8041
8042 let known_paths: HashSet<String> = files
8043 .iter()
8044 .map(|f| neo4j_path_string(root, &f.path))
8045 .collect();
8046
8047 let go_modules = discover_go_modules(root, false).unwrap_or_default();
8048 let go_replaces = discover_go_replaces(root, false).unwrap_or_default();
8049 let csharp_batch_index = build_csharp_batch_index(files, root);
8050 let erlang_module_index = build_erlang_module_index(files);
8051
8052 for file in files {
8053 let path = neo4j_path_string(root, &file.path);
8054 let language = file.language.to_string();
8055 let project_name = derive_project_name(&file.path, root);
8056 let source = &file.source;
8057
8058 ir.files.push(FileIr {
8059 path: path.clone(),
8060 language: language.clone(),
8061 framework: None,
8062 project_name: project_name.clone(),
8063 });
8064
8065 match file.language {
8066 LanguageId::Java => {
8067 append_java_class_ir(&mut ir, &path, project_name.clone(), &file.tree, source);
8068 let package = extract_java_package(source);
8069 let (_, methods) =
8070 extract_java_symbols(&file.tree, source, package.as_deref());
8071 for func in &methods {
8072 ir.functions.push(FunctionIr {
8073 name: func.name.clone(),
8074 fqn: func.fqn.clone(),
8075 path: path.clone(),
8076 language: language.clone(),
8077 framework: None,
8078 project_name: project_name.clone(),
8079 arity: None,
8080 return_type: func.return_type.clone(),
8081 param_count: Some(func.param_count as u32),
8082 param_types: func.param_types.clone(),
8083 });
8084 if let Some(class_fqn) = &func.class_fqn {
8085 push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
8086 }
8087 push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
8088 }
8089 for (derived, base) in
8090 extract_java_inheritance_edges(&file.tree, source, package.as_deref())
8091 {
8092 accumulator.class_uses_class.insert((derived, base));
8093 }
8094 for (cls, dep) in
8095 extract_java_injected_dependencies(&file.tree, source, package.as_deref())
8096 {
8097 accumulator.class_uses_class.insert((cls, dep));
8098 }
8099 for (caller, callee) in extract_java_calls(&file.tree, source, package.as_deref()) {
8100 accumulator.calls_function.insert((caller, callee));
8101 }
8102 for import_fqn in extract_internal_java_imports(source) {
8103 if let Some(dep_path) = map_import_to_project_path(&path, &import_fqn) {
8104 if known_paths.contains(&dep_path) {
8105 push_depends_on_file(&mut ir.edges, &path, &dep_path);
8106 }
8107 }
8108 }
8109 for (http_methods, path_template, handler_name) in extract_java_spring_endpoints(source)
8110 {
8111 let norm_path = normalize_api_path(&path_template);
8112 ir.api_endpoints.push(ApiEndpointIr {
8113 methods: http_methods.clone(),
8114 path: path_template.clone(),
8115 protocol: Some("http".into()),
8116 framework: Some("spring".into()),
8117 project_name: project_name.clone(),
8118 });
8119 if let Some(handler_fqn) =
8120 resolve_java_handler_fqn(&http_methods, &handler_name, &ir)
8121 {
8122 ir.edges.push(EdgeIr {
8123 kind: EdgeKind::HandlesApi,
8124 from_label: "ApiEndpoint".into(),
8125 from_key: api_endpoint_key(&http_methods, &path_template),
8126 to_label: "Function".into(),
8127 to_key: handler_fqn,
8128 });
8129 }
8130 let _ = norm_path;
8131 }
8132 }
8133 LanguageId::CSharp => {
8134 append_csharp_structural_ir(
8135 &mut ir,
8136 &path,
8137 project_name.clone(),
8138 &file.tree,
8139 source,
8140 );
8141 let namespace = extract_csharp_namespace(&file.tree, source);
8142 let using_summary = extract_csharp_using_summary(&file.tree, source);
8143 let (classes, methods, _) = extract_csharp_symbols(&file.tree, source);
8144 for func in &methods {
8145 ir.functions.push(FunctionIr {
8146 name: func.name.clone(),
8147 fqn: func.fqn.clone(),
8148 path: path.clone(),
8149 language: language.clone(),
8150 framework: None,
8151 project_name: project_name.clone(),
8152 arity: None,
8153 return_type: func.return_type.clone(),
8154 param_count: Some(func.param_count as u32),
8155 param_types: func.param_types.clone(),
8156 });
8157 if let Some(class_fqn) = &func.class_fqn {
8158 push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
8159 }
8160 push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
8161 }
8162 for ns in &using_summary.namespace_imports {
8163 let Some(dep_paths) = csharp_batch_index.namespace_to_paths.get(ns) else {
8164 continue;
8165 };
8166 for dep_path in dep_paths {
8167 if dep_path == &path || !known_paths.contains(dep_path) {
8168 continue;
8169 }
8170 push_depends_on_file(&mut ir.edges, &path, dep_path);
8171 }
8172 }
8173 for (derived, base) in extract_csharp_class_inheritance_edges(
8174 &file.tree,
8175 source,
8176 namespace.as_deref(),
8177 &using_summary,
8178 &csharp_batch_index,
8179 ) {
8180 accumulator.class_uses_class.insert((derived, base));
8181 }
8182 for (fn_fqn, class_fqn) in extract_csharp_used_classes(
8183 &file.tree,
8184 source,
8185 namespace.as_deref(),
8186 &using_summary,
8187 &csharp_batch_index,
8188 ) {
8189 accumulator.uses_class.insert((fn_fqn, class_fqn));
8190 }
8191 for (caller, callee) in extract_csharp_calls(
8192 &file.tree,
8193 source,
8194 namespace.as_deref(),
8195 &using_summary,
8196 &csharp_batch_index,
8197 ) {
8198 accumulator.calls_function.insert((caller, callee));
8199 }
8200 let _ = classes;
8201 }
8202 LanguageId::Erlang => {
8203 extract_erlang_to_ir(
8204 &mut ir,
8205 &mut accumulator,
8206 file,
8207 &path,
8208 source,
8209 project_name.clone(),
8210 &language,
8211 &erlang_module_index,
8212 );
8213 }
8214 LanguageId::Go => {
8215 let package_name = extract_go_package(source);
8216 let (structs, functions) =
8217 extract_go_symbols(&file.tree, source, package_name.as_deref());
8218 for s in &structs {
8219 ir.classes.push(ClassIr {
8220 fqn: s.fqn.clone(),
8221 name: s.name.clone(),
8222 path: path.clone(),
8223 language: language.clone(),
8224 project_name: project_name.clone(),
8225 kind: s.kind.map(str::to_string),
8226 });
8227 ir.edges.push(EdgeIr {
8228 kind: EdgeKind::DeclaresClass,
8229 from_label: "File".into(),
8230 from_key: path.clone(),
8231 to_label: "Class".into(),
8232 to_key: s.fqn.clone(),
8233 });
8234 }
8235 for func in &functions {
8236 ir.functions.push(FunctionIr {
8237 name: func.name.clone(),
8238 fqn: func.fqn.clone(),
8239 path: path.clone(),
8240 language: language.clone(),
8241 framework: None,
8242 project_name: project_name.clone(),
8243 arity: None,
8244 return_type: func.return_type.clone(),
8245 param_count: Some(func.param_count as u32),
8246 param_types: func.param_types.clone(),
8247 });
8248 if let Some(class_fqn) = &func.class_fqn {
8249 push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
8250 }
8251 push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
8252 }
8253 for import_path in extract_go_imports(&file.tree, source) {
8254 if let Some(dep) = resolve_go_import_to_known_go_file(
8255 &import_path,
8256 &known_paths,
8257 &go_modules,
8258 &go_replaces,
8259 Some(root),
8260 ) {
8261 push_depends_on_file(&mut ir.edges, &path, &dep);
8262 }
8263 }
8264 }
8265 _ => {
8266 extract_non_java_to_ir(
8267 &mut ir,
8268 &mut accumulator,
8269 file,
8270 &path,
8271 source,
8272 project_name.clone(),
8273 &language,
8274 &known_paths,
8275 _options,
8276 );
8277 }
8278 }
8279 }
8280
8281 accumulator.flush_into(&mut ir.edges);
8282 apply_same_api_edges(&mut ir);
8283 ir
8284}
8285
8286fn resolve_java_handler_fqn(
8287 _methods: &[String],
8288 handler_name: &str,
8289 ir: &ProjectIr,
8290) -> Option<String> {
8291 ir.functions
8292 .iter()
8293 .find(|f| f.name == handler_name)
8294 .map(|f| f.fqn.clone())
8295}
8296
8297fn extract_erlang_to_ir(
8298 ir: &mut ProjectIr,
8299 accumulator: &mut IrEdgeAccumulator,
8300 file: &ParsedFile,
8301 file_path: &str,
8302 source: &str,
8303 project_name: Option<String>,
8304 language: &str,
8305 erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
8306) {
8307 use crate::ir::{ApiEndpointIr, BehaviourIr, CallbackIr, ExternalApiIr, ModuleIr};
8308
8309 let module_name = resolve_erlang_module_name(&file.path, &file.tree, source);
8310 let erlang_meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, source);
8311
8312 if let Some(module_name) = module_name.as_ref() {
8313 ir.modules.push(ModuleIr {
8314 name: module_name.clone(),
8315 path: file_path.to_string(),
8316 language: language.to_string(),
8317 framework: None,
8318 project_name: project_name.clone(),
8319 });
8320 ir.edges.push(EdgeIr {
8321 kind: EdgeKind::DeclaresModule,
8322 from_label: "File".into(),
8323 from_key: file_path.to_string(),
8324 to_label: "Module".into(),
8325 to_key: module_key(module_name, file_path),
8326 });
8327 }
8328
8329 let functions = if let Some(module_name) = module_name.as_ref() {
8330 extract_erlang_functions(module_name, &file.tree, source)
8331 } else {
8332 Vec::new()
8333 };
8334
8335 for (fun_name, arity, fqn) in &functions {
8336 ir.functions.push(FunctionIr {
8337 name: fun_name.clone(),
8338 fqn: fqn.clone(),
8339 path: file_path.to_string(),
8340 language: language.to_string(),
8341 framework: None,
8342 project_name: project_name.clone(),
8343 arity: Some(*arity),
8344 return_type: None,
8345 param_count: None,
8346 param_types: vec![],
8347 });
8348 push_declares_function(&mut ir.edges, "File", file_path, fqn);
8349 if let Some(module_name) = module_name.as_ref() {
8350 push_declares_function(&mut ir.edges, "Module", &module_key(module_name, file_path), fqn);
8351 }
8352 }
8353
8354 let callback_contracts = collect_callback_contracts_for_module(
8355 module_name.as_deref(),
8356 &erlang_meta.behaviour_usages,
8357 &erlang_meta.declared_callbacks,
8358 &erlang_meta.optional_callbacks,
8359 );
8360
8361 if let Some(module_name) = module_name.as_ref() {
8362 for behaviour in &erlang_meta.behaviour_usages {
8363 ir.behaviours.push(BehaviourIr {
8364 name: behaviour.clone(),
8365 path: None,
8366 language: Some(language.to_string()),
8367 project_name: project_name.clone(),
8368 });
8369 ir.edges.push(EdgeIr {
8370 kind: EdgeKind::ImplementsBehaviour,
8371 from_label: "Module".into(),
8372 from_key: module_key(module_name, file_path),
8373 to_label: "Behaviour".into(),
8374 to_key: behaviour.clone(),
8375 });
8376 let dep_path = guess_erlang_file_path_from_module(file_path, behaviour);
8377 push_depends_on_file(&mut ir.edges, file_path, &dep_path);
8378 }
8379
8380 if !erlang_meta.declared_callbacks.is_empty() {
8381 ir.behaviours.push(BehaviourIr {
8382 name: module_name.clone(),
8383 path: Some(file_path.to_string()),
8384 language: Some(language.to_string()),
8385 project_name: project_name.clone(),
8386 });
8387 ir.edges.push(EdgeIr {
8388 kind: EdgeKind::DeclaresBehaviour,
8389 from_label: "File".into(),
8390 from_key: file_path.to_string(),
8391 to_label: "Behaviour".into(),
8392 to_key: module_name.clone(),
8393 });
8394 }
8395
8396 for parent in &erlang_meta.behaviour_extensions {
8397 ir.edges.push(EdgeIr {
8398 kind: EdgeKind::ExtendsBehaviour,
8399 from_label: "Behaviour".into(),
8400 from_key: module_name.clone(),
8401 to_label: "Behaviour".into(),
8402 to_key: parent.clone(),
8403 });
8404 }
8405 }
8406
8407 for contract in &callback_contracts {
8408 let callback_fqn = format!(
8409 "{}:{}/{}",
8410 contract.behaviour, contract.name, contract.arity
8411 );
8412 ir.callbacks.push(CallbackIr {
8413 name: contract.name.clone(),
8414 fqn: callback_fqn.clone(),
8415 arity: contract.arity,
8416 optional: contract.optional,
8417 language: Some(language.to_string()),
8418 project_name: project_name.clone(),
8419 });
8420 ir.edges.push(EdgeIr {
8421 kind: EdgeKind::DeclaresCallback,
8422 from_label: "Behaviour".into(),
8423 from_key: contract.behaviour.clone(),
8424 to_label: "Callback".into(),
8425 to_key: callback_fqn.clone(),
8426 });
8427 }
8428
8429 let function_by_sig: HashMap<(String, u32), String> = functions
8430 .iter()
8431 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
8432 .collect();
8433
8434 for contract in &callback_contracts {
8435 let key = (contract.name.clone(), contract.arity);
8436 let Some(fn_fqn) = function_by_sig.get(&key) else {
8437 continue;
8438 };
8439 let callback_fqn = format!(
8440 "{}:{}/{}",
8441 contract.behaviour, contract.name, contract.arity
8442 );
8443 ir.edges.push(EdgeIr {
8444 kind: EdgeKind::ImplementsCallback,
8445 from_label: "Function".into(),
8446 from_key: fn_fqn.clone(),
8447 to_label: "Callback".into(),
8448 to_key: callback_fqn.clone(),
8449 });
8450 }
8451
8452 for (name, arity) in &erlang_meta.overridden_callbacks {
8453 let key = (name.clone(), *arity);
8454 let Some(fn_fqn) = function_by_sig.get(&key) else {
8455 continue;
8456 };
8457 for contract in callback_contracts
8458 .iter()
8459 .filter(|c| c.name == *name && c.arity == *arity)
8460 {
8461 let callback_fqn = format!(
8462 "{}:{}/{}",
8463 contract.behaviour, contract.name, contract.arity
8464 );
8465 ir.edges.push(EdgeIr {
8466 kind: EdgeKind::OverridesCallback,
8467 from_label: "Function".into(),
8468 from_key: fn_fqn.clone(),
8469 to_label: "Callback".into(),
8470 to_key: callback_fqn,
8471 });
8472 }
8473 }
8474
8475 for (methods, path_template, handler_module) in extract_erlang_api_endpoints(&file.tree, source)
8476 {
8477 let methods_owned: Vec<String> = methods.iter().map(|s| s.to_string()).collect();
8478 ir.api_endpoints.push(ApiEndpointIr {
8479 methods: methods_owned.clone(),
8480 path: path_template.clone(),
8481 protocol: Some("http".into()),
8482 framework: Some("cowboy".into()),
8483 project_name: project_name.clone(),
8484 });
8485 for fqn in select_endpoint_handler_fqns(&handler_module, erlang_module_index) {
8486 ir.edges.push(EdgeIr {
8487 kind: EdgeKind::HandlesApi,
8488 from_label: "ApiEndpoint".into(),
8489 from_key: api_endpoint_key(&methods_owned, &path_template),
8490 to_label: "Function".into(),
8491 to_key: fqn,
8492 });
8493 }
8494 }
8495
8496 for full_url in extract_external_http_urls_from_tree(&file.tree, source) {
8497 let (protocol_opt, host, url_path) = split_url_protocol_host_and_path(&full_url);
8498 let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
8499 let base_url = format!("{protocol}://{host}");
8500 let norm_path = normalize_api_path(&url_path);
8501 ir.external_apis.push(ExternalApiIr {
8502 name: host.clone(),
8503 base_url: Some(base_url.clone()),
8504 protocol: Some(protocol),
8505 provider: Some(host),
8506 service: None,
8507 norm_path: Some(norm_path.clone()),
8508 });
8509 for (_, _, fqn) in &functions {
8510 ir.edges.push(EdgeIr {
8511 kind: EdgeKind::CallsExternalApi,
8512 from_label: "Function".into(),
8513 from_key: fqn.clone(),
8514 to_label: "ExternalApi".into(),
8515 to_key: external_api_key(&base_url, &norm_path),
8516 });
8517 }
8518 }
8519
8520 if let Some(module_name) = module_name.as_ref() {
8521 let function_by_sig: HashMap<(String, u32), String> = functions
8522 .iter()
8523 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
8524 .collect();
8525 for (caller, callee) in extract_erlang_call_edges(
8526 &file.tree,
8527 source,
8528 Some(module_name.as_str()),
8529 &function_by_sig,
8530 ) {
8531 accumulator.calls_function.insert((caller, callee));
8532 }
8533 }
8534}
8535
8536fn extract_non_java_to_ir(
8537 ir: &mut ProjectIr,
8538 accumulator: &mut IrEdgeAccumulator,
8539 file: &ParsedFile,
8540 file_path: &str,
8541 source: &str,
8542 project_name: Option<String>,
8543 language: &str,
8544 known_paths: &HashSet<String>,
8545 options: &ExtractOptions,
8546) {
8547 let functions = extract_non_java_function_symbols(file, source, file_path);
8548 let mut name_to_fqn_depth: HashMap<String, (String, usize)> = HashMap::new();
8549 for f in &functions {
8550 let logical = f
8551 .fqn
8552 .split_once("::")
8553 .map(|(_, l)| l)
8554 .unwrap_or(f.fqn.as_str());
8555 let short = logical
8556 .rsplit_once('.')
8557 .map(|(_, s)| s)
8558 .unwrap_or(logical);
8559 let depth = logical.matches('.').count();
8560 name_to_fqn_depth
8561 .entry(short.to_string())
8562 .and_modify(|(existing_fqn, existing_depth)| {
8563 if depth > *existing_depth {
8564 *existing_fqn = f.fqn.clone();
8565 *existing_depth = depth;
8566 }
8567 })
8568 .or_insert_with(|| (f.fqn.clone(), depth));
8569 }
8570 let name_to_fqn: HashMap<String, String> = name_to_fqn_depth
8571 .into_iter()
8572 .map(|(k, (v, _))| (k, v))
8573 .collect();
8574
8575 for func in &functions {
8576 ir.functions.push(FunctionIr {
8577 name: func.name.clone(),
8578 fqn: func.fqn.clone(),
8579 path: file_path.to_string(),
8580 language: language.to_string(),
8581 framework: None,
8582 project_name: project_name.clone(),
8583 arity: None,
8584 return_type: func.return_type.clone(),
8585 param_count: Some(func.param_count as u32),
8586 param_types: func.param_types.clone(),
8587 });
8588 push_declares_function(&mut ir.edges, "File", file_path, &func.fqn);
8589 }
8590
8591 match file.language {
8592 LanguageId::Python => {
8593 for imp in extract_python_import_modules(&file.tree, source) {
8594 if let Some(dep) = resolve_python_import_to_known_file(&imp, known_paths) {
8595 push_depends_on_file(&mut ir.edges, file_path, &dep);
8596 } else if should_log_unresolved_import(
8597 options.verbose_imports,
8598 is_python_stdlib_top_level(&imp),
8599 is_python_common_external_top_level(&imp),
8600 ) {
8601 eprintln!(
8602 "Python import (unresolved to scanned files): `{imp}` in {file_path}"
8603 );
8604 }
8605 }
8606 for (caller, callee) in
8607 extract_python_intrafile_calls(&file.tree, source, file_path, &name_to_fqn)
8608 {
8609 accumulator.calls_function.insert((caller, callee));
8610 }
8611 }
8612 LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
8613 for spec in extract_js_ts_import_specifiers(&file.tree, source) {
8614 if let Some(dep) =
8615 resolve_js_ts_import_to_known_file(&spec, file_path, known_paths)
8616 {
8617 push_depends_on_file(&mut ir.edges, file_path, &dep);
8618 } else if options.verbose_imports {
8619 eprintln!("JS/TS import (unresolved): `{spec}` in {file_path}");
8620 }
8621 }
8622 for (caller, callee) in extract_js_ts_intrafile_calls(
8623 &file.tree,
8624 source,
8625 file_path,
8626 file.language,
8627 &name_to_fqn,
8628 ) {
8629 accumulator.calls_function.insert((caller, callee));
8630 }
8631 }
8632 _ => {}
8633 }
8634}
8635
8636fn apply_same_api_edges(ir: &mut ProjectIr) {
8637 use crate::ir::{api_endpoint_key, external_api_key};
8638 for ep in &ir.api_endpoints {
8639 let ep_norm = normalize_api_path(&ep.path);
8640 for ext in &ir.external_apis {
8641 if ext.norm_path.as_deref() == Some(ep_norm.as_str()) {
8642 ir.edges.push(EdgeIr {
8643 kind: EdgeKind::SameApi,
8644 from_label: "ApiEndpoint".into(),
8645 from_key: api_endpoint_key(&ep.methods, &ep.path),
8646 to_label: "ExternalApi".into(),
8647 to_key: if let (Some(b), Some(n)) = (&ext.base_url, &ext.norm_path) {
8648 external_api_key(b, n)
8649 } else {
8650 ext.name.clone()
8651 },
8652 });
8653 }
8654 }
8655 }
8656}
8657
8658pub async fn persist_project_ir_to_neo4j(
8663 cfg: &Neo4jConfig,
8664 ir: &ProjectIr,
8665 clean: bool,
8666) -> Result<(), GraphError> {
8667 let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
8668
8669 if clean {
8670 graph.run(query("MATCH (n) DETACH DELETE n")).await?;
8671 }
8672
8673 for f in &ir.files {
8674 let q = query(
8675 "
8676 MERGE (n:File { path: $path })
8677 SET n.language = $language, n.project_name = $project_name
8678 ",
8679 )
8680 .param("path", f.path.clone())
8681 .param("language", f.language.clone())
8682 .param("project_name", f.project_name.clone());
8683 graph.run(q).await?;
8684 }
8685
8686 for m in &ir.modules {
8687 let q = query(
8688 "
8689 MERGE (n:Module { name: $name, path: $path })
8690 SET n.language = $language, n.project_name = $project_name
8691 ",
8692 )
8693 .param("name", m.name.clone())
8694 .param("path", m.path.clone())
8695 .param("language", m.language.clone())
8696 .param("project_name", m.project_name.clone());
8697 graph.run(q).await?;
8698 }
8699
8700 for c in &ir.classes {
8701 let q = query(
8702 "
8703 MERGE (n:Class { fqn: $fqn })
8704 SET n.name = $name, n.path = $path, n.language = $language, n.project_name = $project_name
8705 ",
8706 )
8707 .param("fqn", c.fqn.clone())
8708 .param("name", c.name.clone())
8709 .param("path", c.path.clone())
8710 .param("language", c.language.clone())
8711 .param("project_name", c.project_name.clone());
8712 graph.run(q).await?;
8713 }
8714
8715 for f in &ir.functions {
8716 let q = query(
8717 "
8718 MERGE (n:Function { fqn: $fqn })
8719 SET n.name = $name, n.path = $path, n.language = $language,
8720 n.project_name = $project_name, n.arity = $arity,
8721 n.return_type = $return_type, n.param_count = $param_count
8722 ",
8723 )
8724 .param("fqn", f.fqn.clone())
8725 .param("name", f.name.clone())
8726 .param("path", f.path.clone())
8727 .param("language", f.language.clone())
8728 .param("project_name", f.project_name.clone())
8729 .param("arity", f.arity.map(|a| a as i64))
8730 .param("return_type", f.return_type.clone())
8731 .param("param_count", f.param_count.map(|c| c as i64));
8732 graph.run(q).await?;
8733 }
8734
8735 for edge in &ir.edges {
8736 let rel = edge.kind.to_rel_type().to_string();
8737 let q = match edge.from_label.as_str() {
8738 "Module" if edge.to_label == "Function" || edge.to_label == "Module" => {
8739 let (name, path) = parse_module_key(&edge.from_key).unwrap_or_default();
8740 let cypher = format!(
8741 "
8742 MERGE (a:Module {{ name: $from_name, path: $from_path }})
8743 MERGE (b:{lbl_b} {{ {key_b}: $to_key }})
8744 MERGE (a)-[:{rel}]->(b)
8745 ",
8746 lbl_b = edge.to_label,
8747 key_b = stable_key_property(&edge.to_label),
8748 rel = rel,
8749 );
8750 query(&cypher)
8751 .param("from_name", name)
8752 .param("from_path", path)
8753 .param("to_key", edge.to_key.clone())
8754 }
8755 _ => {
8756 let cypher = format!(
8757 "
8758 MERGE (a:{lbl_a} {{ {key_a}: $from_key }})
8759 MERGE (b:{lbl_b} {{ {key_b}: $to_key }})
8760 MERGE (a)-[:{rel}]->(b)
8761 ",
8762 lbl_a = edge.from_label,
8763 key_a = stable_key_property(&edge.from_label),
8764 lbl_b = edge.to_label,
8765 key_b = stable_key_property(&edge.to_label),
8766 rel = rel,
8767 );
8768 query(&cypher)
8769 .param("from_key", edge.from_key.clone())
8770 .param("to_key", edge.to_key.clone())
8771 }
8772 };
8773 graph.run(q).await?;
8774 }
8775
8776 Ok(())
8777}
8778
8779fn parse_module_key(key: &str) -> Option<(String, String)> {
8780 key.split_once('@').map(|(n, p)| (n.to_string(), p.to_string()))
8781}
8782
8783fn stable_key_property(label: &str) -> &'static str {
8784 match label {
8785 "File" => "path",
8786 "Module" => "name",
8787 "Function" | "Class" | "Property" | "Callback" => "fqn",
8788 "Behaviour" => "name",
8789 "ApiEndpoint" => "path",
8790 "ExternalApi" => "name",
8791 _ => "name",
8792 }
8793}
8794
8795#[cfg(test)] mod tests {
8798 use super::*;
8799 use crate::ir::{EdgeKind, ProjectIr};
8800 use crate::scanner::ParsedFile;
8801 use crate::{parse_once, LanguageId};
8802 use std::collections::HashSet;
8803 use std::path::{Path, PathBuf};
8804 #[test]
8807 fn append_csharp_structural_ir_populates_classes_properties_and_edges() {
8808 let src = r#"
8809namespace Ns {
8810 public class C {
8811 public string Name { get; set; }
8812 }
8813}
8814"#;
8815 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8816 let mut ir = ProjectIr::empty();
8817 append_csharp_structural_ir(&mut ir, "/src/C.cs", None, &tree, src);
8818 assert!(ir.classes.iter().any(|c| c.fqn == "Ns.C"));
8819 assert!(ir.properties.iter().any(|p| p.fqn == "Ns.C.Name"));
8820 assert!(
8821 ir.edges
8822 .iter()
8823 .any(|e| e.kind == EdgeKind::DeclaresClass && e.to_key == "Ns.C")
8824 );
8825 assert!(ir
8826 .edges
8827 .iter()
8828 .any(|e| e.kind == EdgeKind::DeclaresProperty && e.to_key == "Ns.C.Name"));
8829 }
8830
8831 #[test]
8832 fn csharp_using_summary_parses_ast_and_filters_system_microsoft() {
8833 let src = r#"
8834using System;
8835using System.Collections.Generic;
8836using Microsoft.Extensions.Logging;
8837using OtherNs;
8838using static System.Math;
8839using AliasType = OtherNs.SomeType;
8840
8841namespace ConsumerNs { class C { void M() { } } }
8842"#;
8843 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8844 let sum = extract_csharp_using_summary(&tree, src);
8845 assert!(sum.namespace_imports.contains(&"OtherNs".to_string()));
8846 assert!(!sum
8847 .namespace_imports
8848 .iter()
8849 .any(|n| n.starts_with("System") || n.starts_with("Microsoft")));
8850 assert_eq!(
8851 sum.alias_map.get("AliasType").map(String::as_str),
8852 Some("OtherNs.SomeType")
8853 );
8854 }
8855
8856 #[test]
8857 fn csharp_global_using_included_in_summary() {
8858 let src = r#"
8859global using SharedNs;
8860
8861namespace N { class C { } }
8862"#;
8863 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8864 let sum = extract_csharp_using_summary(&tree, src);
8865 assert!(sum.namespace_imports.contains(&"SharedNs".to_string()));
8866 }
8867
8868 #[test]
8869 fn csharp_api_endpoints_multiline_http_get_attribute() {
8870 let src = r#"
8871namespace N {
8872 public class C {
8873 [HttpGet(
8874 "/x")]
8875 public void GetIt() { }
8876 }
8877}
8878"#;
8879 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8880 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
8881 assert_eq!(eps.len(), 1);
8882 assert_eq!(eps[0].0, vec!["GET"]);
8883 assert_eq!(eps[0].1, "/x");
8884 assert_eq!(eps[0].2, "GetIt");
8885 }
8886
8887 #[test]
8888 fn csharp_api_endpoints_class_route_plus_method_http_get() {
8889 let src = r#"
8890namespace N {
8891 [Route("api/v1")]
8892 public class OrdersController {
8893 [HttpGet("orders")]
8894 public void Get() { }
8895 }
8896}
8897"#;
8898 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8899 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
8900 assert_eq!(eps.len(), 1);
8901 assert_eq!(eps[0].0, vec!["GET"]);
8902 assert_eq!(eps[0].1, "/api/v1/orders");
8903 assert_eq!(eps[0].2, "Get");
8904 }
8905
8906 #[test]
8907 fn csharp_api_endpoints_single_line_http_get_regression() {
8908 let src = r#"
8909namespace N {
8910 public class C {
8911 [HttpGet("/api/orders")]
8912 public void List() { }
8913 }
8914}
8915"#;
8916 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8917 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
8918 assert_eq!(eps.len(), 1);
8919 assert_eq!(eps[0].1, "/api/orders");
8920 assert_eq!(eps[0].2, "List");
8921 }
8922
8923 #[test]
8924 fn csharp_api_endpoints_comment_with_fake_attribute_not_parsed() {
8925 let src = r#"
8926namespace N {
8927 public class C {
8928 // [HttpGet("/fake")]
8929 [HttpGet("/real")]
8930 public void A() { }
8931 }
8932}
8933"#;
8934 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8935 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
8936 assert_eq!(eps.len(), 1);
8937 assert_eq!(eps[0].1, "/real");
8938 }
8939
8940 #[test]
8941 fn csharp_api_endpoints_comment_only_does_not_create_endpoint() {
8942 let src = r#"
8943namespace N {
8944 public class C {
8945 // [HttpGet("/fake")]
8946 public void M() { }
8947 }
8948}
8949"#;
8950 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8951 let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
8952 assert!(eps.is_empty(), "expected no endpoints, got {eps:?}");
8953 }
8954
8955 #[test]
8956 fn csharp_symbols_nested_class_fqns() {
8957 let src = r#"
8958namespace N {
8959 public class Outer {
8960 public class Inner {
8961 public void M() { }
8962 }
8963 }
8964}
8965"#;
8966 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8967 let (classes, methods, _) = extract_csharp_symbols(&tree, src);
8968 let fqns: Vec<&str> = classes.iter().map(|c| c.fqn.as_str()).collect();
8969 assert!(fqns.contains(&"N.Outer"));
8970 assert!(fqns.contains(&"N.Outer.Inner"));
8971 let m = methods.iter().find(|f| f.name == "M").expect("method M");
8972 assert_eq!(m.class_fqn.as_deref(), Some("N.Outer.Inner"));
8973 assert_eq!(m.fqn, "N.Outer.Inner.M");
8974 }
8975
8976 #[test]
8977 fn csharp_symbols_enum_kind_and_fqn() {
8978 let src = r#"
8979namespace N {
8980 public enum Color { Red, Green }
8981}
8982"#;
8983 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8984 let (classes, _, _) = extract_csharp_symbols(&tree, src);
8985 let e = classes.iter().find(|c| c.name == "Color").expect("enum Color");
8986 assert_eq!(e.fqn, "N.Color");
8987 assert_eq!(e.kind, Some("enum"));
8988 }
8989
8990 #[test]
8991 fn csharp_symbols_record_kind_and_fqn() {
8992 let src = r#"
8993namespace N {
8994 public record Person(string Name);
8995}
8996"#;
8997 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
8998 let (classes, _, _) = extract_csharp_symbols(&tree, src);
8999 let r = classes.iter().find(|c| c.name == "Person").expect("record Person");
9000 assert_eq!(r.fqn, "N.Person");
9001 assert_eq!(r.kind, Some("record"));
9002 }
9003
9004 #[test]
9005 fn csharp_namespace_nested_blocks_in_fqn() {
9006 let src = r#"
9007namespace A {
9008 namespace B {
9009 public class C {
9010 public void M() {}
9011 }
9012 }
9013}
9014"#;
9015 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9016 let (classes, methods, _) = extract_csharp_symbols(&tree, src);
9017 let c = classes.iter().find(|x| x.name == "C").expect("class C");
9018 assert_eq!(c.fqn, "A.B.C");
9019 let m = methods.iter().find(|f| f.name == "M").expect("method M");
9020 assert_eq!(m.fqn, "A.B.C.M");
9021 }
9022
9023 #[test]
9024 fn csharp_namespace_file_scoped_extracts() {
9025 let src = "namespace Ns;\npublic class X { }\n";
9026 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9027 assert_eq!(
9028 extract_csharp_namespace(&tree, src).as_deref(),
9029 Some("Ns")
9030 );
9031 let (classes, _, _) = extract_csharp_symbols(&tree, src);
9032 let x = classes.iter().find(|c| c.name == "X").expect("class X");
9033 assert_eq!(x.fqn, "Ns.X");
9034 }
9035
9036 #[test]
9037 fn csharp_collect_file_namespace_strings_two_roots() {
9038 let src = r#"
9039namespace A { public class Ca { } }
9040namespace B { public class Cb { } }
9041"#;
9042 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9043 let mut ns = collect_csharp_file_namespace_strings(&tree, src);
9044 ns.sort();
9045 assert_eq!(ns, vec!["A".to_string(), "B".to_string()]);
9046 }
9047
9048 #[test]
9049 fn csharp_class_inheritance_edges_resolved() {
9050 let src_base = "namespace N { public class Base { } }";
9051 let src_der = "namespace N { public class Derived : Base { } }";
9052 let tree_b = parse_once(LanguageId::CSharp, src_base).expect("parse");
9053 let tree_d = parse_once(LanguageId::CSharp, src_der).expect("parse");
9054 let files = vec![
9055 ParsedFile {
9056 path: PathBuf::from("/b/Base.cs"),
9057 language: LanguageId::CSharp,
9058 tree: tree_b,
9059 source: src_base.to_string(),
9060 is_test: false,
9061 },
9062 ParsedFile {
9063 path: PathBuf::from("/d/Derived.cs"),
9064 language: LanguageId::CSharp,
9065 tree: tree_d,
9066 source: src_der.to_string(),
9067 is_test: false,
9068 },
9069 ];
9070 let index = build_csharp_batch_index(&files, Path::new("."));
9071 let using = extract_csharp_using_summary(&files[1].tree, &files[1].source);
9072 let edges = extract_csharp_class_inheritance_edges(
9073 &files[1].tree,
9074 &files[1].source,
9075 Some("N"),
9076 &using,
9077 &index,
9078 );
9079 assert!(
9080 edges.contains(&(String::from("N.Derived"), String::from("N.Base"))),
9081 "edges={edges:?}"
9082 );
9083 }
9084
9085 #[test]
9086 fn csharp_symbols_constructor_function_matches_call_graph_fqn() {
9087 let src = r#"
9088namespace Ns {
9089 public class C {
9090 public C() { M(); }
9091 public void M() { }
9092 }
9093}
9094"#;
9095 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9096 let files = vec![ParsedFile {
9097 path: PathBuf::from("/t/C.cs"),
9098 language: LanguageId::CSharp,
9099 tree,
9100 source: src.to_string(),
9101 is_test: false,
9102 }];
9103 let index = build_csharp_batch_index(&files, Path::new("."));
9104 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9105 let (_, methods, _) = extract_csharp_symbols(&files[0].tree, &files[0].source);
9106 let ctor = methods
9107 .iter()
9108 .find(|f| f.fqn == "Ns.C.ctor#0")
9109 .expect("ctor#0 symbol");
9110 assert_eq!(ctor.name, "ctor#0");
9111 assert_eq!(ctor.class_fqn.as_deref(), Some("Ns.C"));
9112 let calls = extract_csharp_calls(
9113 &files[0].tree,
9114 &files[0].source,
9115 Some("Ns"),
9116 &using_summary,
9117 &index,
9118 );
9119 assert!(
9120 calls.iter().any(|(a, b)| a == "Ns.C.ctor#0" && b == "Ns.C.M"),
9121 "expected persisted ctor fqn as caller, got {calls:?}"
9122 );
9123 }
9124
9125 #[test]
9126 fn csharp_symbols_property_declares_accessors_and_property_symbol() {
9127 let src = r#"
9128namespace Ns {
9129 public class C {
9130 public string Name { get; set; }
9131 }
9132}
9133"#;
9134 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9135 let (_, methods, properties) = extract_csharp_symbols(&tree, src);
9136 let pname = properties.iter().find(|p| p.name == "Name").expect("property Name");
9137 assert_eq!(pname.fqn, "Ns.C.Name");
9138 assert_eq!(pname.class_fqn, "Ns.C");
9139 assert!(pname.declared_type.as_deref() == Some("string"));
9140 let get_f = methods
9141 .iter()
9142 .find(|f| f.fqn == "Ns.C.get_Name")
9143 .expect("getter function");
9144 assert_eq!(get_f.name, "get_Name");
9145 let set_f = methods
9146 .iter()
9147 .find(|f| f.fqn == "Ns.C.set_Name")
9148 .expect("setter function");
9149 assert_eq!(set_f.name, "set_Name");
9150 }
9151
9152 #[test]
9153 fn csharp_symbols_method_modifiers_params_return_type() {
9154 let src = r#"
9155namespace N {
9156 public class Api {
9157 public static async System.Threading.Tasks.Task<int> Foo(string s, OrderDto o) { return 0; }
9158 }
9159}
9160"#;
9161 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9162 let (_, methods, _) = extract_csharp_symbols(&tree, src);
9163 let f = methods.iter().find(|m| m.name == "Foo").expect("Foo");
9164 assert!(f.modifiers.contains(&"public".to_string()));
9165 assert!(f.modifiers.contains(&"static".to_string()));
9166 assert!(f.modifiers.contains(&"async".to_string()));
9167 assert_eq!(f.param_count, 2);
9168 assert_eq!(f.param_types, vec!["string", "OrderDto"]);
9169 assert!(f.return_type.as_deref().unwrap_or("").contains("Task<int>"));
9170 }
9171
9172 #[test]
9173 fn csharp_method_body_spans_nested_method_fqn() {
9174 let src = r#"
9175namespace N {
9176 public class Outer {
9177 public class Inner {
9178 public void M() { int x = 1; }
9179 }
9180 }
9181}
9182"#;
9183 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9184 let spans = csharp_method_body_spans(&tree, src, Some("N"));
9185 let m = spans.iter().find(|(fqn, _, _)| fqn.ends_with(".M")).expect("span M");
9186 assert_eq!(m.0, "N.Outer.Inner.M");
9187 }
9188
9189 #[test]
9190 fn csharp_uses_class_resolves_type_via_using_and_batch_index() {
9191 let src_other = "namespace OtherNs { public class RemoteDto { } }\n";
9192 let src_consumer = r#"
9193using OtherNs;
9194namespace ConsumerNs {
9195 public class Consumer {
9196 public void M() {
9197 RemoteDto x;
9198 }
9199 }
9200}
9201"#;
9202 let tree_other = parse_once(LanguageId::CSharp, src_other).expect("parse");
9203 let tree_consumer = parse_once(LanguageId::CSharp, src_consumer).expect("parse");
9204 let files = vec![
9205 ParsedFile {
9206 path: PathBuf::from("/repo/OtherNs/RemoteDto.cs"),
9207 language: LanguageId::CSharp,
9208 tree: tree_other,
9209 source: src_other.to_string(),
9210 is_test: false,
9211 },
9212 ParsedFile {
9213 path: PathBuf::from("/repo/Consumer.cs"),
9214 language: LanguageId::CSharp,
9215 tree: tree_consumer,
9216 source: src_consumer.to_string(),
9217 is_test: false,
9218 },
9219 ];
9220 let index = build_csharp_batch_index(&files, Path::new("."));
9221 let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
9222 let uses = extract_csharp_used_classes(
9223 &files[1].tree,
9224 &files[1].source,
9225 Some("ConsumerNs"),
9226 &using_summary,
9227 &index,
9228 );
9229 assert!(
9230 uses.iter().any(|(_, cls)| cls == "OtherNs.RemoteDto"),
9231 "expected USES_CLASS to OtherNs.RemoteDto, got {uses:?}"
9232 );
9233 }
9234
9235 #[test]
9236 fn csharp_type_alias_using_resolves_to_aliased_class_fqn() {
9237 let src_other = "namespace OtherNs { public class RemoteDto { } }\n";
9238 let src_consumer = r#"
9239using R = OtherNs.RemoteDto;
9240namespace ConsumerNs {
9241 public class Consumer {
9242 public void M() {
9243 R x;
9244 }
9245 }
9246}
9247"#;
9248 let tree_other = parse_once(LanguageId::CSharp, src_other).expect("parse");
9249 let tree_consumer = parse_once(LanguageId::CSharp, src_consumer).expect("parse");
9250 let files = vec![
9251 ParsedFile {
9252 path: PathBuf::from("/p/Other.cs"),
9253 language: LanguageId::CSharp,
9254 tree: tree_other,
9255 source: src_other.to_string(),
9256 is_test: false,
9257 },
9258 ParsedFile {
9259 path: PathBuf::from("/p/Consumer.cs"),
9260 language: LanguageId::CSharp,
9261 tree: tree_consumer,
9262 source: src_consumer.to_string(),
9263 is_test: false,
9264 },
9265 ];
9266 let index = build_csharp_batch_index(&files, Path::new("."));
9267 let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
9268 let uses = extract_csharp_used_classes(
9269 &files[1].tree,
9270 &files[1].source,
9271 Some("ConsumerNs"),
9272 &using_summary,
9273 &index,
9274 );
9275 assert!(uses.iter().any(|(_, cls)| cls == "OtherNs.RemoteDto"));
9276 }
9277
9278 #[test]
9279 fn csharp_uses_class_does_not_use_method_name_as_type_from_invocation() {
9280 let src = r#"
9281namespace Ns {
9282 public class C {
9283 public void GetOrder() { }
9284 public void M() {
9285 var x = GetOrder();
9286 }
9287 }
9288}
9289"#;
9290 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9291 let files = vec![ParsedFile {
9292 path: PathBuf::from("/t/C.cs"),
9293 language: LanguageId::CSharp,
9294 tree,
9295 source: src.to_string(),
9296 is_test: false,
9297 }];
9298 let index = build_csharp_batch_index(&files, Path::new("."));
9299 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9300 let uses = extract_csharp_used_classes(
9301 &files[0].tree,
9302 &files[0].source,
9303 Some("Ns"),
9304 &using_summary,
9305 &index,
9306 );
9307 assert!(
9308 !uses.iter().any(|(_, cls)| cls == "Ns.GetOrder"),
9309 "GetOrder() invocation must not create USES_CLASS to a synthetic type, got {uses:?}"
9310 );
9311 }
9312
9313 #[test]
9314 fn csharp_uses_class_skips_bcl_datetime_in_parameter() {
9315 let src = r#"
9316namespace Ns {
9317 public class C {
9318 public void M(System.DateTime d) { }
9319 }
9320}
9321"#;
9322 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9323 let files = vec![ParsedFile {
9324 path: PathBuf::from("/t/C.cs"),
9325 language: LanguageId::CSharp,
9326 tree,
9327 source: src.to_string(),
9328 is_test: false,
9329 }];
9330 let index = build_csharp_batch_index(&files, Path::new("."));
9331 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9332 let uses = extract_csharp_used_classes(
9333 &files[0].tree,
9334 &files[0].source,
9335 Some("Ns"),
9336 &using_summary,
9337 &index,
9338 );
9339 assert!(
9340 !uses.iter().any(|(_, cls)| cls.contains("DateTime")),
9341 "DateTime parameter should be filtered as BCL noise, got {uses:?}"
9342 );
9343 }
9344
9345 #[test]
9346 fn csharp_uses_class_does_not_include_types_only_used_in_local_function() {
9347 let src = r#"
9348namespace Ns {
9349 public class Outer {
9350 public void M() {
9351 void Local() {
9352 InnerOnly x;
9353 }
9354 }
9355 }
9356 public class InnerOnly { }
9357}
9358"#;
9359 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9360 let files = vec![ParsedFile {
9361 path: PathBuf::from("/t/Types.cs"),
9362 language: LanguageId::CSharp,
9363 tree,
9364 source: src.to_string(),
9365 is_test: false,
9366 }];
9367 let index = build_csharp_batch_index(&files, Path::new("."));
9368 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9369 let uses = extract_csharp_used_classes(
9370 &files[0].tree,
9371 &files[0].source,
9372 Some("Ns"),
9373 &using_summary,
9374 &index,
9375 );
9376 assert!(
9377 !uses
9378 .iter()
9379 .any(|(caller, cls)| caller == "Ns.Outer.M" && cls.contains("InnerOnly")),
9380 "InnerOnly only appears inside local function; outer M must not USES_CLASS it, got {uses:?}"
9381 );
9382 }
9383
9384 #[test]
9385 fn csharp_calls_resolve_field_receiver_to_type_fqn() {
9386 let src_repo =
9387 "namespace OtherNs { public class OrderRepo { public void Get(int id) { } } }\n";
9388 let src_svc = r#"
9389using OtherNs;
9390namespace ConsumerNs {
9391 public class Svc {
9392 private OrderRepo _repo;
9393 public void M() { _repo.Get(1); }
9394 }
9395}
9396"#;
9397 let tree_repo = parse_once(LanguageId::CSharp, src_repo).expect("parse");
9398 let tree_svc = parse_once(LanguageId::CSharp, src_svc).expect("parse");
9399 let files = vec![
9400 ParsedFile {
9401 path: PathBuf::from("/r/OrderRepo.cs"),
9402 language: LanguageId::CSharp,
9403 tree: tree_repo,
9404 source: src_repo.to_string(),
9405 is_test: false,
9406 },
9407 ParsedFile {
9408 path: PathBuf::from("/r/Svc.cs"),
9409 language: LanguageId::CSharp,
9410 tree: tree_svc,
9411 source: src_svc.to_string(),
9412 is_test: false,
9413 },
9414 ];
9415 let index = build_csharp_batch_index(&files, Path::new("."));
9416 let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
9417 let calls = extract_csharp_calls(
9418 &files[1].tree,
9419 &files[1].source,
9420 Some("ConsumerNs"),
9421 &using_summary,
9422 &index,
9423 );
9424 assert!(
9425 calls.iter().any(|(_, c)| c == "OtherNs.OrderRepo.Get"),
9426 "expected callee OtherNs.OrderRepo.Get, got {calls:?}"
9427 );
9428 }
9429
9430 #[test]
9431 fn csharp_calls_this_receiver_resolves_to_class_method() {
9432 let src = r#"
9433namespace Ns {
9434 public class C {
9435 void Helper() { }
9436 public void Run() { this.Helper(); }
9437 }
9438}
9439"#;
9440 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9441 let files = vec![ParsedFile {
9442 path: PathBuf::from("/t/C.cs"),
9443 language: LanguageId::CSharp,
9444 tree,
9445 source: src.to_string(),
9446 is_test: false,
9447 }];
9448 let index = build_csharp_batch_index(&files, Path::new("."));
9449 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9450 let calls = extract_csharp_calls(
9451 &files[0].tree,
9452 &files[0].source,
9453 Some("Ns"),
9454 &using_summary,
9455 &index,
9456 );
9457 assert!(
9458 calls.iter().any(|(_, c)| c == "Ns.C.Helper"),
9459 "expected callee Ns.C.Helper, got {calls:?}"
9460 );
9461 }
9462
9463 #[test]
9464 fn csharp_calls_constructor_invocations_extracted() {
9465 let src = r#"
9466namespace Ns {
9467 public class C {
9468 public C() { M(); }
9469 public void M() { }
9470 }
9471}
9472"#;
9473 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9474 let files = vec![ParsedFile {
9475 path: PathBuf::from("/t/C.cs"),
9476 language: LanguageId::CSharp,
9477 tree,
9478 source: src.to_string(),
9479 is_test: false,
9480 }];
9481 let index = build_csharp_batch_index(&files, Path::new("."));
9482 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9483 let calls = extract_csharp_calls(
9484 &files[0].tree,
9485 &files[0].source,
9486 Some("Ns"),
9487 &using_summary,
9488 &index,
9489 );
9490 assert!(
9491 calls.iter().any(|(a, b)| a == "Ns.C.ctor#0" && b == "Ns.C.M"),
9492 "expected ctor caller Ns.C.ctor#0 -> Ns.C.M, got {calls:?}"
9493 );
9494 }
9495
9496 #[test]
9497 fn csharp_calls_property_getter_invocations_extracted() {
9498 let src = r#"
9499namespace Ns {
9500 public class C {
9501 public int Prop {
9502 get { Helper(); return 1; }
9503 }
9504 void Helper() { }
9505 }
9506}
9507"#;
9508 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9509 let files = vec![ParsedFile {
9510 path: PathBuf::from("/t/C.cs"),
9511 language: LanguageId::CSharp,
9512 tree,
9513 source: src.to_string(),
9514 is_test: false,
9515 }];
9516 let index = build_csharp_batch_index(&files, Path::new("."));
9517 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9518 let calls = extract_csharp_calls(
9519 &files[0].tree,
9520 &files[0].source,
9521 Some("Ns"),
9522 &using_summary,
9523 &index,
9524 );
9525 assert!(
9526 calls.iter().any(|(a, b)| a == "Ns.C.get_Prop" && b == "Ns.C.Helper"),
9527 "expected getter caller Ns.C.get_Prop -> Ns.C.Helper, got {calls:?}"
9528 );
9529 }
9530
9531 #[test]
9532 fn csharp_calls_console_writeline_uses_well_known_type() {
9533 let src = r#"
9534namespace Ns {
9535 public class C {
9536 public void M() { Console.WriteLine("x"); }
9537 }
9538}
9539"#;
9540 let tree = parse_once(LanguageId::CSharp, src).expect("parse");
9541 let files = vec![ParsedFile {
9542 path: PathBuf::from("/t/C.cs"),
9543 language: LanguageId::CSharp,
9544 tree,
9545 source: src.to_string(),
9546 is_test: false,
9547 }];
9548 let index = build_csharp_batch_index(&files, Path::new("."));
9549 let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
9550 let calls = extract_csharp_calls(
9551 &files[0].tree,
9552 &files[0].source,
9553 Some("Ns"),
9554 &using_summary,
9555 &index,
9556 );
9557 assert!(
9558 calls.iter().any(|(_, c)| c == "System.Console.WriteLine"),
9559 "expected System.Console.WriteLine, got {calls:?}"
9560 );
9561 }
9562
9563 #[test]
9564 fn csharp_external_http_urls_ignore_comments_not_string_literals() {
9565 let src = r#"
9566namespace Ns {
9567 class C {
9568 // https://evil-line.example/x
9569 void M() {
9570 /* https://evil-block.example/y */
9571 var x = "https://good.example/only";
9572 }
9573 }
9574}
9575"#;
9576 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9577 let urls = extract_csharp_external_http_urls_with_spans(&tree, src);
9578 assert_eq!(urls.len(), 1, "expected one URL from string literal, got {urls:?}");
9579 assert!(
9580 urls[0].0.contains("good.example"),
9581 "unexpected url {:?}",
9582 urls[0].0
9583 );
9584 assert!(
9585 !urls.iter().any(|(u, _, _)| u.contains("evil")),
9586 "comment URLs must not appear: {urls:?}"
9587 );
9588 }
9589
9590 #[test]
9591 fn csharp_external_api_links_url_only_to_methods_whose_body_contains_literal() {
9592 let src = r#"
9593namespace Ns {
9594 public class C {
9595 void A() { var x = "https://api-a.example/v1"; }
9596 void B() { var y = "https://api-b.example/v2"; }
9597 }
9598}
9599"#;
9600 let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
9601 let (_, methods, _) = extract_csharp_symbols(&tree, src);
9602 let url_spans = extract_csharp_external_http_urls_with_spans(&tree, src);
9603 let method_spans = csharp_method_body_spans(&tree, src, Some("Ns"));
9604 let mut spans_by_fqn: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
9605 for (fqn, lo, hi) in method_spans {
9606 spans_by_fqn.entry(fqn).or_default().push((lo, hi));
9607 }
9608 let mut pairs: Vec<(String, String)> = Vec::new();
9609 for (full_url, u_start, u_end) in &url_spans {
9610 for func in &methods {
9611 let Some(ranges) = spans_by_fqn.get(&func.fqn) else {
9612 continue;
9613 };
9614 if !ranges
9615 .iter()
9616 .any(|(lo, hi)| *lo <= *u_start && *u_end <= *hi)
9617 {
9618 continue;
9619 }
9620 pairs.push((func.fqn.clone(), full_url.clone()));
9621 }
9622 }
9623 pairs.sort();
9624 assert!(
9625 pairs.contains(&(String::from("Ns.C.A"), String::from("https://api-a.example/v1"))),
9626 "missing A->api-a, got {pairs:?}"
9627 );
9628 assert!(
9629 pairs.contains(&(String::from("Ns.C.B"), String::from("https://api-b.example/v2"))),
9630 "missing B->api-b, got {pairs:?}"
9631 );
9632 assert!(
9633 !pairs.contains(&(String::from("Ns.C.A"), String::from("https://api-b.example/v2"))),
9634 "N×M leak: A linked to B's URL: {pairs:?}"
9635 );
9636 assert!(
9637 !pairs.contains(&(String::from("Ns.C.B"), String::from("https://api-a.example/v1"))),
9638 "N×M leak: B linked to A's URL: {pairs:?}"
9639 );
9640 }
9641
9642 #[test]
9643 fn resolves_module_name_from_erl_attribute() {
9644 let source = "-module(real_mod).\nfoo() -> ok.\n";
9645 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9646 let resolved = resolve_erlang_module_name(Path::new("/tmp/not_matching.erl"), &tree, source);
9647 assert_eq!(resolved.as_deref(), Some("real_mod"));
9648 }
9649 #[test]
9652 fn resolves_module_name_from_erl_basename_fallback() {
9653 let source = "foo() -> ok.\n";
9654 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9655 let resolved = resolve_erlang_module_name(Path::new("/tmp/fallback_name.erl"), &tree, source);
9656 assert_eq!(resolved.as_deref(), Some("fallback_name"));
9657 }
9658 #[test]
9660 fn does_not_fallback_module_name_for_hrl() {
9661 let source = "-define(FLAG, true).\n";
9662 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9663 let resolved = resolve_erlang_module_name(Path::new("/tmp/records.hrl"), &tree, source);
9664 assert_eq!(resolved, None);
9665 }
9666
9667 #[test]
9668 fn extracts_erlang_behaviour_usages_from_ast() {
9669 let source = r#"
9670 -behaviour(gen_server).
9671 -behavior(custom_behaviour).
9672 foo() -> ok.
9673 "#;
9674 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9675 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
9676 assert!(meta.behaviour_usages.contains("gen_server"));
9677 assert!(meta.behaviour_usages.contains("custom_behaviour"));
9678 }
9679
9680 #[test]
9681 fn extracts_declared_and_optional_callbacks_from_ast() {
9682 let source = r#"
9683 -callback init(term()) -> {ok, state()}.
9684 -callback handle_call(term(), term(), term()) -> {reply, ok, term()}.
9685 -optional_callbacks([handle_call/3]).
9686 "#;
9687 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9688 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
9689
9690 assert!(meta.declared_callbacks.contains(&(String::from("init"), 1)));
9691 assert!(
9692 meta.declared_callbacks
9693 .contains(&(String::from("handle_call"), 3))
9694 );
9695 assert!(
9696 meta.optional_callbacks
9697 .contains(&(String::from("handle_call"), 3))
9698 );
9699 assert!(!meta.optional_callbacks.contains(&(String::from("init"), 1)));
9700 }
9701
9702 #[test]
9703 fn extracts_behaviour_extension_and_override_hints_from_ast() {
9704 let source = r#"
9705 -extends_behaviour(base_handler).
9706 -override_callback(handle_call/3).
9707 "#;
9708 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9709 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
9710 assert!(meta.behaviour_extensions.contains("base_handler"));
9711 assert!(
9712 meta.overridden_callbacks
9713 .contains(&(String::from("handle_call"), 3))
9714 );
9715 }
9716
9717 #[test]
9718 fn ast_extracts_multiline_and_quoted_attributes() {
9719 let source = r#"
9720 -'behaviour'('gen_server').
9721 -callback
9722 'handle_call'(
9723 term(),
9724 term(),
9725 term()
9726 ) ->
9727 {reply, ok, term()}.
9728 -optional_callbacks([
9729 'handle_call'/3
9730 ]).
9731 "#;
9732 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9733 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
9734 assert!(meta.behaviour_usages.contains("gen_server"));
9735 assert!(
9736 meta.declared_callbacks
9737 .contains(&(String::from("handle_call"), 3))
9738 );
9739 assert!(
9740 meta.optional_callbacks
9741 .contains(&(String::from("handle_call"), 3))
9742 );
9743 }
9744
9745 #[test]
9746 fn ast_skips_macro_or_variable_names_for_safety() {
9747 let source = r#"
9748 -behaviour(?DYN_BEHAVIOUR).
9749 -optional_callbacks([?CALLBACK/2]).
9750 -extends_behaviour(ParentVar).
9751 -override_callback(?OVERRIDE/3).
9752 "#;
9753 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9754 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
9755 assert!(meta.behaviour_usages.is_empty());
9756 assert!(meta.optional_callbacks.is_empty());
9757 assert!(meta.behaviour_extensions.is_empty());
9758 assert!(meta.overridden_callbacks.is_empty());
9759 }
9760
9761 #[test]
9762 fn maps_functions_to_callback_contracts() {
9763 let source = r#"
9764 -behaviour(gen_server).
9765 -callback local_cb(term()) -> ok.
9766 -optional_callbacks([local_cb/1]).
9767 init(Args) -> {ok, Args}.
9768 handle_call(_Req, _From, State) -> {reply, ok, State}.
9769 local_cb(X) -> X.
9770 "#;
9771 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9772 let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
9773 let contracts = collect_callback_contracts_for_module(
9774 Some("my_behaviour"),
9775 &meta.behaviour_usages,
9776 &meta.declared_callbacks,
9777 &meta.optional_callbacks,
9778 );
9779
9780 assert!(contracts.iter().any(|c| {
9782 c.behaviour == "gen_server" && c.name == "handle_call" && c.arity == 3
9783 }));
9784 assert!(contracts.iter().any(|c| {
9786 c.behaviour == "my_behaviour"
9787 && c.name == "local_cb"
9788 && c.arity == 1
9789 && c.optional
9790 }));
9791 }
9792
9793 #[test]
9794 fn extracts_erlang_functions_from_ast_multiline_and_quoted() {
9795 let source = r#"
9796 -module(my_handler).
9797 'special_name'(
9798 Req,
9799 State
9800 ) ->
9801 {ok, State}.
9802 websocket_handle(Frame, State) ->
9803 {ok, State}.
9804 "#;
9805 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9806 let functions = extract_erlang_functions("my_handler", &tree, source);
9807 let sigs: HashSet<(String, u32)> = functions
9808 .iter()
9809 .map(|(name, arity, _)| (name.clone(), *arity))
9810 .collect();
9811
9812 assert!(sigs.contains(&(String::from("special_name"), 2)));
9813 assert!(sigs.contains(&(String::from("websocket_handle"), 2)));
9814 }
9815
9816 #[test]
9817 fn erlang_function_spans_keyed_by_fqn() {
9818 let source = r#"
9819-module(m).
9820handle(Req, State) ->
9821 {ok, State}.
9822"#;
9823 let tree = parse_once(LanguageId::Erlang, source).expect("parse");
9824 let spans = extract_erlang_function_spans("m", &tree, source);
9825 let fqn = "m:handle/2";
9826 let (lo, hi) = spans.get(fqn).copied().expect("span for handle/2");
9827 assert!(source[lo..hi].contains("handle(Req, State)"));
9828 }
9829
9830 #[test]
9831 fn java_method_body_spans_include_method_body() {
9832 let source = r#"
9833package com.example;
9834class A {
9835 void m() { return; }
9836}
9837"#;
9838 let tree = parse_once(LanguageId::Java, source).expect("parse");
9839 let spans = extract_java_method_body_spans(&tree, source, Some("com.example"));
9840 let (lo, hi) = spans.get("com.example.A.m").copied().expect("span");
9841 assert!(source[lo..hi].contains("return"));
9842 }
9843
9844 #[test]
9845 fn selects_endpoint_handler_callbacks_from_contracts_and_implemented_signatures() {
9846 let mut idx = HashMap::new();
9847 idx.insert(
9848 String::from("omega_ws_handler"),
9849 ErlangModuleSnapshot {
9850 implemented_signatures: HashSet::from([
9851 (String::from("init"), 2),
9852 (String::from("websocket_handle"), 2),
9853 (String::from("not_a_callback"), 1),
9854 ]),
9855 callback_signatures: HashSet::from([
9856 (String::from("init"), 2),
9857 (String::from("websocket_handle"), 2),
9858 (String::from("websocket_info"), 2),
9859 ]),
9860 },
9861 );
9862
9863 let fqns = select_endpoint_handler_fqns("omega_ws_handler", &idx);
9864 assert_eq!(
9865 fqns,
9866 vec![
9867 String::from("omega_ws_handler:init/2"),
9868 String::from("omega_ws_handler:websocket_handle/2"),
9869 ]
9870 );
9871 }
9872
9873 #[test]
9874 fn endpoint_handler_callback_selection_is_strict_when_metadata_missing() {
9875 let mut idx = HashMap::new();
9876 idx.insert(
9877 String::from("router_only"),
9878 ErlangModuleSnapshot {
9879 implemented_signatures: HashSet::from([(String::from("init"), 2)]),
9880 callback_signatures: HashSet::new(),
9881 },
9882 );
9883
9884 let none_for_unknown = select_endpoint_handler_fqns("missing_module", &idx);
9885 assert!(none_for_unknown.is_empty());
9886
9887 let none_for_no_contracts = select_endpoint_handler_fqns("router_only", &idx);
9888 assert!(none_for_no_contracts.is_empty());
9889 }
9890
9891 #[test]
9892 fn extracts_precise_erlang_call_edges_from_ast() {
9893 let source = r#"
9894 -module(my_mod).
9895 a() -> b(), c(), ok.
9896 b() -> ok.
9897 c() -> lists:map(fun(X) -> X end, [1,2]).
9898 "#;
9899 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9900 let functions = extract_erlang_functions("my_mod", &tree, source);
9901 let function_by_sig: HashMap<(String, u32), String> = functions
9902 .iter()
9903 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
9904 .collect();
9905 let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
9906
9907 assert!(edges.contains(&(String::from("my_mod:a/0"), String::from("my_mod:b/0"))));
9908 assert!(edges.contains(&(String::from("my_mod:a/0"), String::from("my_mod:c/0"))));
9909 assert!(!edges.contains(&(String::from("my_mod:c/0"), String::from("my_mod:map/2"))));
9911 }
9912
9913 #[test]
9914 fn does_not_create_nm_edges_for_sparse_local_calls() {
9915 let source = r#"
9916 -module(my_mod).
9917 a() -> b().
9918 b() -> ok.
9919 c() -> ok.
9920 d() -> ok.
9921 "#;
9922 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9923 let functions = extract_erlang_functions("my_mod", &tree, source);
9924 let function_by_sig: HashMap<(String, u32), String> = functions
9925 .iter()
9926 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
9927 .collect();
9928 let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
9929
9930 let expected = HashSet::from([(String::from("my_mod:a/0"), String::from("my_mod:b/0"))]);
9931 assert_eq!(edges, expected);
9932 }
9933
9934 #[test]
9935 fn attributes_calls_to_enclosing_multi_clause_function() {
9936 let source = r#"
9937 -module(my_mod).
9938 foo(0) -> bar();
9939 foo(N) -> baz(N).
9940 bar() -> ok.
9941 baz(_N) -> ok.
9942 "#;
9943 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9944 let functions = extract_erlang_functions("my_mod", &tree, source);
9945 let function_by_sig: HashMap<(String, u32), String> = functions
9946 .iter()
9947 .map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
9948 .collect();
9949 let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
9950
9951 assert!(edges.contains(&(String::from("my_mod:foo/1"), String::from("my_mod:bar/0"))));
9952 assert!(edges.contains(&(String::from("my_mod:foo/1"), String::from("my_mod:baz/1"))));
9953 assert_eq!(edges.len(), 2);
9954 }
9955
9956 #[test]
9957 fn extracts_called_modules_from_ast_remote_calls() {
9958 let source = r#"
9959 -module(my_mod).
9960 a() -> lists:map(fun(X) -> X end, [1,2]), my_dep:run().
9961 "#;
9962 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9963 let mods = extract_erlang_called_modules_from_tree(&tree, source);
9964 assert!(mods.contains("lists"));
9965 assert!(mods.contains("my_dep"));
9966 }
9967
9968 #[test]
9969 fn extracts_cowboy_endpoints_from_ast_multiline_tuples() {
9970 let source = r#"
9971 Dispatch = cowboy_router:compile([
9972 {'_', [
9973 {"/v1/ping", ping_handler, []},
9974 {
9975 "/v1/ws",
9976 websocket_handler,
9977 []
9978 }
9979 ]}
9980 ]).
9981 "#;
9982 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
9983 let endpoints = extract_erlang_api_endpoints(&tree, source);
9984 let keyset: HashSet<(String, String)> = endpoints
9985 .into_iter()
9986 .map(|(_, path, handler)| (path, handler))
9987 .collect();
9988 assert!(keyset.contains(&(String::from("/v1/ping"), String::from("ping_handler"))));
9989 assert!(keyset.contains(&(String::from("/v1/ws"), String::from("websocket_handler"))));
9990 }
9991
9992 #[test]
9993 fn extracts_external_urls_from_ast_strings_only() {
9994 let source = r#"
9995 -module(my_mod).
9996 a() ->
9997 Url = "https://api.example.com/v1/orders?x=1",
9998 io:format("~p", [Url]).
9999 % "https://comment.only/should/not/appear"
10000 "#;
10001 let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
10002 let urls = extract_external_http_urls_from_tree(&tree, source);
10003 assert!(urls.contains(&String::from("https://api.example.com/v1/orders?x=1")));
10004 assert!(!urls.iter().any(|u| u.contains("comment.only")));
10005 }
10006
10007 #[test]
10010 fn java_parse_warnings_surface_error_nodes() {
10011 let src = r#"
10012package p;
10013class Broken { void m( // missing close paren and brace
10014"#;
10015 let tree = parse_once(LanguageId::Java, src).expect("parse");
10016 let w = extract_java_parse_warnings(&tree, src);
10017 assert!(
10018 !w.is_empty(),
10019 "expected ERROR/missing warnings, got {w:?}"
10020 );
10021 assert!(w.iter().any(|(line, _, _)| *line >= 2), "got {w:?}");
10022 }
10023
10024 #[test]
10025 fn java_method_extracts_return_and_param_types() {
10026 let src = r#"
10027package com.example;
10028import java.util.List;
10029import org.springframework.http.ResponseEntity;
10030class Order {}
10031class C {
10032 public ResponseEntity<List<Order>> getOrders(String id, int page) { return null; }
10033}
10034"#;
10035 let tree = parse_once(LanguageId::Java, src).expect("parse");
10036 let (_, methods) = extract_java_symbols(&tree, src, Some("com.example"));
10037 let m = methods
10038 .iter()
10039 .find(|f| f.name == "getOrders")
10040 .expect("getOrders");
10041 assert_eq!(m.return_type.as_deref(), Some("ResponseEntity"));
10042 assert_eq!(m.param_types, vec!["String", "int"]);
10043 assert_eq!(m.param_count, 2);
10044 }
10045
10046 #[test]
10047 fn java_inheritance_edges_extends_and_implements() {
10048 let src = r#"
10049package p;
10050class Parent {}
10051class Child extends Parent implements java.io.Serializable {}
10052"#;
10053 let tree = parse_once(LanguageId::Java, src).expect("parse");
10054 let edges = extract_java_inheritance_edges(&tree, src, Some("p"));
10055 assert!(edges.contains(&(String::from("p.Child"), String::from("p.Parent"))));
10056 assert!(edges.contains(&(
10057 String::from("p.Child"),
10058 String::from("java.io.Serializable")
10059 )));
10060 }
10061
10062 #[test]
10063 fn java_class_and_method_annotations_extracted() {
10064 let src = r#"
10065package p;
10066@Service
10067class Svc {
10068 @Override
10069 @Deprecated
10070 void run() {}
10071}
10072"#;
10073 let tree = parse_once(LanguageId::Java, src).expect("parse");
10074 let c = extract_java_class_annotations(&tree, src, Some("p"));
10075 assert!(c.iter().any(|(fqn, a)| fqn == "p.Svc" && a.contains(&String::from("Service"))));
10076 let m = extract_java_method_annotations(&tree, src, Some("p"));
10077 let (_, anns) = m.iter().find(|(f, _)| f.ends_with(".run")).expect("run");
10078 assert!(anns.contains(&String::from("Override")));
10079 assert!(anns.contains(&String::from("Deprecated")));
10080 }
10081
10082 #[test]
10083 fn java_injected_dependencies_constructor_and_autowired_field() {
10084 let src = r#"
10085package p;
10086class OrderRepo {}
10087class UserService {}
10088class MyService {
10089 @Autowired
10090 OrderRepo repo;
10091 public MyService(UserService svc, OrderRepo r2) {}
10092}
10093"#;
10094 let tree = parse_once(LanguageId::Java, src).expect("parse");
10095 let deps = extract_java_injected_dependencies(&tree, src, Some("p"));
10096 assert!(deps.contains(&(String::from("p.MyService"), String::from("p.OrderRepo"))));
10097 assert!(deps.contains(&(String::from("p.MyService"), String::from("p.UserService"))));
10098 }
10099
10100 #[test]
10101 fn go_parse_warnings_surface_error_nodes() {
10102 let src = r#"package main
10103func main() { x :=
10104"#;
10105 let tree = parse_once(LanguageId::Go, src).expect("parse");
10106 let w = extract_go_parse_warnings(&tree, src);
10107 assert!(!w.is_empty(), "expected warnings, got {w:?}");
10108 }
10109
10110 #[test]
10111 fn go_extracts_interface_kind_and_struct_embedding() {
10112 let src = r#"package main
10113import "io"
10114type Reader interface { Read(p []byte) (n int, err error) }
10115type MyStruct struct {
10116 io.Reader
10117 Name string
10118}
10119"#;
10120 let tree = parse_once(LanguageId::Go, src).expect("parse");
10121 let (classes, _) = extract_go_symbols(&tree, src, Some("main"));
10122 assert!(classes.iter().any(|c| c.name == "Reader" && c.kind == Some("interface")));
10123 assert!(classes.iter().any(|c| c.name == "MyStruct" && c.kind == Some("struct")));
10124 let emb = extract_go_embedding(&tree, src, Some("main"));
10125 assert!(emb.contains(&(String::from("main.MyStruct"), String::from("io.Reader"))));
10126 }
10127
10128 #[test]
10129 fn go_goroutine_call_extracted_as_calls_function_pair() {
10130 let src = r#"package main
10131func worker() {}
10132func main() { go worker() }
10133"#;
10134 let tree = parse_once(LanguageId::Go, src).expect("parse");
10135 let g = extract_go_goroutine_calls(&tree, src, Some("main"));
10136 assert!(
10137 g.contains(&(String::from("main.main"), String::from("main.worker"))),
10138 "got {g:?}"
10139 );
10140 }
10141
10142 #[test]
10143 fn go_method_pointer_receiver_flag() {
10144 let src = r#"package main
10145type User struct{}
10146func (u *User) GetName() string { return "" }
10147func (u User) String() string { return "" }
10148"#;
10149 let tree = parse_once(LanguageId::Go, src).expect("parse");
10150 let (_, funcs) = extract_go_symbols(&tree, src, Some("main"));
10151 let get = funcs.iter().find(|f| f.name == "GetName").expect("GetName");
10152 assert_eq!(get.is_pointer_receiver, Some(true));
10153 let s = funcs.iter().find(|f| f.name == "String").expect("String");
10154 assert_eq!(s.is_pointer_receiver, Some(false));
10155 }
10156
10157 #[test]
10158 fn go_import_paths_extracted_from_grouped_import() {
10159 let src = r#"package main
10160import (
10161 "fmt"
10162 "github.com/gorilla/mux"
10163 "myproject/internal/handler"
10164)
10165func main() {}
10166"#;
10167 let tree = parse_once(LanguageId::Go, src).expect("parse");
10168 let imps = extract_go_imports(&tree, src);
10169 assert!(imps.contains(&String::from("fmt")));
10170 assert!(imps.contains(&String::from("github.com/gorilla/mux")));
10171 assert!(imps.contains(&String::from("myproject/internal/handler")));
10172 }
10173
10174 #[test]
10175 fn go_import_resolves_to_known_scanned_file_path() {
10176 let mut known = HashSet::new();
10177 known.insert(String::from("/repo/myproject/internal/handler/api.go"));
10178 let dep =
10179 resolve_go_import_to_known_go_file("myproject/internal/handler", &known, &[], &[], None);
10180 assert_eq!(dep.as_deref(), Some("/repo/myproject/internal/handler/api.go"));
10181 }
10182
10183 #[test]
10184 fn rust_non_java_symbols_remain_bare_fqn() {
10185 let src = "fn hello() {}";
10186 let tree = parse_once(LanguageId::Rust, src).expect("parse");
10187 let file = ParsedFile {
10188 path: PathBuf::from("/x/a.rs"),
10189 language: LanguageId::Rust,
10190 tree,
10191 source: src.to_string(),
10192 is_test: false,
10193 };
10194 let syms = extract_non_java_function_symbols(&file, src, "/x/a.rs");
10195 assert_eq!(syms.len(), 1);
10196 assert_eq!(syms[0].fqn, "hello");
10197 }
10198
10199 #[test]
10200 fn python_graph_symbols_file_scoped_fqn_and_nested() {
10201 let src = r#"
10202def top():
10203 pass
10204def outer():
10205 def inner():
10206 pass
10207 pass
10208"#;
10209 let tree = parse_once(LanguageId::Python, src).expect("parse");
10210 let file = ParsedFile {
10211 path: PathBuf::from("/app/mod.py"),
10212 language: LanguageId::Python,
10213 tree,
10214 source: src.to_string(),
10215 is_test: false,
10216 };
10217 let syms = extract_non_java_function_symbols(&file, src, "/app/mod.py");
10218 let fqns: Vec<&str> = syms.iter().map(|s| s.fqn.as_str()).collect();
10219 assert!(fqns.contains(&"/app/mod.py::top"));
10220 assert!(fqns.contains(&"/app/mod.py::outer"));
10221 assert!(fqns.contains(&"/app/mod.py::outer.inner"));
10222 }
10223
10224 #[test]
10225 fn python_class_methods_excluded_from_graph_symbols() {
10226 let src = r#"
10227class C:
10228 def meth(self):
10229 pass
10230def global_fn():
10231 pass
10232"#;
10233 let tree = parse_once(LanguageId::Python, src).expect("parse");
10234 let file = ParsedFile {
10235 path: PathBuf::from("/app/c.py"),
10236 language: LanguageId::Python,
10237 tree,
10238 source: src.to_string(),
10239 is_test: false,
10240 };
10241 let syms = extract_non_java_function_symbols(&file, src, "/app/c.py");
10242 assert!(!syms.iter().any(|s| s.name == "meth"));
10243 assert!(syms.iter().any(|s| s.name == "global_fn"));
10244 }
10245
10246 #[test]
10247 fn js_graph_symbols_class_method_arrow_and_top_level() {
10248 let src = r#"
10249class Box {
10250 run() { return 1; }
10251 go = () => 2;
10252}
10253const top = () => {};
10254function decl() {}
10255"#;
10256 let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
10257 let file = ParsedFile {
10258 path: PathBuf::from("/app/box.js"),
10259 language: LanguageId::JavaScript,
10260 tree,
10261 source: src.to_string(),
10262 is_test: false,
10263 };
10264 let syms = extract_non_java_function_symbols(&file, src, "/app/box.js");
10265 let fqns: Vec<&str> = syms.iter().map(|s| s.fqn.as_str()).collect();
10266 assert!(
10267 fqns.iter().any(|f| f.ends_with("::Box.run")),
10268 "got {fqns:?}"
10269 );
10270 assert!(
10271 fqns.iter().any(|f| f.ends_with("::Box.go")),
10272 "got {fqns:?}"
10273 );
10274 assert!(fqns.iter().any(|f| f.ends_with("::top")), "got {fqns:?}");
10275 assert!(fqns.iter().any(|f| f.ends_with("::decl")), "got {fqns:?}");
10276 }
10277
10278 #[test]
10279 fn ts_graph_symbols_include_class_method() {
10280 let src = r#"
10281class Svc {
10282 handle(): void {}
10283}
10284"#;
10285 let tree = parse_once(LanguageId::TypeScript, src).expect("parse");
10286 let file = ParsedFile {
10287 path: PathBuf::from("/svc/h.ts"),
10288 language: LanguageId::TypeScript,
10289 tree,
10290 source: src.to_string(),
10291 is_test: false,
10292 };
10293 let syms = extract_non_java_function_symbols(&file, src, "/svc/h.ts");
10294 assert!(
10295 syms.iter().any(|s| s.fqn.ends_with("::Svc.handle")),
10296 "got {:?}",
10297 syms.iter().map(|s| &s.fqn).collect::<Vec<_>>()
10298 );
10299 }
10300
10301 #[test]
10302 fn python_parse_warnings_surface_error_nodes() {
10303 let src = "def foo(\n";
10304 let tree = parse_once(LanguageId::Python, src).expect("parse");
10305 let w = extract_python_parse_warnings(&tree, src);
10306 assert!(!w.is_empty(), "expected warnings, got {w:?}");
10307 }
10308
10309 #[test]
10310 fn js_parse_warnings_surface_error_nodes() {
10311 let src = "function f( {";
10312 let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
10313 let w = extract_js_ts_parse_warnings(&tree, src);
10314 assert!(!w.is_empty(), "expected warnings, got {w:?}");
10315 }
10316
10317 #[test]
10318 fn python_import_resolves_to_known_py_file() {
10319 let mut known = HashSet::new();
10320 known.insert("/repo/pkg/helper.py".to_string());
10321 let dep = resolve_python_import_to_known_file("pkg.helper", &known);
10322 assert_eq!(dep.as_deref(), Some("/repo/pkg/helper.py"));
10323 }
10324
10325 #[test]
10326 fn python_intrafile_call_edge() {
10327 let src = r#"
10328def callee():
10329 pass
10330def caller():
10331 callee()
10332"#;
10333 let tree = parse_once(LanguageId::Python, src).expect("parse");
10334 let fp = "/t/a.py";
10335 let mut name_to_fqn = HashMap::new();
10336 name_to_fqn.insert("callee".into(), format!("{fp}::callee"));
10337 name_to_fqn.insert("caller".into(), format!("{fp}::caller"));
10338 let calls = extract_python_intrafile_calls(&tree, src, fp, &name_to_fqn);
10339 assert!(
10340 calls.contains(&(format!("{fp}::caller"), format!("{fp}::callee"))),
10341 "got {calls:?}"
10342 );
10343 }
10344
10345 #[test]
10346 fn js_ts_relative_import_resolves_to_known_file() {
10347 let mut known = HashSet::new();
10348 known.insert("/repo/src/util.ts".to_string());
10349 let dep = resolve_js_ts_import_to_known_file("./util", "/repo/src/main.ts", &known);
10350 assert_eq!(dep.as_deref(), Some("/repo/src/util.ts"));
10351 }
10352
10353 #[test]
10354 fn js_intrafile_call_edge() {
10355 let src = r#"
10356function callee() {}
10357function caller() { callee(); }
10358"#;
10359 let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
10360 let fp = "/t/b.js";
10361 let mut name_to_fqn = HashMap::new();
10362 name_to_fqn.insert("callee".into(), format!("{fp}::callee"));
10363 name_to_fqn.insert("caller".into(), format!("{fp}::caller"));
10364 let calls = extract_js_ts_intrafile_calls(
10365 &tree,
10366 src,
10367 fp,
10368 LanguageId::JavaScript,
10369 &name_to_fqn,
10370 );
10371 assert!(
10372 calls.contains(&(format!("{fp}::caller"), format!("{fp}::callee"))),
10373 "got {calls:?}"
10374 );
10375 }
10376}
10377